aboutsummaryrefslogtreecommitdiffstats
path: root/FuseArchive
diff options
context:
space:
mode:
authorSteve Slaven <bpk@hoopajoo.net>2009-08-07 05:39:44 (GMT)
committerSteve Slaven <bpk@hoopajoo.net>2009-08-07 05:39:44 (GMT)
commit16b950060f30fd1793608d2b14f2de235b0d8c83 (patch)
treef767dfa09f86de6dbcbf028eb96ba97dfb791b2c /FuseArchive
parent706fd70f3464a8b8216880cdd238508d0bdd5215 (diff)
downloadfusearchive-16b950060f30fd1793608d2b14f2de235b0d8c83.zip
fusearchive-16b950060f30fd1793608d2b14f2de235b0d8c83.tar.gz
fusearchive-16b950060f30fd1793608d2b14f2de235b0d8c83.tar.bz2
Store a key in the phyiscal file that pulls the data from a chunk to
minimize 100% file duplicates to be nothing more than a key length
Diffstat (limited to 'FuseArchive')
-rw-r--r--FuseArchive/ChunkFile.py28
-rw-r--r--FuseArchive/Serializer.py11
2 files changed, 35 insertions, 4 deletions
diff --git a/FuseArchive/ChunkFile.py b/FuseArchive/ChunkFile.py
index e713e10..7cc8e07 100644
--- a/FuseArchive/ChunkFile.py
+++ b/FuseArchive/ChunkFile.py
@@ -92,6 +92,7 @@ class ChunkFile(object):
# fflush early if we're creating a new file since we reference this
# attribute in the routine. At least it gets initialized I guess
self.original_chunks = []
+ self.original_key = None
# TODO: Better flag handling here?
if flags & os.O_RDONLY:
@@ -131,9 +132,19 @@ class ChunkFile(object):
try:
magic = Serializer.loadfh( self.file )
logging.debug( "Got data: %s" % magic )
+
+ # This is just a key to a block to minimize complete
+ # duplicates
+ logging.debug( "Reading chunk to get actual file data" )
+ self.original_key = magic
+ file_chunk = load_chunk( magic )
+ magic = Serializer.loads( file_chunk )
+
self.size = magic[ 'size' ]
self.chunks = magic[ 'chunks' ]
self.chunk_size = magic[ 'chunk_size' ]
+ logging.debug( "Loaded size: %d, chunk size: %d, chunks: %d" %
+ ( self.size, self.chunk_size, len( self.chunks ) ) )
except Exception, e:
logging.critical( self.orig_path + ": " + str( e ) )
else:
@@ -400,11 +411,24 @@ class ChunkFile(object):
+ "\nProbably a bug in write or ftruncate!"
logging.debug( "Size calculated is: %d (0x%x)" % ( self.size, self.size ) )
- Serializer.dumpfh( self.file, {
+ key = save_chunk( Serializer.dumps( {
'size': self.size,
'chunks': self.chunks,
'chunk_size': self.chunk_size
- } )
+ } ) )
+
+ logging.debug( "Saved indirect file to key %s, saving key in main file" % key )
+
+ Serializer.dumpfh( self.file, key )
+
+ # Update file ref counts
+ if key != self.original_key:
+ logging.debug( "File key changed updating references" )
+ if self.original_key != None:
+ unlock_chunk( self.original_key )
+
+ lock_chunk( key )
+ self.original_key = key
# Now update our chunk ref counts
logging.debug( "Updating chunk references" )
diff --git a/FuseArchive/Serializer.py b/FuseArchive/Serializer.py
index b23371c..621d90a 100644
--- a/FuseArchive/Serializer.py
+++ b/FuseArchive/Serializer.py
@@ -17,11 +17,15 @@ class Serializer:
fh.seek( 0 )
f = gzip.GzipFile( None, "wb", gzip_compress_level, fh )
#f = fh
- cPickle.dump( obj, f, -1 )
+ f.write( Serializer.dumps( obj ) )
del f
fh.flush()
@staticmethod
+ def dumps( obj ):
+ return cPickle.dumps( obj, -1 )
+
+ @staticmethod
def load( f ):
if FuseArchive.magic_profiling:
return { 'size': 0, 'chunks': 0, 'chunk_size': 0 }
@@ -37,6 +41,9 @@ class Serializer:
fh.seek( 0 )
f = gzip.GzipFile( None, "rb", gzip_compress_level, fh )
#f = fh
- magic = cPickle.load( f )
+ magic = Serializer.loads( f.read() )
return( magic )
+ @staticmethod
+ def loads( str ):
+ return cPickle.loads( str )