diff options
author | Steve Slaven <bpk@hoopajoo.net> | 2009-08-07 05:39:44 (GMT) |
---|---|---|
committer | Steve Slaven <bpk@hoopajoo.net> | 2009-08-07 05:39:44 (GMT) |
commit | 16b950060f30fd1793608d2b14f2de235b0d8c83 (patch) | |
tree | f767dfa09f86de6dbcbf028eb96ba97dfb791b2c | |
parent | 706fd70f3464a8b8216880cdd238508d0bdd5215 (diff) | |
download | fusearchive-16b950060f30fd1793608d2b14f2de235b0d8c83.zip fusearchive-16b950060f30fd1793608d2b14f2de235b0d8c83.tar.gz fusearchive-16b950060f30fd1793608d2b14f2de235b0d8c83.tar.bz2 |
Store a key in the phyiscal file that pulls the data from a chunk to
minimize 100% file duplicates to be nothing more than a key length
-rw-r--r-- | FuseArchive/ChunkFile.py | 28 | ||||
-rw-r--r-- | FuseArchive/Serializer.py | 11 |
2 files changed, 35 insertions, 4 deletions
diff --git a/FuseArchive/ChunkFile.py b/FuseArchive/ChunkFile.py index e713e10..7cc8e07 100644 --- a/FuseArchive/ChunkFile.py +++ b/FuseArchive/ChunkFile.py @@ -92,6 +92,7 @@ class ChunkFile(object): # fflush early if we're creating a new file since we reference this # attribute in the routine. At least it gets initialized I guess self.original_chunks = [] + self.original_key = None # TODO: Better flag handling here? if flags & os.O_RDONLY: @@ -131,9 +132,19 @@ class ChunkFile(object): try: magic = Serializer.loadfh( self.file ) logging.debug( "Got data: %s" % magic ) + + # This is just a key to a block to minimize complete + # duplicates + logging.debug( "Reading chunk to get actual file data" ) + self.original_key = magic + file_chunk = load_chunk( magic ) + magic = Serializer.loads( file_chunk ) + self.size = magic[ 'size' ] self.chunks = magic[ 'chunks' ] self.chunk_size = magic[ 'chunk_size' ] + logging.debug( "Loaded size: %d, chunk size: %d, chunks: %d" % + ( self.size, self.chunk_size, len( self.chunks ) ) ) except Exception, e: logging.critical( self.orig_path + ": " + str( e ) ) else: @@ -400,11 +411,24 @@ class ChunkFile(object): + "\nProbably a bug in write or ftruncate!" logging.debug( "Size calculated is: %d (0x%x)" % ( self.size, self.size ) ) - Serializer.dumpfh( self.file, { + key = save_chunk( Serializer.dumps( { 'size': self.size, 'chunks': self.chunks, 'chunk_size': self.chunk_size - } ) + } ) ) + + logging.debug( "Saved indirect file to key %s, saving key in main file" % key ) + + Serializer.dumpfh( self.file, key ) + + # Update file ref counts + if key != self.original_key: + logging.debug( "File key changed updating references" ) + if self.original_key != None: + unlock_chunk( self.original_key ) + + lock_chunk( key ) + self.original_key = key # Now update our chunk ref counts logging.debug( "Updating chunk references" ) diff --git a/FuseArchive/Serializer.py b/FuseArchive/Serializer.py index b23371c..621d90a 100644 --- a/FuseArchive/Serializer.py +++ b/FuseArchive/Serializer.py @@ -17,11 +17,15 @@ class Serializer: fh.seek( 0 ) f = gzip.GzipFile( None, "wb", gzip_compress_level, fh ) #f = fh - cPickle.dump( obj, f, -1 ) + f.write( Serializer.dumps( obj ) ) del f fh.flush() @staticmethod + def dumps( obj ): + return cPickle.dumps( obj, -1 ) + + @staticmethod def load( f ): if FuseArchive.magic_profiling: return { 'size': 0, 'chunks': 0, 'chunk_size': 0 } @@ -37,6 +41,9 @@ class Serializer: fh.seek( 0 ) f = gzip.GzipFile( None, "rb", gzip_compress_level, fh ) #f = fh - magic = cPickle.load( f ) + magic = Serializer.loads( f.read() ) return( magic ) + @staticmethod + def loads( str ): + return cPickle.loads( str ) |