Store a key in the phyiscal file that pulls the data from a chunk to

minimize 100% file duplicates to be nothing more than a key length
author: Steve Slaven <bpk@hoopajoo.net> 2009-08-07 05:39:44 (GMT)
committer: Steve Slaven <bpk@hoopajoo.net> 2009-08-07 05:39:44 (GMT)
commit: 16b950060f30fd1793608d2b14f2de235b0d8c83 (patch)
tree: f767dfa09f86de6dbcbf028eb96ba97dfb791b2c /FuseArchive
parent: 706fd70f3464a8b8216880cdd238508d0bdd5215 (diff)
download: fusearchive-16b950060f30fd1793608d2b14f2de235b0d8c83.zip
fusearchive-16b950060f30fd1793608d2b14f2de235b0d8c83.tar.gz
fusearchive-16b950060f30fd1793608d2b14f2de235b0d8c83.tar.bz2
2 files changed, 35 insertions, 4 deletions
diff --git a/FuseArchive/ChunkFile.py b/FuseArchive/ChunkFile.py
index e713e10..7cc8e07 100644
--- a/FuseArchive/ChunkFile.py
+++ b/FuseArchive/ChunkFile.py
@@ -92,6 +92,7 @@ class ChunkFile(object):
         # fflush early if we're creating a new file since we reference this
         # attribute in the routine.  At least it gets initialized I guess
         self.original_chunks = []
+        self.original_key = None
 
         # TODO: Better flag handling here?
         if flags & os.O_RDONLY:
@@ -131,9 +132,19 @@ class ChunkFile(object):
             try:
                 magic = Serializer.loadfh( self.file )
                 logging.debug( "Got data: %s" % magic )
+
+                # This is just a key to a block to minimize complete
+                # duplicates
+                logging.debug( "Reading chunk to get actual file data" )
+                self.original_key = magic
+                file_chunk = load_chunk( magic )
+                magic = Serializer.loads( file_chunk )
+
                 self.size = magic[ 'size' ]
                 self.chunks = magic[ 'chunks' ]
                 self.chunk_size = magic[ 'chunk_size' ]
+                logging.debug( "Loaded size: %d, chunk size: %d, chunks: %d" %
+                    ( self.size, self.chunk_size, len( self.chunks ) ) )
             except Exception, e:
                 logging.critical( self.orig_path + ": " + str( e ) )
         else:
@@ -400,11 +411,24 @@ class ChunkFile(object):
                 + "\nProbably a bug in write or ftruncate!"
             logging.debug( "Size calculated is: %d (0x%x)" % ( self.size, self.size ) )
 
-            Serializer.dumpfh( self.file, {
+            key = save_chunk( Serializer.dumps( {
                 'size': self.size,
                 'chunks': self.chunks,
                 'chunk_size': self.chunk_size
-            } )
+            } ) )
+
+            logging.debug( "Saved indirect file to key %s, saving key in main file" % key )
+
+            Serializer.dumpfh( self.file, key )
+
+            # Update file ref counts
+            if key != self.original_key:
+                logging.debug( "File key changed updating references" )
+                if self.original_key != None:
+                    unlock_chunk( self.original_key )
+
+                lock_chunk( key )
+                self.original_key = key
 
             # Now update our chunk ref counts
             logging.debug( "Updating chunk references" )
diff --git a/FuseArchive/Serializer.py b/FuseArchive/Serializer.py
index b23371c..621d90a 100644
--- a/FuseArchive/Serializer.py
+++ b/FuseArchive/Serializer.py
@@ -17,11 +17,15 @@ class Serializer:
         fh.seek( 0 )
         f = gzip.GzipFile( None, "wb", gzip_compress_level, fh )
         #f = fh
-        cPickle.dump( obj, f, -1 )
+        f.write( Serializer.dumps( obj ) )
         del f
         fh.flush()
 
     @staticmethod
+    def dumps( obj ):
+        return cPickle.dumps( obj, -1 )
+
+    @staticmethod
     def load( f ):
         if FuseArchive.magic_profiling:
             return { 'size': 0, 'chunks': 0, 'chunk_size': 0 }
@@ -37,6 +41,9 @@ class Serializer:
         fh.seek( 0 )
         f = gzip.GzipFile( None, "rb", gzip_compress_level, fh )
         #f = fh
-        magic = cPickle.load( f )
+        magic = Serializer.loads( f.read() )
         return( magic )
 
+    @staticmethod
+    def loads( str ):
+        return cPickle.loads( str )
author	Steve Slaven <bpk@hoopajoo.net>	2009-08-07 05:39:44 (GMT)
committer	Steve Slaven <bpk@hoopajoo.net>	2009-08-07 05:39:44 (GMT)
commit	16b950060f30fd1793608d2b14f2de235b0d8c83 (patch)
tree	f767dfa09f86de6dbcbf028eb96ba97dfb791b2c /FuseArchive
parent	706fd70f3464a8b8216880cdd238508d0bdd5215 (diff)
download	fusearchive-16b950060f30fd1793608d2b14f2de235b0d8c83.zip fusearchive-16b950060f30fd1793608d2b14f2de235b0d8c83.tar.gz fusearchive-16b950060f30fd1793608d2b14f2de235b0d8c83.tar.bz2