Add reference counting so we can delete unused chunks

author: Steve Slaven <bpk@hoopajoo.net> 2009-08-06 23:23:10 (GMT)
committer: Steve Slaven <bpk@hoopajoo.net> 2009-08-06 23:23:10 (GMT)
commit: 48c3c17f734f2e548b6c8d0d75a0060306a75ee6 (patch)
tree: f4829510a82181fb62c7f324859a6cc18ca1ec60
parent: 2a98d3391ca347317284347e5cd1c3ecbadc3d7e (diff)
download: fusearchive-48c3c17f734f2e548b6c8d0d75a0060306a75ee6.zip
fusearchive-48c3c17f734f2e548b6c8d0d75a0060306a75ee6.tar.gz
fusearchive-48c3c17f734f2e548b6c8d0d75a0060306a75ee6.tar.bz2
5 files changed, 93 insertions, 24 deletions
diff --git a/FuseArchive/Chunk.py b/FuseArchive/Chunk.py
index 9bb05bd..082df68 100644
--- a/FuseArchive/Chunk.py
+++ b/FuseArchive/Chunk.py
@@ -8,11 +8,11 @@ import struct, zlib, logging
 hformat = 'HLBL48x'
 compress_level = 6
 
-header_length = 64
+_header_length = 64
 
-assert struct.calcsize( hformat ) == header_length, \
-    "Header struct must be 64 bytes not %d bytes" % \
-    struct.calcsize( hformat )
+assert struct.calcsize( hformat ) == _header_length, \
+    "Header struct must be %d bytes not %d bytes" % \
+    ( _header_length, struct.calcsize( hformat ) )
 
 # This handles serialization and deserialization of compressed chunks with
 # some header data
@@ -59,15 +59,15 @@ class Chunk:
     @staticmethod
     def deserialize(data):
         logging.debug( "Deserializing data of length %d" % len( data ) )
-        hd = Chunk.parse_header( data[ :header_length ] )
+        hd = Chunk.parse_header( data[ :_header_length ] )
         obj = Chunk()
         obj.count = hd[ 'count' ]
 
         compression = hd[ 'compression' ]
         if compression == 0:
-            obj.chunk = data[ header_length: ]
+            obj.chunk = data[ _header_length: ]
         elif compression == 1:
-            obj.chunk = zlib.decompress( data[ header_length: ] )
+            obj.chunk = zlib.decompress( data[ _header_length: ] )
         else:
             raise ValueError( "Invalid compression type: %d" % compression )
 
@@ -87,3 +87,15 @@ class Chunk:
             'count': fields[ 3 ]
         }
 
+    # This is for updating header info, returns a tuple with the new count
+    # + the data
+    @staticmethod
+    def inc_header_ref(data, count):
+        logging.debug( "Incrementing ref count by %d" % count )
+        fields = list( struct.unpack( hformat, data ) )
+        fields[ 3 ] += count
+        return( fields[ 3 ], struct.pack( hformat, *fields ) )
+
+    @staticmethod
+    def header_length():
+        return( _header_length )
diff --git a/FuseArchive/ChunkFile.py b/FuseArchive/ChunkFile.py
index 051965f..ac39439 100644
--- a/FuseArchive/ChunkFile.py
+++ b/FuseArchive/ChunkFile.py
@@ -1,4 +1,4 @@
-import logging, os, errno, fcntl, fuse, FuseArchive
+import logging, os, errno, fcntl, fuse, FuseArchive, copy
 import FuseArchive.Storage.ZipFile, FuseArchive.Storage.FileSystem
 from binascii import hexlify
 from FuseArchive.Serializer import Serializer
@@ -34,7 +34,10 @@ def flag2mode(flags):
 if chunkstyle == 'fs':
     load_chunk = FuseArchive.Storage.FileSystem.load_chunk
     save_chunk = FuseArchive.Storage.FileSystem.save_chunk
+    lock_chunk = FuseArchive.Storage.FileSystem.lock_chunk
+    unlock_chunk = FuseArchive.Storage.FileSystem.unlock_chunk
 elif chunkstyle == 'zip':
+    raise ValueException( "Zip storage doesn't support lock/unlock, make an inteface!" )
     load_chunk = FuseArchive.Storage.ZipFile.load_chunk
     save_chunk = FuseArchive.Storage.ZipFile.save_chunk
 else:
@@ -136,6 +139,7 @@ class ChunkFile(object):
 
         self.direct_io = False
         self.keep_cache = False
+        self.original_chunks = copy.deepcopy( self.chunks )
 
         logging.debug( "%s init complete" % self )
 
@@ -210,6 +214,29 @@ class ChunkFile(object):
                 logging.debug( "Key was %s" % key )
                 self.dirty_chunks = 0
 
+            # Is this chunk changed from what was here before?
+            oldkey = None
+            key = self.chunks[ index ]
+
+            changed = False
+            # Is the old chunks at least this big?
+            if index >= len( self.original_chunks ):
+                logging.debug( "No old chunk at this spot, changed for sure" )
+                changed = True
+            else:
+                oldkey = self.original_chunks[ index ]
+                if oldkey != key:
+                    logging.debug( "Key has changed at index %d" % index )
+                    changed = True
+                    logging.debug( "%s is now %s" % (oldkey, key) )
+
+            if changed:
+                logging.debug( "Chunk at index %d has changed" % index )
+                if oldkey != None:
+                    unlock_chunk( oldkey )
+
+                lock_chunk( key )
+
     def read(self, length, offset):
         logging.debug( "Reading from %s offset: %d (0x%x) length: %d (0x%d)" %
             ( self.orig_path, offset, offset, length, length ) )
diff --git a/FuseArchive/FileSystem.py b/FuseArchive/FileSystem.py
index dbdc8a3..b393dbb 100644
--- a/FuseArchive/FileSystem.py
+++ b/FuseArchive/FileSystem.py
@@ -34,7 +34,7 @@ class FileSystem(fuse.Fuse):
                 stats = f.fgetattr()
                 f.release( 0 )
         else:
-            logging.debug( "Using os.lstat to get stats" )
+            logging.debug( "Using os.lstat to get stats for %s" % path )
             stats = os.lstat( treefile )
 
         return stats
diff --git a/FuseArchive/Storage/FileSystem.py b/FuseArchive/Storage/FileSystem.py
index a6687b5..04c6bb9 100644
--- a/FuseArchive/Storage/FileSystem.py
+++ b/FuseArchive/Storage/FileSystem.py
@@ -4,6 +4,47 @@ from FuseArchive.Chunk import Chunk
 
 magic_depth = 5
 
+def inc_chunk( key, count ):
+    # Increment this chunk header reference
+    path = _key_to_path( key )
+    logging.debug( "Updating header on %s, ref count + %d" % ( path, count ) )
+    f = open( path, "r+" )
+    data = f.read( Chunk.header_length() )
+    newcount, data = Chunk.inc_header_ref( data, count )
+    f.seek( 0 )
+    f.write( data )
+    f.close()
+    logging.debug( "Count is now: %d" % newcount )
+
+    assert newcount >= 0, "Count is negative?!!"
+
+    if newcount == 0:
+        logging.debug( "Freeing chunk" )
+        os.unlink( path )
+
+def lock_chunk( key ):
+    inc_chunk( key, 1 )
+
+def unlock_chunk( key ):
+    inc_chunk( key, -1 )
+
+def _key_to_path( key ):
+    logging.debug( "Converting key to path" )
+    ( thash, seq ) = key
+    chars = list( thash )
+    logging.debug( chars )
+
+    # Todo: make a digest -> path function to share with deflate
+    hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )
+    logging.debug( "Hash is: %s sub %d" % ( hexdigest, seq ) )
+    subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ]
+    subpath = '/'.join( subparts )
+    logging.debug( "Subpath: " + subpath )
+
+    subpath += "/%s_%d" % ( hexdigest, seq )
+    return( "./storage/" + subpath )
+
+
 # This will write out a data block, it will return a key that can get this
 # data back later
 def save_chunk( chunk ):
@@ -77,26 +118,15 @@ def load_chunk( key ):
     if FuseArchive.magic_profiling:
         return ''
 
-    ( thash, seq ) = key
     logging.debug( "Begin load_chunk" )
 
-    chars = list( thash )
-    logging.debug( chars )
-
-    # Todo: make a digest -> path function to share with deflate
-    hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )
-    logging.debug( "Hash is: %s sub %d" % ( hexdigest, seq ) )
-    subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ]
-    subpath = '/'.join( subparts )
-    logging.debug( "Subpath: " + subpath )
-
-    subpath += "/%s_%d" % ( hexdigest, seq )
+    subpath = _key_to_path( key )
 
     logging.debug( "Chunk path: " + subpath )
 
-    if os.path.exists( "./storage/" + subpath ):
+    if os.path.exists( subpath ):
         logging.debug( "Exporting chunk" )
-        readchunk = open( "./storage/" + subpath )
+        readchunk = open( subpath )
         chunk = Chunk.deserialize( readchunk.read() ).chunk
         readchunk.close()
     else:
diff --git a/dump_chunk.py b/dump_chunk.py
index 8212e62..b28d2e7 100755
--- a/dump_chunk.py
+++ b/dump_chunk.py
@@ -4,7 +4,7 @@ import FuseArchive.Chunk, sys, pickle, os
 
 # This prints some debug info about a chunk
 f = FuseArchive.Chunk.Chunk.parse_header( open( sys.argv[ 1 ] ).read(
-    FuseArchive.Chunk.header_length ) )
+    FuseArchive.Chunk.Chunk.header_length() ) )
 print f
 os.system( "ls -l %s" % sys.argv[ 1 ] )
author	Steve Slaven <bpk@hoopajoo.net>	2009-08-06 23:23:10 (GMT)
committer	Steve Slaven <bpk@hoopajoo.net>	2009-08-06 23:23:10 (GMT)
commit	48c3c17f734f2e548b6c8d0d75a0060306a75ee6 (patch)
tree	f4829510a82181fb62c7f324859a6cc18ca1ec60
parent	2a98d3391ca347317284347e5cd1c3ecbadc3d7e (diff)
download	fusearchive-48c3c17f734f2e548b6c8d0d75a0060306a75ee6.zip fusearchive-48c3c17f734f2e548b6c8d0d75a0060306a75ee6.tar.gz fusearchive-48c3c17f734f2e548b6c8d0d75a0060306a75ee6.tar.bz2