diff options
| author | Steve Slaven <bpk@hoopajoo.net> | 2009-08-06 23:23:10 (GMT) | 
|---|---|---|
| committer | Steve Slaven <bpk@hoopajoo.net> | 2009-08-06 23:23:10 (GMT) | 
| commit | 48c3c17f734f2e548b6c8d0d75a0060306a75ee6 (patch) | |
| tree | f4829510a82181fb62c7f324859a6cc18ca1ec60 | |
| parent | 2a98d3391ca347317284347e5cd1c3ecbadc3d7e (diff) | |
| download | fusearchive-48c3c17f734f2e548b6c8d0d75a0060306a75ee6.zip fusearchive-48c3c17f734f2e548b6c8d0d75a0060306a75ee6.tar.gz fusearchive-48c3c17f734f2e548b6c8d0d75a0060306a75ee6.tar.bz2 | |
Add reference counting so we can delete unused chunks
| -rw-r--r-- | FuseArchive/Chunk.py | 26 | ||||
| -rw-r--r-- | FuseArchive/ChunkFile.py | 29 | ||||
| -rw-r--r-- | FuseArchive/FileSystem.py | 2 | ||||
| -rw-r--r-- | FuseArchive/Storage/FileSystem.py | 58 | ||||
| -rwxr-xr-x | dump_chunk.py | 2 | 
5 files changed, 93 insertions, 24 deletions
| diff --git a/FuseArchive/Chunk.py b/FuseArchive/Chunk.py index 9bb05bd..082df68 100644 --- a/FuseArchive/Chunk.py +++ b/FuseArchive/Chunk.py @@ -8,11 +8,11 @@ import struct, zlib, logging  hformat = 'HLBL48x'  compress_level = 6 -header_length = 64 +_header_length = 64 -assert struct.calcsize( hformat ) == header_length, \ -    "Header struct must be 64 bytes not %d bytes" % \ -    struct.calcsize( hformat ) +assert struct.calcsize( hformat ) == _header_length, \ +    "Header struct must be %d bytes not %d bytes" % \ +    ( _header_length, struct.calcsize( hformat ) )  # This handles serialization and deserialization of compressed chunks with  # some header data @@ -59,15 +59,15 @@ class Chunk:      @staticmethod      def deserialize(data):          logging.debug( "Deserializing data of length %d" % len( data ) ) -        hd = Chunk.parse_header( data[ :header_length ] ) +        hd = Chunk.parse_header( data[ :_header_length ] )          obj = Chunk()          obj.count = hd[ 'count' ]          compression = hd[ 'compression' ]          if compression == 0: -            obj.chunk = data[ header_length: ] +            obj.chunk = data[ _header_length: ]          elif compression == 1: -            obj.chunk = zlib.decompress( data[ header_length: ] ) +            obj.chunk = zlib.decompress( data[ _header_length: ] )          else:              raise ValueError( "Invalid compression type: %d" % compression ) @@ -87,3 +87,15 @@ class Chunk:              'count': fields[ 3 ]          } +    # This is for updating header info, returns a tuple with the new count +    # + the data +    @staticmethod +    def inc_header_ref(data, count): +        logging.debug( "Incrementing ref count by %d" % count ) +        fields = list( struct.unpack( hformat, data ) ) +        fields[ 3 ] += count +        return( fields[ 3 ], struct.pack( hformat, *fields ) ) + +    @staticmethod +    def header_length(): +        return( _header_length ) diff --git a/FuseArchive/ChunkFile.py b/FuseArchive/ChunkFile.py index 051965f..ac39439 100644 --- a/FuseArchive/ChunkFile.py +++ b/FuseArchive/ChunkFile.py @@ -1,4 +1,4 @@ -import logging, os, errno, fcntl, fuse, FuseArchive +import logging, os, errno, fcntl, fuse, FuseArchive, copy  import FuseArchive.Storage.ZipFile, FuseArchive.Storage.FileSystem  from binascii import hexlify  from FuseArchive.Serializer import Serializer @@ -34,7 +34,10 @@ def flag2mode(flags):  if chunkstyle == 'fs':      load_chunk = FuseArchive.Storage.FileSystem.load_chunk      save_chunk = FuseArchive.Storage.FileSystem.save_chunk +    lock_chunk = FuseArchive.Storage.FileSystem.lock_chunk +    unlock_chunk = FuseArchive.Storage.FileSystem.unlock_chunk  elif chunkstyle == 'zip': +    raise ValueException( "Zip storage doesn't support lock/unlock, make an inteface!" )      load_chunk = FuseArchive.Storage.ZipFile.load_chunk      save_chunk = FuseArchive.Storage.ZipFile.save_chunk  else: @@ -136,6 +139,7 @@ class ChunkFile(object):          self.direct_io = False          self.keep_cache = False +        self.original_chunks = copy.deepcopy( self.chunks )          logging.debug( "%s init complete" % self ) @@ -210,6 +214,29 @@ class ChunkFile(object):                  logging.debug( "Key was %s" % key )                  self.dirty_chunks = 0 +            # Is this chunk changed from what was here before? +            oldkey = None +            key = self.chunks[ index ] + +            changed = False +            # Is the old chunks at least this big? +            if index >= len( self.original_chunks ): +                logging.debug( "No old chunk at this spot, changed for sure" ) +                changed = True +            else: +                oldkey = self.original_chunks[ index ] +                if oldkey != key: +                    logging.debug( "Key has changed at index %d" % index ) +                    changed = True +                    logging.debug( "%s is now %s" % (oldkey, key) ) + +            if changed: +                logging.debug( "Chunk at index %d has changed" % index ) +                if oldkey != None: +                    unlock_chunk( oldkey ) + +                lock_chunk( key ) +      def read(self, length, offset):          logging.debug( "Reading from %s offset: %d (0x%x) length: %d (0x%d)" %              ( self.orig_path, offset, offset, length, length ) ) diff --git a/FuseArchive/FileSystem.py b/FuseArchive/FileSystem.py index dbdc8a3..b393dbb 100644 --- a/FuseArchive/FileSystem.py +++ b/FuseArchive/FileSystem.py @@ -34,7 +34,7 @@ class FileSystem(fuse.Fuse):                  stats = f.fgetattr()                  f.release( 0 )          else: -            logging.debug( "Using os.lstat to get stats" ) +            logging.debug( "Using os.lstat to get stats for %s" % path )              stats = os.lstat( treefile )          return stats diff --git a/FuseArchive/Storage/FileSystem.py b/FuseArchive/Storage/FileSystem.py index a6687b5..04c6bb9 100644 --- a/FuseArchive/Storage/FileSystem.py +++ b/FuseArchive/Storage/FileSystem.py @@ -4,6 +4,47 @@ from FuseArchive.Chunk import Chunk  magic_depth = 5 +def inc_chunk( key, count ): +    # Increment this chunk header reference +    path = _key_to_path( key ) +    logging.debug( "Updating header on %s, ref count + %d" % ( path, count ) ) +    f = open( path, "r+" ) +    data = f.read( Chunk.header_length() ) +    newcount, data = Chunk.inc_header_ref( data, count ) +    f.seek( 0 ) +    f.write( data ) +    f.close() +    logging.debug( "Count is now: %d" % newcount ) + +    assert newcount >= 0, "Count is negative?!!" + +    if newcount == 0: +        logging.debug( "Freeing chunk" ) +        os.unlink( path ) + +def lock_chunk( key ): +    inc_chunk( key, 1 ) + +def unlock_chunk( key ): +    inc_chunk( key, -1 ) + +def _key_to_path( key ): +    logging.debug( "Converting key to path" ) +    ( thash, seq ) = key +    chars = list( thash ) +    logging.debug( chars ) + +    # Todo: make a digest -> path function to share with deflate +    hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) +    logging.debug( "Hash is: %s sub %d" % ( hexdigest, seq ) ) +    subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] +    subpath = '/'.join( subparts ) +    logging.debug( "Subpath: " + subpath ) + +    subpath += "/%s_%d" % ( hexdigest, seq ) +    return( "./storage/" + subpath ) + +  # This will write out a data block, it will return a key that can get this  # data back later  def save_chunk( chunk ): @@ -77,26 +118,15 @@ def load_chunk( key ):      if FuseArchive.magic_profiling:          return '' -    ( thash, seq ) = key      logging.debug( "Begin load_chunk" ) -    chars = list( thash ) -    logging.debug( chars ) - -    # Todo: make a digest -> path function to share with deflate -    hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) -    logging.debug( "Hash is: %s sub %d" % ( hexdigest, seq ) ) -    subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] -    subpath = '/'.join( subparts ) -    logging.debug( "Subpath: " + subpath ) - -    subpath += "/%s_%d" % ( hexdigest, seq ) +    subpath = _key_to_path( key )      logging.debug( "Chunk path: " + subpath ) -    if os.path.exists( "./storage/" + subpath ): +    if os.path.exists( subpath ):          logging.debug( "Exporting chunk" ) -        readchunk = open( "./storage/" + subpath ) +        readchunk = open( subpath )          chunk = Chunk.deserialize( readchunk.read() ).chunk          readchunk.close()      else: diff --git a/dump_chunk.py b/dump_chunk.py index 8212e62..b28d2e7 100755 --- a/dump_chunk.py +++ b/dump_chunk.py @@ -4,7 +4,7 @@ import FuseArchive.Chunk, sys, pickle, os  # This prints some debug info about a chunk  f = FuseArchive.Chunk.Chunk.parse_header( open( sys.argv[ 1 ] ).read( -    FuseArchive.Chunk.header_length ) ) +    FuseArchive.Chunk.Chunk.header_length() ) )  print f  os.system( "ls -l %s" % sys.argv[ 1 ] ) | 
