From 48c3c17f734f2e548b6c8d0d75a0060306a75ee6 Mon Sep 17 00:00:00 2001 From: Steve Slaven Date: Thu, 6 Aug 2009 16:23:10 -0700 Subject: Add reference counting so we can delete unused chunks diff --git a/FuseArchive/Chunk.py b/FuseArchive/Chunk.py index 9bb05bd..082df68 100644 --- a/FuseArchive/Chunk.py +++ b/FuseArchive/Chunk.py @@ -8,11 +8,11 @@ import struct, zlib, logging hformat = 'HLBL48x' compress_level = 6 -header_length = 64 +_header_length = 64 -assert struct.calcsize( hformat ) == header_length, \ - "Header struct must be 64 bytes not %d bytes" % \ - struct.calcsize( hformat ) +assert struct.calcsize( hformat ) == _header_length, \ + "Header struct must be %d bytes not %d bytes" % \ + ( _header_length, struct.calcsize( hformat ) ) # This handles serialization and deserialization of compressed chunks with # some header data @@ -59,15 +59,15 @@ class Chunk: @staticmethod def deserialize(data): logging.debug( "Deserializing data of length %d" % len( data ) ) - hd = Chunk.parse_header( data[ :header_length ] ) + hd = Chunk.parse_header( data[ :_header_length ] ) obj = Chunk() obj.count = hd[ 'count' ] compression = hd[ 'compression' ] if compression == 0: - obj.chunk = data[ header_length: ] + obj.chunk = data[ _header_length: ] elif compression == 1: - obj.chunk = zlib.decompress( data[ header_length: ] ) + obj.chunk = zlib.decompress( data[ _header_length: ] ) else: raise ValueError( "Invalid compression type: %d" % compression ) @@ -87,3 +87,15 @@ class Chunk: 'count': fields[ 3 ] } + # This is for updating header info, returns a tuple with the new count + # + the data + @staticmethod + def inc_header_ref(data, count): + logging.debug( "Incrementing ref count by %d" % count ) + fields = list( struct.unpack( hformat, data ) ) + fields[ 3 ] += count + return( fields[ 3 ], struct.pack( hformat, *fields ) ) + + @staticmethod + def header_length(): + return( _header_length ) diff --git a/FuseArchive/ChunkFile.py b/FuseArchive/ChunkFile.py index 051965f..ac39439 100644 --- a/FuseArchive/ChunkFile.py +++ b/FuseArchive/ChunkFile.py @@ -1,4 +1,4 @@ -import logging, os, errno, fcntl, fuse, FuseArchive +import logging, os, errno, fcntl, fuse, FuseArchive, copy import FuseArchive.Storage.ZipFile, FuseArchive.Storage.FileSystem from binascii import hexlify from FuseArchive.Serializer import Serializer @@ -34,7 +34,10 @@ def flag2mode(flags): if chunkstyle == 'fs': load_chunk = FuseArchive.Storage.FileSystem.load_chunk save_chunk = FuseArchive.Storage.FileSystem.save_chunk + lock_chunk = FuseArchive.Storage.FileSystem.lock_chunk + unlock_chunk = FuseArchive.Storage.FileSystem.unlock_chunk elif chunkstyle == 'zip': + raise ValueException( "Zip storage doesn't support lock/unlock, make an inteface!" ) load_chunk = FuseArchive.Storage.ZipFile.load_chunk save_chunk = FuseArchive.Storage.ZipFile.save_chunk else: @@ -136,6 +139,7 @@ class ChunkFile(object): self.direct_io = False self.keep_cache = False + self.original_chunks = copy.deepcopy( self.chunks ) logging.debug( "%s init complete" % self ) @@ -210,6 +214,29 @@ class ChunkFile(object): logging.debug( "Key was %s" % key ) self.dirty_chunks = 0 + # Is this chunk changed from what was here before? + oldkey = None + key = self.chunks[ index ] + + changed = False + # Is the old chunks at least this big? + if index >= len( self.original_chunks ): + logging.debug( "No old chunk at this spot, changed for sure" ) + changed = True + else: + oldkey = self.original_chunks[ index ] + if oldkey != key: + logging.debug( "Key has changed at index %d" % index ) + changed = True + logging.debug( "%s is now %s" % (oldkey, key) ) + + if changed: + logging.debug( "Chunk at index %d has changed" % index ) + if oldkey != None: + unlock_chunk( oldkey ) + + lock_chunk( key ) + def read(self, length, offset): logging.debug( "Reading from %s offset: %d (0x%x) length: %d (0x%d)" % ( self.orig_path, offset, offset, length, length ) ) diff --git a/FuseArchive/FileSystem.py b/FuseArchive/FileSystem.py index dbdc8a3..b393dbb 100644 --- a/FuseArchive/FileSystem.py +++ b/FuseArchive/FileSystem.py @@ -34,7 +34,7 @@ class FileSystem(fuse.Fuse): stats = f.fgetattr() f.release( 0 ) else: - logging.debug( "Using os.lstat to get stats" ) + logging.debug( "Using os.lstat to get stats for %s" % path ) stats = os.lstat( treefile ) return stats diff --git a/FuseArchive/Storage/FileSystem.py b/FuseArchive/Storage/FileSystem.py index a6687b5..04c6bb9 100644 --- a/FuseArchive/Storage/FileSystem.py +++ b/FuseArchive/Storage/FileSystem.py @@ -4,6 +4,47 @@ from FuseArchive.Chunk import Chunk magic_depth = 5 +def inc_chunk( key, count ): + # Increment this chunk header reference + path = _key_to_path( key ) + logging.debug( "Updating header on %s, ref count + %d" % ( path, count ) ) + f = open( path, "r+" ) + data = f.read( Chunk.header_length() ) + newcount, data = Chunk.inc_header_ref( data, count ) + f.seek( 0 ) + f.write( data ) + f.close() + logging.debug( "Count is now: %d" % newcount ) + + assert newcount >= 0, "Count is negative?!!" + + if newcount == 0: + logging.debug( "Freeing chunk" ) + os.unlink( path ) + +def lock_chunk( key ): + inc_chunk( key, 1 ) + +def unlock_chunk( key ): + inc_chunk( key, -1 ) + +def _key_to_path( key ): + logging.debug( "Converting key to path" ) + ( thash, seq ) = key + chars = list( thash ) + logging.debug( chars ) + + # Todo: make a digest -> path function to share with deflate + hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) + logging.debug( "Hash is: %s sub %d" % ( hexdigest, seq ) ) + subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] + subpath = '/'.join( subparts ) + logging.debug( "Subpath: " + subpath ) + + subpath += "/%s_%d" % ( hexdigest, seq ) + return( "./storage/" + subpath ) + + # This will write out a data block, it will return a key that can get this # data back later def save_chunk( chunk ): @@ -77,26 +118,15 @@ def load_chunk( key ): if FuseArchive.magic_profiling: return '' - ( thash, seq ) = key logging.debug( "Begin load_chunk" ) - chars = list( thash ) - logging.debug( chars ) - - # Todo: make a digest -> path function to share with deflate - hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) - logging.debug( "Hash is: %s sub %d" % ( hexdigest, seq ) ) - subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] - subpath = '/'.join( subparts ) - logging.debug( "Subpath: " + subpath ) - - subpath += "/%s_%d" % ( hexdigest, seq ) + subpath = _key_to_path( key ) logging.debug( "Chunk path: " + subpath ) - if os.path.exists( "./storage/" + subpath ): + if os.path.exists( subpath ): logging.debug( "Exporting chunk" ) - readchunk = open( "./storage/" + subpath ) + readchunk = open( subpath ) chunk = Chunk.deserialize( readchunk.read() ).chunk readchunk.close() else: diff --git a/dump_chunk.py b/dump_chunk.py index 8212e62..b28d2e7 100755 --- a/dump_chunk.py +++ b/dump_chunk.py @@ -4,7 +4,7 @@ import FuseArchive.Chunk, sys, pickle, os # This prints some debug info about a chunk f = FuseArchive.Chunk.Chunk.parse_header( open( sys.argv[ 1 ] ).read( - FuseArchive.Chunk.header_length ) ) + FuseArchive.Chunk.Chunk.header_length() ) ) print f os.system( "ls -l %s" % sys.argv[ 1 ] ) -- cgit v0.10.2