aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteve Slaven <bpk@hoopajoo.net>2009-08-06 23:23:10 (GMT)
committerSteve Slaven <bpk@hoopajoo.net>2009-08-06 23:23:10 (GMT)
commit48c3c17f734f2e548b6c8d0d75a0060306a75ee6 (patch)
treef4829510a82181fb62c7f324859a6cc18ca1ec60
parent2a98d3391ca347317284347e5cd1c3ecbadc3d7e (diff)
downloadfusearchive-48c3c17f734f2e548b6c8d0d75a0060306a75ee6.zip
fusearchive-48c3c17f734f2e548b6c8d0d75a0060306a75ee6.tar.gz
fusearchive-48c3c17f734f2e548b6c8d0d75a0060306a75ee6.tar.bz2
Add reference counting so we can delete unused chunks
-rw-r--r--FuseArchive/Chunk.py26
-rw-r--r--FuseArchive/ChunkFile.py29
-rw-r--r--FuseArchive/FileSystem.py2
-rw-r--r--FuseArchive/Storage/FileSystem.py58
-rwxr-xr-xdump_chunk.py2
5 files changed, 93 insertions, 24 deletions
diff --git a/FuseArchive/Chunk.py b/FuseArchive/Chunk.py
index 9bb05bd..082df68 100644
--- a/FuseArchive/Chunk.py
+++ b/FuseArchive/Chunk.py
@@ -8,11 +8,11 @@ import struct, zlib, logging
hformat = 'HLBL48x'
compress_level = 6
-header_length = 64
+_header_length = 64
-assert struct.calcsize( hformat ) == header_length, \
- "Header struct must be 64 bytes not %d bytes" % \
- struct.calcsize( hformat )
+assert struct.calcsize( hformat ) == _header_length, \
+ "Header struct must be %d bytes not %d bytes" % \
+ ( _header_length, struct.calcsize( hformat ) )
# This handles serialization and deserialization of compressed chunks with
# some header data
@@ -59,15 +59,15 @@ class Chunk:
@staticmethod
def deserialize(data):
logging.debug( "Deserializing data of length %d" % len( data ) )
- hd = Chunk.parse_header( data[ :header_length ] )
+ hd = Chunk.parse_header( data[ :_header_length ] )
obj = Chunk()
obj.count = hd[ 'count' ]
compression = hd[ 'compression' ]
if compression == 0:
- obj.chunk = data[ header_length: ]
+ obj.chunk = data[ _header_length: ]
elif compression == 1:
- obj.chunk = zlib.decompress( data[ header_length: ] )
+ obj.chunk = zlib.decompress( data[ _header_length: ] )
else:
raise ValueError( "Invalid compression type: %d" % compression )
@@ -87,3 +87,15 @@ class Chunk:
'count': fields[ 3 ]
}
+ # This is for updating header info, returns a tuple with the new count
+ # + the data
+ @staticmethod
+ def inc_header_ref(data, count):
+ logging.debug( "Incrementing ref count by %d" % count )
+ fields = list( struct.unpack( hformat, data ) )
+ fields[ 3 ] += count
+ return( fields[ 3 ], struct.pack( hformat, *fields ) )
+
+ @staticmethod
+ def header_length():
+ return( _header_length )
diff --git a/FuseArchive/ChunkFile.py b/FuseArchive/ChunkFile.py
index 051965f..ac39439 100644
--- a/FuseArchive/ChunkFile.py
+++ b/FuseArchive/ChunkFile.py
@@ -1,4 +1,4 @@
-import logging, os, errno, fcntl, fuse, FuseArchive
+import logging, os, errno, fcntl, fuse, FuseArchive, copy
import FuseArchive.Storage.ZipFile, FuseArchive.Storage.FileSystem
from binascii import hexlify
from FuseArchive.Serializer import Serializer
@@ -34,7 +34,10 @@ def flag2mode(flags):
if chunkstyle == 'fs':
load_chunk = FuseArchive.Storage.FileSystem.load_chunk
save_chunk = FuseArchive.Storage.FileSystem.save_chunk
+ lock_chunk = FuseArchive.Storage.FileSystem.lock_chunk
+ unlock_chunk = FuseArchive.Storage.FileSystem.unlock_chunk
elif chunkstyle == 'zip':
+ raise ValueException( "Zip storage doesn't support lock/unlock, make an inteface!" )
load_chunk = FuseArchive.Storage.ZipFile.load_chunk
save_chunk = FuseArchive.Storage.ZipFile.save_chunk
else:
@@ -136,6 +139,7 @@ class ChunkFile(object):
self.direct_io = False
self.keep_cache = False
+ self.original_chunks = copy.deepcopy( self.chunks )
logging.debug( "%s init complete" % self )
@@ -210,6 +214,29 @@ class ChunkFile(object):
logging.debug( "Key was %s" % key )
self.dirty_chunks = 0
+ # Is this chunk changed from what was here before?
+ oldkey = None
+ key = self.chunks[ index ]
+
+ changed = False
+ # Is the old chunks at least this big?
+ if index >= len( self.original_chunks ):
+ logging.debug( "No old chunk at this spot, changed for sure" )
+ changed = True
+ else:
+ oldkey = self.original_chunks[ index ]
+ if oldkey != key:
+ logging.debug( "Key has changed at index %d" % index )
+ changed = True
+ logging.debug( "%s is now %s" % (oldkey, key) )
+
+ if changed:
+ logging.debug( "Chunk at index %d has changed" % index )
+ if oldkey != None:
+ unlock_chunk( oldkey )
+
+ lock_chunk( key )
+
def read(self, length, offset):
logging.debug( "Reading from %s offset: %d (0x%x) length: %d (0x%d)" %
( self.orig_path, offset, offset, length, length ) )
diff --git a/FuseArchive/FileSystem.py b/FuseArchive/FileSystem.py
index dbdc8a3..b393dbb 100644
--- a/FuseArchive/FileSystem.py
+++ b/FuseArchive/FileSystem.py
@@ -34,7 +34,7 @@ class FileSystem(fuse.Fuse):
stats = f.fgetattr()
f.release( 0 )
else:
- logging.debug( "Using os.lstat to get stats" )
+ logging.debug( "Using os.lstat to get stats for %s" % path )
stats = os.lstat( treefile )
return stats
diff --git a/FuseArchive/Storage/FileSystem.py b/FuseArchive/Storage/FileSystem.py
index a6687b5..04c6bb9 100644
--- a/FuseArchive/Storage/FileSystem.py
+++ b/FuseArchive/Storage/FileSystem.py
@@ -4,6 +4,47 @@ from FuseArchive.Chunk import Chunk
magic_depth = 5
+def inc_chunk( key, count ):
+ # Increment this chunk header reference
+ path = _key_to_path( key )
+ logging.debug( "Updating header on %s, ref count + %d" % ( path, count ) )
+ f = open( path, "r+" )
+ data = f.read( Chunk.header_length() )
+ newcount, data = Chunk.inc_header_ref( data, count )
+ f.seek( 0 )
+ f.write( data )
+ f.close()
+ logging.debug( "Count is now: %d" % newcount )
+
+ assert newcount >= 0, "Count is negative?!!"
+
+ if newcount == 0:
+ logging.debug( "Freeing chunk" )
+ os.unlink( path )
+
+def lock_chunk( key ):
+ inc_chunk( key, 1 )
+
+def unlock_chunk( key ):
+ inc_chunk( key, -1 )
+
+def _key_to_path( key ):
+ logging.debug( "Converting key to path" )
+ ( thash, seq ) = key
+ chars = list( thash )
+ logging.debug( chars )
+
+ # Todo: make a digest -> path function to share with deflate
+ hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )
+ logging.debug( "Hash is: %s sub %d" % ( hexdigest, seq ) )
+ subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ]
+ subpath = '/'.join( subparts )
+ logging.debug( "Subpath: " + subpath )
+
+ subpath += "/%s_%d" % ( hexdigest, seq )
+ return( "./storage/" + subpath )
+
+
# This will write out a data block, it will return a key that can get this
# data back later
def save_chunk( chunk ):
@@ -77,26 +118,15 @@ def load_chunk( key ):
if FuseArchive.magic_profiling:
return ''
- ( thash, seq ) = key
logging.debug( "Begin load_chunk" )
- chars = list( thash )
- logging.debug( chars )
-
- # Todo: make a digest -> path function to share with deflate
- hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )
- logging.debug( "Hash is: %s sub %d" % ( hexdigest, seq ) )
- subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ]
- subpath = '/'.join( subparts )
- logging.debug( "Subpath: " + subpath )
-
- subpath += "/%s_%d" % ( hexdigest, seq )
+ subpath = _key_to_path( key )
logging.debug( "Chunk path: " + subpath )
- if os.path.exists( "./storage/" + subpath ):
+ if os.path.exists( subpath ):
logging.debug( "Exporting chunk" )
- readchunk = open( "./storage/" + subpath )
+ readchunk = open( subpath )
chunk = Chunk.deserialize( readchunk.read() ).chunk
readchunk.close()
else:
diff --git a/dump_chunk.py b/dump_chunk.py
index 8212e62..b28d2e7 100755
--- a/dump_chunk.py
+++ b/dump_chunk.py
@@ -4,7 +4,7 @@ import FuseArchive.Chunk, sys, pickle, os
# This prints some debug info about a chunk
f = FuseArchive.Chunk.Chunk.parse_header( open( sys.argv[ 1 ] ).read(
- FuseArchive.Chunk.header_length ) )
+ FuseArchive.Chunk.Chunk.header_length() ) )
print f
os.system( "ls -l %s" % sys.argv[ 1 ] )