From c99221a816afd1414338cd24d92011ebf2f38686 Mon Sep 17 00:00:00 2001 From: Steve Slaven Date: Mon, 3 Aug 2009 22:00:19 -0700 Subject: Chunk storage handlers diff --git a/FuseArchive/Storage/FileSystem.py b/FuseArchive/Storage/FileSystem.py new file mode 100644 index 0000000..8488964 --- /dev/null +++ b/FuseArchive/Storage/FileSystem.py @@ -0,0 +1,101 @@ +# This will write out a data block, it will return a key that can get this +# data back later +def save_chunk( chunk ): + if magic_profiling: + return( [ 0, 0 ] ) + + logging.debug( "Begin save_chunk, length: %d" % len( chunk ) ) + if deep_debug: + logging.debug( "Chunk: %s" + hexlify( chunk ) ) + + # Save this hash string, similar to the backuppc algo + digest = sha.new( chunk ).digest() + + # Write out our chunk + chars = list( digest ) + logging.debug( chars ) + + # We make the hexdigest here, yeah we could just call hexdigest() + # but we need to essentially do this same thing to reassemble the + # file anyway + hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) + + # Subparts just needs the first N chars + subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] + + logging.debug( subparts ) + subpath = '/'.join( subparts ) + logging.debug( "Subpath: " + subpath ) + + # Make sure this sub path exists + nextpart = "./storage" + for part in subparts: + nextpart += "/" + part + if not os.path.exists( nextpart ): + logging.debug( "Creating subdir: " + nextpart ) + os.mkdir( nextpart ) + + # Find a chunk slot + sub = 0 + while True: + checkpath = "./storage/%s/%s_%d" % ( subpath, hexdigest, sub ) + logging.debug( "Checking: " + checkpath ) + if os.path.exists( checkpath ): + # Check if this is our data + verify = open( checkpath, "rb" ) + verify_contents = verify.read() + verify.close() + + verify_contents = FuseArchiveChunk.deserialize( verify_contents ) + if verify_contents.chunk == chunk: + logging.debug( "Found existing block" ) + break + else: + logging.debug( "Block exists but is not the same" ) + sub += 1 + else: + # We found a spot, dump our data here + logging.debug( "No block here, creating new block" ) + savechunk = open( checkpath, "wb" ) + savechunk.write( chunk ) + savechunk.close() + break + + logging.debug( "Got chunk slot: %d" % sub ) + return( [ digest, sub ] ) + +# This will return a data block by key that was saved previously +def load_chunk( key ): + if magic_profiling: + return '' + + ( thash, seq ) = key + logging.debug( "Begin load_chunk" ) + + chars = list( thash ) + logging.debug( chars ) + + # Todo: make a digest -> path function to share with deflate + hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) + logging.debug( "Hash is: %s sub %d" % ( hexdigest, seq ) ) + subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] + subpath = '/'.join( subparts ) + logging.debug( "Subpath: " + subpath ) + + subpath += "/%s_%d" % ( hexdigest, seq ) + + logging.debug( "Chunk path: " + subpath ) + + if os.path.exists( "./storage/" + subpath ): + logging.debug( "Exporting chunk" ) + readchunk = FuseArchiveStream.open( "./storage/" + subpath ) + chunk = readchunk.read() + readchunk.close() + else: + raise IOError + + if deep_debug: + logging.debug( "Load-Chunk: %s" + hexlify( chunk ) ) + + return chunk + diff --git a/FuseArchive/Storage/ZipFile.py b/FuseArchive/Storage/ZipFile.py new file mode 100644 index 0000000..c76b5bb --- /dev/null +++ b/FuseArchive/Storage/ZipFile.py @@ -0,0 +1,96 @@ +import os, logging + +def save_chunk( chunk ): + if FuseArchive.magic_profiling: + return( [ 0, 0 ] ) + + logging.debug( "Begin save_chunk, length: %d" % len( chunk ) ) + if deep_debug: + logging.debug( "Chunk: %s" + hexlify( chunk ) ) + + # Save this hash string, similar to the backuppc algo + digest = sha.new( chunk ).digest() + + # Write out our chunk + chars = list( digest ) + logging.debug( chars ) + + # We make the hexdigest here, yeah we could just call hexdigest() + # but we need to essentially do this same thing to reassemble the + # file anyway + hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) + + # Should be about max of 32k zip files + zipname = hexdigest[ 0:4 ] + ".zip" + logging.debug( "Zip name: " + zipname ) + if not os.path.exists( "./storage/" + zipname ): + logging.debug( "Creating intial empty zip" ) + z = zipfile.ZipFile( "./storage/" + zipname, 'w', zipfile.ZIP_DEFLATED, True ) + # append mode throws an exception if it's not zip, or maybe it's + # just zero-length files + z.writestr( 'junk', 'junk' ) + z.close() + + z = zipfile.ZipFile( "./storage/" + zipname, 'a', zipfile.ZIP_DEFLATED, True ) + + # Find a chunk slot + sub = 0 + while True: + checkpath = "%s_%d" % ( hexdigest, sub ) + logging.debug( "Checking: " + checkpath ) + try: + data = z.read( checkpath ) + except: + data = '' + + if len(data): + if data == chunk: + logging.debug( "Found existing block" ) + break + else: + logging.debug( "Block exists but is not the same" ) + sub += 1 + else: + # We found a spot, dump our data here + logging.debug( "No block here, creating new block" ) + z.writestr( checkpath, chunk ) + break + + z.close() + logging.debug( "Got chunk slot: %d" % sub ) + return( [ digest, sub ] ) + +def load_chunk( key ): + if FuseArchive.magic_profiling: + return '' + + ( thash, seq ) = key + logging.debug( "Begin load_chunk" ) + + chars = list( thash ) + logging.debug( chars ) + + # Todo: make a digest -> path function to share with deflate + hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) + + zipname = hexdigest[ 0:4 ] + ".zip" + logging.debug( "Zip name: " + zipname ) + z = zipfile.ZipFile( "./storage/" + zipname, 'r', zipfile.ZIP_DEFLATED, True ) + + subpath = "%s_%d" % ( hexdigest, seq ) + logging.debug( "Chunk path: " + subpath ) + data = z.read( subpath ) + if len( data ): + logging.debug( "Exporting chunk" ) + chunk = data + else: + z.close() + raise IOError + + if deep_debug: + logging.debug( "Load-Chunk: %s" + hexlify( chunk ) ) + + z.close() + return chunk + + diff --git a/FuseArchive/Storage/__init__.py b/FuseArchive/Storage/__init__.py new file mode 100644 index 0000000..e69de29 -- cgit v0.10.2