From a62be67068f4a8386f55652906c3889315e19319 Mon Sep 17 00:00:00 2001 From: Steve Slaven Date: Thu, 23 Jul 2009 11:18:12 -0700 Subject: Moved block/chunk code to separate subs so that it can happen on demand diff --git a/fusearchive.py b/fusearchive.py index 67e42ea..78b1c8c 100755 --- a/fusearchive.py +++ b/fusearchive.py @@ -41,6 +41,94 @@ def flag2mode(flags): return m +# This will write out a data block, it will return a key that can get this +# data back later +def save_chunk( chunk ): + dmsg( 2, "Begin save_chunk" ) + # Save this hash string, similar to the backuppc algo + digest = sha.new( str(len(chunk)) + chunk ).digest() + + # Write out our chunk + chars = list( digest ) + dmsg( 4, chars ) + + # We make the hexdigest here, yeah we could just call hexdigest() + # but we need to essentially do this same thing to reassemble the + # file anyway + hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ); + + # Subparts just needs the first N chars + subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] + + dmsg( 4, subparts ) + subpath = '/'.join( subparts ); + dmsg( 3, "Subpath: " + subpath ) + + # Make sure this sub path exists + nextpart = "./storage" + for part in subparts: + nextpart += "/" + part + if not os.path.exists( nextpart ): + dmsg( 3, "Creating subdir: " + nextpart ) + os.mkdir( nextpart ) + + # Find a chunk slot + sub = 0 + while True: + checkpath = "./storage/" + subpath + "/" + hexdigest + "_" + str( sub ) + dmsg( 3, "Checking: " + checkpath ) + if os.path.exists( checkpath ): + # Check if this is our data + verify = gzip.open( checkpath, "r" ) + verify_contents = verify.read() + verify.close() + + if verify_contents == chunk: + dmsg( 3, "Found existing block" ) + break + else: + dmsg( 3, "Block exists but is not the same" ) + sub += 1 + else: + # We found a spot, dump our data here + dmsg( 3, "No block here, creating new block" ) + savechunk = gzip.open( checkpath, "w" ) + savechunk.write( chunk ) + savechunk.close + break + + dmsg( 3, "Got chunk slot: " + str( sub ) ) + return( digest + "_" + str( sub ) ) + +# This will return a data block by key that was saved previously +def load_chunk( key ): + ( hash, seq ) = key.split( '_' ) + dmsg( 2, "Begin load_chunk" ) + + chars = list( hash ) + dmsg( 4, chars ) + + # Todo: make a digest -> path function to share with deflate + hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ); + dmsg( 3, "Hash is: " + hash + " sub " + seq ) + subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] + subpath = '/'.join( subparts ); + dmsg( 3, "Subpath: " + subpath ) + + subpath += "/" + hexdigest + "_" + str( seq ); + + dmsg( 3, "Chunk path: " + subpath ) + + if os.path.exists( "./storage/" + subpath ): + dmsg( 3, "Exporting chunk" ) + readchunk = gzip.open( "./storage/" + subpath ) + chunk = readchunk.read() + readchunk.close() + else: + raise IOError + + return chunk + # Inflate a file, src is a packed file, dest is where the unpacked file # should go # we assume our chunks are in storage/ @@ -60,28 +148,7 @@ def inflate( src, dest ): # Now unserialize the chunks back in to a file for key in magic[ 'data' ]: - ( hash, seq ) = key - - chars = list( hash ) - dmsg( 4, chars ) - - # Todo: make a digest -> path function to share with deflate - hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ); - subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] - subpath = '/'.join( subparts ); - dmsg( 3, "Subpath: " + subpath ) - - subpath += "/" + hexdigest + "_" + str( seq ); - - dmsg( 3, "Chunk path: " + subpath ) - - if os.path.exists( "./storage/" + subpath ): - dmsg( 3, "Exporting chunk" ) - readchunk = gzip.open( "./storage/" + subpath ) - out.write( readchunk.read() ) - readchunk.close() - else: - raise IOError + out.write( load_chunk( key ) ) dmsg( 2, "File inflated" ) out.close() @@ -107,60 +174,8 @@ def deflate( src, dest ): if len( chunk ) == 0: break - # Save this hash string, similar to the backuppc algo - digest = sha.new( str(len(chunk)) + chunk ).digest() - - # Write out our chunk - chars = list( digest ) - dmsg( 4, chars ) - - # We make the hexdigest here, yeah we could just call hexdigest() - # but we need to essentially do this same thing to reassemble the - # file anyway - hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ); - - # Subparts just needs the first N chars - subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] - - dmsg( 4, subparts ) - subpath = '/'.join( subparts ); - dmsg( 3, "Subpath: " + subpath ) - - # Make sure this sub path exists - nextpart = "./storage" - for part in subparts: - nextpart += "/" + part - if not os.path.exists( nextpart ): - dmsg( 3, "Creating subdir: " + nextpart ) - os.mkdir( nextpart ) - - # Find a chunk slot - sub = 0 - while True: - checkpath = "./storage/" + subpath + "/" + hexdigest + "_" + str( sub ) - dmsg( 3, "Checking: " + checkpath ) - if os.path.exists( checkpath ): - # Check if this is our data - verify = gzip.open( checkpath, "r" ) - verify_contents = verify.read() - verify.close() - - if verify_contents == chunk: - dmsg( 3, "Found existing block" ) - break - else: - dmsg( 3, "Block exists but is not the same" ) - sub += 1 - else: - # We found a spot, dump our data here - dmsg( 3, "No block here, creating new block" ) - savechunk = gzip.open( checkpath, "w" ) - savechunk.write( chunk ) - savechunk.close - break - - dmsg( 3, "Got chunk slot: " + str( sub ) ) - hashs.append( [ digest, sub ] ) + key = save_chunk( chunk ) + hashs.append( key ) inp.close() @@ -365,6 +380,7 @@ class FuseArchive(Fuse): # BUG: If you cp -a a file then quickly ls -l sometimes it doesn't show # up right? like wrong size and stuff? + # Maybe because release doesn't return a fuse message and is async? def release(self, flags): # Deflate the file dmsg( 2, "Release: " + self.orig_path ) -- cgit v0.10.2