From 46d5d21e933f92ce9eaf32dd69e02db562945024 Mon Sep 17 00:00:00 2001 From: Steve Slaven Date: Wed, 22 Jul 2009 16:13:43 -0700 Subject: This actually stores file chunks now diff --git a/fusearchive.py b/fusearchive.py index b427a5f..f3b5093 100755 --- a/fusearchive.py +++ b/fusearchive.py @@ -7,7 +7,7 @@ # See the file COPYING. # -import os, sys, shutil, fcntl, fuse, re, tempfile +import os, sys, shutil, fcntl, fuse, re, tempfile, sha, pickle from errno import * from stat import * from fuse import Fuse @@ -22,6 +22,8 @@ fuse.fuse_python_api = (0, 2) fuse.feature_assert('stateful_files', 'has_init') +magic_blocksize = 1024 * 32 +magic_depth = 5 def flag2mode(flags): md = {os.O_RDONLY: 'r', os.O_WRONLY: 'w', os.O_RDWR: 'w+'} @@ -32,6 +34,91 @@ def flag2mode(flags): return m +# Inflate a file, src is a packed file, dest is where the unpacked file +# should go +# we assume our chunks are in storage/ +def inflate( src, dest ): + print "inflate!" + +# Deflate a file, src is the unpacked file, dest is where we want to pack +# to, and we assume storage/ is where chunks are stored +def deflate( src, dest ): + print "deflate!" + inp = open( src, "r" ) + + hashs = []; + # This is retarded: + # http://groups.google.com/group/comp.lang.python/browse_thread/thread/ed25388487b3ac7b + # + # Why can't I just do: + # while( chunk = inp.read( magic_blocksize ) ): + # I though python was supposed to be easier! :( + + while True: + chunk = inp.read( magic_blocksize ) + if len( chunk ) == 0: + break + + # Save this hash string, similar to the backuppc algo + digest = sha.new( str(len(chunk)) + chunk ).digest() + + # Write out our chunk + chars = list( digest ) + print chars + subparts = []; + for l in range( 0, magic_depth ): + subparts.append( "%02x" % ord( chars.pop( 0 ) ) ) + + # We make the hexdigest here, yeah we could just call hexdigest() + # but we need to essentially do this same thing to reassemble the + # file anyway + hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ); + + print subparts + subpath = '/'.join( subparts ); + print "Subpath: " + subpath + + # Make sure this sub path exists + nextpart = "./storage" + for part in subparts: + nextpart += "/" + part + if not os.path.exists( nextpart ): + print "Creating subdir: " + nextpart + os.mkdir( nextpart ) + + # Find a chunk slot + sub = 0 + while True: + checkpath = "./storage/" + subpath + "/" + hexdigest + "_" + str( sub ) + print "Checking: " + checkpath + if os.path.exists( checkpath ): + # Check if this is our data + verify = open( checkpath, "r" ) + verify_contents = verify.read() + verify.close() + + if verify_contents == chunk: + print "Found existing block" + break + else: + print "Block exists but is not the same" + sub += 1 + else: + # We found a spot, dump our data here + print "No block here, creating new block" + savechunk = open( checkpath, "w" ) + savechunk.write( chunk ) + savechunk.close + break + + print "Got chunk slot: " + str( sub ) + hashs.append( [ digest, sub ] ) + + inp.close() + + out = open( dest, "w" ) + pickle.dump( { 'data': hashs }, out ) + out.close() class FuseArchive(Fuse): @@ -150,7 +237,7 @@ class FuseArchive(Fuse): #os.close( fdnum ); if os.path.exists( "./tree" + self.orig_path ): - shutil.copy( "./tree" + path, self.tmp_name ) + inflate( "./tree" + path, self.tmp_name ) print "Shadow file: " + self.tmp_name + " for " + self.orig_path print "Going to open shadow file with flags: " + str(flags) + " mode " + str(mode) @@ -191,7 +278,7 @@ class FuseArchive(Fuse): self.tmp_name + " -> " + self.orig_path #pdb.set_trace() - shutil.copy( self.tmp_name, "./tree" + self.orig_path ); + deflate( self.tmp_name, "./tree" + self.orig_path ); print "Deleting old file: " + self.tmp_name os.unlink( self.tmp_name ); -- cgit v0.10.2