aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteve Slaven <bpk@hoopajoo.net>2009-07-23 18:18:12 (GMT)
committerSteve Slaven <bpk@hoopajoo.net>2009-07-23 18:18:12 (GMT)
commita62be67068f4a8386f55652906c3889315e19319 (patch)
tree442dc080a71cc8de723b11590c27db03c599231f
parent3187cd8cb6935b4ce2dccd0d60e580da252f871b (diff)
downloadfusearchive-a62be67068f4a8386f55652906c3889315e19319.zip
fusearchive-a62be67068f4a8386f55652906c3889315e19319.tar.gz
fusearchive-a62be67068f4a8386f55652906c3889315e19319.tar.bz2
Moved block/chunk code to separate subs so that it can happen on demand
-rwxr-xr-xfusearchive.py168
1 files changed, 92 insertions, 76 deletions
diff --git a/fusearchive.py b/fusearchive.py
index 67e42ea..78b1c8c 100755
--- a/fusearchive.py
+++ b/fusearchive.py
@@ -41,6 +41,94 @@ def flag2mode(flags):
return m
+# This will write out a data block, it will return a key that can get this
+# data back later
+def save_chunk( chunk ):
+ dmsg( 2, "Begin save_chunk" )
+ # Save this hash string, similar to the backuppc algo
+ digest = sha.new( str(len(chunk)) + chunk ).digest()
+
+ # Write out our chunk
+ chars = list( digest )
+ dmsg( 4, chars )
+
+ # We make the hexdigest here, yeah we could just call hexdigest()
+ # but we need to essentially do this same thing to reassemble the
+ # file anyway
+ hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] );
+
+ # Subparts just needs the first N chars
+ subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ]
+
+ dmsg( 4, subparts )
+ subpath = '/'.join( subparts );
+ dmsg( 3, "Subpath: " + subpath )
+
+ # Make sure this sub path exists
+ nextpart = "./storage"
+ for part in subparts:
+ nextpart += "/" + part
+ if not os.path.exists( nextpart ):
+ dmsg( 3, "Creating subdir: " + nextpart )
+ os.mkdir( nextpart )
+
+ # Find a chunk slot
+ sub = 0
+ while True:
+ checkpath = "./storage/" + subpath + "/" + hexdigest + "_" + str( sub )
+ dmsg( 3, "Checking: " + checkpath )
+ if os.path.exists( checkpath ):
+ # Check if this is our data
+ verify = gzip.open( checkpath, "r" )
+ verify_contents = verify.read()
+ verify.close()
+
+ if verify_contents == chunk:
+ dmsg( 3, "Found existing block" )
+ break
+ else:
+ dmsg( 3, "Block exists but is not the same" )
+ sub += 1
+ else:
+ # We found a spot, dump our data here
+ dmsg( 3, "No block here, creating new block" )
+ savechunk = gzip.open( checkpath, "w" )
+ savechunk.write( chunk )
+ savechunk.close
+ break
+
+ dmsg( 3, "Got chunk slot: " + str( sub ) )
+ return( digest + "_" + str( sub ) )
+
+# This will return a data block by key that was saved previously
+def load_chunk( key ):
+ ( hash, seq ) = key.split( '_' )
+ dmsg( 2, "Begin load_chunk" )
+
+ chars = list( hash )
+ dmsg( 4, chars )
+
+ # Todo: make a digest -> path function to share with deflate
+ hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] );
+ dmsg( 3, "Hash is: " + hash + " sub " + seq )
+ subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ]
+ subpath = '/'.join( subparts );
+ dmsg( 3, "Subpath: " + subpath )
+
+ subpath += "/" + hexdigest + "_" + str( seq );
+
+ dmsg( 3, "Chunk path: " + subpath )
+
+ if os.path.exists( "./storage/" + subpath ):
+ dmsg( 3, "Exporting chunk" )
+ readchunk = gzip.open( "./storage/" + subpath )
+ chunk = readchunk.read()
+ readchunk.close()
+ else:
+ raise IOError
+
+ return chunk
+
# Inflate a file, src is a packed file, dest is where the unpacked file
# should go
# we assume our chunks are in storage/
@@ -60,28 +148,7 @@ def inflate( src, dest ):
# Now unserialize the chunks back in to a file
for key in magic[ 'data' ]:
- ( hash, seq ) = key
-
- chars = list( hash )
- dmsg( 4, chars )
-
- # Todo: make a digest -> path function to share with deflate
- hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] );
- subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ]
- subpath = '/'.join( subparts );
- dmsg( 3, "Subpath: " + subpath )
-
- subpath += "/" + hexdigest + "_" + str( seq );
-
- dmsg( 3, "Chunk path: " + subpath )
-
- if os.path.exists( "./storage/" + subpath ):
- dmsg( 3, "Exporting chunk" )
- readchunk = gzip.open( "./storage/" + subpath )
- out.write( readchunk.read() )
- readchunk.close()
- else:
- raise IOError
+ out.write( load_chunk( key ) )
dmsg( 2, "File inflated" )
out.close()
@@ -107,60 +174,8 @@ def deflate( src, dest ):
if len( chunk ) == 0:
break
- # Save this hash string, similar to the backuppc algo
- digest = sha.new( str(len(chunk)) + chunk ).digest()
-
- # Write out our chunk
- chars = list( digest )
- dmsg( 4, chars )
-
- # We make the hexdigest here, yeah we could just call hexdigest()
- # but we need to essentially do this same thing to reassemble the
- # file anyway
- hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] );
-
- # Subparts just needs the first N chars
- subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ]
-
- dmsg( 4, subparts )
- subpath = '/'.join( subparts );
- dmsg( 3, "Subpath: " + subpath )
-
- # Make sure this sub path exists
- nextpart = "./storage"
- for part in subparts:
- nextpart += "/" + part
- if not os.path.exists( nextpart ):
- dmsg( 3, "Creating subdir: " + nextpart )
- os.mkdir( nextpart )
-
- # Find a chunk slot
- sub = 0
- while True:
- checkpath = "./storage/" + subpath + "/" + hexdigest + "_" + str( sub )
- dmsg( 3, "Checking: " + checkpath )
- if os.path.exists( checkpath ):
- # Check if this is our data
- verify = gzip.open( checkpath, "r" )
- verify_contents = verify.read()
- verify.close()
-
- if verify_contents == chunk:
- dmsg( 3, "Found existing block" )
- break
- else:
- dmsg( 3, "Block exists but is not the same" )
- sub += 1
- else:
- # We found a spot, dump our data here
- dmsg( 3, "No block here, creating new block" )
- savechunk = gzip.open( checkpath, "w" )
- savechunk.write( chunk )
- savechunk.close
- break
-
- dmsg( 3, "Got chunk slot: " + str( sub ) )
- hashs.append( [ digest, sub ] )
+ key = save_chunk( chunk )
+ hashs.append( key )
inp.close()
@@ -365,6 +380,7 @@ class FuseArchive(Fuse):
# BUG: If you cp -a a file then quickly ls -l sometimes it doesn't show
# up right? like wrong size and stuff?
+ # Maybe because release doesn't return a fuse message and is async?
def release(self, flags):
# Deflate the file
dmsg( 2, "Release: " + self.orig_path )