aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteve Slaven <bpk@hoopajoo.net>2009-08-04 05:00:19 (GMT)
committerSteve Slaven <bpk@hoopajoo.net>2009-08-04 05:00:19 (GMT)
commitc99221a816afd1414338cd24d92011ebf2f38686 (patch)
tree0f112e39f9d3a51a3aa06a5ed2097cfde7d519fb
parent8f02874e8e45d21a90f3c880eaf1aafc3e852951 (diff)
downloadfusearchive-c99221a816afd1414338cd24d92011ebf2f38686.zip
fusearchive-c99221a816afd1414338cd24d92011ebf2f38686.tar.gz
fusearchive-c99221a816afd1414338cd24d92011ebf2f38686.tar.bz2
Chunk storage handlers
-rw-r--r--FuseArchive/Storage/FileSystem.py101
-rw-r--r--FuseArchive/Storage/ZipFile.py96
-rw-r--r--FuseArchive/Storage/__init__.py0
3 files changed, 197 insertions, 0 deletions
diff --git a/FuseArchive/Storage/FileSystem.py b/FuseArchive/Storage/FileSystem.py
new file mode 100644
index 0000000..8488964
--- /dev/null
+++ b/FuseArchive/Storage/FileSystem.py
@@ -0,0 +1,101 @@
+# This will write out a data block, it will return a key that can get this
+# data back later
+def save_chunk( chunk ):
+ if magic_profiling:
+ return( [ 0, 0 ] )
+
+ logging.debug( "Begin save_chunk, length: %d" % len( chunk ) )
+ if deep_debug:
+ logging.debug( "Chunk: %s" + hexlify( chunk ) )
+
+ # Save this hash string, similar to the backuppc algo
+ digest = sha.new( chunk ).digest()
+
+ # Write out our chunk
+ chars = list( digest )
+ logging.debug( chars )
+
+ # We make the hexdigest here, yeah we could just call hexdigest()
+ # but we need to essentially do this same thing to reassemble the
+ # file anyway
+ hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )
+
+ # Subparts just needs the first N chars
+ subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ]
+
+ logging.debug( subparts )
+ subpath = '/'.join( subparts )
+ logging.debug( "Subpath: " + subpath )
+
+ # Make sure this sub path exists
+ nextpart = "./storage"
+ for part in subparts:
+ nextpart += "/" + part
+ if not os.path.exists( nextpart ):
+ logging.debug( "Creating subdir: " + nextpart )
+ os.mkdir( nextpart )
+
+ # Find a chunk slot
+ sub = 0
+ while True:
+ checkpath = "./storage/%s/%s_%d" % ( subpath, hexdigest, sub )
+ logging.debug( "Checking: " + checkpath )
+ if os.path.exists( checkpath ):
+ # Check if this is our data
+ verify = open( checkpath, "rb" )
+ verify_contents = verify.read()
+ verify.close()
+
+ verify_contents = FuseArchiveChunk.deserialize( verify_contents )
+ if verify_contents.chunk == chunk:
+ logging.debug( "Found existing block" )
+ break
+ else:
+ logging.debug( "Block exists but is not the same" )
+ sub += 1
+ else:
+ # We found a spot, dump our data here
+ logging.debug( "No block here, creating new block" )
+ savechunk = open( checkpath, "wb" )
+ savechunk.write( chunk )
+ savechunk.close()
+ break
+
+ logging.debug( "Got chunk slot: %d" % sub )
+ return( [ digest, sub ] )
+
+# This will return a data block by key that was saved previously
+def load_chunk( key ):
+ if magic_profiling:
+ return ''
+
+ ( thash, seq ) = key
+ logging.debug( "Begin load_chunk" )
+
+ chars = list( thash )
+ logging.debug( chars )
+
+ # Todo: make a digest -> path function to share with deflate
+ hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )
+ logging.debug( "Hash is: %s sub %d" % ( hexdigest, seq ) )
+ subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ]
+ subpath = '/'.join( subparts )
+ logging.debug( "Subpath: " + subpath )
+
+ subpath += "/%s_%d" % ( hexdigest, seq )
+
+ logging.debug( "Chunk path: " + subpath )
+
+ if os.path.exists( "./storage/" + subpath ):
+ logging.debug( "Exporting chunk" )
+ readchunk = FuseArchiveStream.open( "./storage/" + subpath )
+ chunk = readchunk.read()
+ readchunk.close()
+ else:
+ raise IOError
+
+ if deep_debug:
+ logging.debug( "Load-Chunk: %s" + hexlify( chunk ) )
+
+ return chunk
+
diff --git a/FuseArchive/Storage/ZipFile.py b/FuseArchive/Storage/ZipFile.py
new file mode 100644
index 0000000..c76b5bb
--- /dev/null
+++ b/FuseArchive/Storage/ZipFile.py
@@ -0,0 +1,96 @@
+import os, logging
+
+def save_chunk( chunk ):
+ if FuseArchive.magic_profiling:
+ return( [ 0, 0 ] )
+
+ logging.debug( "Begin save_chunk, length: %d" % len( chunk ) )
+ if deep_debug:
+ logging.debug( "Chunk: %s" + hexlify( chunk ) )
+
+ # Save this hash string, similar to the backuppc algo
+ digest = sha.new( chunk ).digest()
+
+ # Write out our chunk
+ chars = list( digest )
+ logging.debug( chars )
+
+ # We make the hexdigest here, yeah we could just call hexdigest()
+ # but we need to essentially do this same thing to reassemble the
+ # file anyway
+ hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )
+
+ # Should be about max of 32k zip files
+ zipname = hexdigest[ 0:4 ] + ".zip"
+ logging.debug( "Zip name: " + zipname )
+ if not os.path.exists( "./storage/" + zipname ):
+ logging.debug( "Creating intial empty zip" )
+ z = zipfile.ZipFile( "./storage/" + zipname, 'w', zipfile.ZIP_DEFLATED, True )
+ # append mode throws an exception if it's not zip, or maybe it's
+ # just zero-length files
+ z.writestr( 'junk', 'junk' )
+ z.close()
+
+ z = zipfile.ZipFile( "./storage/" + zipname, 'a', zipfile.ZIP_DEFLATED, True )
+
+ # Find a chunk slot
+ sub = 0
+ while True:
+ checkpath = "%s_%d" % ( hexdigest, sub )
+ logging.debug( "Checking: " + checkpath )
+ try:
+ data = z.read( checkpath )
+ except:
+ data = ''
+
+ if len(data):
+ if data == chunk:
+ logging.debug( "Found existing block" )
+ break
+ else:
+ logging.debug( "Block exists but is not the same" )
+ sub += 1
+ else:
+ # We found a spot, dump our data here
+ logging.debug( "No block here, creating new block" )
+ z.writestr( checkpath, chunk )
+ break
+
+ z.close()
+ logging.debug( "Got chunk slot: %d" % sub )
+ return( [ digest, sub ] )
+
+def load_chunk( key ):
+ if FuseArchive.magic_profiling:
+ return ''
+
+ ( thash, seq ) = key
+ logging.debug( "Begin load_chunk" )
+
+ chars = list( thash )
+ logging.debug( chars )
+
+ # Todo: make a digest -> path function to share with deflate
+ hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )
+
+ zipname = hexdigest[ 0:4 ] + ".zip"
+ logging.debug( "Zip name: " + zipname )
+ z = zipfile.ZipFile( "./storage/" + zipname, 'r', zipfile.ZIP_DEFLATED, True )
+
+ subpath = "%s_%d" % ( hexdigest, seq )
+ logging.debug( "Chunk path: " + subpath )
+ data = z.read( subpath )
+ if len( data ):
+ logging.debug( "Exporting chunk" )
+ chunk = data
+ else:
+ z.close()
+ raise IOError
+
+ if deep_debug:
+ logging.debug( "Load-Chunk: %s" + hexlify( chunk ) )
+
+ z.close()
+ return chunk
+
+
diff --git a/FuseArchive/Storage/__init__.py b/FuseArchive/Storage/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/FuseArchive/Storage/__init__.py