aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteve Slaven <bpk@hoopajoo.net>2009-07-24 21:15:23 (GMT)
committerSteve Slaven <bpk@hoopajoo.net>2009-07-24 21:15:23 (GMT)
commit54c2eb333a65d8b16f7edca0b7d0909f76088829 (patch)
treeb237b681a4bfc3211bec532d9133f44d83360f14
parent1c0b8383fb01a03d8c18665f4f9837c3288bae6b (diff)
downloadfusearchive-54c2eb333a65d8b16f7edca0b7d0909f76088829.zip
fusearchive-54c2eb333a65d8b16f7edca0b7d0909f76088829.tar.gz
fusearchive-54c2eb333a65d8b16f7edca0b7d0909f76088829.tar.bz2
Testing out using zip files for chunk storage to eliminate some of the huge
overhead of having thousands of tiny files and directories
-rwxr-xr-xfusearchive.py106
1 files changed, 103 insertions, 3 deletions
diff --git a/fusearchive.py b/fusearchive.py
index 4a860ee..faf465a 100755
--- a/fusearchive.py
+++ b/fusearchive.py
@@ -8,7 +8,7 @@
# See the file COPYING.
#
-import os, sys, fcntl, fuse, sha, cPickle, gzip, errno
+import os, sys, fcntl, fuse, sha, cPickle, gzip, errno, zipfile
from fuse import Fuse
import pdb
@@ -42,9 +42,15 @@ def dmsg(level,message):
if level <= debug_level:
print str(level) + ": " + str(message)
+def save_chunk( chunk ):
+ return _save_chunk_zip( chunk )
+
+def load_chunk( key ):
+ return _load_chunk_zip( key )
+
# This will write out a data block, it will return a key that can get this
# data back later
-def save_chunk( chunk ):
+def _save_chunk_fs( chunk ):
if magic_profiling:
return( [ 0, 0 ] )
@@ -107,8 +113,68 @@ def save_chunk( chunk ):
dmsg( 3, "Got chunk slot: " + str( sub ) )
return( [ digest, sub ] )
+def _save_chunk_zip( chunk ):
+ if magic_profiling:
+ return( [ 0, 0 ] )
+
+ dmsg( 2, "Begin save_chunk, length: " + str( len( chunk ) ) )
+ if debug_level > 4:
+ dmsg( 5, "Chunk: " + str( chunk ) )
+
+ # Save this hash string, similar to the backuppc algo
+ digest = sha.new( chunk ).digest()
+
+ # Write out our chunk
+ chars = list( digest )
+ dmsg( 4, chars )
+
+ # We make the hexdigest here, yeah we could just call hexdigest()
+ # but we need to essentially do this same thing to reassemble the
+ # file anyway
+ hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )
+
+ # Should be about max of 32k zip files
+ zipname = hexdigest[ 0:4 ] + ".zip"
+ dmsg( 3, "Zip name: " + zipname )
+ if not os.path.exists( "./storage/" + zipname ):
+ dmsg( 3, "Creating intial empty zip" )
+ z = zipfile.ZipFile( "./storage/" + zipname, 'w', zipfile.ZIP_DEFLATED, True )
+ # append mode throws an exception if it's not zip, or maybe it's
+ # just zero-length files
+ z.writestr( 'junk', 'junk' )
+ z.close()
+
+ z = zipfile.ZipFile( "./storage/" + zipname, 'a', zipfile.ZIP_DEFLATED, True )
+
+ # Find a chunk slot
+ sub = 0
+ while True:
+ checkpath = hexdigest + "_" + str( sub )
+ dmsg( 3, "Checking: " + checkpath )
+ try:
+ data = z.read( checkpath )
+ except:
+ data = ''
+
+ if len(data):
+ if data == chunk:
+ dmsg( 3, "Found existing block" )
+ break
+ else:
+ dmsg( 3, "Block exists but is not the same" )
+ sub += 1
+ else:
+ # We found a spot, dump our data here
+ dmsg( 3, "No block here, creating new block" )
+ z.writestr( checkpath, chunk )
+ break
+
+ z.close()
+ dmsg( 3, "Got chunk slot: " + str( sub ) )
+ return( [ digest, sub ] )
+
# This will return a data block by key that was saved previously
-def load_chunk( key ):
+def _load_chunk_fs( key ):
if magic_profiling:
return ''
@@ -142,6 +208,40 @@ def load_chunk( key ):
return chunk
+def _load_chunk_zip( key ):
+ if magic_profiling:
+ return ''
+
+ ( thash, seq ) = key
+ dmsg( 2, "Begin load_chunk" )
+
+ chars = list( thash )
+ dmsg( 4, chars )
+
+ # Todo: make a digest -> path function to share with deflate
+ hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )
+
+ zipname = hexdigest[ 0:4 ] + ".zip"
+ dmsg( 3, "Zip name: " + zipname )
+ z = zipfile.ZipFile( "./storage/" + zipname, 'r', zipfile.ZIP_DEFLATED, True )
+
+ subpath = hexdigest + "_" + str( seq )
+ dmsg( 3, "Chunk path: " + subpath )
+ data = z.read( subpath )
+ if len( data ):
+ dmsg( 3, "Exporting chunk" )
+ chunk = data
+ else:
+ z.close()
+ raise IOError
+
+ if debug_level > 4:
+ dmsg( 5, "Load-Chunk: " + str( chunk ) )
+
+ z.close()
+ return chunk
+
+
class FuseArchiveStream:
"""This just allows switching out writer classes easily"""
@staticmethod