diff options
-rwxr-xr-x | fusearchive.py | 218 |
1 files changed, 110 insertions, 108 deletions
diff --git a/fusearchive.py b/fusearchive.py index 00a156d..f4e97d8 100755 --- a/fusearchive.py +++ b/fusearchive.py @@ -8,7 +8,8 @@ # See the file COPYING. # -import os, sys, fcntl, fuse, sha, cPickle, gzip, errno, zipfile, stat +import os, sys, fcntl, fuse, sha, cPickle, gzip, errno +import zipfile, stat, logging from fuse import Fuse import pdb @@ -21,10 +22,15 @@ fuse.fuse_python_api = (0, 2) fuse.feature_assert('stateful_files', 'has_init') + +logging.basicConfig( level = logging.DEBUG, + format = '%(asctime)s %(levelname)s %(filename)s:%(lineno)d %(module)s:%(funcName)s() %(message)s', + filename = sys.argv[ 0 ] + ".log", + filemode = 'w' ) + magic_profiling = False enable_stats = False enable_psyco = False -debug_level = 3 # These control some of the file output magic_blocksize = 1024 * 128 @@ -48,10 +54,6 @@ def flag2mode(flags): return m -def dmsg(level,message): - if level <= debug_level: - print str(level) + ": " + str(message) - def save_chunk( chunk ): if chunkstyle == 'fs': return _save_chunk_fs( chunk ) @@ -74,16 +76,16 @@ def _save_chunk_fs( chunk ): if magic_profiling: return( [ 0, 0 ] ) - dmsg( 2, "Begin save_chunk, length: " + str( len( chunk ) ) ) + logging.debug( "Begin save_chunk, length: " + str( len( chunk ) ) ) if debug_level > 4: - dmsg( 5, "Chunk: " + str( chunk ) ) + logging.debug( "Chunk: " + str( chunk ) ) # Save this hash string, similar to the backuppc algo digest = sha.new( chunk ).digest() # Write out our chunk chars = list( digest ) - dmsg( 4, chars ) + logging.debug( chars ) # We make the hexdigest here, yeah we could just call hexdigest() # but we need to essentially do this same thing to reassemble the @@ -93,23 +95,23 @@ def _save_chunk_fs( chunk ): # Subparts just needs the first N chars subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] - dmsg( 4, subparts ) + logging.debug( subparts ) subpath = '/'.join( subparts ) - dmsg( 3, "Subpath: " + subpath ) + logging.debug( "Subpath: " + subpath ) # Make sure this sub path exists nextpart = "./storage" for part in subparts: nextpart += "/" + part if not os.path.exists( nextpart ): - dmsg( 3, "Creating subdir: " + nextpart ) + logging.debug( "Creating subdir: " + nextpart ) os.mkdir( nextpart ) # Find a chunk slot sub = 0 while True: checkpath = "./storage/" + subpath + "/" + hexdigest + "_" + str( sub ) - dmsg( 3, "Checking: " + checkpath ) + logging.debug( "Checking: " + checkpath ) if os.path.exists( checkpath ): # Check if this is our data verify = FuseArchiveStream.open( checkpath, "rb" ) @@ -117,36 +119,36 @@ def _save_chunk_fs( chunk ): verify.close() if verify_contents == chunk: - dmsg( 3, "Found existing block" ) + logging.debug( "Found existing block" ) break else: - dmsg( 3, "Block exists but is not the same" ) + logging.debug( "Block exists but is not the same" ) sub += 1 else: # We found a spot, dump our data here - dmsg( 3, "No block here, creating new block" ) + logging.debug( "No block here, creating new block" ) savechunk = FuseArchiveStream.open( checkpath, "wb" ) savechunk.write( chunk ) savechunk.close() break - dmsg( 3, "Got chunk slot: " + str( sub ) ) + logging.debug( "Got chunk slot: " + str( sub ) ) return( [ digest, sub ] ) def _save_chunk_zip( chunk ): if magic_profiling: return( [ 0, 0 ] ) - dmsg( 2, "Begin save_chunk, length: " + str( len( chunk ) ) ) + logging.debug( "Begin save_chunk, length: " + str( len( chunk ) ) ) if debug_level > 4: - dmsg( 5, "Chunk: " + str( chunk ) ) + logging.debug( "Chunk: " + str( chunk ) ) # Save this hash string, similar to the backuppc algo digest = sha.new( chunk ).digest() # Write out our chunk chars = list( digest ) - dmsg( 4, chars ) + logging.debug( chars ) # We make the hexdigest here, yeah we could just call hexdigest() # but we need to essentially do this same thing to reassemble the @@ -155,9 +157,9 @@ def _save_chunk_zip( chunk ): # Should be about max of 32k zip files zipname = hexdigest[ 0:4 ] + ".zip" - dmsg( 3, "Zip name: " + zipname ) + logging.debug( "Zip name: " + zipname ) if not os.path.exists( "./storage/" + zipname ): - dmsg( 3, "Creating intial empty zip" ) + logging.debug( "Creating intial empty zip" ) z = zipfile.ZipFile( "./storage/" + zipname, 'w', zipfile.ZIP_DEFLATED, True ) # append mode throws an exception if it's not zip, or maybe it's # just zero-length files @@ -170,7 +172,7 @@ def _save_chunk_zip( chunk ): sub = 0 while True: checkpath = hexdigest + "_" + str( sub ) - dmsg( 3, "Checking: " + checkpath ) + logging.debug( "Checking: " + checkpath ) try: data = z.read( checkpath ) except: @@ -178,19 +180,19 @@ def _save_chunk_zip( chunk ): if len(data): if data == chunk: - dmsg( 3, "Found existing block" ) + logging.debug( "Found existing block" ) break else: - dmsg( 3, "Block exists but is not the same" ) + logging.debug( "Block exists but is not the same" ) sub += 1 else: # We found a spot, dump our data here - dmsg( 3, "No block here, creating new block" ) + logging.debug( "No block here, creating new block" ) z.writestr( checkpath, chunk ) break z.close() - dmsg( 3, "Got chunk slot: " + str( sub ) ) + logging.debug( "Got chunk slot: " + str( sub ) ) return( [ digest, sub ] ) # This will return a data block by key that was saved previously @@ -199,24 +201,24 @@ def _load_chunk_fs( key ): return '' ( thash, seq ) = key - dmsg( 2, "Begin load_chunk" ) + logging.debug( "Begin load_chunk" ) chars = list( thash ) - dmsg( 4, chars ) + logging.debug( chars ) # Todo: make a digest -> path function to share with deflate hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) - dmsg( 3, "Hash is: " + str(hexdigest) + " sub " + str(seq) ) + logging.debug( "Hash is: " + str(hexdigest) + " sub " + str(seq) ) subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] subpath = '/'.join( subparts ) - dmsg( 3, "Subpath: " + subpath ) + logging.debug( "Subpath: " + subpath ) subpath += "/" + hexdigest + "_" + str( seq ) - dmsg( 3, "Chunk path: " + subpath ) + logging.debug( "Chunk path: " + subpath ) if os.path.exists( "./storage/" + subpath ): - dmsg( 3, "Exporting chunk" ) + logging.debug( "Exporting chunk" ) readchunk = FuseArchiveStream.open( "./storage/" + subpath ) chunk = readchunk.read() readchunk.close() @@ -224,7 +226,7 @@ def _load_chunk_fs( key ): raise IOError if debug_level > 4: - dmsg( 5, "Load-Chunk: " + str( chunk ) ) + logging.debug( "Load-Chunk: " + str( chunk ) ) return chunk @@ -233,30 +235,30 @@ def _load_chunk_zip( key ): return '' ( thash, seq ) = key - dmsg( 2, "Begin load_chunk" ) + logging.debug( "Begin load_chunk" ) chars = list( thash ) - dmsg( 4, chars ) + logging.debug( chars ) # Todo: make a digest -> path function to share with deflate hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) zipname = hexdigest[ 0:4 ] + ".zip" - dmsg( 3, "Zip name: " + zipname ) + logging.debug( "Zip name: " + zipname ) z = zipfile.ZipFile( "./storage/" + zipname, 'r', zipfile.ZIP_DEFLATED, True ) subpath = hexdigest + "_" + str( seq ) - dmsg( 3, "Chunk path: " + subpath ) + logging.debug( "Chunk path: " + subpath ) data = z.read( subpath ) if len( data ): - dmsg( 3, "Exporting chunk" ) + logging.debug( "Exporting chunk" ) chunk = data else: z.close() raise IOError if debug_level > 4: - dmsg( 5, "Load-Chunk: " + str( chunk ) ) + logging.debug( "Load-Chunk: " + str( chunk ) ) z.close() return chunk @@ -338,13 +340,13 @@ class FuseArchive(Fuse): treefile = "./tree" + path if os.path.isfile( treefile ): - dmsg( 3, "Delegating getattr to FuserArchiveFile" ) + logging.debug( "Delegating getattr to FuserArchiveFile" ) f = self.FuseArchiveFile( path, os.O_RDONLY, 0 ) stats = f.fgetattr() f.release( 0 ) else: - dmsg( 3, "Using os.lstat to get stats" ) + logging.debug( "Using os.lstat to get stats" ) stats = os.lstat( treefile ) return stats @@ -379,7 +381,7 @@ class FuseArchive(Fuse): def truncate(self, path, len): # Truncate using the ftruncate on the file - dmsg( 2, "Using FuseArchiveFile to truncate " + path + " to " + str(len) ) + logging.debug( "Using FuseArchiveFile to truncate " + path + " to " + str(len) ) f = self.FuseArchiveFile( path, os.O_WRONLY | os.O_APPEND, 0 ) f.ftruncate(len) f.release( 0 ) @@ -452,7 +454,7 @@ class FuseArchive(Fuse): def __init__(self, path, flags, *mode): # Inflate the file - dmsg( 1, "Init file: " + path ) + logging.debug( "Init file: " + path ) self.orig_path = path # init rw and offset @@ -497,22 +499,22 @@ class FuseArchive(Fuse): # Open the file now and keep the fh around so that cp -a on r/o # files works (in the create a read-only file for writing case) src = "./tree" + path - dmsg( 3, "Saving fh for " + src ) + logging.debug( "Saving fh for " + src ) self.file = os.fdopen( os.open( src, flags, *mode ), flag2mode( flags ) ) if preexist: # Read in file info table - dmsg( 3, "Unpickling: " + str( self.file ) ) + logging.debug( "Unpickling: " + str( self.file ) ) # TODO: return an IO error if inflating fails magic = FuseArchiveSerializer.loadfh( self.file ) - dmsg( 3, "Got data: " + str( magic ) ) + logging.debug( "Got data: " + str( magic ) ) self.size = magic[ 'size' ] self.chunks = magic[ 'chunks' ] self.chunk_size = magic[ 'chunk_size' ] else: if self.wr: - dmsg( 2, "File doesn't exist and we're going to write, creating temp empty file" ) + logging.debug( "File doesn't exist and we're going to write, creating temp empty file" ) self.modified = True self.flush() @@ -520,39 +522,39 @@ class FuseArchive(Fuse): self.keep_cache = False #pdb.set_trace() - dmsg( 3, str(self) + " init complete" ) + logging.debug( str(self) + " init complete" ) def _load_chunk( self, index ): # If the current chunk is the same as the chunk we're loading # just return if index == self.chunk_index: - dmsg( 3, "Load chunk is same as current chunk, all done" ) + logging.debug( "Load chunk is same as current chunk, all done" ) return # Save this chunk if modified self._save_chunk() - dmsg( 3, "Loading chunk " + str(index) ) + logging.debug( "Loading chunk " + str(index) ) key = None size = len( self.chunks ) if index >= size: - dmsg( 3, "Index doesn't exist" ) + logging.debug( "Index doesn't exist" ) else: key = self.chunks[ index ] if key: if isinstance( key, str ): - dmsg( 3, "Found cached dirty page" ) + logging.debug( "Found cached dirty page" ) self.chunk = key else: - dmsg( 3, "Index: " + str( key ) ) + logging.debug( "Index: " + str( key ) ) self.chunk = load_chunk( key ) else: - dmsg( 3, "No chunk at this index, loading nothing" ) + logging.debug( "No chunk at this index, loading nothing" ) self.chunk = '' - dmsg( 3, "Loaded chunk of length: " + str( len( self.chunk ) ) ) + logging.debug( "Loaded chunk of length: " + str( len( self.chunk ) ) ) self.chunk_index = index self.chunk_modified = False @@ -569,8 +571,8 @@ class FuseArchive(Fuse): if isinstance( self.chunks[ self.chunk_index ], list ) or \ len( self.chunks[ self.chunk_index ] ) == 0: self.dirty_chunks += 1 - dmsg( 3, "Dirty chunks is now: " + str( self.dirty_chunks ) ) - dmsg( 3, "Dirty flush at: " + str( dirty_flush ) ) + logging.debug( "Dirty chunks is now: " + str( self.dirty_chunks ) ) + logging.debug( "Dirty flush at: " + str( dirty_flush ) ) # Save the dirty chunk temporarily in memory self.chunks[ self.chunk_index ] = self.chunk @@ -583,14 +585,14 @@ class FuseArchive(Fuse): def _flush_chunks(self): for index in range( len( self.chunks ) ): if isinstance( self.chunks[ index ], str ): - dmsg( 3, "Flushing chunk at " + str( index ) ) + logging.debug( "Flushing chunk at " + str( index ) ) key = save_chunk( self.chunks[ index ] ) self.chunks[ index ] = key - dmsg( 3, "Key was " + str( key ) ) + logging.debug( "Key was " + str( key ) ) self.dirty_chunks = 0 def read(self, length, offset): - dmsg( 3, "Reading from " + self.orig_path + " offset: " + str( offset ) + logging.debug( "Reading from " + self.orig_path + " offset: " + str( offset ) + " length: " + str( length ) ) data_read = 0 @@ -601,7 +603,7 @@ class FuseArchive(Fuse): # Keep reading chunks until we have at least this much data while data_read < length and not is_eof: - dmsg( 3, "Pulling chunk data: " + str( index ) ) + logging.debug( "Pulling chunk data: " + str( index ) ) self._load_chunk( index ) if len(self.chunk): chunk_remaining = len(self.chunk) - rest @@ -610,29 +612,29 @@ class FuseArchive(Fuse): if data_left < chunk_remaining: to_read = data_left - dmsg( 3, "chunk_remaining: " + str( chunk_remaining ) ) - dmsg( 3, "data_left: " + str( data_left ) ) - dmsg( 3, "data_read: " + str( data_read ) ) - dmsg( 3, "rest: " + str( rest ) ) - dmsg( 3, "Copying " + str(to_read) + " bytes" ) + logging.debug( "chunk_remaining: " + str( chunk_remaining ) ) + logging.debug( "data_left: " + str( data_left ) ) + logging.debug( "data_read: " + str( data_read ) ) + logging.debug( "rest: " + str( rest ) ) + logging.debug( "Copying " + str(to_read) + " bytes" ) data += self.chunk[ rest:(rest+to_read) ] data_read += to_read index += 1 rest = 0 else: - dmsg( 3, "No more chunk data, bye" ) + logging.debug( "No more chunk data, bye" ) is_eof = True - dmsg( 3, "Returning " + str( len( data ) ) + " bytes of data" ) - dmsg( 3, "Internal count was: " + str( data_read ) ) + logging.debug( "Returning " + str( len( data ) ) + " bytes of data" ) + logging.debug( "Internal count was: " + str( data_read ) ) return data def write(self, buf, offset): if magic_profiling: return len( buf ) - dmsg( 3, "Writing to " + self.orig_path + " offset: " + str( offset ) ) + logging.debug( "Writing to " + self.orig_path + " offset: " + str( offset ) ) index = int( offset / self.chunk_size ) rest = offset % self.chunk_size @@ -640,31 +642,31 @@ class FuseArchive(Fuse): buf_offset = 0 buf_len = len(buf) - dmsg( 3, "Length: " + str( buf_len ) ) + logging.debug( "Length: " + str( buf_len ) ) while( buf_offset < buf_len ): - dmsg( 3, "Pulling in chunk for writing: " + str(index) ) + logging.debug( "Pulling in chunk for writing: " + str(index) ) self._load_chunk( index ) buf_remain = buf_len - buf_offset chunk_remain = self.chunk_size - rest - dmsg( 3, "buf_remain: " + str(buf_remain) ) - dmsg( 3, "chunk_remain: " + str(chunk_remain) ) + logging.debug( "buf_remain: " + str(buf_remain) ) + logging.debug( "chunk_remain: " + str(chunk_remain) ) if chunk_remain < buf_remain: - dmsg( 3, "Writing " + str( chunk_remain ) + " bytes, buffer boundry" ) + logging.debug( "Writing " + str( chunk_remain ) + " bytes, buffer boundry" ) this_len = chunk_remain else: - dmsg( 3, "Writing final " + str( buf_remain ) + " bytes" ) + logging.debug( "Writing final " + str( buf_remain ) + " bytes" ) this_len = buf_remain - dmsg( 3, "Bytes to copy: " + str(this_len) ) - dmsg( 3, " buf offset: " + str(buf_offset) ) - dmsg( 3, " chunk offset: " + str(rest) ) + logging.debug( "Bytes to copy: " + str(this_len) ) + logging.debug( " buf offset: " + str(buf_offset) ) + logging.debug( " chunk offset: " + str(rest) ) if debug_level > 4: - dmsg( 5, "Pre-Buf: " + str(buf) ) - dmsg( 5, "Pre-Chunk: " + str(self.chunk) ) + logging.debug( "Pre-Buf: " + str(buf) ) + logging.debug( "Pre-Chunk: " + str(self.chunk) ) # Since python doesn't do in-place reassignment like you # can with splice() we will reconstruct the data by joining @@ -676,8 +678,8 @@ class FuseArchive(Fuse): self.chunk[ (rest + this_len): ] if debug_level > 4: - dmsg( 5, "Post-Buf: " + str(buf) ) - dmsg( 5, "Post-Chunk: " + str(self.chunk) ) + logging.debug( "Post-Buf: " + str(buf) ) + logging.debug( "Post-Chunk: " + str(self.chunk) ) buf_offset += this_len @@ -687,7 +689,7 @@ class FuseArchive(Fuse): self.chunk_modified = True self.modified = True - dmsg( 3, "Chunk size is now: " + str(len(self.chunk)) ) + logging.debug( "Chunk size is now: " + str(len(self.chunk)) ) return len(buf) # BUG: If you cp -a a file then quickly ls -l sometimes it doesn't show @@ -695,13 +697,13 @@ class FuseArchive(Fuse): # Maybe because release doesn't return a fuse message and is async? def release(self, flags): # Deflate the file - dmsg( 2, "Release: " + self.orig_path ) + logging.debug( "Release: " + self.orig_path ) self.flush() self.file.close() def _fflush(self): if self.wr and self.modified: - dmsg( 3, "_fflush!" ) + logging.debug( "_fflush!" ) # Save our main data self._save_chunk() @@ -713,15 +715,15 @@ class FuseArchive(Fuse): numchunks = len( self.chunks ) if numchunks > 0: # Load the last chunk - dmsg( 3, "We have " + str(numchunks) + " chunks, calculating size" ) + logging.debug( "We have " + str(numchunks) + " chunks, calculating size" ) self._load_chunk( numchunks - 1 ) self.size = ( numchunks - 1 ) * self.chunk_size + \ len( self.chunk ) else: - dmsg( 3, "No chunks, setting size to zero" ) + logging.debug( "No chunks, setting size to zero" ) self.size = 0 - dmsg( 3, "Size calculated is: " + str( self.size ) ) + logging.debug( "Size calculated is: " + str( self.size ) ) FuseArchiveSerializer.dumpfh( self.file, { 'size': self.size, @@ -737,7 +739,7 @@ class FuseArchive(Fuse): # Currently we treat fsync as flush since we don't keep any data # hanging around anyway in fh stuff def fsync(self, isfsyncfile): - dmsg( 2, "fsync " + self.orig_path ) + logging.debug( "fsync " + self.orig_path ) self._fflush() #if isfsyncfile and hasattr(os, 'fdatasync'): # os.fdatasync(self.fd) @@ -745,11 +747,11 @@ class FuseArchive(Fuse): # os.fsync(self.fd) def flush(self): - dmsg( 2, "flush " + self.orig_path ) + logging.debug( "flush " + self.orig_path ) self._fflush() def fgetattr(self): - dmsg( 3, "Overridding fgetattr" ) + logging.debug( "Overridding fgetattr" ) stats = FuseArchiveStat( os.lstat( "./tree" + self.orig_path ) ) if self.modified: @@ -768,23 +770,23 @@ class FuseArchive(Fuse): curr_chunks = len( self.chunks ) need_chunks = ( length / self.chunk_size ) extra_bytes = length % self.chunk_size - dmsg( 3, "Ftruncate - " + str(length) ) - dmsg( 3, " - curr_chunks: " + str( curr_chunks ) ) - dmsg( 3, " - need_chunks: " + str( need_chunks ) ) - dmsg( 3, " - extra_bytes: " + str( extra_bytes ) ) + logging.debug( "Ftruncate - " + str(length) ) + logging.debug( " - curr_chunks: " + str( curr_chunks ) ) + logging.debug( " - need_chunks: " + str( need_chunks ) ) + logging.debug( " - extra_bytes: " + str( extra_bytes ) ) if extra_bytes: - dmsg( 3, "Need an extra chunk" ) + logging.debug( "Need an extra chunk" ) need_chunks += 1 self._load_chunk( 0 ) pdb.set_trace() if need_chunks == 0: - dmsg( 3, "Creating 0 chunk file" ) + logging.debug( "Creating 0 chunk file" ) self.chunks = [] elif need_chunks > curr_chunks: - dmsg( 3, "Need to pad out file" ) + logging.debug( "Need to pad out file" ) # Just write out null bytes to the length requested self.flush() @@ -794,18 +796,18 @@ class FuseArchive(Fuse): if tmp_size + to_write > length: to_write = length - tmp_size - dmsg( 3, "Writing " + str( to_write ) + " bytes to extend file to " + str( length ) ) + logging.debug( "Writing " + str( to_write ) + " bytes to extend file to " + str( length ) ) self.write( "\0" * to_write, tmp_size + 1 ) tmp_size += to_write else: - dmsg( 3, "Truncating chunks" ) + logging.debug( "Truncating chunks" ) while True: - dmsg( 3, "Need chunks: " + str( need_chunks ) + " curr: " + str( curr_chunks ) ) + logging.debug( "Need chunks: " + str( need_chunks ) + " curr: " + str( curr_chunks ) ) if need_chunks == curr_chunks: break - dmsg( 3, "Deleting chunk " + str( self.chunk_index ) ) + logging.debug( "Deleting chunk " + str( self.chunk_index ) ) self.chunks.pop() curr_chunks = len( self.chunks ) @@ -813,15 +815,15 @@ class FuseArchive(Fuse): # last chunk if len( self.chunks ): self._load_chunk( len( self.chunks ) - 1 ) - dmsg( 3, "Loaded final chunk, len: " + str( len( self.chunk ) ) ) + logging.debug( "Loaded final chunk, len: " + str( len( self.chunk ) ) ) # Now truncate or extend this item if len( self.chunk ) < extra_bytes: remaining = extra_bytes - len(self.chunk) - dmsg( 3, "Extending final chunk " + str( remaining ) + " bytes" ) + logging.debug( "Extending final chunk " + str( remaining ) + " bytes" ) self.chunk += "\0" * remaining else: - dmsg( 3, "Truncating final chunk to " + str( extra_bytes ) ) + logging.debug( "Truncating final chunk to " + str( extra_bytes ) ) self.chunk = self.chunk[ :extra_bytes ] self.modified = True @@ -829,7 +831,7 @@ class FuseArchive(Fuse): self._fflush() def lock(self, cmd, owner, **kw): - dmsg( 3, "WARNING: locking unsupported" ) + logging.debug( "WARNING: locking unsupported" ) return 1 # The code here is much rather just a demonstration of the locking |