From ea8e86da16ed863858dd1b3d4ede9e087c8d8a0c Mon Sep 17 00:00:00 2001 From: Steve Slaven Date: Thu, 23 Jul 2009 13:41:08 -0700 Subject: Modified to do chunking on demand, runs but file writing not working diff --git a/fusearchive.py b/fusearchive.py index 9b6364f..92717cf 100755 --- a/fusearchive.py +++ b/fusearchive.py @@ -26,7 +26,7 @@ fuse.feature_assert('stateful_files', 'has_init') magic_blocksize = 1024 * 32 magic_depth = 5 -debug_level = 2 +debug_level = 4 def dmsg(level,message): if level <= debug_level: @@ -187,7 +187,7 @@ def deflate( src, dest ): out.close() class FuseArchiveStat(fuse.Stat): - def __init__(self, stat, overstat): + def __init__(self, stat): self.st_mode = stat.st_mode self.st_ino = stat.st_ino self.st_dev = stat.st_dev @@ -195,13 +195,17 @@ class FuseArchiveStat(fuse.Stat): self.st_nlink = stat.st_nlink self.st_uid = stat.st_uid self.st_gid = stat.st_gid - self.st_size = overstat.st_size + self.st_size = stat.st_size self.st_atime = stat.st_atime self.st_mtime = stat.st_mtime self.st_ctime = stat.st_mtime + self.st_blocks = stat.st_blocks + self.st_blksize = stat.st_blksize + + def overstat( self, size ): + self.st_size = size # Yeah we shouldn't always just add 1 self.st_blocks = int( self.st_size / 512 ) + 1 - self.st_blksize = stat.st_blksize class FuseArchive(Fuse): @@ -226,7 +230,8 @@ class FuseArchive(Fuse): inp.close() dmsg( 3, "Overridding getattr" ) - stats = FuseArchiveStat( stats, magic[ 'stat' ] ) + stats = FuseArchiveStat( stats ) + stats.overstat( magic[ 'size' ] ) return stats @@ -335,47 +340,124 @@ class FuseArchive(Fuse): # Inflate the file dmsg( 1, "Init file: " + path ) self.orig_path = path; - ( fdnum, self.tmp_name ) = tempfile.mkstemp(); - #os.close( fdnum ); + + # init rw and offset + self.offset = 0 + self.read = False + self.write = False + self.size = 0 + + # This is the current in-memory chunk and offset in to data[] + self.chunk = None + self.chunk_index = 0 + self.chunk_modified = False + self.chunk_size = magic_blocksize + + # The chunk table + self.chunks = [] + + # TODO: Better flag handling here? + md = flag2mode( flags ) + if re.match( '^r', md ): + self.read = True + elif re.match( '^w', md ): + self.write = True + elif re.match( '^a', md ): + self.write = True + self.offset = -1 if os.path.exists( "./tree" + self.orig_path ): - inflate( "./tree" + path, self.tmp_name ) + # Read in file info table + src = "./tree" + self.orig_path + dmsg( 3, "Unpickling: " + src ) + # TODO: return an IO error if inflating fails + inp = gzip.open( src, "r" ) + magic = pickle.load( inp ) + inp.close() + dmsg( 3, "Got data: " + str( magic ) ) + self.size = magic[ 'size' ] + self.chunks = magic[ 'chunks' ] + self.chunk_size = magic[ 'chunk_size' ] else: if re.match( '(a|w)', flag2mode( flags ) ): dmsg( 2, "File doesn't exist and we're going to write, creating temp empty file" ) - deflate( "/dev/null", "./tree" + path ) + self.flush() - dmsg( 2, "Shadow file: " + self.tmp_name + " for " + self.orig_path ) - dmsg( 3, "Going to open shadow file with flags: " + str(flags) + " mode " + str(mode) ) - # pdb.set_trace() - dmsg( 3, "Flag2mode is: " + str( flag2mode( flags ) ) ) + self.direct_io = False + self.keep_cache = False - # Just use the fdnum they gave us instead of reopening it, - # since that might fail - # fdnum = os.open( self.tmp_name, flags, *mode ) - #print "Got fdnum: " + str(fdnum) - self.file = os.fdopen( fdnum, flag2mode( flags ) ) - dmsg( 3, "Open" ) + dmsg( 3, str(self) + " init complete" ) - self.fd = self.file.fileno() + def _load_chunk( self, index ): + # Save this chunk if modified + if self.chunk_modified: + self._save_chunk() - self.direct_io = False - self.keep_cache = False + dmsg( 3, "Loading chunk " + str(index) ) + key = None - self.modified = False + try: + key = self.chunks[ index ] + except IndexError: + dmsg( 3, "Index doesn't exist" ) - dmsg( 3, str(self) + " init complete" ) + if key: + dmsg( 3, "Index: " + str( key ) ) + self.chunk = load_chunk( key ) + else: + dmsg( 3, "No chunk at this index, loading nothing" ) + self.chunk = None + + self.chunk_index = index + self.chunk_modified = False + + def _save_chunk(): + dmsg( 3, "Saving chunk " + self.chunk_index ) + key = save_chunk( self.chunk ) + self.chunks[ index ] = key + dmsg( 3, "Key was " + str( key ) ) def read(self, length, offset): - dmsg( 3, "Reading from " + self.orig_path ) - self.file.seek(offset) - return self.file.read(length) + dmsg( 3, "Reading from " + self.orig_path + " offset: " + str( offset ) + + " length: " + str( length ) ) + + data_read = 0 + data = None + index = int( offset / self.chunk_size ) + rest = offset % self.chunk_size + + # Keep reading chunks until we have at least this much data + while data_read < length: + dmsg( 3, "Pulling chunk data" ) + self._load_chunk( index ) + chunk_remaining = self.chunk_size - rest + data += substr( self.chunk[ rest:chunk_remaining ] ) + data_read = len( data ) + index += 1 + rest = 0 + + return data[ :length ] def write(self, buf, offset): - dmsg( 3, "Writing to " + self.orig_path ) - self.file.seek(offset) - self.file.write(buf) - self.modified = True + dmsg( 3, "Writing to " + self.orig_path + " offset: " + str( offset ) ) + + index = int( offset / self.chunk_size ) + rest = offset % self.chunk_size + + buf_offset = 0 + buf_len = len(buf) + + dmg( 3, "Length: " + str( buf_len ) ) + while( buf_offset < buf_len ): + dmsg( 3, "Pulling in chunk for writing" ) + self._load_chunk( index ) + buf_remain = buf_len - buf_offset + if rest < buf_remain: + dmsg( 3, "Writing " + str( rest ) + " bytes, buffer boundry" ) + else: + dmsg( 3, "Writing final " + str( buf_remain ) + " bytes" ) + + self.chunk_modified = True return len(buf) # BUG: If you cp -a a file then quickly ls -l sometimes it doesn't show @@ -384,44 +466,51 @@ class FuseArchive(Fuse): def release(self, flags): # Deflate the file dmsg( 2, "Release: " + self.orig_path ) - self.file.close() - - if self.modified: - dmsg( 2, "Copying working file back to storage: " + \ - self.tmp_name + " -> " + self.orig_path ) - - #pdb.set_trace() - deflate( self.tmp_name, "./tree" + self.orig_path ) - else: - dmsg( 2, "File not modified, not copying back" ) - - dmsg( 2, "Deleting old file: " + self.tmp_name ) - os.unlink( self.tmp_name ); + self.flush() def _fflush(self): - if 'w' in self.file.mode or 'a' in self.file.mode: - self.file.flush() - + if self.write: + dmsg( 3, "_fflush!" ) + # Save our main data + out = gzip.open( "./tree" + self.orig_path, "w" ) + pickle.dump( { + 'size': self.size, + 'chunks': self.chunks, + 'chunk_size': self.chunk_size + }, out ) + out.close() + + + # Currently we treat fsync as flush since we don't keep any data + # hanging around anyway in fh stuff def fsync(self, isfsyncfile): + dmsg( 2, "fsync " + self.orig_path ) self._fflush() - if isfsyncfile and hasattr(os, 'fdatasync'): - os.fdatasync(self.fd) - else: - os.fsync(self.fd) + #if isfsyncfile and hasattr(os, 'fdatasync'): + # os.fdatasync(self.fd) + #else: + # os.fsync(self.fd) def flush(self): + dmsg( 2, "flush " + self.orig_path ) self._fflush() - # cf. xmp_flush() in fusexmp_fh.c - os.close(os.dup(self.fd)) def fgetattr(self): - return os.fstat(self.fd) + print "WARNING: fgetattr is broken!!!!" + return os.lstat( "./tree" + self.orig_path ) def ftruncate(self, len): - self.modified = True - self.file.truncate(len) + self.chunks = [] + self._load_chunk( 0 ) + self._fflush() + + if len > 0: + print "WARNING: ftruncate is broken!!!" def lock(self, cmd, owner, **kw): + dmsg( 3, "WARNING: locking unsupported" ) + return + # The code here is much rather just a demonstration of the locking # API than something which actually was seen to be useful. -- cgit v0.10.2