#!/usr/bin/env python # Copyright (C) 2001 Jeff Epler # Copyright (C) 2006 Csaba Henk # Copyright (C) 2009 Steve Slaven # # This program can be distributed under the terms of the GNU LGPL. # See the file COPYING. # import os, sys, fcntl, fuse, sha, cPickle, gzip, errno import zipfile, logging from fuse import Fuse from binascii import hexlify #import pdb if not hasattr(fuse, '__version__'): raise RuntimeError, \ "your fuse-py doesn't know of fuse.__version__, probably it's too old." fuse.fuse_python_api = (0, 2) fuse.feature_assert('stateful_files', 'has_init') #log_level = logging.DEBUG log_level = logging.WARNING logging.basicConfig( level = log_level, format = '%(asctime)s %(levelname)s %(filename)s:%(lineno)d %(module)s:%(funcName)s() %(message)s', stream = sys.stderr, filemode = 'w' ) magic_profiling = False enable_stats = False enable_psyco = False deep_debug = False # These control some of the file output magic_blocksize = 1024 * 128 # Use a tiny block size to debug writes, so you can use a smaller test file #magic_blocksize = 1024 magic_depth = 5 gzip_compress_level = 6 chunkstyle = 'fs' # Memory for dirty blocks, per file (1M) dirty_size = 1024 * 1024 * 1; # This is the number of actualy blocks in that size dirty_flush = int( dirty_size / magic_blocksize ) # This is a cache of open files by inode, to fix the lseek == size problem # this causes a failure in fsx-linux becuase to to lseek(fd,0,seek_end) it # apparently does a getattr to find the file length then subtracts the # offset from that to pass to write or whatever, since the offset is passed # to write and we don't maintain one internally. xmp.py also fails this # test. dirty_cache = {} def flag2mode(flags): md = {os.O_RDONLY: 'r', os.O_WRONLY: 'w', os.O_RDWR: 'w+'} m = md[flags & (os.O_RDONLY | os.O_WRONLY | os.O_RDWR)] if flags & os.O_APPEND: m = m.replace('w', 'a', 1) return m def save_chunk( chunk ): if chunkstyle == 'fs': return _save_chunk_fs( chunk ) elif chunkstyle == 'zip': return _save_chunk_zip( chunk ) else: raise ValueError( 'Unknown chunk style' ) def load_chunk( key ): if chunkstyle == 'fs': return _load_chunk_fs( key ) elif chunkstyle == 'zip': return _load_chunk_zip( key ) else: raise ValueError( 'Unknown chunk style' ) # This will write out a data block, it will return a key that can get this # data back later def _save_chunk_fs( chunk ): if magic_profiling: return( [ 0, 0 ] ) logging.debug( "Begin save_chunk, length: %d" % len( chunk ) ) if deep_debug: logging.debug( "Chunk: %s" + hexlify( chunk ) ) # Save this hash string, similar to the backuppc algo digest = sha.new( chunk ).digest() # Write out our chunk chars = list( digest ) logging.debug( chars ) # We make the hexdigest here, yeah we could just call hexdigest() # but we need to essentially do this same thing to reassemble the # file anyway hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) # Subparts just needs the first N chars subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] logging.debug( subparts ) subpath = '/'.join( subparts ) logging.debug( "Subpath: " + subpath ) # Make sure this sub path exists nextpart = "./storage" for part in subparts: nextpart += "/" + part if not os.path.exists( nextpart ): logging.debug( "Creating subdir: " + nextpart ) os.mkdir( nextpart ) # Find a chunk slot sub = 0 while True: checkpath = "./storage/%s/%s_%d" % ( subpath, hexdigest, sub ) logging.debug( "Checking: " + checkpath ) if os.path.exists( checkpath ): # Check if this is our data verify = FuseArchiveStream.open( checkpath, "rb" ) verify_contents = verify.read() verify.close() if verify_contents == chunk: logging.debug( "Found existing block" ) break else: logging.debug( "Block exists but is not the same" ) sub += 1 else: # We found a spot, dump our data here logging.debug( "No block here, creating new block" ) savechunk = FuseArchiveStream.open( checkpath, "wb" ) savechunk.write( chunk ) savechunk.close() break logging.debug( "Got chunk slot: %d" % sub ) return( [ digest, sub ] ) def _save_chunk_zip( chunk ): if magic_profiling: return( [ 0, 0 ] ) logging.debug( "Begin save_chunk, length: %d" % len( chunk ) ) if deep_debug: logging.debug( "Chunk: %s" + hexlify( chunk ) ) # Save this hash string, similar to the backuppc algo digest = sha.new( chunk ).digest() # Write out our chunk chars = list( digest ) logging.debug( chars ) # We make the hexdigest here, yeah we could just call hexdigest() # but we need to essentially do this same thing to reassemble the # file anyway hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) # Should be about max of 32k zip files zipname = hexdigest[ 0:4 ] + ".zip" logging.debug( "Zip name: " + zipname ) if not os.path.exists( "./storage/" + zipname ): logging.debug( "Creating intial empty zip" ) z = zipfile.ZipFile( "./storage/" + zipname, 'w', zipfile.ZIP_DEFLATED, True ) # append mode throws an exception if it's not zip, or maybe it's # just zero-length files z.writestr( 'junk', 'junk' ) z.close() z = zipfile.ZipFile( "./storage/" + zipname, 'a', zipfile.ZIP_DEFLATED, True ) # Find a chunk slot sub = 0 while True: checkpath = "%s_%d" % ( hexdigest, sub ) logging.debug( "Checking: " + checkpath ) try: data = z.read( checkpath ) except: data = '' if len(data): if data == chunk: logging.debug( "Found existing block" ) break else: logging.debug( "Block exists but is not the same" ) sub += 1 else: # We found a spot, dump our data here logging.debug( "No block here, creating new block" ) z.writestr( checkpath, chunk ) break z.close() logging.debug( "Got chunk slot: %d" % sub ) return( [ digest, sub ] ) # This will return a data block by key that was saved previously def _load_chunk_fs( key ): if magic_profiling: return '' ( thash, seq ) = key logging.debug( "Begin load_chunk" ) chars = list( thash ) logging.debug( chars ) # Todo: make a digest -> path function to share with deflate hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) logging.debug( "Hash is: %s sub %d" % ( hexdigest, seq ) ) subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] subpath = '/'.join( subparts ) logging.debug( "Subpath: " + subpath ) subpath += "/%s_%d" % ( hexdigest, seq ) logging.debug( "Chunk path: " + subpath ) if os.path.exists( "./storage/" + subpath ): logging.debug( "Exporting chunk" ) readchunk = FuseArchiveStream.open( "./storage/" + subpath ) chunk = readchunk.read() readchunk.close() else: raise IOError if deep_debug: logging.debug( "Load-Chunk: %s" + hexlify( chunk ) ) return chunk def _load_chunk_zip( key ): if magic_profiling: return '' ( thash, seq ) = key logging.debug( "Begin load_chunk" ) chars = list( thash ) logging.debug( chars ) # Todo: make a digest -> path function to share with deflate hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ) zipname = hexdigest[ 0:4 ] + ".zip" logging.debug( "Zip name: " + zipname ) z = zipfile.ZipFile( "./storage/" + zipname, 'r', zipfile.ZIP_DEFLATED, True ) subpath = "%s_%d" % ( hexdigest, seq ) logging.debug( "Chunk path: " + subpath ) data = z.read( subpath ) if len( data ): logging.debug( "Exporting chunk" ) chunk = data else: z.close() raise IOError if deep_debug: logging.debug( "Load-Chunk: %s" + hexlify( chunk ) ) z.close() return chunk class FuseArchiveStream: """This just allows switching out writer classes easily""" @staticmethod def open( path, mode = 'r' ): fh = gzip.open( path, mode, gzip_compress_level ) #fh = open( path, mode ) return fh class FuseArchiveSerializer: """This lets us experiment with different main file serializers""" @staticmethod def dump( f, obj ): out = FuseArchiveStream.open( f, "wb" ) FuseArchiveSerializer.dumpfh( obj, out ) # new file format out.close() @staticmethod def dumpfh( fh, obj ): logging.debug( "Going to serialize %s to %s" % ( obj, fh ) ) fh.truncate( 0 ) fh.seek( 0 ) f = gzip.GzipFile( None, "wb", gzip_compress_level, fh ) #f = fh cPickle.dump( obj, f, -1 ) del f fh.flush() @staticmethod def load( f ): if magic_profiling: return { 'size': 0, 'chunks': 0, 'chunk_size': 0 } inp = FuseArchiveStream.open( f, "rb" ) magic = FuseArchiveSerializer.loadfh( inp ) inp.close() return magic @staticmethod def loadfh( fh ): logging.debug( "Going to load from %s" % fh ) fh.seek( 0 ) f = gzip.GzipFile( None, "rb", gzip_compress_level, fh ) #f = fh magic = cPickle.load( f ) return( magic ) class FuseArchiveStat(fuse.Stat): def __init__(self, stat): self.st_mode = stat.st_mode self.st_ino = stat.st_ino self.st_dev = stat.st_dev self.st_rdev = stat.st_rdev self.st_nlink = stat.st_nlink self.st_uid = stat.st_uid self.st_gid = stat.st_gid self.st_size = stat.st_size self.st_atime = stat.st_atime self.st_mtime = stat.st_mtime self.st_ctime = stat.st_mtime self.st_blocks = stat.st_blocks self.st_blksize = stat.st_blksize def overstat( self, size ): self.st_size = size # Yeah we shouldn't always just add 1 self.st_blocks = int( self.st_size / 512 ) + 1 class FuseArchive(Fuse): def __init__(self, *args, **kw): Fuse.__init__(self, *args, **kw) self.root = None # Fix getattr and fgetattr to? def getattr(self, path): treefile = "./tree" + path if os.path.isfile( treefile ): logging.debug( "Delegating getattr to FuserArchiveFile for " + path ) # Check in the dirty cache first (to handle lseek and the # relatively broken implmentation in fuse/python) if path in dirty_cache: logging.info( "WORKAROUND: lseek appears to do a gettattr if whence is SEEK_END, using dirty cache object" ) f = dirty_cache[ path ] stats = f.fgetattr() # no release, it's still being used else: f = self.FuseArchiveFile( path, os.O_RDONLY, 0 ) stats = f.fgetattr() f.release( 0 ) else: logging.debug( "Using os.lstat to get stats" ) stats = os.lstat( treefile ) return stats def readlink(self, path): return os.readlink("./tree" + path) def readdir(self, path, offset): for e in os.listdir("./tree" + path): yield fuse.Direntry(e) def unlink(self, path): os.unlink("./tree" + path) def rmdir(self, path): os.rmdir("./tree" + path) def symlink(self, path, path1): os.symlink(path, "./tree" + path1) def rename(self, path, path1): os.rename("./tree" + path, "./tree" + path1) def link(self, path, path1): os.link("./tree" + path, "./tree" + path1) def chmod(self, path, mode): os.chmod("./tree" + path, mode) def chown(self, path, user, group): os.chown("./tree" + path, user, group) def truncate(self, path, len): # Truncate using the ftruncate on the file logging.debug( "Using FuseArchiveFile to truncate %s to %d" % ( path, len) ) f = self.FuseArchiveFile( path, os.O_RDWR, 0 ) f.ftruncate(len) f.release( 0 ) def mknod(self, path, mode, dev): os.mknod("./tree" + path, mode, dev) def mkdir(self, path, mode): os.mkdir("./tree" + path, mode) def utime(self, path, times): os.utime("./tree" + path, times) # The following utimens method would do the same as the above utime method. # We can't make it better though as the Python stdlib doesn't know of # subsecond preciseness in acces/modify times. # # def utimens(self, path, ts_acc, ts_mod): # os.utime("." + path, (ts_acc.tv_sec, ts_mod.tv_sec)) def access(self, path, mode): if not os.access("./tree" + path, mode): return -errno.EACCES # This is how we could add stub extended attribute handlers... # (We can't have ones which aptly delegate requests to the underlying fs # because Python lacks a standard xattr interface.) # # def getxattr(self, path, name, size): # val = name.swapcase() + '@' + path # if size == 0: # # We are asked for size of the value. # return len(val) # return val # # def listxattr(self, path, size): # # We use the "user" namespace to please XFS utils # aa = ["user." + a for a in ("foo", "bar")] # if size == 0: # # We are asked for size of the attr list, ie. joint size of attrs # # plus null separators. # return len("".join(aa)) + len(aa) # return aa def statfs(self): """ Should return an object with statvfs attributes (f_bsize, f_frsize...). Eg., the return value of os.statvfs() is such a thing (since py 2.2). If you are not reusing an existing statvfs object, start with fuse.StatVFS(), and define the attributes. To provide usable information (ie., you want sensible df(1) output, you are suggested to specify the following attributes: - f_bsize - preferred size of file blocks, in bytes - f_frsize - fundamental size of file blcoks, in bytes [if you have no idea, use the same as blocksize] - f_blocks - total number of blocks in the filesystem - f_bfree - number of free blocks - f_files - total number of file inodes - f_ffree - nunber of free file inodes """ return os.statvfs(".") def fsinit(self): os.chdir(self.root) class FuseArchiveFile(object): def __init__(self, path, flags, *mode): # Inflate the file logging.debug( "Init file: " + path ) self.orig_path = path # init rw and offset self.rd = False self.wr = False self.size = 0 self.modified = False # This is the current in-memory chunk and offset in to data[] self.chunk_cache = {}; self.chunk = '' self.chunk_index = -1 self.chunk_modified = False self.chunk_size = magic_blocksize self.dirty_chunks = 0 # The chunk table self.chunks = [] # TODO: Better flag handling here? if flags & os.O_RDONLY: self.rd = True if flags & os.O_RDWR: self.rd = True self.wr = True if flags & os.O_WRONLY: self.wr = True if flags & os.O_APPEND: self.wr = True if os.path.exists( "./tree" + self.orig_path ): preexist = True else: preexist = False # Open the file now and keep the fh around so that cp -a on r/o # files works (in the create a read-only file for writing case) src = "./tree" + path logging.debug( "Saving fh for " + src ) nflags = os.O_RDWR | os.O_APPEND if flags & os.O_CREAT: logging.debug( "Adding O_CREAT" ) nflags = nflags | os.O_CREAT self.file = os.fdopen( os.open( src, nflags, *mode ), flag2mode( nflags ) ) if preexist: # Read in file info table logging.debug( "Unpickling: %s" % self.file ) # TODO: return an IO error if inflating fails try: magic = FuseArchiveSerializer.loadfh( self.file ) logging.debug( "Got data: %s" % magic ) self.size = magic[ 'size' ] self.chunks = magic[ 'chunks' ] self.chunk_size = magic[ 'chunk_size' ] except Exception, e: logging.critical( self.orig_path + ": " + str( e ) ) else: if self.wr: logging.debug( "File doesn't exist and we're going to write, creating temp empty file" ) self.modified = True self.flush() self.direct_io = False self.keep_cache = False logging.debug( "%s init complete" % self ) def _load_chunk( self, index ): # If the current chunk is the same as the chunk we're loading # just return logging.debug( "_load_chunk: %d" % index ) if index == self.chunk_index: logging.debug( "Load chunk is same as current chunk, all done" ) return # Save this chunk if modified self._save_chunk() logging.debug( "Loading chunk %d" % index ) key = None size = len( self.chunks ) if index >= size: logging.debug( "Index doesn't exist" ) else: key = self.chunks[ index ] if key: if isinstance( key, str ): logging.debug( "Found cached dirty page" ) self.chunk = key else: logging.debug( "Index: %s" % key ) self.chunk = load_chunk( key ) else: logging.debug( "No chunk at this index, loading nothing" ) self.chunk = '' logging.debug( "Loaded chunk of length: %d" % len( self.chunk ) ) self.chunk_index = index self.chunk_modified = False # This simply puts the chunk data inside our current chunks at chunk_index def _save_chunk(self): if self.chunk_modified: logging.debug( "Saving chunk %d" % self.chunk_index ) # Make sure we have room for this chunk size = len( self.chunks ) if self.chunk_index >= size: self.chunks.extend( [ '' ] * ( self.chunk_index -size + 1 ) ) # Increment dirty chunks if we had a key here already if isinstance( self.chunks[ self.chunk_index ], list ) or \ len( self.chunks[ self.chunk_index ] ) == 0: self.dirty_chunks += 1 logging.debug( "Dirty chunks is now: %d" % self.dirty_chunks ) logging.debug( "Dirty flush at: %d" % dirty_flush ) # Save the dirty chunk temporarily in memory self.chunks[ self.chunk_index ] = self.chunk # Flush if we have too many dirty chunks if self.dirty_chunks > dirty_flush: self._flush_chunks() # This flushes any cached chunks def _flush_chunks(self): for index in range( len( self.chunks ) ): if isinstance( self.chunks[ index ], str ): logging.debug( "Flushing chunk at %d" % index ) key = save_chunk( self.chunks[ index ] ) self.chunks[ index ] = key logging.debug( "Key was %s" % key ) self.dirty_chunks = 0 def read(self, length, offset): logging.debug( "Reading from %s offset: %d (0x%x) length: %d (0x%d)" % ( self.orig_path, offset, offset, length, length ) ) data_read = 0 data = '' index = int( offset / self.chunk_size ) rest = offset % self.chunk_size is_eof = False # Keep reading chunks until we have at least this much data while data_read < length and not is_eof: logging.debug( "Pulling chunk data: %d" % index ) self._load_chunk( index ) if len(self.chunk): chunk_remaining = len(self.chunk) - rest to_read = chunk_remaining data_left = length - data_read if data_left < chunk_remaining: to_read = data_left logging.debug( "chunk_remaining: %d" % chunk_remaining ) logging.debug( "data_left: %d" % data_left ) logging.debug( "data_read: %d" % data_read ) logging.debug( "rest: %d" % rest ) logging.debug( "Copying %d bytes" % to_read ) data += self.chunk[ rest:(rest+to_read) ] data_read += to_read index += 1 rest = 0 else: logging.debug( "No more chunk data, bye" ) is_eof = True logging.debug( "Returning %d bytes of data" % len( data ) ) logging.debug( "Internal count was: %d" % data_read ) return data def write(self, buf, offset): if magic_profiling: return len( buf ) logging.debug( "Writing to %s offset: %d (0x%x) length: %d (0x%x)" % ( self.orig_path, offset, offset, len( buf ), len( buf ) ) ) index = int( offset / self.chunk_size ) rest = offset % self.chunk_size logging.debug( "This chunk falls on index: %d rest: %d" % ( index, rest ) ) logging.debug( "We have %d chunks" % len( self.chunks ) ) logging.debug( "File size is: %d" % self.size ) # If index is higher than the number of blocks we current have it's a seek hole, so we need to extend our blocks out # We know these need to essentially be zeroed up to this size since if len( self.chunks ) - 1 < index: logging.debug( "Not enough chunks %d, need %d, extending" % ( len( self.chunks ), index + 1 ) ) this_index = 0 while this_index < index: self._load_chunk( this_index ) fill_null = self.chunk_size - len(self.chunk) logging.debug( "Filling this chunk with null, bytes: %d" % fill_null ) self.chunk += "\0" * fill_null logging.debug( "Chunk is now: %d bytes" % len( self.chunk) ) self.chunk_modified = True self._save_chunk() this_index += 1 self._load_chunk( index ) # Now check if this chunk needs to be extended if len( self.chunk ) < rest: fill_null = rest - len(self.chunk) logging.debug( "Filling final chunk with null, bytes: %d" % fill_null ) self.chunk += "\0" * fill_null self.chunk_modified = True self._save_chunk() buf_offset = 0 buf_len = len(buf) logging.debug( "Length: %d" % buf_len ) while( buf_offset < buf_len ): logging.debug( "Pulling in chunk for writing: %d" % index ) self._load_chunk( index ) buf_remain = buf_len - buf_offset chunk_remain = self.chunk_size - rest logging.debug( "buf_remain: %d" % buf_remain ) logging.debug( "chunk_remain: %d" % chunk_remain ) if chunk_remain < buf_remain: logging.debug( "Writing %d bytes, buffer boundry" % chunk_remain ) this_len = chunk_remain else: logging.debug( "Writing final %d bytes" % buf_remain ) this_len = buf_remain logging.debug( "Bytes to copy: %d" % this_len ) logging.debug( " buf offset: %d" % buf_offset ) logging.debug( " chunk offset: %d" % rest ) if deep_debug: logging.debug( "Pre-Buf: %s" % hexlify(buf) ) logging.debug( "Pre-Chunk: %s" % hexlify(self.chunk) ) # Since python doesn't do in-place reassignment like you # can with splice() we will reconstruct the data by joining # stuff by offsets (first chars to skip, then our joining # buf chunk, the everything that would have been after it) self.chunk = self.chunk[ :rest ] + \ buf[ buf_offset:(buf_offset+this_len) ] + \ self.chunk[ (rest + this_len): ] if deep_debug: logging.debug( "Post-Buf: %s" % hexlify(buf) ) logging.debug( "Post-Chunk: %s" % hexlify(self.chunk) ) buf_offset += this_len # Advance to next block rest = 0 index += 1 self.chunk_modified = True self._save_chunk() self.modified = True if offset + len(buf) > self.size: self.size = offset + len(buf) logging.debug( "This chunk size is now: %d" % len( self.chunk ) ) logging.debug( "File size is now: %d" % self.size ) logging.debug( "Num Chunks: %d" % len( self.chunks ) ) # Mark us in the dirty cache dirty_cache[ self.orig_path ] = self return len(buf) # BUG: If you cp -a a file then quickly ls -l sometimes it doesn't show # up right? like wrong size and stuff? # Maybe because release doesn't return a fuse message and is async? def release(self, flags): # Deflate the file logging.debug( "Release: " + self.orig_path ) self.flush() self.file.close() def _fflush(self): if self.wr and self.modified: logging.debug( "_fflush!" ) # Save our main data self._save_chunk() # And flush any cached chunks self._flush_chunks() save_size = self.size # Figure out our size based on the number of chunks + the # len of the final chunk numchunks = len( self.chunks ) if numchunks > 0: # Load the last chunk logging.debug( "We have %d chunks, calculating size" % numchunks ) self._load_chunk( numchunks - 1 ) self.size = ( numchunks - 1 ) * self.chunk_size + \ len( self.chunk ) else: logging.debug( "No chunks, setting size to zero" ) self.size = 0 # If this assert fails then write/ftruncate failed to set # things up right somewhere assert save_size == self.size, "Calculated size of " \ + self.orig_path + " = " + str( self.size ) \ + " doesn't match internal size " + str( save_size ) \ + "\nProbably a bug in write or ftruncate!" logging.debug( "Size calculated is: %d (0x%x)" % ( self.size, self.size ) ) FuseArchiveSerializer.dumpfh( self.file, { 'size': self.size, 'chunks': self.chunks, 'chunk_size': self.chunk_size } ) # Not dirty anymore if self.orig_path in dirty_cache: del dirty_cache[ self.orig_path ] logging.debug( "_fflush exit" ) return 1 # Currently we treat fsync as flush since we don't keep any data # hanging around anyway in fh stuff def fsync(self, isfsyncfile): logging.debug( "fsync " + self.orig_path ) self._fflush() #if isfsyncfile and hasattr(os, 'fdatasync'): # os.fdatasync(self.fd) #else: # os.fsync(self.fd) def flush(self): logging.debug( "flush " + self.orig_path ) self._fflush() def fgetattr(self): logging.debug( "Overridding fgetattr" ) stats = FuseArchiveStat( os.lstat( "./tree" + self.orig_path ) ) # Fixed in write? #if self.modified: # We would need to fsync here to recalc size, but don't do # it unless modified? otherwise simple getattr will be # rewriting a ton of files # print "WARNING: self.modified causes fgetattr to be incorrect!" stats.overstat( self.size ) return stats def ftruncate(self, length): if not self.wr: return errno.IOError curr_chunks = len( self.chunks ) need_chunks = ( length / self.chunk_size ) extra_bytes = length % self.chunk_size logging.debug( "Ftruncate - %d (0x%x)" % ( length, length ) ) logging.debug( " - self.size: %d" % self.size ) logging.debug( " - curr_chunks: %d" % curr_chunks ) logging.debug( " - need_chunks: %d" % need_chunks ) logging.debug( " - extra_bytes: %d" % extra_bytes ) if extra_bytes: logging.debug( "Need an extra chunk" ) need_chunks += 1 self._load_chunk( 0 ) if length == 0: logging.debug( "Creating 0 chunk file" ) self.chunks = [] self.chunk = '' elif self.size <= length: logging.debug( "Need to pad out file, writing/seeking to %d" % length ) # Just write out null bytes to the length requested, write will do this for us if we specify the offset self.write( '', length ) else: logging.debug( "Truncating chunks" ) while True: logging.debug( "Need chunks: %d curr: %d" % ( need_chunks, curr_chunks ) ) if need_chunks == curr_chunks: break logging.debug( "Deleting chunk %d" % self.chunk_index ) self.chunks.pop() curr_chunks = len( self.chunks ) # Now make sure this chunk is the right size, first load the # last chunk if len( self.chunks ): self._load_chunk( len( self.chunks ) - 1 ) logging.debug( "Loaded final chunk, len: %d" % len( self.chunk ) ) # Now truncate this item if needed if len( self.chunk ) > extra_bytes: logging.debug( "Truncating final chunk to %d" % extra_bytes ) self.chunk = self.chunk[ :extra_bytes ] logging.debug( "Chunk is now: %d bytes" % len( self.chunk ) ) self.chunk_modified = True self.modified = True self.size = length self._load_chunk( 0 ) logging.debug( "ftruncate complete" ) self._fflush() def lock(self, cmd, owner, **kw): logging.debug( "WARNING: locking unsupported" ) return 1 # The code here is much rather just a demonstration of the locking # API than something which actually was seen to be useful. # Advisory file locking is pretty messy in Unix, and the Python # interface to this doesn't make it better. # We can't do fcntl(2)/F_GETLK from Python in a platfrom independent # way. The following implementation *might* work under Linux. # # if cmd == fcntl.F_GETLK: # import struct # # lockdata = struct.pack('hhQQi', kw['l_type'], os.SEEK_SET, # kw['l_start'], kw['l_len'], kw['l_pid']) # ld2 = fcntl.fcntl(self.fd, fcntl.F_GETLK, lockdata) # flockfields = ('l_type', 'l_whence', 'l_start', 'l_len', 'l_pid') # uld2 = struct.unpack('hhQQi', ld2) # res = {} # for i in xrange(len(uld2)): # res[flockfields[i]] = uld2[i] # # return fuse.Flock(**res) # Convert fcntl-ish lock parameters to Python's weird # lockf(3)/flock(2) medley locking API... op = { fcntl.F_UNLCK : fcntl.LOCK_UN, fcntl.F_RDLCK : fcntl.LOCK_SH, fcntl.F_WRLCK : fcntl.LOCK_EX }[kw['l_type']] if cmd == fcntl.F_GETLK: return -errno.EOPNOTSUPP elif cmd == fcntl.F_SETLK: if op != fcntl.LOCK_UN: op |= fcntl.LOCK_NB elif cmd == fcntl.F_SETLKW: pass else: return -errno.EINVAL fcntl.lockf(self.fd, op, kw['l_start'], kw['l_len']) def main(self, *a, **kw): self.file_class = self.FuseArchiveFile # This is where fragments go if not os.path.exists( 'storage' ): os.mkdir( 'storage' ) # This is where the real files exist if not os.path.exists( 'tree' ): os.mkdir( 'tree' ) return Fuse.main(self, *a, **kw) def main(): usage = """ Userspace nullfs-alike: mirror the filesystem tree from some point on. """ + Fuse.fusage server = FuseArchive(version="%prog " + fuse.__version__, usage=usage, dash_s_do='setsingle') server.multithreaded = False server.parse(values=server, errex=1) if len(server.parser.largs) != 2: print "Usage: " + sys.argv[0] + " storageDirectory mountDirectory" sys.exit(1) server.root = server.parser.largs[0] try: if server.fuse_args.mount_expected(): os.chdir(server.root) except OSError: print >> sys.stderr, "can't enter root of underlying filesystem" sys.exit(1) server.main() if __name__ == '__main__': if enable_psyco: # Import Psyco if available try: import psyco psyco.full() except ImportError: pass if enable_stats: import hotshot prof = hotshot.Profile( "fusearchive_stats" ) prof.runcall(main) prof.close() else: main()