#!/usr/bin/env python # Copyright (C) 2001 Jeff Epler # Copyright (C) 2006 Csaba Henk # Copyright (C) 2009 Steve Slaven # # This program can be distributed under the terms of the GNU LGPL. # See the file COPYING. # import os, sys, shutil, fcntl, fuse, re import tempfile, sha, pickle, gzip from errno import * from stat import * from fuse import Fuse import pdb if not hasattr(fuse, '__version__'): raise RuntimeError, \ "your fuse-py doesn't know of fuse.__version__, probably it's too old." fuse.fuse_python_api = (0, 2) fuse.feature_assert('stateful_files', 'has_init') magic_blocksize = 1024 * 32 magic_depth = 5 debug_level = 2 def dmsg(level,message): if level <= debug_level: print str(level) + ": " + message def flag2mode(flags): md = {os.O_RDONLY: 'r', os.O_WRONLY: 'w', os.O_RDWR: 'w+'} m = md[flags & (os.O_RDONLY | os.O_WRONLY | os.O_RDWR)] if flags | os.O_APPEND: m = m.replace('w', 'a', 1) return m # This will write out a data block, it will return a key that can get this # data back later def save_chunk( chunk ): dmsg( 2, "Begin save_chunk" ) # Save this hash string, similar to the backuppc algo digest = sha.new( str(len(chunk)) + chunk ).digest() # Write out our chunk chars = list( digest ) dmsg( 4, chars ) # We make the hexdigest here, yeah we could just call hexdigest() # but we need to essentially do this same thing to reassemble the # file anyway hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ); # Subparts just needs the first N chars subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] dmsg( 4, subparts ) subpath = '/'.join( subparts ); dmsg( 3, "Subpath: " + subpath ) # Make sure this sub path exists nextpart = "./storage" for part in subparts: nextpart += "/" + part if not os.path.exists( nextpart ): dmsg( 3, "Creating subdir: " + nextpart ) os.mkdir( nextpart ) # Find a chunk slot sub = 0 while True: checkpath = "./storage/" + subpath + "/" + hexdigest + "_" + str( sub ) dmsg( 3, "Checking: " + checkpath ) if os.path.exists( checkpath ): # Check if this is our data verify = gzip.open( checkpath, "r" ) verify_contents = verify.read() verify.close() if verify_contents == chunk: dmsg( 3, "Found existing block" ) break else: dmsg( 3, "Block exists but is not the same" ) sub += 1 else: # We found a spot, dump our data here dmsg( 3, "No block here, creating new block" ) savechunk = gzip.open( checkpath, "w" ) savechunk.write( chunk ) savechunk.close break dmsg( 3, "Got chunk slot: " + str( sub ) ) return( [ digest, sub ] ) # This will return a data block by key that was saved previously def load_chunk( key ): ( hash, seq ) = key dmsg( 2, "Begin load_chunk" ) chars = list( hash ) dmsg( 4, chars ) # Todo: make a digest -> path function to share with deflate hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ); dmsg( 3, "Hash is: " + hash + " sub " + seq ) subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] subpath = '/'.join( subparts ); dmsg( 3, "Subpath: " + subpath ) subpath += "/" + hexdigest + "_" + str( seq ); dmsg( 3, "Chunk path: " + subpath ) if os.path.exists( "./storage/" + subpath ): dmsg( 3, "Exporting chunk" ) readchunk = gzip.open( "./storage/" + subpath ) chunk = readchunk.read() readchunk.close() else: raise IOError return chunk # Inflate a file, src is a packed file, dest is where the unpacked file # should go # we assume our chunks are in storage/ def inflate( src, dest ): dmsg( 1, "inflate!" ) out = open( dest, "w" ) dmsg( 3, "Unpickling: " + src ) # TODO: return an IO error if inflating fails inp = gzip.open( src, "r" ) magic = pickle.load( inp ) inp.close() dmsg( 3, "Got data: " + str( magic ) ) #pdb.set_trace() # Now unserialize the chunks back in to a file for key in magic[ 'data' ]: out.write( load_chunk( key ) ) dmsg( 2, "File inflated" ) out.close() # TODO: deflate only if the file has been modified # Deflate a file, src is the unpacked file, dest is where we want to pack # to, and we assume storage/ is where chunks are stored def deflate( src, dest ): dmsg( 2, "deflate!" ) inp = open( src, "r" ) hashs = []; # This is retarded: # http://groups.google.com/group/comp.lang.python/browse_thread/thread/ed25388487b3ac7b # # Why can't I just do: # while( chunk = inp.read( magic_blocksize ) ): # I though python was supposed to be easier! :( while True: chunk = inp.read( magic_blocksize ) if len( chunk ) == 0: break key = save_chunk( chunk ) hashs.append( key ) inp.close() out = gzip.open( dest, "w" ) pickle.dump( { 'stat': os.stat( src ), 'data': hashs }, out ) out.close() class FuseArchiveStat(fuse.Stat): def __init__(self, stat, overstat): self.st_mode = stat.st_mode self.st_ino = stat.st_ino self.st_dev = stat.st_dev self.st_rdev = stat.st_rdev self.st_nlink = stat.st_nlink self.st_uid = stat.st_uid self.st_gid = stat.st_gid self.st_size = overstat.st_size self.st_atime = stat.st_atime self.st_mtime = stat.st_mtime self.st_ctime = stat.st_mtime # Yeah we shouldn't always just add 1 self.st_blocks = int( self.st_size / 512 ) + 1 self.st_blksize = stat.st_blksize class FuseArchive(Fuse): def __init__(self, *args, **kw): Fuse.__init__(self, *args, **kw) self.root = None # Fix getattr and fgetattr to? def getattr(self, path): treefile = "./tree" + path stats = os.lstat( treefile ) if os.path.isfile( treefile ): dmsg( 3, "Reading file to get size: " + path ) #pdb.set_trace() # Override size inp = gzip.open( treefile ) magic = pickle.load( inp ) inp.close() dmsg( 3, "Overridding getattr" ) stats = FuseArchiveStat( stats, magic[ 'stat' ] ) return stats def readlink(self, path): return os.readlink("./tree" + path) def readdir(self, path, offset): for e in os.listdir("./tree" + path): yield fuse.Direntry(e) def unlink(self, path): os.unlink("./tree" + path) def rmdir(self, path): os.rmdir("./tree" + path) def symlink(self, path, path1): os.symlink(path, "./tree" + path1) def rename(self, path, path1): os.rename("./tree" + path, "./tree" + path1) def link(self, path, path1): os.link("./tree" + path, "./tree" + path1) def chmod(self, path, mode): os.chmod("./tree" + path, mode) def chown(self, path, user, group): os.chown("./tree" + path, user, group) def truncate(self, path, len): # Truncate using the ftruncate on the file dmsg( 2, "Using FuseArchiveFile to truncate " + path + " to " + str(len) ) f = self.FuseArchiveFile( path, os.O_APPEND, 0 ) f.ftruncate(len) f.release( 0 ) def mknod(self, path, mode, dev): os.mknod("./tree" + path, mode, dev) def mkdir(self, path, mode): os.mkdir("./tree" + path, mode) def utime(self, path, times): os.utime("./tree" + path, times) # The following utimens method would do the same as the above utime method. # We can't make it better though as the Python stdlib doesn't know of # subsecond preciseness in acces/modify times. # # def utimens(self, path, ts_acc, ts_mod): # os.utime("." + path, (ts_acc.tv_sec, ts_mod.tv_sec)) def access(self, path, mode): if not os.access("./tree" + path, mode): return -EACCES # This is how we could add stub extended attribute handlers... # (We can't have ones which aptly delegate requests to the underlying fs # because Python lacks a standard xattr interface.) # # def getxattr(self, path, name, size): # val = name.swapcase() + '@' + path # if size == 0: # # We are asked for size of the value. # return len(val) # return val # # def listxattr(self, path, size): # # We use the "user" namespace to please XFS utils # aa = ["user." + a for a in ("foo", "bar")] # if size == 0: # # We are asked for size of the attr list, ie. joint size of attrs # # plus null separators. # return len("".join(aa)) + len(aa) # return aa def statfs(self): """ Should return an object with statvfs attributes (f_bsize, f_frsize...). Eg., the return value of os.statvfs() is such a thing (since py 2.2). If you are not reusing an existing statvfs object, start with fuse.StatVFS(), and define the attributes. To provide usable information (ie., you want sensible df(1) output, you are suggested to specify the following attributes: - f_bsize - preferred size of file blocks, in bytes - f_frsize - fundamental size of file blcoks, in bytes [if you have no idea, use the same as blocksize] - f_blocks - total number of blocks in the filesystem - f_bfree - number of free blocks - f_files - total number of file inodes - f_ffree - nunber of free file inodes """ return os.statvfs(".") def fsinit(self): os.chdir(self.root) class FuseArchiveFile(object): def __init__(self, path, flags, *mode): # Inflate the file dmsg( 1, "Init file: " + path ) self.orig_path = path; ( fdnum, self.tmp_name ) = tempfile.mkstemp(); #os.close( fdnum ); if os.path.exists( "./tree" + self.orig_path ): inflate( "./tree" + path, self.tmp_name ) else: if re.match( '(a|w)', flag2mode( flags ) ): dmsg( 2, "File doesn't exist and we're going to write, creating temp empty file" ) deflate( "/dev/null", "./tree" + path ) dmsg( 2, "Shadow file: " + self.tmp_name + " for " + self.orig_path ) dmsg( 3, "Going to open shadow file with flags: " + str(flags) + " mode " + str(mode) ) # pdb.set_trace() dmsg( 3, "Flag2mode is: " + str( flag2mode( flags ) ) ) # Just use the fdnum they gave us instead of reopening it, # since that might fail # fdnum = os.open( self.tmp_name, flags, *mode ) #print "Got fdnum: " + str(fdnum) self.file = os.fdopen( fdnum, flag2mode( flags ) ) dmsg( 3, "Open" ) self.fd = self.file.fileno() self.direct_io = False self.keep_cache = False self.modified = False dmsg( 3, str(self) + " init complete" ) def read(self, length, offset): dmsg( 3, "Reading from " + self.orig_path ) self.file.seek(offset) return self.file.read(length) def write(self, buf, offset): dmsg( 3, "Writing to " + self.orig_path ) self.file.seek(offset) self.file.write(buf) self.modified = True return len(buf) # BUG: If you cp -a a file then quickly ls -l sometimes it doesn't show # up right? like wrong size and stuff? # Maybe because release doesn't return a fuse message and is async? def release(self, flags): # Deflate the file dmsg( 2, "Release: " + self.orig_path ) self.file.close() if self.modified: dmsg( 2, "Copying working file back to storage: " + \ self.tmp_name + " -> " + self.orig_path ) #pdb.set_trace() deflate( self.tmp_name, "./tree" + self.orig_path ) else: dmsg( 2, "File not modified, not copying back" ) dmsg( 2, "Deleting old file: " + self.tmp_name ) os.unlink( self.tmp_name ); def _fflush(self): if 'w' in self.file.mode or 'a' in self.file.mode: self.file.flush() def fsync(self, isfsyncfile): self._fflush() if isfsyncfile and hasattr(os, 'fdatasync'): os.fdatasync(self.fd) else: os.fsync(self.fd) def flush(self): self._fflush() # cf. xmp_flush() in fusexmp_fh.c os.close(os.dup(self.fd)) def fgetattr(self): return os.fstat(self.fd) def ftruncate(self, len): self.modified = True self.file.truncate(len) def lock(self, cmd, owner, **kw): # The code here is much rather just a demonstration of the locking # API than something which actually was seen to be useful. # Advisory file locking is pretty messy in Unix, and the Python # interface to this doesn't make it better. # We can't do fcntl(2)/F_GETLK from Python in a platfrom independent # way. The following implementation *might* work under Linux. # # if cmd == fcntl.F_GETLK: # import struct # # lockdata = struct.pack('hhQQi', kw['l_type'], os.SEEK_SET, # kw['l_start'], kw['l_len'], kw['l_pid']) # ld2 = fcntl.fcntl(self.fd, fcntl.F_GETLK, lockdata) # flockfields = ('l_type', 'l_whence', 'l_start', 'l_len', 'l_pid') # uld2 = struct.unpack('hhQQi', ld2) # res = {} # for i in xrange(len(uld2)): # res[flockfields[i]] = uld2[i] # # return fuse.Flock(**res) # Convert fcntl-ish lock parameters to Python's weird # lockf(3)/flock(2) medley locking API... op = { fcntl.F_UNLCK : fcntl.LOCK_UN, fcntl.F_RDLCK : fcntl.LOCK_SH, fcntl.F_WRLCK : fcntl.LOCK_EX }[kw['l_type']] if cmd == fcntl.F_GETLK: return -EOPNOTSUPP elif cmd == fcntl.F_SETLK: if op != fcntl.LOCK_UN: op |= fcntl.LOCK_NB elif cmd == fcntl.F_SETLKW: pass else: return -EINVAL fcntl.lockf(self.fd, op, kw['l_start'], kw['l_len']) def main(self, *a, **kw): self.file_class = self.FuseArchiveFile # This is where fragments go if not os.path.exists( 'storage' ): os.mkdir( 'storage' ) # This is where the real files exist if not os.path.exists( 'tree' ): os.mkdir( 'tree' ) return Fuse.main(self, *a, **kw) def main(): usage = """ Userspace nullfs-alike: mirror the filesystem tree from some point on. """ + Fuse.fusage server = FuseArchive(version="%prog " + fuse.__version__, usage=usage, dash_s_do='setsingle') server.multithreaded = False server.parse(values=server, errex=1) if len(server.parser.largs) != 2: print "Usage: " + sys.argv[0] + " storageDirectory mountDirectory" sys.exit(1) server.root = server.parser.largs[0] try: if server.fuse_args.mount_expected(): os.chdir(server.root) except OSError: print >> sys.stderr, "can't enter root of underlying filesystem" sys.exit(1) server.main() if __name__ == '__main__': main()