#!/usr/bin/env python # Copyright (C) 2001 Jeff Epler # Copyright (C) 2006 Csaba Henk # # This program can be distributed under the terms of the GNU LGPL. # See the file COPYING. # import os, sys, shutil, fcntl, fuse, re, tempfile, sha, pickle from errno import * from stat import * from fuse import Fuse import pdb if not hasattr(fuse, '__version__'): raise RuntimeError, \ "your fuse-py doesn't know of fuse.__version__, probably it's too old." fuse.fuse_python_api = (0, 2) fuse.feature_assert('stateful_files', 'has_init') magic_blocksize = 1024 * 32 magic_depth = 5 def flag2mode(flags): md = {os.O_RDONLY: 'r', os.O_WRONLY: 'w', os.O_RDWR: 'w+'} m = md[flags & (os.O_RDONLY | os.O_WRONLY | os.O_RDWR)] if flags | os.O_APPEND: m = m.replace('w', 'a', 1) return m # Inflate a file, src is a packed file, dest is where the unpacked file # should go # we assume our chunks are in storage/ def inflate( src, dest ): print "inflate!" out = open( dest, "w" ) print "Unpickling: " + src # TODO: return an IO error if inflating fails inp = open( src, "r" ) magic = pickle.load( inp ) inp.close() print "Got data: " + str( magic ) #pdb.set_trace() # Now unserialize the chunks back in to a file for key in magic[ 'data' ]: ( hash, seq ) = key chars = list( hash ) print chars # Todo: make a digest -> path function to share with deflate hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ); subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] subpath = '/'.join( subparts ); print "Subpath: " + subpath subpath += "/" + hexdigest + "_" + str( seq ); print "Chunk path: " + subpath if os.path.exists( "./storage/" + subpath ): print "Exporting chunk" readchunk = open( "./storage/" + subpath ) out.write( readchunk.read() ) readchunk.close() else: raise IOError print "File inflated" out.close() # TODO: deflate only if the file has been modified # Deflate a file, src is the unpacked file, dest is where we want to pack # to, and we assume storage/ is where chunks are stored def deflate( src, dest ): print "deflate!" inp = open( src, "r" ) hashs = []; # This is retarded: # http://groups.google.com/group/comp.lang.python/browse_thread/thread/ed25388487b3ac7b # # Why can't I just do: # while( chunk = inp.read( magic_blocksize ) ): # I though python was supposed to be easier! :( while True: chunk = inp.read( magic_blocksize ) if len( chunk ) == 0: break # Save this hash string, similar to the backuppc algo digest = sha.new( str(len(chunk)) + chunk ).digest() # Write out our chunk chars = list( digest ) print chars # We make the hexdigest here, yeah we could just call hexdigest() # but we need to essentially do this same thing to reassemble the # file anyway hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ); # Subparts just needs the first N chars subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] print subparts subpath = '/'.join( subparts ); print "Subpath: " + subpath # Make sure this sub path exists nextpart = "./storage" for part in subparts: nextpart += "/" + part if not os.path.exists( nextpart ): print "Creating subdir: " + nextpart os.mkdir( nextpart ) # Find a chunk slot sub = 0 while True: checkpath = "./storage/" + subpath + "/" + hexdigest + "_" + str( sub ) print "Checking: " + checkpath if os.path.exists( checkpath ): # Check if this is our data verify = open( checkpath, "r" ) verify_contents = verify.read() verify.close() if verify_contents == chunk: print "Found existing block" break else: print "Block exists but is not the same" sub += 1 else: # We found a spot, dump our data here print "No block here, creating new block" savechunk = open( checkpath, "w" ) savechunk.write( chunk ) savechunk.close break print "Got chunk slot: " + str( sub ) hashs.append( [ digest, sub ] ) inp.close() out = open( dest, "w" ) pickle.dump( { 'stat': os.stat( src ), 'data': hashs }, out ) out.close() class FuseArchiveStat(fuse.Stat): def __init__(self, stat, overstat): self.st_mode = stat.st_mode self.st_ino = stat.st_ino self.st_dev = stat.st_dev self.st_rdev = stat.st_rdev self.st_nlink = stat.st_nlink self.st_uid = stat.st_uid self.st_gid = stat.st_gid self.st_size = overstat.st_size self.st_atime = stat.st_atime self.st_mtime = stat.st_mtime self.st_ctime = stat.st_mtime # Yeah we shouldn't always just add 1 self.st_blocks = int( self.st_size / 512 ) + 1 self.st_blksize = stat.st_blksize class FuseArchive(Fuse): def __init__(self, *args, **kw): Fuse.__init__(self, *args, **kw) self.root = None # Fix getattr and fgetattr to? def getattr(self, path): treefile = "./tree" + path stats = os.lstat( treefile ) if os.path.isfile( treefile ): print "Reading file to get size: " + path #pdb.set_trace() # Override size inp = open( treefile ) magic = pickle.load( inp ) inp.close() print "Overridding getattr" stats = FuseArchiveStat( stats, magic[ 'stat' ] ) return stats def readlink(self, path): return os.readlink("./tree" + path) def readdir(self, path, offset): for e in os.listdir("./tree" + path): yield fuse.Direntry(e) def unlink(self, path): os.unlink("./tree" + path) def rmdir(self, path): os.rmdir("./tree" + path) def symlink(self, path, path1): os.symlink(path, "./tree" + path1) def rename(self, path, path1): os.rename("./tree" + path, "./tree" + path1) def link(self, path, path1): os.link("./tree" + path, "./tree" + path1) def chmod(self, path, mode): os.chmod("./tree" + path, mode) def chown(self, path, user, group): os.chown("./tree" + path, user, group) def truncate(self, path, len): f = open("./tree" + path, "a") f.truncate(len) f.close() def mknod(self, path, mode, dev): os.mknod("./tree" + path, mode, dev) def mkdir(self, path, mode): os.mkdir("./tree" + path, mode) def utime(self, path, times): os.utime("./tree" + path, times) # The following utimens method would do the same as the above utime method. # We can't make it better though as the Python stdlib doesn't know of # subsecond preciseness in acces/modify times. # # def utimens(self, path, ts_acc, ts_mod): # os.utime("." + path, (ts_acc.tv_sec, ts_mod.tv_sec)) def access(self, path, mode): if not os.access("./tree" + path, mode): return -EACCES # This is how we could add stub extended attribute handlers... # (We can't have ones which aptly delegate requests to the underlying fs # because Python lacks a standard xattr interface.) # # def getxattr(self, path, name, size): # val = name.swapcase() + '@' + path # if size == 0: # # We are asked for size of the value. # return len(val) # return val # # def listxattr(self, path, size): # # We use the "user" namespace to please XFS utils # aa = ["user." + a for a in ("foo", "bar")] # if size == 0: # # We are asked for size of the attr list, ie. joint size of attrs # # plus null separators. # return len("".join(aa)) + len(aa) # return aa def statfs(self): """ Should return an object with statvfs attributes (f_bsize, f_frsize...). Eg., the return value of os.statvfs() is such a thing (since py 2.2). If you are not reusing an existing statvfs object, start with fuse.StatVFS(), and define the attributes. To provide usable information (ie., you want sensible df(1) output, you are suggested to specify the following attributes: - f_bsize - preferred size of file blocks, in bytes - f_frsize - fundamental size of file blcoks, in bytes [if you have no idea, use the same as blocksize] - f_blocks - total number of blocks in the filesystem - f_bfree - number of free blocks - f_files - total number of file inodes - f_ffree - nunber of free file inodes """ return os.statvfs(".") def fsinit(self): os.chdir(self.root) class FuseArchiveFile(object): def __init__(self, path, flags, *mode): # Inflate the file print "Init file: " + path self.orig_path = path; ( fdnum, self.tmp_name ) = tempfile.mkstemp(); #os.close( fdnum ); if os.path.exists( "./tree" + self.orig_path ): inflate( "./tree" + path, self.tmp_name ) print "Shadow file: " + self.tmp_name + " for " + self.orig_path print "Going to open shadow file with flags: " + str(flags) + " mode " + str(mode) # pdb.set_trace() print "Flag2mode is: " + str( flag2mode( flags ) ) # Just use the fdnum they gave us instead of reopening it, # since that might fail # fdnum = os.open( self.tmp_name, flags, *mode ) #print "Got fdnum: " + str(fdnum) self.file = os.fdopen( fdnum, flag2mode( flags ) ) print "Open" self.fd = self.file.fileno() self.direct_io = False self.keep_cache = False print str(self) + " init complete" def read(self, length, offset): print "Reading from " + self.orig_path self.file.seek(offset) return self.file.read(length) def write(self, buf, offset): print "Writing to " + self.orig_path self.file.seek(offset) self.file.write(buf) return len(buf) def release(self, flags): # Deflate the file print "Release: " + self.orig_path self.file.close() print "Copying working file back to storage: " + \ self.tmp_name + " -> " + self.orig_path #pdb.set_trace() deflate( self.tmp_name, "./tree" + self.orig_path ); print "Deleting old file: " + self.tmp_name os.unlink( self.tmp_name ); def _fflush(self): if 'w' in self.file.mode or 'a' in self.file.mode: self.file.flush() def fsync(self, isfsyncfile): self._fflush() if isfsyncfile and hasattr(os, 'fdatasync'): os.fdatasync(self.fd) else: os.fsync(self.fd) def flush(self): self._fflush() # cf. xmp_flush() in fusexmp_fh.c os.close(os.dup(self.fd)) def fgetattr(self): return os.fstat(self.fd) def ftruncate(self, len): self.file.truncate(len) def lock(self, cmd, owner, **kw): # The code here is much rather just a demonstration of the locking # API than something which actually was seen to be useful. # Advisory file locking is pretty messy in Unix, and the Python # interface to this doesn't make it better. # We can't do fcntl(2)/F_GETLK from Python in a platfrom independent # way. The following implementation *might* work under Linux. # # if cmd == fcntl.F_GETLK: # import struct # # lockdata = struct.pack('hhQQi', kw['l_type'], os.SEEK_SET, # kw['l_start'], kw['l_len'], kw['l_pid']) # ld2 = fcntl.fcntl(self.fd, fcntl.F_GETLK, lockdata) # flockfields = ('l_type', 'l_whence', 'l_start', 'l_len', 'l_pid') # uld2 = struct.unpack('hhQQi', ld2) # res = {} # for i in xrange(len(uld2)): # res[flockfields[i]] = uld2[i] # # return fuse.Flock(**res) # Convert fcntl-ish lock parameters to Python's weird # lockf(3)/flock(2) medley locking API... op = { fcntl.F_UNLCK : fcntl.LOCK_UN, fcntl.F_RDLCK : fcntl.LOCK_SH, fcntl.F_WRLCK : fcntl.LOCK_EX }[kw['l_type']] if cmd == fcntl.F_GETLK: return -EOPNOTSUPP elif cmd == fcntl.F_SETLK: if op != fcntl.LOCK_UN: op |= fcntl.LOCK_NB elif cmd == fcntl.F_SETLKW: pass else: return -EINVAL fcntl.lockf(self.fd, op, kw['l_start'], kw['l_len']) def main(self, *a, **kw): self.file_class = self.FuseArchiveFile # This is where fragments go if not os.path.exists( 'storage' ): os.mkdir( 'storage' ) # This is where the real files exist if not os.path.exists( 'tree' ): os.mkdir( 'tree' ) return Fuse.main(self, *a, **kw) def main(): usage = """ Userspace nullfs-alike: mirror the filesystem tree from some point on. """ + Fuse.fusage server = FuseArchive(version="%prog " + fuse.__version__, usage=usage, dash_s_do='setsingle') server.multithreaded = False server.parse(values=server, errex=1) if len(server.parser.largs) != 2: print "Usage: " + sys.argv[0] + " storageDirectory mountDirectory" sys.exit(1) server.root = server.parser.largs[0] try: if server.fuse_args.mount_expected(): os.chdir(server.root) except OSError: print >> sys.stderr, "can't enter root of underlying filesystem" sys.exit(1) server.main() if __name__ == '__main__': main()