diff options
| -rwxr-xr-x | fusearchive.py | 168 | 
1 files changed, 92 insertions, 76 deletions
| diff --git a/fusearchive.py b/fusearchive.py index 67e42ea..78b1c8c 100755 --- a/fusearchive.py +++ b/fusearchive.py @@ -41,6 +41,94 @@ def flag2mode(flags):      return m +# This will write out a data block, it will return a key that can get this +# data back later +def save_chunk( chunk ): +    dmsg( 2, "Begin save_chunk" ) +    # Save this hash string, similar to the backuppc algo +    digest = sha.new( str(len(chunk)) + chunk ).digest() + +    # Write out our chunk +    chars = list( digest ) +    dmsg( 4, chars ) + +    # We make the hexdigest here, yeah we could just call hexdigest() +    # but we need to essentially do this same thing to reassemble the +    # file anyway +    hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ); + +    # Subparts just needs the first N chars +    subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] + +    dmsg( 4, subparts ) +    subpath = '/'.join( subparts ); +    dmsg( 3, "Subpath: " + subpath ) + +    # Make sure this sub path exists +    nextpart = "./storage" +    for part in subparts: +        nextpart += "/" + part +        if not os.path.exists( nextpart ): +            dmsg( 3, "Creating subdir: " + nextpart ) +            os.mkdir( nextpart ) + +    # Find a chunk slot +    sub = 0 +    while True: +        checkpath = "./storage/" + subpath + "/" + hexdigest + "_" + str( sub ) +        dmsg( 3, "Checking: " + checkpath ) +        if os.path.exists( checkpath ): +            # Check if this is our data +            verify = gzip.open( checkpath, "r" ) +            verify_contents = verify.read() +            verify.close() + +            if verify_contents == chunk: +                dmsg( 3, "Found existing block" ) +                break +            else: +                dmsg( 3, "Block exists but is not the same" ) +                sub += 1 +        else: +            # We found a spot, dump our data here +            dmsg( 3, "No block here, creating new block" ) +            savechunk = gzip.open( checkpath, "w" ) +            savechunk.write( chunk ) +            savechunk.close +            break + +    dmsg( 3, "Got chunk slot: " + str( sub ) ) +    return( digest + "_" + str( sub ) ) + +# This will return a data block by key that was saved previously +def load_chunk( key ): +    ( hash, seq ) = key.split( '_' ) +    dmsg( 2, "Begin load_chunk" ) + +    chars = list( hash ) +    dmsg( 4, chars ) + +    # Todo: make a digest -> path function to share with deflate +    hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ); +    dmsg( 3, "Hash is: " + hash + " sub " + seq ) +    subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] +    subpath = '/'.join( subparts ); +    dmsg( 3, "Subpath: " + subpath ) + +    subpath += "/" + hexdigest + "_" + str( seq ); + +    dmsg( 3, "Chunk path: " + subpath ) + +    if os.path.exists( "./storage/" + subpath ): +        dmsg( 3, "Exporting chunk" ) +        readchunk = gzip.open( "./storage/" + subpath ) +        chunk = readchunk.read() +        readchunk.close() +    else: +        raise IOError + +    return chunk +  # Inflate a file, src is a packed file, dest is where the unpacked file  # should go  # we assume our chunks are in storage/ @@ -60,28 +148,7 @@ def inflate( src, dest ):      # Now unserialize the chunks back in to a file      for key in magic[ 'data' ]: -        ( hash, seq ) = key - -        chars = list( hash ) -        dmsg( 4, chars ) - -        # Todo: make a digest -> path function to share with deflate -        hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ); -        subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] -        subpath = '/'.join( subparts ); -        dmsg( 3, "Subpath: " + subpath ) - -        subpath += "/" + hexdigest + "_" + str( seq ); - -        dmsg( 3, "Chunk path: " + subpath ) - -        if os.path.exists( "./storage/" + subpath ): -            dmsg( 3, "Exporting chunk" ) -            readchunk = gzip.open( "./storage/" + subpath ) -            out.write( readchunk.read() ) -            readchunk.close() -        else: -            raise IOError +        out.write( load_chunk( key ) )      dmsg( 2, "File inflated" )      out.close() @@ -107,60 +174,8 @@ def deflate( src, dest ):          if len( chunk ) == 0:              break -        # Save this hash string, similar to the backuppc algo -        digest = sha.new( str(len(chunk)) + chunk ).digest() - -        # Write out our chunk -        chars = list( digest ) -        dmsg( 4, chars ) - -        # We make the hexdigest here, yeah we could just call hexdigest() -        # but we need to essentially do this same thing to reassemble the -        # file anyway -        hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] ); - -        # Subparts just needs the first N chars -        subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ] - -        dmsg( 4, subparts ) -        subpath = '/'.join( subparts ); -        dmsg( 3, "Subpath: " + subpath ) - -        # Make sure this sub path exists -        nextpart = "./storage" -        for part in subparts: -            nextpart += "/" + part -            if not os.path.exists( nextpart ): -                dmsg( 3, "Creating subdir: " + nextpart ) -                os.mkdir( nextpart ) - -        # Find a chunk slot -        sub = 0 -        while True: -            checkpath = "./storage/" + subpath + "/" + hexdigest + "_" + str( sub ) -            dmsg( 3, "Checking: " + checkpath ) -            if os.path.exists( checkpath ): -                # Check if this is our data -                verify = gzip.open( checkpath, "r" ) -                verify_contents = verify.read() -                verify.close() - -                if verify_contents == chunk: -                    dmsg( 3, "Found existing block" ) -                    break -                else: -                    dmsg( 3, "Block exists but is not the same" ) -                    sub += 1 -            else: -                # We found a spot, dump our data here -                dmsg( 3, "No block here, creating new block" ) -                savechunk = gzip.open( checkpath, "w" ) -                savechunk.write( chunk ) -                savechunk.close -                break - -        dmsg( 3, "Got chunk slot: " + str( sub ) ) -        hashs.append( [ digest, sub ] ) +        key = save_chunk( chunk ) +        hashs.append( key )      inp.close() @@ -365,6 +380,7 @@ class FuseArchive(Fuse):          # BUG: If you cp -a a file then quickly ls -l sometimes it doesn't show          # up right?  like wrong size and stuff? +        # Maybe because release doesn't return a fuse message and is async?          def release(self, flags):              # Deflate the file              dmsg( 2, "Release: " + self.orig_path ) | 
