aboutsummaryrefslogtreecommitdiffstats
path: root/FuseArchive/Storage/ZipFile.py
blob: 83898ef14b9cbcd39ce197a3b901beca1cf17764 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os, logging, FuseArchive, zipfile, sha
from binascii import hexlify

def save_chunk( chunk ):
    if FuseArchive.magic_profiling:
        return( [ 0, 0 ] )

    logging.debug( "Begin save_chunk, length: %d" % len( chunk ) )
    if FuseArchive.deep_debug:
        logging.debug( "Chunk: %s" + hexlify( chunk ) )

    # Save this hash string, similar to the backuppc algo
    digest = sha.new( chunk ).digest()

    # Write out our chunk
    chars = list( digest )
    logging.debug( chars )

    # We make the hexdigest here, yeah we could just call hexdigest()
    # but we need to essentially do this same thing to reassemble the
    # file anyway
    hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )

    # Should be about max of 32k zip files
    zipname = hexdigest[ 0:4 ] + ".zip"
    logging.debug( "Zip name: " + zipname )
    if not os.path.exists( "./storage/" + zipname ):
        logging.debug( "Creating intial empty zip" )
        z = zipfile.ZipFile( "./storage/" + zipname, 'w', zipfile.ZIP_DEFLATED, True )
        # append mode throws an exception if it's not zip, or maybe it's
        # just zero-length files
        z.writestr( 'junk', 'junk' )
        z.close()

    z = zipfile.ZipFile( "./storage/" + zipname, 'a', zipfile.ZIP_DEFLATED, True )

    # Find a chunk slot
    sub = 0
    while True:
        checkpath = "%s_%d" % ( hexdigest, sub )
        logging.debug( "Checking: " + checkpath )
        try:
            data = z.read( checkpath )
        except:
            data = ''

        if len(data):
            if data == chunk:
                logging.debug( "Found existing block" )
                break
            else:
                logging.debug( "Block exists but is not the same" )
                sub += 1
        else:
            # We found a spot, dump our data here
            logging.debug( "No block here, creating new block" )
            z.writestr( checkpath, chunk )
            break

    z.close()
    logging.debug( "Got chunk slot: %d" % sub )
    return( [ digest, sub ] )

def load_chunk( key ):
    if FuseArchive.magic_profiling:
        return ''

    ( thash, seq ) = key
    logging.debug( "Begin load_chunk" )

    chars = list( thash )
    logging.debug( chars )

    # Todo: make a digest -> path function to share with deflate
    hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )

    zipname = hexdigest[ 0:4 ] + ".zip"
    logging.debug( "Zip name: " + zipname )
    z = zipfile.ZipFile( "./storage/" + zipname, 'r', zipfile.ZIP_DEFLATED, True )

    subpath = "%s_%d" % ( hexdigest, seq )
    logging.debug( "Chunk path: " + subpath )
    data = z.read( subpath )
    if len( data ):
        logging.debug( "Exporting chunk" )
        chunk = data
    else:
        z.close()
        raise IOError

    if FuseArchive.deep_debug:
        logging.debug( "Load-Chunk: %s" + hexlify( chunk ) )

    z.close()
    return chunk