aboutsummaryrefslogtreecommitdiffstats
path: root/FuseArchive/Storage/FileSystem.py
blob: b5d5222776793d55a0a5c41d91c079c96790c482 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import FuseArchive, logging, os, sha, sys
from binascii import hexlify
from FuseArchive.Chunk import Chunk

magic_depth = 5

def inc_chunk( key, count ):
    # Increment this chunk header reference
    path = _key_to_path( key )
    logging.debug( "Updating header on %s, ref count + %d" % ( path, count ) )
    f = open( path, "r+" )
    data = f.read( Chunk.header_length() )
    newcount, data = Chunk.inc_header_ref( data, count )
    f.seek( 0 )
    f.write( data )
    f.close()
    logging.debug( "Count is now: %d" % newcount )

    assert newcount >= 0, "Count is negative?!!"

    if newcount == 0:
        logging.debug( "Freeing chunk" )
        os.unlink( path )

        # TODO: it would be nice if we were to clear out empty directories
        # here

def lock_chunk( key ):
    inc_chunk( key, 1 )

def unlock_chunk( key ):
    inc_chunk( key, -1 )

def _key_to_path( key ):
    logging.debug( "Converting key to path" )
    ( thash, seq ) = key
    chars = list( thash )
    logging.debug( chars )

    # Todo: make a digest -> path function to share with deflate
    hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )
    logging.debug( "Hash is: %s sub %d" % ( hexdigest, seq ) )
    subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ]
    subpath = '/'.join( subparts )
    logging.debug( "Subpath: " + subpath )

    subpath += "/%s_%d" % ( hexdigest, seq )
    return( "./storage/" + subpath )


# This will write out a data block, it will return a key that can get this
# data back later
def save_chunk( chunk ):
    if FuseArchive.magic_profiling:
        return( [ 0, 0 ] )

    logging.debug( "Begin save_chunk, length: %d" % len( chunk ) )
    if FuseArchive.deep_debug:
        logging.debug( "Chunk: %s" + hexlify( chunk ) )

    # Save this hash string, similar to the backuppc algo
    digest = sha.new( chunk ).digest()

    # Write out our chunk
    chars = list( digest )
    logging.debug( chars )

    # We make the hexdigest here, yeah we could just call hexdigest()
    # but we need to essentially do this same thing to reassemble the
    # file anyway
    hexdigest = ''.join( [ "%02x" % ord( x ) for x in chars ] )

    # Subparts just needs the first N chars
    subparts = [ "%02x" % ord( x ) for x in chars[ :magic_depth ] ]

    logging.debug( subparts )
    subpath = '/'.join( subparts )
    logging.debug( "Subpath: " + subpath )

    # Make sure this sub path exists
    nextpart = "./storage"
    for part in subparts:
        nextpart += "/" + part
        if not os.path.exists( nextpart ):
            logging.debug( "Creating subdir: " + nextpart )
            os.mkdir( nextpart )

    # Find a chunk slot
    sub = 0
    while True:
        checkpath = "./storage/%s/%s_%d" % ( subpath, hexdigest, sub )
        logging.debug( "Checking: " + checkpath )
        if os.path.exists( checkpath ):
            # Check if this is our data
            verify = open( checkpath, "rb" )
            verify_contents = verify.read()
            verify.close()

            verify_contents = Chunk.deserialize( verify_contents )
            if verify_contents.chunk == chunk:
                logging.debug( "Found existing block" )
                break
            else:
                logging.debug( "Block exists but is not the same" )
                sub += 1
        else:
            # We found a spot, dump our data here
            logging.debug( "No block here, creating new block" )
            save_chunk = Chunk()
            save_chunk.chunk = chunk
            savechunk = open( checkpath, "wb" )
            savechunk.write( save_chunk.serialize() )
            savechunk.close()
            break

    logging.debug( "Got chunk slot: %d" % sub )
    return( [ digest, sub ] )

# This will return a data block by key that was saved previously
def load_chunk( key ):
    if FuseArchive.magic_profiling:
        return ''

    logging.debug( "Begin load_chunk" )

    subpath = _key_to_path( key )

    logging.debug( "Chunk path: " + subpath )

    if os.path.exists( subpath ):
        logging.debug( "Exporting chunk" )
        readchunk = open( subpath )
        chunk = Chunk.deserialize( readchunk.read() ).chunk
        readchunk.close()
    else:
        raise IOError

    if FuseArchive.deep_debug:
        logging.debug( "Load-Chunk: %s" + hexlify( chunk ) )

    return chunk