FuseArchive/Chunk.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101

import struct, zlib, logging

# Format version
# Payload block size (so you don't have to uncompress it to see)
# Data compression type (0 = none, 1 = zlib)
# Ref count (number of files that use this)
# filler to pad out ot 64 bytes for future expansion
hformat = 'HLBL48x'
compress_level = 6

_header_length = 64

assert struct.calcsize( hformat ) == _header_length, \
    "Header struct must be %d bytes not %d bytes" % \
    ( _header_length, struct.calcsize( hformat ) )

# This handles serialization and deserialization of compressed chunks with
# some header data
class Chunk:
    def __init__(self):
        self.chunk = ''
        self.count = 0

    #####
    #   Need a way to serialize/deserialize just headers?  so we can update
    #   just counts, etc?
    #   Count needs to be tracked when we unlink?
    #####

    # Returns a string representing the serialized class to be sent to a
    # file
    def serialize(self, compression = 1):
        l = len( self.chunk )

        logging.debug( "Packing header: len: %d compression: %d count: %d" %
            ( l, compression, self.count ) )

        data = struct.pack( hformat,
            0,
            l,
            compression,
            self.count
        )

        if compression == 0:
            logging.debug( "Saving chunk data raw" )
            data += self.chunk
        elif compression == 1:
            logging.debug( "Saving chunk data using zlib" )
            data += zlib.compress( self.chunk, compress_level )
        else:
            raise ValueError( "Invalid compression type: %d" % compression )

        logging.debug( "Returning serialized block, size is %d" % len( data) )

        return data

    # Converts the output of serialize back to a chunk object
    @staticmethod
    def deserialize(data):
        logging.debug( "Deserializing data of length %d" % len( data ) )
        hd = Chunk.parse_header( data[ :_header_length ] )
        obj = Chunk()
        obj.count = hd[ 'count' ]

        compression = hd[ 'compression' ]
        if compression == 0:
            obj.chunk = data[ _header_length: ]
        elif compression == 1:
            obj.chunk = zlib.decompress( data[ _header_length: ] )
        else:
            raise ValueError( "Invalid compression type: %d" % compression )

        return obj

    # Returns a dict of the header data, in case you don't want to
    # unserialize the whole thing to see some attributes which would
    # involve potentially uncompressing some data
    @staticmethod
    def parse_header(data):
        logging.debug( "Parsing header of length %d" % len( data ) )
        fields = struct.unpack( hformat, data )
        return {
            'version': fields[ 0 ],
            'size': fields[ 1 ],
            'compression': fields[ 2 ],
            'count': fields[ 3 ]
        }

    # This is for updating header info, returns a tuple with the new count
    # + the data
    @staticmethod
    def inc_header_ref(data, count):
        logging.debug( "Incrementing ref count by %d" % count )
        fields = list( struct.unpack( hformat, data ) )
        fields[ 3 ] += count
        return( fields[ 3 ], struct.pack( hformat, *fields ) )

    @staticmethod
    def header_length():
        return( _header_length )