1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
import struct, zlib, logging
# Format version
# Payload block size (so you don't have to uncompress it to see)
# Data compression type (0 = none, 1 = zlib)
# Ref count (number of files that use this)
# filler to pad out ot 64 bytes for future expansion
hformat = 'HLBL48x'
compress_level = 6
_header_length = 64
assert struct.calcsize( hformat ) == _header_length, \
"Header struct must be %d bytes not %d bytes" % \
( _header_length, struct.calcsize( hformat ) )
# This handles serialization and deserialization of compressed chunks with
# some header data
class Chunk:
def __init__(self):
self.chunk = ''
self.count = 0
#####
# Need a way to serialize/deserialize just headers? so we can update
# just counts, etc?
# Count needs to be tracked when we unlink?
#####
# Returns a string representing the serialized class to be sent to a
# file
def serialize(self, compression = 1):
l = len( self.chunk )
logging.debug( "Packing header: len: %d compression: %d count: %d" %
( l, compression, self.count ) )
data = struct.pack( hformat,
0,
l,
compression,
self.count
)
if compression == 0:
logging.debug( "Saving chunk data raw" )
data += self.chunk
elif compression == 1:
logging.debug( "Saving chunk data using zlib" )
data += zlib.compress( self.chunk, compress_level )
else:
raise ValueError( "Invalid compression type: %d" % compression )
logging.debug( "Returning serialized block, size is %d" % len( data) )
return data
# Converts the output of serialize back to a chunk object
@staticmethod
def deserialize(data):
logging.debug( "Deserializing data of length %d" % len( data ) )
hd = Chunk.parse_header( data[ :_header_length ] )
obj = Chunk()
obj.count = hd[ 'count' ]
compression = hd[ 'compression' ]
if compression == 0:
obj.chunk = data[ _header_length: ]
elif compression == 1:
obj.chunk = zlib.decompress( data[ _header_length: ] )
else:
raise ValueError( "Invalid compression type: %d" % compression )
return obj
# Returns a dict of the header data, in case you don't want to
# unserialize the whole thing to see some attributes which would
# involve potentially uncompressing some data
@staticmethod
def parse_header(data):
logging.debug( "Parsing header of length %d" % len( data ) )
fields = struct.unpack( hformat, data )
return {
'version': fields[ 0 ],
'size': fields[ 1 ],
'compression': fields[ 2 ],
'count': fields[ 3 ]
}
# This is for updating header info, returns a tuple with the new count
# + the data
@staticmethod
def inc_header_ref(data, count):
logging.debug( "Incrementing ref count by %d" % count )
fields = list( struct.unpack( hformat, data ) )
fields[ 3 ] += count
return( fields[ 3 ], struct.pack( hformat, *fields ) )
@staticmethod
def header_length():
return( _header_length )
|