From 1af5ec1e05c0cd5dfa1737d0822aaaa73a5a9833 Mon Sep 17 00:00:00 2001 From: Steve Slaven Date: Mon, 3 Aug 2009 22:15:54 -0700 Subject: Added binary format chunk serializer diff --git a/FuseArchive/Chunk.py b/FuseArchive/Chunk.py new file mode 100644 index 0000000..c011382 --- /dev/null +++ b/FuseArchive/Chunk.py @@ -0,0 +1,72 @@ +import struct, zlib + +# Format version +# Payload block size (so you don't have to uncompress it to see) +# Data compression type (0 = none, 1 = zlib) +# Ref count (number of files that use this) +# filler to pad out ot 64 bytes for future expansion +hformat = 'HHBL52x' +compress_level = 6 + +# This handles serialization and deserialization of compressed chunks with +# some header data +class Chunk: + def __init__(self): + self.chunk = '' + self.count = 0 + + ##### + # Need a way to serialize/deserialize just headers? so we can update + # just counts, etc? + # Count needs to be tracked when we unlink? + ##### + + # Returns a string representing the serialized class to be sent to a + # file + def serialize(self, compression = 1): + data = struct.pack( hformat, + 0, + len( self.chunk ), + compression, + self.count + ) + + if compression == 0: + data += self.chunk + elif compression == 1: + data += zlib.compress( self.chunk, compress_level ) + else: + raise ValueError( "Invalid compression type: %d" % compression ) + + return data + + # Converts the output of serialize back to a chunk object + @staticmethod + def unserialize(data): + hd = Chunk.parse_header( data[ :64 ] ) + obj = Chunk() + obj.count = hd[ 'count' ] + + compression = hd[ 'compression' ] + if compression == 0: + obj.chunk = data[ 64: ] + elif compression == 1: + obj.chunk = zlib.decompress( data[64: ] ) + else: + raise ValueError( "Invalid compression type: %d" % compression ) + + return obj + + # Returns a dict of the header data, in case you don't want to + # unserialize the whole thing to see some attributes which would + # involve potentially uncompressing some data + @staticmethod + def parse_header(data): + fields = struct.unpack( hformat, data ) + return { + 'version': fields[ 0 ], + 'size': fields[ 1 ], + 'compression': fields[ 2 ], + 'count': fields[ 3 ] + } + diff --git a/FuseArchive/Storage/FileSystem.py b/FuseArchive/Storage/FileSystem.py index 4310481..547e567 100644 --- a/FuseArchive/Storage/FileSystem.py +++ b/FuseArchive/Storage/FileSystem.py @@ -1,5 +1,6 @@ import FuseArchive, logging, os, sha from binascii import hexlify +from FuseArchive.Chunk import Chunk magic_depth = 5 @@ -51,7 +52,7 @@ def save_chunk( chunk ): verify_contents = verify.read() verify.close() - verify_contents = FuseArchiveChunk.deserialize( verify_contents ) + verify_contents = Chunk.deserialize( verify_contents ) if verify_contents.chunk == chunk: logging.debug( "Found existing block" ) break -- cgit v0.10.2