diff options
| -rw-r--r-- | FuseArchive/ChunkBuffer.py | 21 | ||||
| -rw-r--r-- | FuseArchive/ChunkFile.py | 106 | 
2 files changed, 88 insertions, 39 deletions
diff --git a/FuseArchive/ChunkBuffer.py b/FuseArchive/ChunkBuffer.py new file mode 100644 index 0000000..4ef6370 --- /dev/null +++ b/FuseArchive/ChunkBuffer.py @@ -0,0 +1,21 @@ +import logging + +# Handle efficient operations on a non-fixed length buffer like appending, +# replacing, reading chunks, etc +class ChunkBuffer: +    def __init__( self, data = '' ): +        logging.debug( "Creating chunkbuffer: %s" % data ) +        self.chunk = list( data ) + +    def append( self, s ): +        self.chunk.extend( list( s ) ) + +    def replace( self, s, start, end ): +        self.chunk + +    def length( self ): +        return len( self.chunk ) + +    def string(self): +        logging.debug( "Stringifying: %s" % self.chunk ) +        return ''.join( self.chunk ) diff --git a/FuseArchive/ChunkFile.py b/FuseArchive/ChunkFile.py index 6a0ea34..ca7ee2b 100644 --- a/FuseArchive/ChunkFile.py +++ b/FuseArchive/ChunkFile.py @@ -2,17 +2,16 @@ import logging, os, errno, fcntl, fuse, FuseArchive, copy  import FuseArchive.Storage.ZipFile, FuseArchive.Storage.FileSystem  from binascii import hexlify  from FuseArchive.Serializer import Serializer +from ChunkBuffer import ChunkBuffer  # These control some of the file output -magic_blocksize = 1024 * 128 +magic_blocksize = 1024 * 1024 * 5  # Use a tiny block size to debug writes, so you can use a smaller test file  #magic_blocksize = 1024  chunkstyle = 'fs' -# Memory for dirty blocks, per file (1M) -dirty_size = 1024 * 1024 * 1;  # This is the number of actualy blocks in that size -dirty_flush = int( dirty_size / magic_blocksize ) +dirty_flush = 5 * magic_blocksize  # This is a cache of open files by inode, to fix the lseek == size problem  # this causes a failure in fsx-linux becuase to to lseek(fd,0,seek_end) it @@ -78,8 +77,8 @@ class ChunkFile(object):          self.modified = False          # This is the current in-memory chunk and offset in to data[] -        self.chunk_cache = {}; -        self.chunk = '' +        self.chunk_cache = {} +        self.chunk = ChunkBuffer()          self.chunk_index = -1          self.chunk_modified = False          self.chunk_size = magic_blocksize @@ -181,17 +180,17 @@ class ChunkFile(object):              key = self.chunks[ index ]          if key: -            if isinstance( key, str ): +            if isinstance( key, ChunkBuffer ):                  logging.debug( "Found cached dirty page" )                  self.chunk = key              else:                  logging.debug( "Index: %s" % key ) -                self.chunk = load_chunk( key ) +                self.chunk = ChunkBuffer( load_chunk( key ) )          else:              logging.debug( "No chunk at this index, loading nothing" ) -            self.chunk = '' +            self.chunk = ChunkBuffer() -        logging.debug( "Loaded chunk of length: %d" % len( self.chunk ) ) +        logging.debug( "Loaded chunk of length: %d" % self.chunk.length() )          self.chunk_index = index          self.chunk_modified = False @@ -204,11 +203,12 @@ class ChunkFile(object):              # Make sure we have room for this chunk              size = len( self.chunks )              if self.chunk_index >= size: -                self.chunks.extend( [ '' ] * ( self.chunk_index  -size + 1 ) ) +                self.chunks.extend( [ ChunkBuffer() ] * ( self.chunk_index  -size + 1 ) )              # Increment dirty chunks if we had a key here already +            logging.debug( "Chunk is: %s" % self.chunks[ self.chunk_index ] );              if isinstance( self.chunks[ self.chunk_index ], list ) or \ -                    len( self.chunks[ self.chunk_index ] ) == 0: +                    self.chunks[ self.chunk_index ].length() == 0:                  self.dirty_chunks += 1                  logging.debug( "Dirty chunks is now: %d" % self.dirty_chunks )                  logging.debug( "Dirty flush at: %d" % dirty_flush ) @@ -223,14 +223,34 @@ class ChunkFile(object):      # This flushes any cached chunks      def _flush_chunks(self):          for index in range( len( self.chunks ) ): -            if isinstance( self.chunks[ index ], str ): +            if isinstance( self.chunks[ index ], ChunkBuffer ):                  logging.debug( "Flushing chunk at %d" % index ) -                key = save_chunk( self.chunks[ index ] ) +                key = save_chunk( self.chunks[ index ].string() )                  self.chunks[ index ] = key                  logging.debug( "Key was %s" % key )                  self.dirty_chunks = 0 -        self._update_chunk_references() +                # If we had an old chunk here, free it +                if len(self.original_chunks) >= index + 1: +                    oldkey = self.original_chunks[ index ] +                    if oldkey != key: +                        # Free this chunk +                        unlock_chunk( oldkey ) +                        # And keep this chunk +                        lock_chunk( key ) +                    # Else chunk didn't change, don't relock or anything +                else: +                    # We did not have a chunk here so lock this chunk +                    lock_chunk( key ) +                    # And extend original chunks by 1 (we are walking +                    # sequentially so we don't need to worry about padding +                    # out intermediate chunks) +                    self.original_chunks.extend( [ ChunkBuffer() ] ) + +                # And update the key in original chunks +                self.original_chunks[ index ] = key + +        #self._update_chunk_references()      def read(self, length, offset):          logging.debug( "Reading from %s offset: %d (0x%x) length: %d (0x%d)" % @@ -246,8 +266,8 @@ class ChunkFile(object):          while data_read < length and not is_eof:              logging.debug( "Pulling chunk data: %d" % index )              self._load_chunk( index ) -            if len(self.chunk): -                chunk_remaining = len(self.chunk) - rest +            if self.chunk.length(): +                chunk_remaining = self.chunk.length() - rest                  to_read = chunk_remaining                  data_left = length - data_read                  if data_left < chunk_remaining: @@ -259,7 +279,7 @@ class ChunkFile(object):                  logging.debug( "rest: %d" % rest )                  logging.debug( "Copying %d bytes" % to_read ) -                data += self.chunk[ rest:(rest+to_read) ] +                data += self.chunk.string()[ rest:(rest+to_read) ]                  data_read += to_read                  index += 1                  rest = 0 @@ -298,21 +318,20 @@ class ChunkFile(object):              while this_index < index:                  self._load_chunk( this_index ) -                fill_null = self.chunk_size - len(self.chunk) +                fill_null = self.chunk_size - self.chunk.length()                  logging.debug( "Filling this chunk with null, bytes: %d" % fill_null ) -                self.chunk += "\0" * fill_null -                logging.debug( "Chunk is now: %d bytes" % len( self.chunk) ) +                self.chunk.append( "\0" * fill_null ) +                logging.debug( "Chunk is now: %d bytes" % self.chunk.length() )                  self.chunk_modified = True                  self._save_chunk()                  this_index += 1          self._load_chunk( index ) -        # Now check if this chunk needs to be extended -        if len( self.chunk ) < rest: -            fill_null = rest - len(self.chunk) +        if self.chunk.length() < rest: +            fill_null = rest - self.chunk.length()              logging.debug( "Filling final chunk with null, bytes: %d" % fill_null ) -            self.chunk += "\0" * fill_null +            self.chunk.append( "\0" * fill_null )              self.chunk_modified = True              self._save_chunk() @@ -344,13 +363,22 @@ class ChunkFile(object):                  logging.debug( "Pre-Buf: %s" % hexlify(buf) )                  logging.debug( "Pre-Chunk: %s" % hexlify(self.chunk) ) -            # Since python doesn't do in-place reassignment like you -            # can with splice() we will reconstruct the data by joining -            # stuff by offsets (first chars to skip, then our joining -            # buf chunk, the everything that would have been after it) -            self.chunk = self.chunk[ :rest ] + \ -                buf[ buf_offset:(buf_offset+this_len) ] + \ -                self.chunk[ (rest + this_len): ] +            # Check if we are appending only, appends are much faster than +            # splicing up string +            if self.chunk.length() == rest and len( buf ) <= this_len: +                logging.debug( "Doing quick append" ) +                self.chunk.append( buf ) +            else: +                logging.debug( "SLOOOOW!  Doing string splice" ) +                # Since python doesn't do in-place reassignment like you +                # can with splice() we will reconstruct the data by joining +                # stuff by offsets (first chars to skip, then our joining +                # buf chunk, the everything that would have been after it) + +                # This sucks for moving around data, it is very slow! +                self.chunk.replace( buf[ buf_offset:(buf_offset+this_len) ], +                    rest, rest + this_len ) +              if FuseArchive.deep_debug:                  logging.debug( "Post-Buf: %s" % hexlify(buf) ) @@ -368,7 +396,7 @@ class ChunkFile(object):          if offset + len(buf) > self.size:              self.size = offset + len(buf) -        logging.debug( "This chunk size is now: %d" % len( self.chunk ) ) +        logging.debug( "This chunk size is now: %d" % self.chunk.length() )          logging.debug( "File size is now: %d" % self.size )          logging.debug( "Num Chunks: %d" % len( self.chunks ) ) @@ -405,7 +433,7 @@ class ChunkFile(object):                  logging.debug( "We have %d chunks, calculating size" % numchunks )                  self._load_chunk( numchunks - 1 )                  self.size = ( numchunks - 1 ) * self.chunk_size + \ -                    len( self.chunk ) +                    self.chunk.length()              else:                  logging.debug( "No chunks, setting size to zero" )                  self.size = 0 @@ -546,7 +574,7 @@ class ChunkFile(object):          if length == 0:              logging.debug( "Creating 0 chunk file" )              self.chunks = [] -            self.chunk = '' +            self.chunk = ChunkBuffer()          elif self.size <= length:              logging.debug( "Need to pad out file, writing/seeking to %d" % length ) @@ -567,13 +595,13 @@ class ChunkFile(object):              # last chunk              if len( self.chunks ):                  self._load_chunk( len( self.chunks ) - 1 ) -                logging.debug( "Loaded final chunk, len: %d" % len( self.chunk ) ) +                logging.debug( "Loaded final chunk, len: %d" % self.chunk.length() )              # Now truncate this item if needed -            if len( self.chunk ) > extra_bytes: +            if self.chunk.length() > extra_bytes:                  logging.debug( "Truncating final chunk to %d" % extra_bytes ) -                self.chunk = self.chunk[ :extra_bytes ] -                logging.debug( "Chunk is now: %d bytes" % len( self.chunk ) ) +                self.chunk.truncate( extra_bytes ) +                logging.debug( "Chunk is now: %d bytes" % self.chunk.length() )          self.chunk_modified = True          self.modified = True  | 
