aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteve Slaven <bpk@hoopajoo.net>2009-11-02 23:36:12 (GMT)
committerSteve Slaven <bpk@hoopajoo.net>2009-11-02 23:36:12 (GMT)
commitb4f754a596f8d262d0f3089c37bf47c73d8ccfc1 (patch)
tree11d2287b9bb38961b216c4b03de9c722eaee0a6f
parentc44db1796c8389d89acd4122da6ffdd72998d6a0 (diff)
downloadfusearchive-b4f754a596f8d262d0f3089c37bf47c73d8ccfc1.zip
fusearchive-b4f754a596f8d262d0f3089c37bf47c73d8ccfc1.tar.gz
fusearchive-b4f754a596f8d262d0f3089c37bf47c73d8ccfc1.tar.bz2
Use a stringbuffer list instead of appending strings, apparently python
string handling when appending/using large strings is very bad
-rw-r--r--FuseArchive/ChunkBuffer.py21
-rw-r--r--FuseArchive/ChunkFile.py106
2 files changed, 88 insertions, 39 deletions
diff --git a/FuseArchive/ChunkBuffer.py b/FuseArchive/ChunkBuffer.py
new file mode 100644
index 0000000..4ef6370
--- /dev/null
+++ b/FuseArchive/ChunkBuffer.py
@@ -0,0 +1,21 @@
+import logging
+
+# Handle efficient operations on a non-fixed length buffer like appending,
+# replacing, reading chunks, etc
+class ChunkBuffer:
+ def __init__( self, data = '' ):
+ logging.debug( "Creating chunkbuffer: %s" % data )
+ self.chunk = list( data )
+
+ def append( self, s ):
+ self.chunk.extend( list( s ) )
+
+ def replace( self, s, start, end ):
+ self.chunk
+
+ def length( self ):
+ return len( self.chunk )
+
+ def string(self):
+ logging.debug( "Stringifying: %s" % self.chunk )
+ return ''.join( self.chunk )
diff --git a/FuseArchive/ChunkFile.py b/FuseArchive/ChunkFile.py
index 6a0ea34..ca7ee2b 100644
--- a/FuseArchive/ChunkFile.py
+++ b/FuseArchive/ChunkFile.py
@@ -2,17 +2,16 @@ import logging, os, errno, fcntl, fuse, FuseArchive, copy
import FuseArchive.Storage.ZipFile, FuseArchive.Storage.FileSystem
from binascii import hexlify
from FuseArchive.Serializer import Serializer
+from ChunkBuffer import ChunkBuffer
# These control some of the file output
-magic_blocksize = 1024 * 128
+magic_blocksize = 1024 * 1024 * 5
# Use a tiny block size to debug writes, so you can use a smaller test file
#magic_blocksize = 1024
chunkstyle = 'fs'
-# Memory for dirty blocks, per file (1M)
-dirty_size = 1024 * 1024 * 1;
# This is the number of actualy blocks in that size
-dirty_flush = int( dirty_size / magic_blocksize )
+dirty_flush = 5 * magic_blocksize
# This is a cache of open files by inode, to fix the lseek == size problem
# this causes a failure in fsx-linux becuase to to lseek(fd,0,seek_end) it
@@ -78,8 +77,8 @@ class ChunkFile(object):
self.modified = False
# This is the current in-memory chunk and offset in to data[]
- self.chunk_cache = {};
- self.chunk = ''
+ self.chunk_cache = {}
+ self.chunk = ChunkBuffer()
self.chunk_index = -1
self.chunk_modified = False
self.chunk_size = magic_blocksize
@@ -181,17 +180,17 @@ class ChunkFile(object):
key = self.chunks[ index ]
if key:
- if isinstance( key, str ):
+ if isinstance( key, ChunkBuffer ):
logging.debug( "Found cached dirty page" )
self.chunk = key
else:
logging.debug( "Index: %s" % key )
- self.chunk = load_chunk( key )
+ self.chunk = ChunkBuffer( load_chunk( key ) )
else:
logging.debug( "No chunk at this index, loading nothing" )
- self.chunk = ''
+ self.chunk = ChunkBuffer()
- logging.debug( "Loaded chunk of length: %d" % len( self.chunk ) )
+ logging.debug( "Loaded chunk of length: %d" % self.chunk.length() )
self.chunk_index = index
self.chunk_modified = False
@@ -204,11 +203,12 @@ class ChunkFile(object):
# Make sure we have room for this chunk
size = len( self.chunks )
if self.chunk_index >= size:
- self.chunks.extend( [ '' ] * ( self.chunk_index -size + 1 ) )
+ self.chunks.extend( [ ChunkBuffer() ] * ( self.chunk_index -size + 1 ) )
# Increment dirty chunks if we had a key here already
+ logging.debug( "Chunk is: %s" % self.chunks[ self.chunk_index ] );
if isinstance( self.chunks[ self.chunk_index ], list ) or \
- len( self.chunks[ self.chunk_index ] ) == 0:
+ self.chunks[ self.chunk_index ].length() == 0:
self.dirty_chunks += 1
logging.debug( "Dirty chunks is now: %d" % self.dirty_chunks )
logging.debug( "Dirty flush at: %d" % dirty_flush )
@@ -223,14 +223,34 @@ class ChunkFile(object):
# This flushes any cached chunks
def _flush_chunks(self):
for index in range( len( self.chunks ) ):
- if isinstance( self.chunks[ index ], str ):
+ if isinstance( self.chunks[ index ], ChunkBuffer ):
logging.debug( "Flushing chunk at %d" % index )
- key = save_chunk( self.chunks[ index ] )
+ key = save_chunk( self.chunks[ index ].string() )
self.chunks[ index ] = key
logging.debug( "Key was %s" % key )
self.dirty_chunks = 0
- self._update_chunk_references()
+ # If we had an old chunk here, free it
+ if len(self.original_chunks) >= index + 1:
+ oldkey = self.original_chunks[ index ]
+ if oldkey != key:
+ # Free this chunk
+ unlock_chunk( oldkey )
+ # And keep this chunk
+ lock_chunk( key )
+ # Else chunk didn't change, don't relock or anything
+ else:
+ # We did not have a chunk here so lock this chunk
+ lock_chunk( key )
+ # And extend original chunks by 1 (we are walking
+ # sequentially so we don't need to worry about padding
+ # out intermediate chunks)
+ self.original_chunks.extend( [ ChunkBuffer() ] )
+
+ # And update the key in original chunks
+ self.original_chunks[ index ] = key
+
+ #self._update_chunk_references()
def read(self, length, offset):
logging.debug( "Reading from %s offset: %d (0x%x) length: %d (0x%d)" %
@@ -246,8 +266,8 @@ class ChunkFile(object):
while data_read < length and not is_eof:
logging.debug( "Pulling chunk data: %d" % index )
self._load_chunk( index )
- if len(self.chunk):
- chunk_remaining = len(self.chunk) - rest
+ if self.chunk.length():
+ chunk_remaining = self.chunk.length() - rest
to_read = chunk_remaining
data_left = length - data_read
if data_left < chunk_remaining:
@@ -259,7 +279,7 @@ class ChunkFile(object):
logging.debug( "rest: %d" % rest )
logging.debug( "Copying %d bytes" % to_read )
- data += self.chunk[ rest:(rest+to_read) ]
+ data += self.chunk.string()[ rest:(rest+to_read) ]
data_read += to_read
index += 1
rest = 0
@@ -298,21 +318,20 @@ class ChunkFile(object):
while this_index < index:
self._load_chunk( this_index )
- fill_null = self.chunk_size - len(self.chunk)
+ fill_null = self.chunk_size - self.chunk.length()
logging.debug( "Filling this chunk with null, bytes: %d" % fill_null )
- self.chunk += "\0" * fill_null
- logging.debug( "Chunk is now: %d bytes" % len( self.chunk) )
+ self.chunk.append( "\0" * fill_null )
+ logging.debug( "Chunk is now: %d bytes" % self.chunk.length() )
self.chunk_modified = True
self._save_chunk()
this_index += 1
self._load_chunk( index )
- # Now check if this chunk needs to be extended
- if len( self.chunk ) < rest:
- fill_null = rest - len(self.chunk)
+ if self.chunk.length() < rest:
+ fill_null = rest - self.chunk.length()
logging.debug( "Filling final chunk with null, bytes: %d" % fill_null )
- self.chunk += "\0" * fill_null
+ self.chunk.append( "\0" * fill_null )
self.chunk_modified = True
self._save_chunk()
@@ -344,13 +363,22 @@ class ChunkFile(object):
logging.debug( "Pre-Buf: %s" % hexlify(buf) )
logging.debug( "Pre-Chunk: %s" % hexlify(self.chunk) )
- # Since python doesn't do in-place reassignment like you
- # can with splice() we will reconstruct the data by joining
- # stuff by offsets (first chars to skip, then our joining
- # buf chunk, the everything that would have been after it)
- self.chunk = self.chunk[ :rest ] + \
- buf[ buf_offset:(buf_offset+this_len) ] + \
- self.chunk[ (rest + this_len): ]
+ # Check if we are appending only, appends are much faster than
+ # splicing up string
+ if self.chunk.length() == rest and len( buf ) <= this_len:
+ logging.debug( "Doing quick append" )
+ self.chunk.append( buf )
+ else:
+ logging.debug( "SLOOOOW! Doing string splice" )
+ # Since python doesn't do in-place reassignment like you
+ # can with splice() we will reconstruct the data by joining
+ # stuff by offsets (first chars to skip, then our joining
+ # buf chunk, the everything that would have been after it)
+
+ # This sucks for moving around data, it is very slow!
+ self.chunk.replace( buf[ buf_offset:(buf_offset+this_len) ],
+ rest, rest + this_len )
+
if FuseArchive.deep_debug:
logging.debug( "Post-Buf: %s" % hexlify(buf) )
@@ -368,7 +396,7 @@ class ChunkFile(object):
if offset + len(buf) > self.size:
self.size = offset + len(buf)
- logging.debug( "This chunk size is now: %d" % len( self.chunk ) )
+ logging.debug( "This chunk size is now: %d" % self.chunk.length() )
logging.debug( "File size is now: %d" % self.size )
logging.debug( "Num Chunks: %d" % len( self.chunks ) )
@@ -405,7 +433,7 @@ class ChunkFile(object):
logging.debug( "We have %d chunks, calculating size" % numchunks )
self._load_chunk( numchunks - 1 )
self.size = ( numchunks - 1 ) * self.chunk_size + \
- len( self.chunk )
+ self.chunk.length()
else:
logging.debug( "No chunks, setting size to zero" )
self.size = 0
@@ -546,7 +574,7 @@ class ChunkFile(object):
if length == 0:
logging.debug( "Creating 0 chunk file" )
self.chunks = []
- self.chunk = ''
+ self.chunk = ChunkBuffer()
elif self.size <= length:
logging.debug( "Need to pad out file, writing/seeking to %d" % length )
@@ -567,13 +595,13 @@ class ChunkFile(object):
# last chunk
if len( self.chunks ):
self._load_chunk( len( self.chunks ) - 1 )
- logging.debug( "Loaded final chunk, len: %d" % len( self.chunk ) )
+ logging.debug( "Loaded final chunk, len: %d" % self.chunk.length() )
# Now truncate this item if needed
- if len( self.chunk ) > extra_bytes:
+ if self.chunk.length() > extra_bytes:
logging.debug( "Truncating final chunk to %d" % extra_bytes )
- self.chunk = self.chunk[ :extra_bytes ]
- logging.debug( "Chunk is now: %d bytes" % len( self.chunk ) )
+ self.chunk.truncate( extra_bytes )
+ logging.debug( "Chunk is now: %d bytes" % self.chunk.length() )
self.chunk_modified = True
self.modified = True