summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLudovic Pouzenc <lpouzenc@gmail.com>2015-07-14 21:45:37 +0200
committerLudovic Pouzenc <lpouzenc@gmail.com>2015-07-14 21:45:37 +0200
commit734a807df22cc085a4f711964cffae623fe648f1 (patch)
tree6c219f39bf5d780504e2ca18df5775376dbaefbc
parent259c938a7796f1aa4a8d2d9477aaed8bc72293e8 (diff)
downloadraidguessfs-734a807df22cc085a4f711964cffae623fe648f1.tar.gz
raidguessfs-734a807df22cc085a4f711964cffae623fe648f1.tar.bz2
raidguessfs-734a807df22cc085a4f711964cffae623fe648f1.zip
find_files : load all ref file in memory (in a dict) to find every
matching sector with one read and one dict access per sector
-rw-r--r--mytasks.py37
1 files changed, 21 insertions, 16 deletions
diff --git a/mytasks.py b/mytasks.py
index 80b04ed..dcb53a8 100644
--- a/mytasks.py
+++ b/mytasks.py
@@ -27,19 +27,25 @@ def do_find_files(d,state):
state['state'] = 'initializing'
ref_paths = state['filepaths']
ref_count = len(ref_paths)
- ref_fds = [None]*ref_count
- ref_sizes = [None]*ref_count
- ref_offset = [None]*ref_count
- ref_cur_sect = [None]*ref_count
+ ref_big_hash = {}
for ref_no in range(ref_count):
path = state['filepaths'][ref_no]
- logging.debug("Try to open reffile '%s'"%path)
- ref_offset[ref_no] = 0
- ref_sizes[ref_no] = os.lstat(path).st_size
- ref_fds[ref_no] = open(path, "r")
- ref_fds[ref_no].seek(0)
- ref_cur_sect[ref_no] = ref_fds[ref_no].read(512)
- logging.debug("Opened reffile '%s'"%path)
+ logging.debug("Try to open ref. file '%s'"%path)
+ with open(path, 'rb') as fd:
+ logging.info("Loading ref. file '%s'"%path)
+ while True:
+ ref_offset = fd.tell()
+ data = fd.read(512)
+ if not data:
+ break
+ if data == '\0'*512:
+ logging.info("Ignoring empty sector in '%s'@0x%011x"%(path,ref_offset))
+ elif data in ref_big_hash:
+ (prev_ref_no, prev_ref_offset) = ref_big_hash[data]
+ logging.info("Non-unique sector found in ref. files ('%s'@0x%011x and '%s'@0x%011x)"%
+ (prev_ref_no, prev_ref_offset, ref_no, ref_offset))
+ else:
+ ref_big_hash[data] = (ref_no, ref_offset)
start = 0
end = min(d.disks_size)
@@ -54,14 +60,12 @@ def do_find_files(d,state):
for disk_no in range(d.disk_count):
d.disks[disk_no].seek(offset)
data = d.disks[disk_no].read(512)
- for ref_no in range(ref_count):
- #TODO : gerer les buffers plus court que 512
- if data == ref_cur_sect[ref_no]:
- #TODO : chercher la suite du fichier
+ if data in ref_big_hash:
f = state['found']
if len(f) < 200:
# TODO agreger les matches
- f.append((ref_paths[ref_no],ref_offset[ref_no],disk_no,offset))
+ (ref_no, ref_offset) = ref_big_hash[data]
+ f.append((ref_no,ref_offset,disk_no,offset))
state['found'] = f
else:
state['state'] = 'aborted'
@@ -69,6 +73,7 @@ def do_find_files(d,state):
if offset % one_percent == 0:
state['progress'] = state['progress'] + 1
+ ref_big_hash.clear()
state['state'] = 'finished'
state['progress'] = 100
except Exception as e: