From 2a5966a6517e3e99062c5796200389fb591091f6 Mon Sep 17 00:00:00 2001 From: Ludovic Pouzenc Date: Fri, 21 Aug 2015 17:54:11 +0200 Subject: Adding RAID 5+0 support, memory fixes and improovements for big disks --- mydisks.py | 5 +++-- myraid.py | 38 +++++++++++++++++++++++++++++++++----- mytasks.py | 44 ++++++++++++++++++++++++++++++++------------ raidguessfs.py | 8 ++++---- 4 files changed, 72 insertions(+), 23 deletions(-) diff --git a/mydisks.py b/mydisks.py index 4ca21bf..4a79c81 100644 --- a/mydisks.py +++ b/mydisks.py @@ -74,8 +74,9 @@ class MyDisks(): def is_readable(self,disk_no,offset,size): - import random - return random.randint(0,100) > 1 # FIXME : implement this (parse ddrescue log files) + return True + #import random + #return random.randint(0,100) > 1 # FIXME : implement this (parse ddrescue log files) def read(self,disk_no,offset,size): self.disks[disk_no].seek(offset) diff --git a/myraid.py b/myraid.py index 75402d9..4a6ad6a 100644 --- a/myraid.py +++ b/myraid.py @@ -79,6 +79,7 @@ class MyRaid(): self.raid_disk_count = 0 self.raid_layout = 'ls' self.raid_disks = [] + self.nested_subraid = 2 def get_raid_start(self): return self.raid_start @@ -182,7 +183,7 @@ class MyRaid(): if raid_type in ['1','5', '5+0']: result = ''.join( [ '0x%011x %c\n'%( addr, MyRaid.xor_blocks(self.raid_disks, addr, self.raid_sector_size)[0]) - for addr in range(start, end, self.raid_sector_size) + for addr in xrange(start, end, self.raid_sector_size) ]) else: result = None @@ -195,7 +196,7 @@ class MyRaid(): """Returns actual RAID data""" if raid_type == '0': segment_no = offset / self.raid_chunk_size - segment_off = offset % self.raid_chunk_size + segment_off = offset % self.raid_chunk_size stripe_no = segment_no / self.raid_disk_count par_disk = -1 data_disk = segment_no % self.raid_disk_count @@ -213,7 +214,7 @@ class MyRaid(): elif raid_type == '5': segment_no = offset / self.raid_chunk_size - segment_off = offset % self.raid_chunk_size + segment_off = offset % self.raid_chunk_size stripe_no = segment_no / (self.raid_disk_count-1) if self.raid_layout in ['ls','la']: @@ -229,8 +230,35 @@ class MyRaid(): data_disk = data_disk + 1 off_disk = self.raid_start + stripe_no * self.raid_chunk_size + segment_off - # Note : self make shorter read than asked but convince the reader to be chunck aligned, which is great + # Note : could make error-free shorter reads than asked but convince the reader to be chunck aligned, which is great for perf size2 = min(size, (segment_no+1) * self.raid_chunk_size - offset) + + elif raid_type == '5+0': + subraid_disk_count = self.raid_disk_count / self.nested_subraid + segment_no = offset / self.raid_chunk_size + segment_off = offset % self.raid_chunk_size + stripe_no = segment_no / (self.raid_disk_count - self.nested_subraid) # segment_no / 12 + subraid_no = (segment_no / (subraid_disk_count-1) ) % self.nested_subraid # (segment_no/6) mod 2 + + if self.raid_layout in ['ls','la']: + subraid_par_disk = (subraid_disk_count-1) - (stripe_no % subraid_disk_count) + else: # self.raid_layout in ['rs','ra']: + subraid_par_disk = stripe_no % subraid_disk_count + + if self.raid_layout in ['ls','rs']: + subraid_data_disk = (subraid_par_disk+1 + (segment_no % (subraid_disk_count-1)) ) % subraid_disk_count + else: # self.raid_layout in ['la','ra']: + subraid_data_disk = segment_no % (subraid_disk_count-1) + if subraid_data_disk >= subraid_par_disk: + subraid_data_disk = subraid_data_disk + 1 + + par_disk = subraid_no * subraid_disk_count + subraid_par_disk + data_disk = subraid_no * subraid_disk_count + subraid_data_disk + + off_disk = self.raid_start + stripe_no * self.raid_chunk_size + segment_off + # Note : could make error-free shorter reads than asked but convince the reader to be chunck aligned, which is great for perf + size2 = min(size, (segment_no+1) * self.raid_chunk_size - offset) + else: raise Exception('Unimplemented read_raid_result() for raid_type == %s', raid_type) @@ -253,7 +281,7 @@ class MyRaid(): other_fds.remove(data_fd) data_arr = [] - for s in range(off_disk, off_disk+size2, self.raid_sector_size): + for s in xrange(off_disk, off_disk+size2, self.raid_sector_size): if self.d.is_readable(self.raid_disk_order[data_disk],s,self.raid_sector_size): # Current sector is readable from data disk, read it logging.debug('-> 0x%011x : readable'%s) diff --git a/mytasks.py b/mytasks.py index 225064b..21ae365 100644 --- a/mytasks.py +++ b/mytasks.py @@ -61,17 +61,18 @@ def do_find_files(d,state): start = 0 end = min(d.disks_size) - one_percent = (end - start) / 100 - one_percent = one_percent + ( (-one_percent)%512 ) - logging.debug("start/end/1pc : %i / %i / %i"%(start,end,one_percent)) + one_per_thousand = (end - start) / 1000 + one_per_thousand = one_per_thousand + ( (-one_per_thousand)%512 ) + logging.debug("start/end/1pm : %i / %i / %i"%(start,end,one_per_thousand)) state['found'] = [] - state['progress'] = 0 + state['progress'] = 0.0 state['raw_matches'] = 0 state['state'] = 'searching' raw_matches = [] - for offset in range(start, end, 512): + for offset in xrange(start, end, 512): for disk_no in range(d.disk_count): + # Read disks sector by sector and try to match sectors present in ref_files (through ref_big_hash) d.disks[disk_no].seek(offset) data = d.disks[disk_no].read(512) if data in ref_big_hash: @@ -83,13 +84,32 @@ def do_find_files(d,state): 'disk_offset': offset } raw_matches.append(a) - logging.info('raw_matches.append(%s)'%a) + logging.info("raw_matches.append(disk%02i@0x%011x <=> '%s'@0x%011x)"%(a['disk_no'], a['disk_offset'], a['ref_no'], a['ref_offset'])) state['raw_matches'] = len(raw_matches) - if offset % one_percent == 0: - state['progress'] = state['progress'] + 1 + + if offset % one_per_thousand == 0: + state['progress'] = state['progress'] + 0.1 agg = state['found'] state['state'] = 'aggregating' + + # Blacklist ref_file sectors that is far too common in disk data + idx_matches = {} + for rm in raw_matches: + k = (rm['ref_no'], rm['ref_offset']) + if k in idx_matches: + idx_matches[k] += 1 + else: + idx_matches[k] = 1 + + for k in idx_matches.keys(): + if idx_matches[k] > 5: + logging.info("Skipping too many raw_matches for '%s'@0x%011x"%k) + logging.debug("Before filter : %i"%len(raw_matches)) + raw_matches = filter(lambda rm: k != (rm['ref_no'], rm['ref_offset']), raw_matches) + logging.debug("After filter : %i"%len(raw_matches)) + + # Aggregate raw_matches found = True while found: found = False @@ -163,15 +183,15 @@ def do_find_files(d,state): # end while not found - if len(agg) < 200: - state['found'] = agg + if len(agg) < 10000: + state['found'] = filter(lambda x: len(x['block_list']) > 1, agg) state['state'] = 'searching' else: state['state'] = 'aborted' raise Exception('Aborting after too many matches') state['state'] = 'finished' - state['progress'] = 100 + state['progress'] = 100.0 except Exception as e: logging.exception(e) finally: @@ -195,7 +215,7 @@ def do_find_bootsect(d,state): state['found'] = [] state['progress'] = 0 state['state'] = 'searching' - for offset in range(start, end, 512): + for offset in xrange(start, end, 512): for disk_no in range(d.disk_count): d.disks[disk_no].seek(offset) data = d.disks[disk_no].read(512) diff --git a/raidguessfs.py b/raidguessfs.py index 4494fde..bb26b3f 100755 --- a/raidguessfs.py +++ b/raidguessfs.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2.7 # RaidGuessFS, a FUSE pseudo-filesystem to guess RAID parameters of a damaged device # Copyright (C) 2015 Ludovic Pouzenc @@ -366,10 +366,10 @@ RaidGuessFS is a pseudo-filesystem that allows to guess parameters and disk orde """ fuse.fuse_python_api = (0, 2) - LOG_FILENAME = "raidguessfs.log" + LOG_FILENAME = "/tmp/raidguessfs.log" #logging.basicConfig(filename=LOG_FILENAME,level=logging.WARN,) - logging.basicConfig(filename=LOG_FILENAME,level=logging.INFO,) - #logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG,) + #logging.basicConfig(filename=LOG_FILENAME,level=logging.INFO,) + logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG,) server = RaidGuessFS(version="%prog " + fuse.__version__,usage=usage,dash_s_do='setsingle') server.multithreaded = False -- cgit v1.2.3