summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLudovic Pouzenc <ludovic.pouzenc@univ-jfc.fr>2015-08-21 17:54:11 +0200
committerLudovic Pouzenc <ludovic.pouzenc@univ-jfc.fr>2015-08-21 17:54:11 +0200
commit2a5966a6517e3e99062c5796200389fb591091f6 (patch)
treec19c9989814212899ce1d3821d89cc717ac805c8
parent0028794b08c1deba0554d4a90bbace69cde599ca (diff)
downloadraidguessfs-2a5966a6517e3e99062c5796200389fb591091f6.tar.gz
raidguessfs-2a5966a6517e3e99062c5796200389fb591091f6.tar.bz2
raidguessfs-2a5966a6517e3e99062c5796200389fb591091f6.zip
Adding RAID 5+0 support, memory fixes and improovements for big disks
-rw-r--r--mydisks.py5
-rw-r--r--myraid.py38
-rw-r--r--mytasks.py44
-rwxr-xr-xraidguessfs.py8
4 files changed, 72 insertions, 23 deletions
diff --git a/mydisks.py b/mydisks.py
index 4ca21bf..4a79c81 100644
--- a/mydisks.py
+++ b/mydisks.py
@@ -74,8 +74,9 @@ class MyDisks():
def is_readable(self,disk_no,offset,size):
- import random
- return random.randint(0,100) > 1 # FIXME : implement this (parse ddrescue log files)
+ return True
+ #import random
+ #return random.randint(0,100) > 1 # FIXME : implement this (parse ddrescue log files)
def read(self,disk_no,offset,size):
self.disks[disk_no].seek(offset)
diff --git a/myraid.py b/myraid.py
index 75402d9..4a6ad6a 100644
--- a/myraid.py
+++ b/myraid.py
@@ -79,6 +79,7 @@ class MyRaid():
self.raid_disk_count = 0
self.raid_layout = 'ls'
self.raid_disks = []
+ self.nested_subraid = 2
def get_raid_start(self):
return self.raid_start
@@ -182,7 +183,7 @@ class MyRaid():
if raid_type in ['1','5', '5+0']:
result = ''.join(
[ '0x%011x %c\n'%( addr, MyRaid.xor_blocks(self.raid_disks, addr, self.raid_sector_size)[0])
- for addr in range(start, end, self.raid_sector_size)
+ for addr in xrange(start, end, self.raid_sector_size)
])
else:
result = None
@@ -195,7 +196,7 @@ class MyRaid():
"""Returns actual RAID data"""
if raid_type == '0':
segment_no = offset / self.raid_chunk_size
- segment_off = offset % self.raid_chunk_size
+ segment_off = offset % self.raid_chunk_size
stripe_no = segment_no / self.raid_disk_count
par_disk = -1
data_disk = segment_no % self.raid_disk_count
@@ -213,7 +214,7 @@ class MyRaid():
elif raid_type == '5':
segment_no = offset / self.raid_chunk_size
- segment_off = offset % self.raid_chunk_size
+ segment_off = offset % self.raid_chunk_size
stripe_no = segment_no / (self.raid_disk_count-1)
if self.raid_layout in ['ls','la']:
@@ -229,8 +230,35 @@ class MyRaid():
data_disk = data_disk + 1
off_disk = self.raid_start + stripe_no * self.raid_chunk_size + segment_off
- # Note : self make shorter read than asked but convince the reader to be chunck aligned, which is great
+ # Note : could make error-free shorter reads than asked but convince the reader to be chunck aligned, which is great for perf
size2 = min(size, (segment_no+1) * self.raid_chunk_size - offset)
+
+ elif raid_type == '5+0':
+ subraid_disk_count = self.raid_disk_count / self.nested_subraid
+ segment_no = offset / self.raid_chunk_size
+ segment_off = offset % self.raid_chunk_size
+ stripe_no = segment_no / (self.raid_disk_count - self.nested_subraid) # segment_no / 12
+ subraid_no = (segment_no / (subraid_disk_count-1) ) % self.nested_subraid # (segment_no/6) mod 2
+
+ if self.raid_layout in ['ls','la']:
+ subraid_par_disk = (subraid_disk_count-1) - (stripe_no % subraid_disk_count)
+ else: # self.raid_layout in ['rs','ra']:
+ subraid_par_disk = stripe_no % subraid_disk_count
+
+ if self.raid_layout in ['ls','rs']:
+ subraid_data_disk = (subraid_par_disk+1 + (segment_no % (subraid_disk_count-1)) ) % subraid_disk_count
+ else: # self.raid_layout in ['la','ra']:
+ subraid_data_disk = segment_no % (subraid_disk_count-1)
+ if subraid_data_disk >= subraid_par_disk:
+ subraid_data_disk = subraid_data_disk + 1
+
+ par_disk = subraid_no * subraid_disk_count + subraid_par_disk
+ data_disk = subraid_no * subraid_disk_count + subraid_data_disk
+
+ off_disk = self.raid_start + stripe_no * self.raid_chunk_size + segment_off
+ # Note : could make error-free shorter reads than asked but convince the reader to be chunck aligned, which is great for perf
+ size2 = min(size, (segment_no+1) * self.raid_chunk_size - offset)
+
else:
raise Exception('Unimplemented read_raid_result() for raid_type == %s', raid_type)
@@ -253,7 +281,7 @@ class MyRaid():
other_fds.remove(data_fd)
data_arr = []
- for s in range(off_disk, off_disk+size2, self.raid_sector_size):
+ for s in xrange(off_disk, off_disk+size2, self.raid_sector_size):
if self.d.is_readable(self.raid_disk_order[data_disk],s,self.raid_sector_size):
# Current sector is readable from data disk, read it
logging.debug('-> 0x%011x : readable'%s)
diff --git a/mytasks.py b/mytasks.py
index 225064b..21ae365 100644
--- a/mytasks.py
+++ b/mytasks.py
@@ -61,17 +61,18 @@ def do_find_files(d,state):
start = 0
end = min(d.disks_size)
- one_percent = (end - start) / 100
- one_percent = one_percent + ( (-one_percent)%512 )
- logging.debug("start/end/1pc : %i / %i / %i"%(start,end,one_percent))
+ one_per_thousand = (end - start) / 1000
+ one_per_thousand = one_per_thousand + ( (-one_per_thousand)%512 )
+ logging.debug("start/end/1pm : %i / %i / %i"%(start,end,one_per_thousand))
state['found'] = []
- state['progress'] = 0
+ state['progress'] = 0.0
state['raw_matches'] = 0
state['state'] = 'searching'
raw_matches = []
- for offset in range(start, end, 512):
+ for offset in xrange(start, end, 512):
for disk_no in range(d.disk_count):
+ # Read disks sector by sector and try to match sectors present in ref_files (through ref_big_hash)
d.disks[disk_no].seek(offset)
data = d.disks[disk_no].read(512)
if data in ref_big_hash:
@@ -83,13 +84,32 @@ def do_find_files(d,state):
'disk_offset': offset
}
raw_matches.append(a)
- logging.info('raw_matches.append(%s)'%a)
+ logging.info("raw_matches.append(disk%02i@0x%011x <=> '%s'@0x%011x)"%(a['disk_no'], a['disk_offset'], a['ref_no'], a['ref_offset']))
state['raw_matches'] = len(raw_matches)
- if offset % one_percent == 0:
- state['progress'] = state['progress'] + 1
+
+ if offset % one_per_thousand == 0:
+ state['progress'] = state['progress'] + 0.1
agg = state['found']
state['state'] = 'aggregating'
+
+ # Blacklist ref_file sectors that is far too common in disk data
+ idx_matches = {}
+ for rm in raw_matches:
+ k = (rm['ref_no'], rm['ref_offset'])
+ if k in idx_matches:
+ idx_matches[k] += 1
+ else:
+ idx_matches[k] = 1
+
+ for k in idx_matches.keys():
+ if idx_matches[k] > 5:
+ logging.info("Skipping too many raw_matches for '%s'@0x%011x"%k)
+ logging.debug("Before filter : %i"%len(raw_matches))
+ raw_matches = filter(lambda rm: k != (rm['ref_no'], rm['ref_offset']), raw_matches)
+ logging.debug("After filter : %i"%len(raw_matches))
+
+ # Aggregate raw_matches
found = True
while found:
found = False
@@ -163,15 +183,15 @@ def do_find_files(d,state):
# end while not found
- if len(agg) < 200:
- state['found'] = agg
+ if len(agg) < 10000:
+ state['found'] = filter(lambda x: len(x['block_list']) > 1, agg)
state['state'] = 'searching'
else:
state['state'] = 'aborted'
raise Exception('Aborting after too many matches')
state['state'] = 'finished'
- state['progress'] = 100
+ state['progress'] = 100.0
except Exception as e:
logging.exception(e)
finally:
@@ -195,7 +215,7 @@ def do_find_bootsect(d,state):
state['found'] = []
state['progress'] = 0
state['state'] = 'searching'
- for offset in range(start, end, 512):
+ for offset in xrange(start, end, 512):
for disk_no in range(d.disk_count):
d.disks[disk_no].seek(offset)
data = d.disks[disk_no].read(512)
diff --git a/raidguessfs.py b/raidguessfs.py
index 4494fde..bb26b3f 100755
--- a/raidguessfs.py
+++ b/raidguessfs.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2.7
# RaidGuessFS, a FUSE pseudo-filesystem to guess RAID parameters of a damaged device
# Copyright (C) 2015 Ludovic Pouzenc <ludovic@pouzenc.fr>
@@ -366,10 +366,10 @@ RaidGuessFS is a pseudo-filesystem that allows to guess parameters and disk orde
"""
fuse.fuse_python_api = (0, 2)
- LOG_FILENAME = "raidguessfs.log"
+ LOG_FILENAME = "/tmp/raidguessfs.log"
#logging.basicConfig(filename=LOG_FILENAME,level=logging.WARN,)
- logging.basicConfig(filename=LOG_FILENAME,level=logging.INFO,)
- #logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG,)
+ #logging.basicConfig(filename=LOG_FILENAME,level=logging.INFO,)
+ logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG,)
server = RaidGuessFS(version="%prog " + fuse.__version__,usage=usage,dash_s_do='setsingle')
server.multithreaded = False