#!/usr/bin/env python # RaidGuessFS, a FUSE pseudo-filesystem to guess RAID parameters of a damaged device # Copyright (C) 2015 Ludovic Pouzenc # # This file is part of RaidGuessFS. # # RaidGuessFS is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # RaidGuessFS is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with RaidGuessFS. If not, see import os, multiprocessing, binascii, logging import mydisks def do_find_files(d,state): logging.info("Enter do_find_files()") try: state['state'] = 'loading' ref_paths = state['filepaths'] ref_count = len(ref_paths) ref_big_hash = {} for ref_no in range(ref_count): path = state['filepaths'][ref_no] logging.debug("Try to open ref. file '%s'"%path) with open(path, 'rb') as fd: logging.info("Loading ref. file '%s'"%path) warn_empty = True warn_dup = True; while True: ref_offset = fd.tell() data = fd.read(512) if not data: break if data == '\0'*512: if warn_empty: logging.info("Ignoring empty sector in '%s'@0x%011x"%(path,ref_offset)) warn_empty = False else: logging.debug("Ignoring empty sector in '%s'@0x%011x"%(path,ref_offset)) elif data in ref_big_hash: (prev_ref_no, prev_ref_offset) = ref_big_hash[data] if warn_dup: logging.info("Non-unique sector found in ref. files ('%s'@0x%011x and '%s'@0x%011x)"% (prev_ref_no, prev_ref_offset, ref_no, ref_offset)) warn_dump = False else: logging.debug("Non-unique sector found in ref. files ('%s'@0x%011x and '%s'@0x%011x)"% (prev_ref_no, prev_ref_offset, ref_no, ref_offset)) else: # Hash in memory the whole read sector and store it's provenance ref_big_hash[data] = (ref_no, ref_offset) start = 0 # FIXME : make it tunable start = 0x00132870000 end = min(d.disks_size) one_per_thousand = (end - start) / 1000 one_per_thousand = one_per_thousand + ( (-one_per_thousand)%512 ) logging.debug("start/end/1pm : %i / %i / %i"%(start,end,one_per_thousand)) state['found'] = [] state['progress'] = 0.0 state['raw_matches'] = 0 state['state'] = 'searching' raw_matches = [] for offset in xrange(start, end, 512): for disk_no in range(d.disk_count): # Read disks sector by sector and try to match sectors present in ref_files (through ref_big_hash) d.disks[disk_no].seek(offset) data = d.disks[disk_no].read(512) if data in ref_big_hash: (ref_no, ref_offset) = ref_big_hash[data] a = { 'ref_no': ref_no, 'ref_offset': ref_offset, 'disk_no': disk_no, 'disk_offset': offset } raw_matches.append(a) logging.info("raw_matches.append(disk%02i@0x%011x <=> '%s'@0x%011x)"%(a['disk_no'], a['disk_offset'], a['ref_no'], a['ref_offset'])) state['raw_matches'] = len(raw_matches) if offset % one_per_thousand == 0: state['progress'] = state['progress'] + 0.1 agg = state['found'] state['state'] = 'aggregating' # Blacklist ref_file sectors that is far too common in disk data idx_matches = {} for rm in raw_matches: k = (rm['ref_no'], rm['ref_offset']) if k in idx_matches: idx_matches[k] += 1 else: idx_matches[k] = 1 for k in idx_matches.keys(): if idx_matches[k] > 5: logging.info("Skipping too many raw_matches for '%s'@0x%011x"%k) logging.debug("Before filter : %i"%len(raw_matches)) raw_matches = filter(lambda rm: k != (rm['ref_no'], rm['ref_offset']), raw_matches) logging.debug("After filter : %i"%len(raw_matches)) # Aggregate raw_matches found = True while found: found = False i = 0 rm_len = len(raw_matches) while not found and i < rm_len: x = raw_matches[i] # Try to find an aggregated item that ends just before our ref_offset if not found: for a in agg: if a['ref_no'] == x['ref_no'] and x['ref_offset'] == a['ref_offset_end']: a['ref_offset_end'] = x['ref_offset'] + 512 b = a['block_list'].pop() if b[0] == x['disk_no'] and b[2] == x['disk_offset']: a['block_list'].append( (b[0], b[1], x['disk_offset'] + 512) ) else: a['block_list'].append(b) a['block_list'].append( (x['disk_no'], x['disk_offset'], x['disk_offset'] + 512) ) x['consumed'] = True found = True break # Try to find an aggregated item that starts just after our ref_offset if not found: for a in agg: if a['ref_no'] == x['ref_no'] and x['ref_offset'] + 512 == a['ref_offset_start']: a['ref_offset_start'] = x['ref_offset'] a['block_list'].insert(0,(x['disk_no'], x['disk_offset'])) b = a['block_list'].pop(0) if b[0] == x['disk_no'] and b[1] == x['disk_offset'] + 512: b1[1] = x['disk_offset'] a['block_list'].insert(0, (b[0], x['disk_offset'], b[2]) ) else: a['block_list'].append(b) a['block_list'].append( (x['disk_no'], x['disk_offset'], x['disk_offset'] + 512) ) x['consumed'] = True found = True break # Try to find another match right before or after the current one to create an aggregate if not found: for x2 in raw_matches: if x2 != x and x2['ref_no'] == x['ref_no'] and abs(x2['ref_offset'] - x['ref_offset']) == 512: if x2['ref_offset'] > x['ref_offset']: first = x last = x2 else: first = x2 last = x a = { 'ref_no': ref_no, 'ref_offset_start': first['ref_offset'], 'ref_offset_end': last['ref_offset'] + 512, 'block_list': [ (first['disk_no'], first['disk_offset'], last['disk_offset'] + 512) ] } logging.info('agg.append(%s)'%a) agg.append(a) x['consumed'] = True x2['consumed'] = True found = True break # Orphans are not injected in agg i += 1 # end while ( x = raw_matches[i] ) # Remove all consumed items from raw_matches list before next While iteration if found == True: raw_matches = filter(lambda x: not 'consumed' in x, raw_matches) state['raw_matches'] = len(raw_matches) # end while not found if len(agg) < 10000: state['found'] = filter(lambda x: len(x['block_list']) > 1, agg) state['state'] = 'searching' else: state['state'] = 'aborted' raise Exception('Aborting after too many matches') state['state'] = 'finished' state['progress'] = 100.0 except Exception as e: logging.exception(e) finally: ref_big_hash.clear() logging.info("Exit. do_find_files()") def do_find_bootsect(d,state): logging.info("Enter do_find_bootsect()") try: state['state'] = 'initializing' ref_sig = binascii.unhexlify('55AA') ref_bootflags = ( binascii.unhexlify('00'), binascii.unhexlify('80') ) ref_parttypes = ( binascii.unhexlify('FB'), binascii.unhexlify('FC') ) start = 0 end = min(d.disks_size) one_per_thousand = (end - start) / 1000 one_per_thousand = one_per_thousand + ( (-one_per_thousand)%512 ) logging.debug("start/end/1pc : %i / %i / %i"%(start,end,one_per_thousand)) state['found'] = [] state['progress'] = 0.0 state['state'] = 'searching' for offset in xrange(start, end, 512): for disk_no in range(d.disk_count): d.disks[disk_no].seek(offset) data = d.disks[disk_no].read(512) if data[0x1fe:] == ref_sig: # Found magic 55AA logging.debug("find_bootsect : found Magic at '%s'@0x%011x (%s)"%(disk_no,offset,binascii.hexlify(data[0x1be+0x0]))) if data[0x1be+0x0] in ref_bootflags: # Partition 1 : valid flags for bootable logging.debug("find_bootsect : found correct part1 flag byte at '%s'@0x%011x"%(disk_no,offset)) # FIXME : make part type detection parametrable if data[0x1be+0x4] in ref_parttypes: # Partition 1 : type VMWare logging.debug("find_bootsect : found correct part1 type at '%s'@0x%011x"%(disk_no,offset)) f = state['found'] if len(f) < 200: f.append((disk_no,offset)) state['found'] = f else: state['state'] = 'aborted' raise Exception('Aborting after too many matches') if offset % one_per_thousand == 0: state['progress'] = state['progress'] + 0.1 state['progress'] = 100.0 state['state'] = 'finished' except Exception as e: logging.exception(e) logging.info("Exit. do_find_bootsect()") class MyTasks(): """Auxiliary class, managing long or background tasks""" TASK_NAMES = [ 'find_bootsect', 'find_files' ] def __init__(self, mydisks): self.tasks = [] self.d = mydisks self.find_files_pathlist = [] m = multiprocessing.Manager() self.find_bootsect_state = m.dict() self.find_bootsect_process = None self.find_files_state = m.dict() self.find_files_process = None def get_find_files_pathlist(self): return self.find_files_pathlist def get_find_files_pathlist_str(self): return '\n'.join(self.find_files_pathlist) def task_start(self, task_name): if task_name == 'find_files': self.find_files_state['filepaths'] = list(self.find_files_pathlist) self.find_files_process = multiprocessing.Process( target = do_find_files, args = (self.d, self.find_files_state) ) self.find_files_process.start() elif task_name == 'find_bootsect': self.find_bootsect_process = multiprocessing.Process( target = do_find_bootsect, args = (self.d, self.find_bootsect_state) ) self.find_bootsect_process.start() else: raise ValueError('Valid task names are : %s'%','.join(MyTasks.TASK_NAMES)) def task_kill(self, task_name): if task_name == 'find_bootsect': if self.find_bootsect_process != None and self.find_bootsect_process.is_alive(): self.find_bootsect_process.terminate() elif task_name == 'find_files': if self.find_files_process != None and self.find_files_process.is_alive(): self.find_files_process.terminate() else: raise ValueError('Valid task names are : %s'%','.join(MyTasks.TASK_NAMES)) def append_find_files_pathlist(self, pathlist): # TODO : should receive a list, make changes in raidguessfs.py self.find_files_pathlist.extend(pathlist.split('\n')) def set_find_files_pathlist(self, new_find_files_pathlist): self.find_files_pathlist = new_find_files_pathlist def read_find_bootsect(self): if self.find_bootsect_process == None: return 'This task has never been started\n' else: return '%s\n'%self.find_bootsect_state def read_find_files(self): if self.find_files_process == None: return 'This task has never been started\n' else: return '%s\n'%self.find_files_state