#!/usr/bin/env python

# RaidGuessFS, a FUSE pseudo-filesystem to guess RAID parameters of a damaged device
# Copyright (C) 2015 Ludovic Pouzenc <ludovic@pouzenc.fr>
#
# This file is part of RaidGuessFS.
#
# RaidGuessFS is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# RaidGuessFS is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with RaidGuessFS. If not, see <http://www.gnu.org/licenses/>

import os, multiprocessing, binascii, logging
import mydisks

def do_find_files(d,state):
    logging.info("Enter do_find_files()")
    try:
        state['state'] = 'loading'
        ref_paths = state['filepaths']
        ref_count = len(ref_paths)
        ref_big_hash = {}
        for ref_no in range(ref_count):
            path = state['filepaths'][ref_no]
            logging.debug("Try to open ref. file '%s'"%path)
            with open(path, 'rb') as fd:
                logging.info("Loading ref. file '%s'"%path)
                warn_empty = True
                warn_dup = True;
                while True:
                    ref_offset = fd.tell()
                    data = fd.read(512)
                    if not data:
                        break
                    if data == '\0'*512:
                        if warn_empty:
                            logging.info("Ignoring empty sector in '%s'@0x%011x"%(path,ref_offset))
                            warn_empty = False
                        else:
                            logging.debug("Ignoring empty sector in '%s'@0x%011x"%(path,ref_offset))
                    elif data in ref_big_hash:
                        (prev_ref_no, prev_ref_offset) = ref_big_hash[data]
                        if warn_dup:
                            logging.info("Non-unique sector found in ref. files ('%s'@0x%011x and '%s'@0x%011x)"%
                                    (prev_ref_no, prev_ref_offset, ref_no, ref_offset))
                            warn_dump = False
                        else:
                            logging.debug("Non-unique sector found in ref. files ('%s'@0x%011x and '%s'@0x%011x)"%
                                    (prev_ref_no, prev_ref_offset, ref_no, ref_offset))
                    else:
                        # Hash in memory the whole read sector and store it's provenance
                        ref_big_hash[data] = (ref_no, ref_offset)

        start = 0 # FIXME : make it tunable
        start = 0x00132870000
        end = min(d.disks_size)
        one_per_thousand = (end - start) / 1000
        one_per_thousand = one_per_thousand + ( (-one_per_thousand)%512 )
        logging.debug("start/end/1pm : %i / %i / %i"%(start,end,one_per_thousand))

        state['found'] = []
        state['progress'] = 0.0
        state['raw_matches'] = 0
        state['state'] = 'searching'
        raw_matches = []
        for offset in xrange(start, end, 512):
            for disk_no in range(d.disk_count):
		# Read disks sector by sector and try to match sectors present in ref_files (through ref_big_hash)
                d.disks[disk_no].seek(offset)
                data = d.disks[disk_no].read(512)
                if data in ref_big_hash:
                    (ref_no, ref_offset) = ref_big_hash[data]
                    a = {
                        'ref_no': ref_no,
                        'ref_offset': ref_offset,
                        'disk_no': disk_no,
                        'disk_offset': offset
                        }
                    raw_matches.append(a)
                    logging.info("raw_matches.append(disk%02i@0x%011x <=> '%s'@0x%011x)"%(a['disk_no'], a['disk_offset'], a['ref_no'], a['ref_offset']))
                    state['raw_matches'] = len(raw_matches)
            
            if offset % one_per_thousand == 0:
                state['progress'] = state['progress'] + 0.1
                agg = state['found']

                state['state'] = 'aggregating'

		# Blacklist ref_file sectors that is far too common in disk data
		idx_matches = {}
		for rm in raw_matches:
			k = (rm['ref_no'], rm['ref_offset'])
			if k in idx_matches:
				idx_matches[k] += 1
			else:
				idx_matches[k] = 1

		for k in idx_matches.keys():
			if idx_matches[k] > 5:
				logging.info("Skipping too many raw_matches for '%s'@0x%011x"%k)
				logging.debug("Before filter : %i"%len(raw_matches))
				raw_matches = filter(lambda rm: k != (rm['ref_no'], rm['ref_offset']), raw_matches)
				logging.debug("After filter : %i"%len(raw_matches))

		# Aggregate raw_matches
                found = True
                while found:
                    found = False
                    i = 0
                    rm_len = len(raw_matches)
                    while not found and i < rm_len:
                        x = raw_matches[i]

                        # Try to find an aggregated item that ends just before our ref_offset
                        if not found:
                            for a in agg:
                                if a['ref_no'] == x['ref_no'] and x['ref_offset'] == a['ref_offset_end']:
                                    a['ref_offset_end'] = x['ref_offset'] + 512
                                    b = a['block_list'].pop()
                                    if b[0] == x['disk_no'] and b[2] == x['disk_offset']:
                                        a['block_list'].append( (b[0], b[1], x['disk_offset']  + 512) )
                                    else:
                                        a['block_list'].append(b)
                                        a['block_list'].append( (x['disk_no'], x['disk_offset'], x['disk_offset'] + 512) )
                                    x['consumed'] = True
                                    found = True
                                    break

                        # Try to find an aggregated item that starts just after our ref_offset
                        if not found:
                            for a in agg:
                                if a['ref_no'] == x['ref_no'] and x['ref_offset'] + 512 == a['ref_offset_start']:
                                    a['ref_offset_start'] = x['ref_offset']
                                    a['block_list'].insert(0,(x['disk_no'], x['disk_offset']))
                                    b = a['block_list'].pop(0)
                                    if b[0] == x['disk_no'] and b[1] == x['disk_offset'] + 512:
                                        b1[1] = x['disk_offset']
                                        a['block_list'].insert(0, (b[0], x['disk_offset'], b[2]) )
                                    else:
                                        a['block_list'].append(b)
                                        a['block_list'].append( (x['disk_no'], x['disk_offset'], x['disk_offset'] + 512) )
                                    x['consumed'] = True
                                    found = True
                                    break

                        # Try to find another match right before or after the current one to create an aggregate
                        if not found:
                            for x2 in raw_matches:
                                if x2 != x and x2['ref_no'] == x['ref_no'] and abs(x2['ref_offset'] - x['ref_offset']) == 512:
                                    if x2['ref_offset'] > x['ref_offset']:
                                        first = x
                                        last = x2
                                    else:
                                        first = x2
                                        last = x
                                
                                    a = {
                                        'ref_no': ref_no,
                                        'ref_offset_start': first['ref_offset'],
                                        'ref_offset_end': last['ref_offset'] + 512,
                                        'block_list': [ (first['disk_no'], first['disk_offset'], last['disk_offset'] + 512) ]
                                        }
                                    logging.info('agg.append(%s)'%a)
                                    agg.append(a)
                                    x['consumed'] = True
                                    x2['consumed'] = True
                                    found = True
                                    break
                        # Orphans are not injected in agg
                        i += 1
                    # end while ( x = raw_matches[i] )
                    # Remove all consumed items from raw_matches list before next While iteration
                    if found == True:
                        raw_matches = filter(lambda x: not 'consumed' in x, raw_matches)
                        state['raw_matches'] = len(raw_matches)

                # end while not found

                if len(agg) < 10000:
                    state['found'] = filter(lambda x: len(x['block_list']) > 1, agg)
                    state['state'] = 'searching'
                else:
                    state['state'] = 'aborted'
                    raise Exception('Aborting after too many matches')

        state['state'] = 'finished'
        state['progress'] = 100.0
    except Exception as e:
        logging.exception(e)
    finally:
        ref_big_hash.clear()

    logging.info("Exit. do_find_files()")


def do_find_bootsect(d,state):
    logging.info("Enter do_find_bootsect()")
    try:
        state['state'] = 'initializing'
        ref_sig = binascii.unhexlify('55AA')
        ref_bootflags = ( binascii.unhexlify('00'), binascii.unhexlify('80') )
        ref_parttypes = ( binascii.unhexlify('FB'), binascii.unhexlify('FC') )

        start = 0
        end = min(d.disks_size)
        one_per_thousand = (end - start) / 1000
        one_per_thousand = one_per_thousand + ( (-one_per_thousand)%512 )
        logging.debug("start/end/1pc : %i / %i / %i"%(start,end,one_per_thousand))

        state['found'] = []
        state['progress'] = 0.0
        state['state'] = 'searching'
        for offset in xrange(start, end, 512):
            for disk_no in range(d.disk_count):
                d.disks[disk_no].seek(offset)
                data = d.disks[disk_no].read(512)
                if data[0x1fe:] == ref_sig:  # Found magic 55AA
                    logging.debug("find_bootsect : found Magic at '%s'@0x%011x (%s)"%(disk_no,offset,binascii.hexlify(data[0x1be+0x0])))
                    if data[0x1be+0x0] in ref_bootflags: # Partition 1 : valid flags for bootable
                        logging.debug("find_bootsect : found correct part1 flag byte at '%s'@0x%011x"%(disk_no,offset))
                        # FIXME : make part type detection parametrable
                        if data[0x1be+0x4] in ref_parttypes: # Partition 1 : type VMWare
                            logging.debug("find_bootsect : found correct part1 type at '%s'@0x%011x"%(disk_no,offset))
                            f = state['found']
                            if len(f) < 200:
                                f.append((disk_no,offset))
                                state['found'] = f
                            else:
                                state['state'] = 'aborted'
                                raise Exception('Aborting after too many matches')

            if offset % one_per_thousand == 0:
                state['progress'] = state['progress'] + 0.1

        state['progress'] = 100.0
        state['state'] = 'finished'
    except Exception as e:
        logging.exception(e)
    logging.info("Exit. do_find_bootsect()")


class MyTasks():
    """Auxiliary class, managing long or background tasks"""

    TASK_NAMES = [ 'find_bootsect', 'find_files' ]

    def __init__(self, mydisks):
        self.tasks = []
        self.d = mydisks
        self.find_files_pathlist = []
        m = multiprocessing.Manager()
        self.find_bootsect_state = m.dict()
        self.find_bootsect_process = None
        self.find_files_state = m.dict()
        self.find_files_process = None

    def get_find_files_pathlist(self):
        return self.find_files_pathlist

    def get_find_files_pathlist_str(self):
        return '\n'.join(self.find_files_pathlist)

    def task_start(self, task_name):
        if task_name == 'find_files':
            self.find_files_state['filepaths'] = list(self.find_files_pathlist)
            self.find_files_process = multiprocessing.Process(
                    target = do_find_files,
                    args = (self.d, self.find_files_state)
            )
            self.find_files_process.start()
        elif task_name == 'find_bootsect':
            self.find_bootsect_process = multiprocessing.Process(
                    target = do_find_bootsect,
                    args = (self.d, self.find_bootsect_state)
            )
            self.find_bootsect_process.start()
        else:
            raise ValueError('Valid task names are : %s'%','.join(MyTasks.TASK_NAMES))
    
    def task_kill(self, task_name):
        if task_name == 'find_bootsect':
            if self.find_bootsect_process != None and self.find_bootsect_process.is_alive():
                self.find_bootsect_process.terminate()
        elif task_name == 'find_files':
            if self.find_files_process != None and self.find_files_process.is_alive():
                self.find_files_process.terminate()
        else:
            raise ValueError('Valid task names are : %s'%','.join(MyTasks.TASK_NAMES))

    def append_find_files_pathlist(self, pathlist):
        # TODO : should receive a list, make changes in raidguessfs.py
        self.find_files_pathlist.extend(pathlist.split('\n'))

    def set_find_files_pathlist(self, new_find_files_pathlist):
        self.find_files_pathlist = new_find_files_pathlist

    def read_find_bootsect(self):
        if self.find_bootsect_process == None:
            return 'This task has never been started\n'
        else:
            return '%s\n'%self.find_bootsect_state

    def read_find_files(self):
        if self.find_files_process == None:
            return 'This task has never been started\n'
        else:
            return '%s\n'%self.find_files_state