From 560f4cdf61a0259af57e276e4759d164327882db Mon Sep 17 00:00:00 2001
From: Ludovic Pouzenc <lpouzenc@gmail.com>
Date: Sun, 12 Jul 2015 12:23:10 +0200
Subject: task find_files : first impl. task find_bootsect : added "state"

---
 mytasks.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 56 insertions(+), 4 deletions(-)

diff --git a/mytasks.py b/mytasks.py
index 235a1be..80b04ed 100644
--- a/mytasks.py
+++ b/mytasks.py
@@ -18,13 +18,58 @@
 # You should have received a copy of the GNU General Public License
 # along with RaidGuessFS. If not, see <http://www.gnu.org/licenses/>
 
-import multiprocessing, binascii, logging
+import os, multiprocessing, binascii, logging
 import mydisks
 
 def do_find_files(d,state):
     logging.info("Enter do_find_files()")
     try:
-        state['TODO'] = 'Not yet implemented'
+        state['state'] = 'initializing'
+        ref_paths = state['filepaths']
+        ref_count = len(ref_paths)
+        ref_fds = [None]*ref_count
+        ref_sizes = [None]*ref_count
+        ref_offset = [None]*ref_count
+        ref_cur_sect = [None]*ref_count
+        for ref_no in range(ref_count):
+            path = state['filepaths'][ref_no]
+            logging.debug("Try to open reffile '%s'"%path)
+            ref_offset[ref_no] = 0
+            ref_sizes[ref_no] = os.lstat(path).st_size
+            ref_fds[ref_no] = open(path, "r")
+            ref_fds[ref_no].seek(0)
+            ref_cur_sect[ref_no] = ref_fds[ref_no].read(512)
+            logging.debug("Opened reffile '%s'"%path)
+
+        start = 0
+        end = min(d.disks_size)
+        one_percent = (end - start) / 100
+        one_percent = one_percent + ( (-one_percent)%512 )
+        logging.debug("start/end/1pc : %i / %i / %i"%(start,end,one_percent))
+
+        state['found'] = []
+        state['progress'] = 0
+        state['state'] = 'searching'
+        for offset in range(start, end, 512):
+            for disk_no in range(d.disk_count):
+                d.disks[disk_no].seek(offset)
+                data = d.disks[disk_no].read(512)
+                for ref_no in range(ref_count):
+                    #TODO : gerer les buffers plus court que 512
+                    if data == ref_cur_sect[ref_no]:
+                        #TODO : chercher la suite du fichier
+                        f = state['found']
+                        if len(f) < 200:
+                            # TODO agreger les matches
+                            f.append((ref_paths[ref_no],ref_offset[ref_no],disk_no,offset))
+                            state['found'] = f
+                        else:
+                            state['state'] = 'aborted'
+                            raise Exception('Aborting after too many matches')
+            if offset % one_percent == 0:
+                state['progress'] = state['progress'] + 1
+
+        state['state'] = 'finished'
         state['progress'] = 100
     except Exception as e:
         logging.exception(e)
@@ -34,6 +79,7 @@ def do_find_files(d,state):
 def do_find_bootsect(d,state):
     logging.info("Enter do_find_bootsect()")
     try:
+        state['state'] = 'initializing'
         ref_sig = binascii.unhexlify('55AA')
 
         start = 0
@@ -44,6 +90,7 @@ def do_find_bootsect(d,state):
 
         state['found'] = []
         state['progress'] = 0
+        state['state'] = 'searching'
         for offset in range(start, end, 512):
             for disk_no in range(d.disk_count):
                 d.disks[disk_no].seek(offset)
@@ -55,11 +102,14 @@ def do_find_bootsect(d,state):
                         f.append((disk_no,offset))
                         state['found'] = f
                     else:
+                        state['state'] = 'aborted'
                         raise Exception('Aborting after too many matches')
 
             if offset % one_percent == 0:
                 state['progress'] = state['progress'] + 1
+
         state['progress'] = 100
+        state['state'] = 'finished'
     except Exception as e:
         logging.exception(e)
     logging.info("Exit. do_find_bootsect()")
@@ -89,6 +139,7 @@ class MyTasks():
 
     def task_start(self, task_name):
         if task_name == 'find_files':
+            self.find_files_state['filepaths'] = list(self.find_files_pathlist)
             self.find_files_process = multiprocessing.Process(
                     target = do_find_files,
                     args = (self.d, self.find_files_state)
@@ -113,8 +164,9 @@ class MyTasks():
         else:
             raise ValueError('Valid task names are : %s'%','.join(MyTasks.TASK_NAMES))
 
-    def append_find_files_pathlist(self, path):
-        self.find_files_pathlist.append(path)
+    def append_find_files_pathlist(self, pathlist):
+        # TODO : should receive a list, make changes in raidguessfs.py
+        self.find_files_pathlist.extend(pathlist.split('\n'))
 
     def set_find_files_pathlist(self, new_find_files_pathlist):
         self.find_files_pathlist = new_find_files_pathlist
-- 
cgit v1.2.3