From 5534a6c1353b00cfebe6f8a155b6420687858843 Mon Sep 17 00:00:00 2001
From: Ludovic Pouzenc <lpouzenc@gmail.com>
Date: Sat, 4 Jul 2015 22:31:56 +0200
Subject: myraid: lecture RAID sur disque de donnée ou par calcul de parité
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mydisks.py     |   5 ++
 myraid.py      | 224 +++++++++++++++++++++++++++++++++------------------------
 raidguessfs.py |   6 +-
 3 files changed, 137 insertions(+), 98 deletions(-)

diff --git a/mydisks.py b/mydisks.py
index c3b7716..b249198 100644
--- a/mydisks.py
+++ b/mydisks.py
@@ -68,6 +68,11 @@ class MyDisks():
                 self.disks_size[d] = 0
         logging.debug("Exit. open_disks()")
 
+
+    def is_readable(self,disk_no,offset,size):
+        import random
+        return random.randint(0,100) > 1 # FIXME : implement this (parse ddrescue log files)
+
     def read(self,disk_no,offset,size):
         self.disks[disk_no].seek(offset)
         return self.disks[disk_no].read(size)
diff --git a/myraid.py b/myraid.py
index 3706c36..75402d9 100644
--- a/myraid.py
+++ b/myraid.py
@@ -19,21 +19,66 @@
 # along with RaidGuessFS. If not, see <http://www.gnu.org/licenses/>
 
 import logging, numpy
+import mydisks
 
 class MyRaid():
     """Auxiliary class, managing RAID layer"""
     RAID_TYPES = [ '0', '1', '5', '5+0' ]
     RAID5_LAYOUTS = [ 'la', 'ra', 'ls', 'rs' ]
 
+    @staticmethod
+    def xor_blocks(fd_list, offset, size):
+        """Compute bitwise XOR against a bunch of disks slice"""
+        logging.info("Enter xor_blocks(fd_list(%i),0x%011x,%d)"%(len(fd_list), offset, size))
+        
+        if size % 8 != 0:
+            raise ValueError('xor_blocks : size must be multiple of 8')
+        dt = numpy.dtype('<Q8')
+
+        fd_list[0].seek(offset)
+        str_b1=fd_list[0].read(size)
+        numpy_b1 = numpy.fromstring(str_b1, dtype=dt)
+        all_zero = (numpy.count_nonzero(numpy_b1) == 0 )
+        any_zero = all_zero
+
+        for fd in fd_list[1:]:
+            fd.seek(offset)
+            str_b2=fd.read(size)
+            numpy_b2 = numpy.fromstring(str_b2, dtype=dt)
+            b2_zero = (numpy.count_nonzero(numpy_b2) == 0 )
+            if all_zero == True:
+                all_zero = b2_zero
+            if any_zero == False:
+                any_zero = b2_zero
+
+            numpy.bitwise_xor(numpy_b1,numpy_b2,numpy_b1)
+
+        if all_zero == True:
+            result = 'z'
+        elif numpy.count_nonzero(numpy_b1) == 0:
+            if any_zero:
+                result = 'g'
+            else:
+                result = 'G'
+        else:
+            result = 'b'
+
+        logging.info("Exit. xor_blocks(fd_list,%d,%d)"%(offset, size))
+        #import binascii
+        #logging.debug(binascii.hexlify(numpy_b1))
+        return (result,numpy_b1)
+
+
     def __init__(self, *args, **kwargs):
-        self.disks = []
+        self.d = None
         self.raid_start = 0
         self.raid_end = 0
-        self.raid_sector_size = 512
+        self.raid_sector_size = 512 # TODO : should be self.d.sector_size
         self.raid_chunk_size = 65536
         self.raid_disk_order = []
         self.raid_disk_count = 0
         self.raid_layout = 'ls'
+        self.raid_disks = []
 
     def get_raid_start(self):
         return self.raid_start
@@ -53,8 +98,10 @@ class MyRaid():
     def get_raid_layout(self):
         return self.raid_layout
 
-    def set_disks(self, disks):
-        self.disks = disks
+    def set_disks(self, new_mydisks):
+        # FIXME : self.d don't need to be updaed (pass on __init__)
+        self.d = new_mydisks
+        self.set_raid_disk_order(range(self.d.disk_count))
 
     def set_raid_start(self, new_raid_start):
         """Update the start offset of raid data on underlying disks"""
@@ -70,19 +117,19 @@ class MyRaid():
 
     def set_raid_disk_order(self, new_raid_disk_order):
         """Update the raid logical disk order"""
-        card=len(self.disks)
-        check=[0]*card
+        check=[0] * self.d.disk_count
         for item in new_raid_disk_order:
             d = int(item)
-            if not 0 <= d < card:
-                raise ValueError('Value out of range : %i [0,%i]'%(d,card-1))
+            if not 0 <= d < self.d.disk_count:
+                raise ValueError('Value out of range : %i [0,%i]'%(d,self.d.disk_count-1))
             check[d]=check[d]+1
         
-        for d in range(card):
+        for d in range(self.d.disk_count):
             if check[d] != 1 and check[d] != 0:
                 raise ValueError('Disk %i appears %i times (must be 0 or 1)'%(d,check[d]))
-        self.raid_disk_order = new_raid_disk_order
         self.raid_disk_count = len(new_raid_disk_order)
+        self.raid_disk_order = new_raid_disk_order
+        self.raid_disks = [ self.d.disks[i] for i in self.raid_disk_order ]
 
     def set_raid_layout(self, new_raid_layout):
         if new_raid_layout in MyRaid.RAID5_LAYOUTS:
@@ -91,88 +138,50 @@ class MyRaid():
             raise ValueError('raid_layout has to be one of %s'%' '.join(RAID_LAYOUTS))
 
     def sizeof_raid_result(self, raid_type):
-        size = self.raid_end - self.raid_start
-        if size <= 0 :
-            return 0
-        else:
-            return {
-                '0'  : size * self.raid_disk_count,
-                '1'  : size if self.raid_disk_count == 2 else 0,
-                '5'  : size * (self.raid_disk_count - 1) if self.raid_disk_count >= 3 else 0,
-                '5+0': size * (self.raid_disk_count - 2) if self.raid_disk_count >= 6 and self.raid_disk_count % 2 == 0 else 0,
-                }[raid_type]
+        size = max(0, self.raid_end - self.raid_start)
+        return {
+            '0'  : size * self.raid_disk_count,
+            '1'  : size if self.raid_disk_count == 2 else 0,
+            '5'  : size * (self.raid_disk_count - 1) if self.raid_disk_count >= 3 else 0,
+            '5+0': size * (self.raid_disk_count - 2) if self.raid_disk_count >= 6 and self.raid_disk_count % 2 == 0 else 0,
+            }[raid_type]
 
     def sizeof_disk_xor(self, raid_type):
-        size = self.raid_end - self.raid_start
-        if size <= 0:
-            return 0
-        else:
-            return {
-                '0'  : 0, # TODO Could contain some plain text error message
-                '1'  : size if self.raid_disk_count == 2 else 0,
-                '5'  : size if self.raid_disk_count >= 3 else 0,
-                '5+0': size if self.raid_disk_count >= 6 and self.raid_disk_count % 2 == 0 else 0,
-                }[raid_type]
+        return max(0, self.raid_end - self.raid_start)
 
     def sizeof_disk_parity(self, raid_type):
-        return self.sizeof_disk_xor(raid_type) / self.raid_sector_size * 16
-
-    def xor_blocks(self,fd_list, offset, size):
-        """Compute bitwise XOR against a bunch of disks slice"""
-        logging.info("Enter xor_blocks(fd_list,%d,%d)"%(offset, size))
-        
-        if size % 8 != 0:
-            raise ValueError('xor_blocks : size must be multiple of 8')
-        dt = numpy.dtype('<Q8')
-
-        fd_list[0].seek(offset)
-        str_b1=fd_list[0].read(size)
-        numpy_b1 = numpy.fromstring(str_b1, dtype=dt)
-        all_zero = (numpy.count_nonzero(numpy_b1) == 0 )
-        any_zero = all_zero
-
-        for fd in fd_list[1:]:
-            fd.seek(offset)
-            str_b2=fd.read(size)
-            numpy_b2 = numpy.fromstring(str_b2, dtype=dt)
-            b2_zero = (numpy.count_nonzero(numpy_b2) == 0 )
-            if all_zero == True:
-                all_zero = b2_zero
-            if any_zero == False:
-                any_zero = b2_zero
-
-            numpy.bitwise_xor(numpy_b1,numpy_b2,numpy_b1)
-
-        if all_zero == True:
-            result = 'z'
-        elif numpy.count_nonzero(numpy_b1) == 0:
-            if any_zero:
-                result = 'g'
-            else:
-                result = 'G'
-        else:
-            result = 'b'
-
-        logging.info("Exit. xor_blocks(fd_list,%d,%d)"%(offset, size))
-        #import binascii
-        #logging.warn(binascii.hexlify(numpy_b1))
-        return (result,numpy_b1)
+        size = max(0, self.raid_end - self.raid_start) / self.raid_sector_size * 16
+        return {
+            '0'  : 64,
+            '1'  : size if self.raid_disk_count == 2 else 64,
+            '5'  : size if self.raid_disk_count >= 3 else 64,
+            '5+0': size if self.raid_disk_count >= 6 and self.raid_disk_count % 2 == 0 else 64,
+            }[raid_type]
 
     def read_disk_xor(self,raid_type,offset,size):
-        raid_disks = [ self.disks[i] for i in self.raid_disk_order ]
-        return self.xor_blocks(raid_disks,offset,size)[1].tostring()
+        """Returns raw bitwise XOR against a bunch of disks slice"""
+        return MyRaid.xor_blocks(self.raid_disks,offset,size)[1].tostring()
 
     def read_disk_parity(self,raid_type,offset,size):
         """Returns textual information about parity status of each sector"""
         logging.warn("Enter read_disk_parity(%s,%d,%d)"%(raid_type,offset,size))
-        raid_disks = [ self.disks[i] for i in self.raid_disk_order ]
+        msg = {
+                '0'  : 'There no notion of parity in RAID 0 mode\n',
+                '1'  : None if self.raid_disk_count == 2 else 'Wrong disk count (should be 2)\n',
+                '5'  : None if self.raid_disk_count >= 3 else 'Wrong disk count (should be >=3)\n',
+                '5+0': None if self.raid_disk_count >= 6 and self.raid_disk_count % 2 == 0
+                            else 'Wrong disk count (should be >=6 and even)\n',
+                }[raid_type]
+        if msg:
+            return msg[offset:offset+size]
+
         start = self.raid_start + offset * self.raid_sector_size / 16
         end = start + size * self.raid_sector_size / 16
 
         #TODO : improove for nested levels
         if raid_type in ['1','5', '5+0']:
             result = ''.join(
-                    [ '0x%011x %c\n'%( addr, self.xor_blocks(raid_disks, addr, self.raid_sector_size)[0])
+                    [ '0x%011x %c\n'%( addr, MyRaid.xor_blocks(self.raid_disks, addr, self.raid_sector_size)[0])
                             for addr in range(start, end, self.raid_sector_size)
                     ])
         else:
@@ -184,15 +193,12 @@ class MyRaid():
 
     def read_raid_result(self,raid_type,offset,size):
         """Returns actual RAID data"""
-        raid_disks = [ self.disks[i] for i in self.raid_disk_order ] # TODO A garder en attribut ?
-        disk_count = len(self.raid_disk_order) # TODO doublon ?
-
         if raid_type == '0':
             segment_no = offset / self.raid_chunk_size
             segment_off = offset % self.raid_chunk_size 
-            stripe_no = segment_no / disk_count
+            stripe_no = segment_no / self.raid_disk_count
             par_disk = -1
-            data_disk = segment_no % disk_count
+            data_disk = segment_no % self.raid_disk_count
             off_disk = self.raid_start + stripe_no * self.raid_chunk_size + segment_off
             size2 = min(size, (segment_no+1) * self.raid_chunk_size - offset)
 
@@ -208,35 +214,63 @@ class MyRaid():
         elif raid_type == '5':
             segment_no = offset / self.raid_chunk_size
             segment_off = offset % self.raid_chunk_size 
-            stripe_no = segment_no / (disk_count-1)
+            stripe_no = segment_no / (self.raid_disk_count-1)
 
             if self.raid_layout in ['ls','la']:
-                par_disk = (disk_count-1) - (stripe_no % disk_count)
+                par_disk = (self.raid_disk_count-1) - (stripe_no % self.raid_disk_count)
             else: # self.raid_layout in ['rs','ra']:
-                par_disk = stripe_no % disk_count
+                par_disk = stripe_no % self.raid_disk_count
 
             if self.raid_layout in ['ls','rs']:
-                data_disk = (par_disk+1 + (segment_no % (disk_count-1)) ) % disk_count
+                data_disk = (par_disk+1 + (segment_no % (self.raid_disk_count-1)) ) % self.raid_disk_count
             else: # self.raid_layout in ['la','ra']:
-                data_disk = segment_no % (disk_count-1)
+                data_disk = segment_no % (self.raid_disk_count-1)
                 if data_disk >= par_disk:
                     data_disk = data_disk + 1
 
             off_disk = self.raid_start + stripe_no * self.raid_chunk_size + segment_off
+            # Note : self make shorter read than asked but convince the reader to be chunck aligned, which is great
             size2 = min(size, (segment_no+1) * self.raid_chunk_size - offset)
+        else:
+            raise Exception('Unimplemented read_raid_result() for raid_type == %s', raid_type)
 
-        logging.info("raid.read_result(%s): offset=%d,segment_no=%d,segment_off=%d,stripe_no=%d,par_disk=%d,data_disk=%d,off_disk=%d,size2=%d,segment_off+size2=%d" 
+        logging.debug("raid.read_result(%s): offset=%d,segment_no=%d,segment_off=%d,stripe_no=%d,par_disk=%d,data_disk=%d,off_disk=%d,size2=%d,segment_off+size2=%d" 
         % (raid_type,offset,segment_no,segment_off,stripe_no,par_disk,data_disk,off_disk,size2,segment_off+size2) )
 
-        #TODO recorver from parity if damaged sectors in data_disk
-        data_fd = raid_disks[data_disk]
-        data_fd.seek(off_disk)
-        data = data_fd.read(size2)
+        data_fd = self.raid_disks[data_disk]
 
-        # This kills performance but don't make short reads before EOF
-        #if size2 > 0 and size2 < size:
-        #    data += self.read_result(self,raid_type,offset+size2,size-size2)
+        if self.d.is_readable(self.raid_disk_order[data_disk],off_disk,size2):
+            # No damaged sectors until the end of the chunck, so just read the data disk
+            data_fd.seek(off_disk)
+            data = data_fd.read(size2)
+        else:
+            logging.warn('Try to recovering damaged chunck (raid_offset: 0x%011x, data_disk: %i, disk_offset: 0x%011x'
+                    % (offset, self.raid_disk_order[data_disk], off_disk) )
+            # Damaged sectors, check / recover every sector
+            other_disks = list(self.raid_disk_order)
+            other_disks.remove(self.raid_disk_order[data_disk])
+            other_fds = list(self.raid_disks)
+            other_fds.remove(data_fd)
+
+            data_arr = []
+            for s in range(off_disk, off_disk+size2, self.raid_sector_size):
+                if self.d.is_readable(self.raid_disk_order[data_disk],s,self.raid_sector_size):
+                    # Current sector is readable from data disk, read it
+                    logging.debug('-> 0x%011x : readable'%s)
+                    data_fd.seek(off_disk)
+                    data_arr.append(data_fd.read(self.raid_sector_size))
+                else:
+                    # Current sector is dead on data disk, recover it if possible
+                    recoverable = reduce(lambda a,b: a and b, [ 
+                        self.d.is_readable(other_disk,off_disk,self.raid_sector_size) for other_disk in other_disks
+                    ])
+                    if recoverable:
+                        logging.info('-> 0x%011x : recoverable'%s)
+                        data_arr.append( MyRaid.xor_blocks(other_fds, s,self.raid_sector_size)[1].tostring() )
+                    else:
+                        logging.warn('-> 0x%011x : unrecoverable'%s)
+                        data_arr.append( '\0' * self.raid_sector_size)
+            data = ''.join(data_arr)
 
         return data
 
-
diff --git a/raidguessfs.py b/raidguessfs.py
index 409ef0c..94f36e7 100755
--- a/raidguessfs.py
+++ b/raidguessfs.py
@@ -147,7 +147,7 @@ class RaidGuessFS(fuse.Fuse):
         self.d.set_disk_count(i)
         self.d.open_disks()
         self._refresh_disk_dentries()
-        self.raid.set_disks(self.d.disks)
+        self.raid.set_disks(self.d)
         self.raid.set_raid_end(min(self.d.disks_size)-1)
         self.update_raid_disk_order(range(i))
 
@@ -372,8 +372,8 @@ RaidGuessFS is a pseudo-filesystem that allows to guess parameters and disk orde
 
     LOG_FILENAME = "raidguessfs.log"
     #logging.basicConfig(filename=LOG_FILENAME,level=logging.WARN,)
-    #logging.basicConfig(filename=LOG_FILENAME,level=logging.INFO,)
-    logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG,)
+    logging.basicConfig(filename=LOG_FILENAME,level=logging.INFO,)
+    #logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG,)
 
     server = RaidGuessFS(version="%prog " + fuse.__version__,usage=usage,dash_s_do='setsingle')
     server.multithreaded = False
-- 
cgit v1.2.3