From c992a6efbd34e065026052f47b42668abd5870d1 Mon Sep 17 00:00:00 2001
From: GAUTREAU Guillaume <guillaume.gautreau@free.fr>
Date: Mon, 5 Mar 2018 12:03:04 +0100
Subject: [PATCH] bug fix. When the position of the initialization value in the
 m file do not match the mf output file, the partition are set to undefined

---
 ppanggolin/command_line.py |  2 +-
 ppanggolin/ppanggolin.py   | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/ppanggolin/command_line.py b/ppanggolin/command_line.py
index 3f72c4e..454fb95 100755
--- a/ppanggolin/command_line.py
+++ b/ppanggolin/command_line.py
@@ -433,7 +433,7 @@ def __main__():
     #-------------
 
     #-------------
-    logging.getLogger().info("Partitionning...")
+    logging.getLogger().info("Partitioning...")
 
     start_partitioning = time()
     pan.partition(nem_dir_path    = OUTPUTDIR+NEM_DIR,
diff --git a/ppanggolin/ppanggolin.py b/ppanggolin/ppanggolin.py
index 89191b1..e07fbed 100755
--- a/ppanggolin/ppanggolin.py
+++ b/ppanggolin/ppanggolin.py
@@ -326,11 +326,15 @@ def __str__(self):
 
         if self.pan_size != 0:
             pan_str += "Pan-genome size:"+str(self.pan_size)+"\n"
+            pan_str += "\n"
             pan_str += "Exact core-genome size:"+str(len(self.partitions["core_exact"]))+"\n"
             pan_str += "Exact variable-genome size:"+str(self.pan_size-len(self.partitions["core_exact"]))+"\n"
+            pan_str += "\n"
             pan_str += "Persistent genome size:"+str(len(self.partitions["persistent"]))+"\n"
             pan_str += "Shell genome size:"+str(len(self.partitions["shell"]))+"\n"
             pan_str += "Cloud genome cloud:"+str(len(self.partitions["cloud"]))+"\n"
+            pan_str += "\n"
+            pan_str += "Genome with undefined partition:"+str(len(self.partitions["undefined"]))+"\n"
         else:
             pan_str += "No partitioning have been performed on this Pangenome instance\n"
             pan_str += "Run the partitioning function to obtain more detailled statistics...\n"
@@ -1390,6 +1394,9 @@ def run_partitioning(nem_dir_path, graph, organisms, pan_size, beta, free_disper
             partition[shell_k]      = "S"#SHELL
             partition[cloud_k]      = "C"#CLOUD
 
+            if partition[0] != "P" or partition[1] != "S" or partition[2] != "C":
+                raise ValueError("vector mu_k and epsilon_k value in the mf file are not relevant with the initialisation value in the m file")
+
             for i, line in enumerate(partitions_nem_file):
                 elements = [float(el) for el in line.split()]
                 max_prob = max([float(el) for el in elements])
@@ -1406,6 +1413,9 @@ def run_partitioning(nem_dir_path, graph, organisms, pan_size, beta, free_disper
             #logging.getLogger().debug(index.keys())
     except FileNotFoundError:
         logging.getLogger().warning("Statistical partitioning do not works (the number of organisms used is probably too low), see logs here to obtain more details "+nem_dir_path+"/nem_file.log")
+    except ValueError:
+        ## return the default partitions_list which correspond to undefined
+        pass
     return(dict(zip(index_fam.keys(), partitions_list)))