completed ortholog template + updated reports

alliance-genome · Jul 9, 2018 · 684099d · 684099d
1 parent 9fd3be9
commit 684099d
Show file tree

Hide file tree

Showing 8 changed files with 235 additions and 120 deletions.
diff --git a/config_wb.yml b/config_wb.yml
@@ -2,6 +2,7 @@ generic_data_fetcher:
     cache_location: "gene_descriptions_cache"
     # set 'data_fetcher' to 'agr_data_fetcher' to generate descriptions for AGR or 'wb_data_fetcher' for WormBase
     data_fetcher: "wb_data_fetcher"
+    textpresso_api_token: "ZfwjASOC2utOr01iXHeD"
 
 # options for WormBase gene description generation - used when data_fetcher is set to 'wb_data_fetcher'
 wb_data_fetcher:
@@ -24,7 +25,7 @@ wb_data_fetcher:
         human:
             full_name: "Homo sapiens"
         b_malayi:
-            full_name: "Brugia Malayi"
+            full_name: "Brugia malayi"
             project_id: "PRJNA10729"
             ortholog:
                 - c_elegans
@@ -82,7 +83,7 @@ wb_data_fetcher:
                 - o_volvulus
             main_sister_species: c_elegans
         t_muris:
-            full_name: "Trichuris Muris"
+            full_name: "Trichuris muris"
             project_id: "PRJEB126"
             ortholog:
                 - c_elegans

diff --git a/genedescriptions/config_parser.py b/genedescriptions/config_parser.py
@@ -7,6 +7,14 @@ def __init__(self, file_path):
         with open(file_path) as conf_file:
             self.config = yaml.load(conf_file)
 
+    def get_textpresso_api_token(self) -> str:
+        """get textpresso API token
+
+        Returns:
+            str: the textpresso API token
+        """
+        return self.config["generic_data_fetcher"]["textpresso_api_token"]
+
     def get_data_fetcher(self) -> str:
         """get the data fetcher type from the configuration file
 

diff --git a/genedescriptions/data_fetcher.py b/genedescriptions/data_fetcher.py
@@ -429,25 +429,29 @@ def get_best_orthologs_for_gene(self, gene_id: str, orth_species_full_name: List
             for curr_orth_fullname in orth_species_full_name:
                 if curr_orth_fullname in self.orthologs[gene_id]:
                     orthologs = self.orthologs[gene_id][curr_orth_fullname]
+                    # for human orthologs, take only those predicted by more than 1 method
+                    if len(orth_species_full_name) == 1 and orth_species_full_name[0] == "Homo sapiens":
+                        orthologs = [ortholog for ortholog in orthologs if len(ortholog[2].split(";")) > 1]
                     orthologs_keys = []
-                    if len(orthologs) > 1:
-                        for ortholog in orthologs:
+                    if len(orthologs) > 0:
+                        if len(orthologs) > 1:
+                            for ortholog in orthologs:
+                                if sister_species_data_fetcher:
+                                    orthologs_keys.append([ortholog[0], ortholog[1], len(ortholog[2].split(";")),
+                                                           len(sister_species_data_fetcher.get_annotations_for_gene(
+                                                               gene_id=ortholog[0], annot_type=DataType.GO,
+                                                               priority_list=ecode_priority_list))])
+                                else:
+                                    orthologs_keys.append([ortholog[0], ortholog[1], len(ortholog[2].split(";"))])
                             if sister_species_data_fetcher:
-                                orthologs_keys.append([ortholog[0], ortholog[1], len(ortholog[2].split(";")),
-                                                       len(sister_species_data_fetcher.get_annotations_for_gene(
-                                                           gene_id=ortholog[0], annot_type=DataType.GO,
-                                                           priority_list=ecode_priority_list))])
+                                best_orthologs = [sorted(orthologs_keys, key=lambda x: (x[2], x[3]), reverse=True)[0][0:2]]
                             else:
-                                orthologs_keys.append([ortholog[0], ortholog[1], len(ortholog[2].split(";"))])
-                        if sister_species_data_fetcher:
-                            best_orthologs = [sorted(orthologs_keys, key=lambda x: (x[2], x[3]), reverse=True)[0][0:2]]
+                                best_orthologs = [[orth_key[0], orth_key[1]] for orth_key in
+                                                  sorted(orthologs_keys, key=lambda x: x[2], reverse=True) if
+                                                  orth_key[2] == max([orth[2] for orth in orthologs_keys])]
                         else:
-                            best_orthologs = [[orth_key[0], orth_key[1]] for orth_key in
-                                              sorted(orthologs_keys, key=lambda x: x[2], reverse=True) if
-                                              orth_key[2] == max([orth[2] for orth in orthologs_keys])]
-                    else:
-                        best_orthologs = [[orthologs[0][0], orthologs[0][1]]]
-                    break
+                            best_orthologs = [[orthologs[0][0], orthologs[0][1]]]
+                        break
         return best_orthologs, curr_orth_fullname
 
     def load_all_data_from_file(self, go_terms_replacement_regex: Dict[str, str] = None,