From 409940e81486b91b0c1e82bb5bc96c9c2c6beb61 Mon Sep 17 00:00:00 2001 From: luissian Date: Sat, 17 Feb 2024 19:48:31 +0100 Subject: [PATCH] fixed liting --- taranis/__main__.py | 7 ------- taranis/blast.py | 4 ++-- taranis/eval_cluster.py | 1 - taranis/reference_alleles.py | 38 +----------------------------------- 4 files changed, 3 insertions(+), 47 deletions(-) diff --git a/taranis/__main__.py b/taranis/__main__.py index 8b9a920..f7655e1 100644 --- a/taranis/__main__.py +++ b/taranis/__main__.py @@ -338,13 +338,6 @@ def reference_alleles( sys.exit(1) """Create the reference alleles from the schema """ results = [] - """ - for f_file in schema_files: - - ref_alleles = taranis.reference_alleles.ReferenceAlleles(f_file, output, eval_cluster, kmer_size, sketch_size) - c_data = ref_alleles.create_ref_alleles() - results.append(c_data) - """ with concurrent.futures.ThreadPoolExecutor(max_workers=cpus) as executor: futures = [ executor.submit( diff --git a/taranis/blast.py b/taranis/blast.py index 7054913..a51d2ad 100644 --- a/taranis/blast.py +++ b/taranis/blast.py @@ -118,8 +118,8 @@ def run_blast( else: out, _ = cline() except Exception as e: - # log.error("Unable to run blast for %s ", self.out_blast_dir) - # log.error(e) + log.error("Unable to run blast for %s ", self.out_blast_dir) + log.error(e) stderr.print(f"[red] Unable to run blast {self.out_blast_dir}") exit(1) return out.splitlines() diff --git a/taranis/eval_cluster.py b/taranis/eval_cluster.py index 04d6c59..5f30820 100644 --- a/taranis/eval_cluster.py +++ b/taranis/eval_cluster.py @@ -6,7 +6,6 @@ import taranis.utils import taranis.blast from Bio import SeqIO -import pdb log = logging.getLogger(__name__) stderr = rich.console.Console( diff --git a/taranis/reference_alleles.py b/taranis/reference_alleles.py index ded1221..5646455 100644 --- a/taranis/reference_alleles.py +++ b/taranis/reference_alleles.py @@ -9,7 +9,6 @@ import taranis.clustering import taranis.eval_cluster from Bio import SeqIO -import pdb log = logging.getLogger(__name__) stderr = rich.console.Console( @@ -81,39 +80,6 @@ def create_distance_matrix(self) -> list: def processing_cluster_data( self, cluster_data: np.array, cluster_ptrs: np.array, position_to_allele: dict ) -> dict: - """As per result of ClusterDistance methods, the - reference alleles are saved to file and statistics information is - returned - - Returns: - list: two dictionaires are returned, cluster_data having statistics - and reference_alleles, where keys are cluster number and value - the reference allele for the cluster - """ - """ - # dist_matrix_np, postition_to_allele = self.create_distance_matrix() - - - # log.debug("Processing distance matrix for $s", self.fasta_file) - # distance_obj = taranis.distance.DistanceMatrix(self.fasta_file, self.kmer_size, self.sketch_size) - # mash_distance_df = distance_obj.create_matrix() - # log.debug(f"Created distance matrix for {self.fasta_file}") - # fetch the allele position into array - - postition_to_allele = { - x: mash_distance_df.columns[x] for x in range(len(mash_distance_df.columns)) - } - # convert the triangle matrix into full data matrix - matrix_np = mash_distance_df.to_numpy() - t_matrix_np = matrix_np.transpose() - matrix_np = t_matrix_np + matrix_np - # At this point minimal distance is 0. For clustering requires to be 1 - # the oposite. - dist_matrix_np = (matrix_np - 1) * -1 - """ - - # convert the center pointer to allele name and create list to get - # sequences reference_alleles = [] for cluster_id, values in cluster_data.items(): @@ -176,7 +142,7 @@ def create_ref_alleles(self) -> dict: dist_matrix_np, self.locus_name, ) - # pdb.set_trace() + for resolution in np.arange(self.cluster_resolution, 1, 0.025): cluster_ptrs, cluster_data = self.cluster_obj.create_clusters( round(resolution, 3) @@ -194,13 +160,11 @@ def create_ref_alleles(self) -> dict: evaluation_obj = taranis.eval_cluster.EvaluateCluster( self.fasta_file, self.locus_name, self.output ) - # pdb.set_trace() evaluation_result = evaluation_obj.evaluate_clusters( allele_data["alleles_in_cluster"], allele_data["cluster_data"], ref_fasta_file, ) - # pdb.set_trace() if evaluation_result["result"] == "OK" or resolution >= 1: # delete blast database used for evaluation _ = evaluation_obj.delete_blast_db_folder()