Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes in allele calling module #21

Merged
merged 19 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
f5644df
fixed comments, added eval id as parameter for reference alleles
saramonzon Apr 11, 2024
8f1646a
added eval_identity to parallel execution function
saramonzon Apr 15, 2024
5b78cd1
added left eval_id to EvaluateCluster call
saramonzon Apr 15, 2024
e5bf0d0
variable renaming and psudocode
saramonzon Apr 16, 2024
f39ce11
first draft code for prot conversion and extend sequence fix
saramonzon Apr 16, 2024
dc581c2
fixed bug when niph/niphem, removed checking allele match as not impo…
saramonzon Apr 16, 2024
11f6750
fixed update classification for niph/niphem, fixed wrong indent in fi…
saramonzon Apr 16, 2024
ac89a98
changed exact match detection from grep to biopython
saramonzon Apr 16, 2024
24195b2
removed grep execution
saramonzon Apr 16, 2024
8eda7fd
TPR when any protein translation error, fixed bug when b_split_data i…
saramonzon Apr 16, 2024
f8ce715
added some twicks when strand is -, and some linting
saramonzon Apr 16, 2024
b6e19da
fixed LNF in allele_match.tsv output, variable renaming, comment for …
saramonzon Apr 17, 2024
f12fdb5
renaming, changed output of allele details from list to dict, moved e…
saramonzon Apr 17, 2024
7143e20
fixed wrong unpack in extend seq find function call
saramonzon Apr 17, 2024
1913567
sort locus_names when printing results. Added search for EXC match af…
saramonzon Apr 18, 2024
2c29700
function organization, linting and added function when sequence is no…
saramonzon Apr 19, 2024
f1032ca
added comments
saramonzon Apr 19, 2024
878bffd
linting
saramonzon Apr 19, 2024
c5f785f
variable renaming from allele to locus for clarity
saramonzon Apr 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 16 additions & 26 deletions taranis/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def analyze_schema(
"--eval-cluster/--no-eval-cluster",
required=False,
default=True,
help="Evaluate if the reference alleles match against blast with a 90% identity",
help="Evaluate if the reference alleles match against blast with the identity set in eval-identity param",
)
@click.option(
"-k",
Expand All @@ -304,6 +304,14 @@ def analyze_schema(
default=0.75,
help="Resolution value used for clustering.",
)
@click.option(
"-e",
"--eval-identity",
required=False,
type=float,
default=85,
help="Resolution value used for clustering.",
)
@click.option(
"--seed",
required=False,
Expand Down Expand Up @@ -332,6 +340,7 @@ def reference_alleles(
kmer_size: int,
sketch_size: int,
cluster_resolution: float,
eval_identity: float,
seed: int,
cpus: int,
force: bool,
Expand Down Expand Up @@ -362,6 +371,7 @@ def reference_alleles(
kmer_size,
sketch_size,
cluster_resolution,
eval_identity,
seed,
)
for f_file in schema_files
Expand Down Expand Up @@ -404,12 +414,12 @@ def reference_alleles(
)
@click.option(
"-t",
"--threshold",
"--hit_lenght_perc",
required=False,
nargs=1,
default=0.8,
type=float,
help="Threshold value to consider in blast. Values from 0 to 1. default 0.8",
help="Threshold value to consider in blast hit percentage regarding the reference length. Values from 0 to 1. default 0.8",
)
@click.option(
"-p",
Expand Down Expand Up @@ -484,7 +494,7 @@ def allele_calling(
reference: str,
annotation: str,
assemblies: list,
threshold: float,
hit_lenght_perc: float,
perc_identity: int,
output: str,
force: bool,
Expand All @@ -507,7 +517,6 @@ def allele_calling(
if not force:
_ = taranis.utils.prompt_user_if_folder_exists(output)
# Filter fasta files from reference folder
# ref_alleles = glob.glob(os.path.join(reference, "*.fasta"))
max_cpus = taranis.utils.cpus_available()
if cpus > max_cpus:
stderr.print("[red] Number of CPUs bigger than the CPUs available")
Expand Down Expand Up @@ -536,7 +545,7 @@ def allele_calling(
schema,
prediction_data,
schema_ref_files,
threshold,
hit_lenght_perc,
perc_identity,
output,
inf_allele_obj,
Expand All @@ -553,32 +562,13 @@ def allele_calling(
except Exception as e:
print(e)
continue
"""
for assembly_file in assemblies:
results.append(
taranis.allele_calling.parallel_execution(
assembly_file,
schema,
prediction_data,
schema_ref_files,
threshold,
perc_identity,
output,
inf_allele_obj,
snp,
alignment,
proteine_threshold,
increase_sequence,
)
)
"""

_ = taranis.allele_calling.collect_data(
results, output, snp, alignment, schema_ref_files, cpus
)
finish = time.perf_counter()
print(f"Allele calling finish in {round((finish-start)/60, 2)} minutes")
log.info("Allele calling finish in %s minutes", round((finish - start) / 60, 2))
# sample_allele_obj.analyze_sample()


@taranis_cli.command(help_priority=3)
Expand Down
Loading
Loading