From 9f54970ce1b1a5b78e999c183591018a23d27da1 Mon Sep 17 00:00:00 2001 From: JLSteenwyk Date: Thu, 8 Aug 2024 16:38:02 -0700 Subject: [PATCH] set some unit tests --- .github/workflows/ci.yml | 22 +++++++++++++++++++ orthohmm/args_processing.py | 5 +++-- orthohmm/helpers.py | 9 ++++---- orthohmm/orthohmm.py | 16 ++++++++++++-- .../orthohmm_single_copy_orthogroups/OG2.fa | 18 +++++++-------- tests/unit/test_args_parsing.py | 10 +++++++++ tests/unit/test_parser.py | 6 ++--- 7 files changed, 66 insertions(+), 20 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6bec822..1c5efbe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,6 +20,17 @@ jobs: # install deps for tests pip install pytest pip install pytest-cov + - name: Install HMMER + run: | + brew install hmmer + - name: Install MCL + run: | + mkdir installmcl + cd installmcl + wget https://raw.githubusercontent.com/micans/mcl/main/install-this-mcl.sh -o install-this-mcl + chmod u+x install-this-mcl.sh + ./install-this-mcl.sh + mcl --version - name: Run tests run: | make test.fast @@ -41,6 +52,17 @@ jobs: # install test dependencies pip install pytest pip install pytest-cov + - name: Install HMMER + run: | + brew install hmmer + - name: Install MCL + run: | + mkdir installmcl + cd installmcl + wget https://raw.githubusercontent.com/micans/mcl/main/install-this-mcl.sh -o install-this-mcl + chmod u+x install-this-mcl.sh + ./install-this-mcl.sh + mcl --version - name: Generate coverage report run: | make test.coverage diff --git a/orthohmm/args_processing.py b/orthohmm/args_processing.py index a8504ec..c3ef0a6 100644 --- a/orthohmm/args_processing.py +++ b/orthohmm/args_processing.py @@ -1,4 +1,5 @@ from distutils.spawn import find_executable +import shutil import logging import multiprocessing import os.path @@ -27,7 +28,7 @@ def process_args(args) -> dict: if args.phmmer: phmmer = args.phmmer - if not os.path.isfile(phmmer): + if not shutil.which(phmmer): logger.warning(f"phmmer can't be found at {phmmer}.") sys.exit() else: @@ -50,7 +51,7 @@ def process_args(args) -> dict: if args.mcl: mcl = args.mcl - if not os.path.isfile(mcl): + if not shutil.which(mcl): logger.warning(f"mcl can't be found at {mcl}.") sys.exit() else: diff --git a/orthohmm/helpers.py b/orthohmm/helpers.py index f1b5be8..c331b29 100644 --- a/orthohmm/helpers.py +++ b/orthohmm/helpers.py @@ -2,8 +2,6 @@ import multiprocessing import os import subprocess -import sys -import time from typing import Tuple from Bio import SeqIO @@ -31,6 +29,7 @@ def run_phmmer( pool.join() +# TODO: write unit test def calculate_sequence_lengths( fasta_directory: str, file: str, @@ -56,9 +55,10 @@ def calculate_sequence_lengths( return sequence_lengths +# TODO: write unit test def get_best_hits( df: pd.DataFrame -) -> pd.DataFrame: +) -> Tuple[dict, dict]: best_hits = df.loc[df.groupby("query name")["norm_score"].idxmax()] return best_hits.set_index("query name")["target name"].to_dict(), \ best_hits.set_index("query name")["norm_score"].to_dict() @@ -219,9 +219,10 @@ def determine_network_edges( def execute_mcl( mcl: str, inflation_value: float, + cpu: int, output_directory: str, ) -> None: - cmd = f"{mcl} {output_directory}/working_dir/orthohmm_edges.txt --abc -I {inflation_value} -o {output_directory}/working_dir/orthohmm_edges_clustered.txt" + cmd = f"{mcl} {output_directory}/working_dir/orthohmm_edges.txt -te {cpu} --abc -I {inflation_value} -o {output_directory}/working_dir/orthohmm_edges_clustered.txt" subprocess.run( cmd, shell=True, diff --git a/orthohmm/orthohmm.py b/orthohmm/orthohmm.py index 2d36d28..5038e6e 100644 --- a/orthohmm/orthohmm.py +++ b/orthohmm/orthohmm.py @@ -34,7 +34,7 @@ def execute( mcl: str, inflation_value: float, **kwargs, -): +) -> None: print(textwrap.dedent( f"""\ ____ _ _ _ _ __ __ __ __ @@ -62,6 +62,18 @@ def execute( "reg", "clu", "ov", "env", "dom", "rep", "inc", "description of target" ] + # # wrap execute() in try, except, and finally + # try: + # do_something() + # except Exception as e: + # logger.error(e) + # finally: # guaranteed to execute + # # if I have the temp directory, delete it + # do_cleanup() + + # TODO: support specific temporary directory supplied by user + # Default /tmp/orthohmm-v489q48710ds + # v489q48710ds should be a random string # create working directory if not os.path.isdir(f"{output_directory}/working_dir"): os.mkdir(f"{output_directory}/working_dir") @@ -120,7 +132,7 @@ def execute( # Step 4: Conduct mcl clustering print("Step 4/6: Conducting clustering") - execute_mcl(mcl, inflation_value, output_directory) + execute_mcl(mcl, inflation_value, cpu, output_directory) print(" Completed!\n") # Step 5: Write out orthogroup files diff --git a/tests/expected/orthohmm_single_copy_orthogroups/OG2.fa b/tests/expected/orthohmm_single_copy_orthogroups/OG2.fa index 1b17455..0820320 100644 --- a/tests/expected/orthohmm_single_copy_orthogroups/OG2.fa +++ b/tests/expected/orthohmm_single_copy_orthogroups/OG2.fa @@ -1,4 +1,4 @@ ->ENSMUSP00000058613 +>Mus_musculus.GRCm38.pep.all|ENSMUSP00000058613 MATRSSRRESRLPFLFALVALLPRGALGGGWTQRLHGGPAPLPQDRGFFVVQGDPRDLRLGTHGDAPGAS PAARKPLRTRRSAALQPQPIQVYGQVSLNDSHNQMVVHWAGEKSNVIVALARDSLALARPKSSDVYVSYD YGKSFSKISEKLNFGVGNNSEAVISQFYHSPADNKRYIFVDAYAQYLWITFDFCSTIHGFSIPFRAADLL @@ -31,7 +31,7 @@ DQDLFYAIAVKDLIRKTDRSYKVRSRNSTVEYSLSKLEPGGKYHIIVQLGNMSKDSSIKITTVSLSAPDA LKIITENDHVLLFWKSLALKEKQFNETRGYEIHMSDSAVNLTAYLGNTTDNFFKVSNLKMGHNYTFTVQA RCLFGSQICGEPAVLLYDELSSGADAAVIQAARSTDVAAVVVPILFLILLSLGVGFAILYTKHRRLQSSF SAFANSHYSSRLGSAIFSSGDDLGEDDEDAPMITGFSDDVPMVIA ->ENSCAFP00000017344 +>Canis_familiaris.CanFam3.1.pep.all|ENSCAFP00000017344 MATRSSRRESRLPFLLALVALLPPGAVCAAGAQTLLGGRAPLPQERGVRVLRGEARGGPRGAAEAPPRRR RSAALQPEPLQVYGQVSLNDSHNQMVVHWAGEKSNVIVALARDSLALARPRSSDVYVSYDYGKSFKRISE KLNFGEGNSSEAVIAQFYHSPADNKRYIFADAYAQYLWITFDFCNTLQGFSIPFRAADLLLHSKASDLLL @@ -64,7 +64,7 @@ DLIRKSDRSYKIKSRNSTVEYTLNKLEPGGKYHIIVQLGNMSKDSSIKITTVSLSAPDALKIITENDHVL LFWKSLALKEKHFNESRGYEIHMFDSAMNISAYLGNTTDNFFKISNLKLGHNYTFTVQARCLFGSQICGE PAVLLYDELGSGGGASAFQAARSTDVAAVVVPILFLILLSLGVGFAILYTKHRRLQSSFTAFANSHYSSR LGSAIFSSGDDLGEDDEDAPMITGFSDDVPMVIA ->ENSTNIP00000017394 +>Tetraodon_nigroviridis.TETRAODON8.pep.all|ENSTNIP00000017394 MAMRRPAAPQPPARSALCLLLLLCVSGSRSSNLHLRAGPARPQNRGFFTVDAIADPAGGDGAARAAAGGA YPPRRSPRSSGESAMPKVYGQANLNDSHNQMVVHWAGEKSNVIVALARDSTGSTGPKSSSVYVSYDYGAS FTQVSDKFRPPADGTGAEDTPVISQFYHSPADNKRYLFVDSGSRVLWSSLDFCRSVRSFRLPFRPSDLLL @@ -97,7 +97,7 @@ YDSPDEPLMYAIHVRDAVRKTERDYKVTTQNNTVEYLLRGLEPGGRYSVSVRLRNMSKEASFSLSTVPLP APEALKILTEDDHVFLFWKSLAVREKGFDEARGYEVHVYDSTTNQTSYLGNTTETYLRVSSLQPGHNYTF SVQARCLVGGQLCGEAALLLYDQLTAEGGDAAGSGQRSGDMAAVVVPVLFMLLLGVCGGLVILYLRHRRL QNNFTAFANSHYNSRLGSAIFSSGDELGDDDEDAPMISGFSDDVPMVIA ->ENSPTRP00000007532 +>Pan_troglodytes.Pan_tro_3.0.pep.all|ENSPTRP00000007532 MATRSSRRESRLPFLFTLVALLPPGALCEVWTQRLHGGSAPLPQDRGFLVVQGDPRELRLWERGDARGAS RADEKPLRRKRSAALQPEPIKVYGQVSLNDSHNQMVVHWAGEKSNVIVALARDSLALARPKSSDVYVSYD YGKSFKKISDKLNFGVGNRSEAVIAQFYHSPADNKRYIFADAYAQYLWITFDFCNTLQGFSIPFRAADLL @@ -130,7 +130,7 @@ QDLLYAIAVKDLIRKTDRSYKVKSRNSTVEYTLNKLEPGGKYHIIVQLGNMSKDSSIKITTVSLSAPDAL KIITENDHVLLFWKSLALKEKHFNESRGYEIHMFDSAMNITAYLGNTTDNFFKISNLKMGHNYTFTVQAR CLFGSQICGEPAILLYDELGSGADASATQAARSTDVAAVVVPILFLILLSLGVGFAILYTKHRRLQSSFT AFANSHYSSRLGSAIFSSGDDLGEDDEDAPMITGFSDDVPMVIA ->ENSDARP00000129655 +>Danio_rerio.GRCz11.pep.all|ENSDARP00000129655 MASGQTRKMLALSRCAIYLLLLLVPAAISSTLRLHHDQRFVLPQDRGFSLVSAHLEPAESRVVRLEREVR EASAAHQLRVRRNAAGAPVPNVYGMANLNDSHNQMVVHWAGEKSDVIVALARDSVGATDPKTSSVYVSYD YGANFTPVSEKFQLPREQEDKKQVISQFYHSPADNKRYLFTDTTNSYLWNTFDFCKTVQGFSIPFKPTDL @@ -163,7 +163,7 @@ TYAVHVRDTVRKTESDYKVITQNNTVEYTLKGLEPAGRYSITIRLLNMSKEASYTLSTVPLPAPEALKIL EEEDHVFLFWKSLAVKDRTFNESRGYEVYVHDSVTNSTKCLGNTTETFFRINSLLAGHNYTFSVRARCLL SNQLCGESAVLLYDELGKAAGQNDAASQSGKSEDMAAIVVPVLFLLLVGVCGGLVVLYLRHRRLQHSFTA FANSHYNSRLGSAIFSSGDELGDDDEDAPMISGFSDDVPMVIA ->ENSP00000260197 +>Homo_sapiens.GRCh38.pep.all|ENSP00000260197 MATRSSRRESRLPFLFTLVALLPPGALCEVWTQRLHGGSAPLPQDRGFLVVQGDPRELRLWARGDARGAS RADEKPLRRKRSAALQPEPIKVYGQVSLNDSHNQMVVHWAGEKSNVIVALARDSLALARPKSSDVYVSYD YGKSFKKISDKLNFGLGNRSEAVIAQFYHSPADNKRYIFADAYAQYLWITFDFCNTLQGFSIPFRAADLL @@ -196,7 +196,7 @@ QDLLYAVAVKDLIRKTDRSYKVKSRNSTVEYTLNKLEPGGKYHIIVQLGNMSKDSSIKITTVSLSAPDAL KIITENDHVLLFWKSLALKEKHFNESRGYEIHMFDSAMNITAYLGNTTDNFFKISNLKMGHNYTFTVQAR CLFGNQICGEPAILLYDELGSGADASATQAARSTDVAAVVVPILFLILLSLGVGFAILYTKHRRLQSSFT AFANSHYSSRLGSAIFSSGDDLGEDDEDAPMITGFSDDVPMVIA ->ENSCINP00000024362 +>Ciona_intestinalis.KH.pep.all|ENSCINP00000024362 AELHDGNTQLVIHWAGLNSSNIAVLTKNQRSGSVRTSNVYFSYDYGATFSRKISSSSSGSLPQARISDFV HAYHNIDRYIFMDRHTPSLFTTTDSGRNFHHVTPLPFTPSTLTINPVKPWIVIGHDETGKNLFKSSDFGS SWIRIQTRVEKYEFGFSAATSQFETNPDTLYVRRTNLDGSYTILRSETYFQNDIREQVVISHVTDFLLMN @@ -218,7 +218,7 @@ PIYILMGGKFSDLSVVAASSLSVSSNVFLHTATTPTTPIPTLPPHECGSTHLPCRTSGMCYPLVWRCDGV DDCGDDSDEFDCNTTSSTPTAHHLPCFSDLLQFRCANQLCIFRFYKCDGDNDCGDWSDERGCPNGNMSSV PSVAPPTANPMHCVDGYTYCGVGGTRTCILDRFVCDGDNDCGNNADEKNCGQQCGTDQFTCHNGRCIALS LHCNGIDDCSDNSDENCTTVDCGPSFYRCAADNHCILRSSVCNGYPDCSDNADEQGCAE ->ENSGALP00000046461 +>Gallus_gallus.GRCg6a.pep.all|ENSGALP00000046461 MGCSQQVLLHHLLPGEGLSQCFNHLAAHSEHKDPSGGPCPRALTLTLTPQGPLAPGIVSVLFRFECLIPP QQDGTPCSGSRCPFPLQVSLNDSHNQMVVHWAGEKSNVIVALARDSLSLLGPKNSDVYISYDYGKSFKKI SERFSFGDGNSSAVAIAQFYHSPANNQRYIFVDAFVPYLWITTDFCKSIQGFSIPFRAADLLLHSRNPNL @@ -251,7 +251,7 @@ KDLVRKTDKIYKVKTRNSTVEYTIKKLEPGGKYHVIVQLGNMSKESSMMINTVPLSAPDALKIITENDHI LLFWKSLALKESNFNESRGYEVLMFDSLVNRTAYLGNTTENFFKVSNLKIGHNYSFAVRARCLYGGQMCG EPATLLYDELGAGEDSSESKLGRSTDVAAIVVPILFLLLVALGAGFVVLYTRHRRLQSSFTAFANSHYSS RLGSAIFSSGDDLGDEDDEAPMITGFSDDVPMVIA ->ENSMODP00000016287 +>Monodelphis_domestica.ASM229v1.pep.all|ENSMODP00000016287 MATGSSCGEPRLPLLFALVALLPPWALTETRSLQSAGGRAALPQERGFFVLQGGMGQPWLRARRDAAGGA SRPAGEPRPPRTRRSAAAHQLEPVKVYGQVSLNDSHNQMVVHWAGEKSNVIVALARDSLALLGPKSSDVY VSYDYGKTFQKISEKFNFGMGNSSEAVIAQFYHSPADNKRYIFVDAYAQYLWITFDFCSTVQGFSIPFRA diff --git a/tests/unit/test_args_parsing.py b/tests/unit/test_args_parsing.py index a8a11c3..b0785b8 100644 --- a/tests/unit/test_args_parsing.py +++ b/tests/unit/test_args_parsing.py @@ -33,6 +33,16 @@ def test_process_args_output_directory_dne(self, args): with pytest.raises(SystemExit): process_args(args) + def test_process_args_phmmer_not_installed(self, args): + args.phmmer = "phmmer-that-dne" + with pytest.raises(SystemExit): + process_args(args) + + def test_process_args_mcl_not_installed(self, args): + args.mcl = "mcl-that-dne" + with pytest.raises(SystemExit): + process_args(args) + def test_process_args_default_single_copy_threshold(self, args): args.single_copy_threshold = None res = process_args(args) diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py index f58a502..5630475 100644 --- a/tests/unit/test_parser.py +++ b/tests/unit/test_parser.py @@ -10,6 +10,6 @@ def parser(): class TestParser(object): def test_required_only(self, parser): - input_directory = "./tests/samples/" - parsed = parser.parse_args([input_directory]) - assert parsed.input == input_directory + fasta_directory = "./tests/samples/" + parsed = parser.parse_args([fasta_directory]) + assert parsed.fasta_directory == fasta_directory