Skip to content

Commit

Permalink
set some unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
JLSteenwyk committed Aug 8, 2024
1 parent 68ee877 commit 9f54970
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 20 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,17 @@ jobs:
# install deps for tests
pip install pytest
pip install pytest-cov
- name: Install HMMER
run: |
brew install hmmer
- name: Install MCL
run: |
mkdir installmcl
cd installmcl
wget https://raw.githubusercontent.com/micans/mcl/main/install-this-mcl.sh -o install-this-mcl
chmod u+x install-this-mcl.sh
./install-this-mcl.sh
mcl --version
- name: Run tests
run: |
make test.fast
Expand All @@ -41,6 +52,17 @@ jobs:
# install test dependencies
pip install pytest
pip install pytest-cov
- name: Install HMMER
run: |
brew install hmmer
- name: Install MCL
run: |
mkdir installmcl
cd installmcl
wget https://raw.githubusercontent.com/micans/mcl/main/install-this-mcl.sh -o install-this-mcl
chmod u+x install-this-mcl.sh
./install-this-mcl.sh
mcl --version
- name: Generate coverage report
run: |
make test.coverage
Expand Down
5 changes: 3 additions & 2 deletions orthohmm/args_processing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from distutils.spawn import find_executable
import shutil
import logging
import multiprocessing
import os.path
Expand Down Expand Up @@ -27,7 +28,7 @@ def process_args(args) -> dict:

if args.phmmer:
phmmer = args.phmmer
if not os.path.isfile(phmmer):
if not shutil.which(phmmer):
logger.warning(f"phmmer can't be found at {phmmer}.")
sys.exit()
else:
Expand All @@ -50,7 +51,7 @@ def process_args(args) -> dict:

if args.mcl:
mcl = args.mcl
if not os.path.isfile(mcl):
if not shutil.which(mcl):
logger.warning(f"mcl can't be found at {mcl}.")
sys.exit()
else:
Expand Down
9 changes: 5 additions & 4 deletions orthohmm/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import multiprocessing
import os
import subprocess
import sys
import time
from typing import Tuple

from Bio import SeqIO
Expand Down Expand Up @@ -31,6 +29,7 @@ def run_phmmer(
pool.join()


# TODO: write unit test
def calculate_sequence_lengths(
fasta_directory: str,
file: str,
Expand All @@ -56,9 +55,10 @@ def calculate_sequence_lengths(
return sequence_lengths


# TODO: write unit test
def get_best_hits(
df: pd.DataFrame
) -> pd.DataFrame:
) -> Tuple[dict, dict]:
best_hits = df.loc[df.groupby("query name")["norm_score"].idxmax()]
return best_hits.set_index("query name")["target name"].to_dict(), \
best_hits.set_index("query name")["norm_score"].to_dict()
Expand Down Expand Up @@ -219,9 +219,10 @@ def determine_network_edges(
def execute_mcl(
mcl: str,
inflation_value: float,
cpu: int,
output_directory: str,
) -> None:
cmd = f"{mcl} {output_directory}/working_dir/orthohmm_edges.txt --abc -I {inflation_value} -o {output_directory}/working_dir/orthohmm_edges_clustered.txt"
cmd = f"{mcl} {output_directory}/working_dir/orthohmm_edges.txt -te {cpu} --abc -I {inflation_value} -o {output_directory}/working_dir/orthohmm_edges_clustered.txt"
subprocess.run(
cmd,
shell=True,
Expand Down
16 changes: 14 additions & 2 deletions orthohmm/orthohmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def execute(
mcl: str,
inflation_value: float,
**kwargs,
):
) -> None:
print(textwrap.dedent(
f"""\
____ _ _ _ _ __ __ __ __
Expand Down Expand Up @@ -62,6 +62,18 @@ def execute(
"reg", "clu", "ov", "env", "dom", "rep", "inc", "description of target"
]

# # wrap execute() in try, except, and finally
# try:
# do_something()
# except Exception as e:
# logger.error(e)
# finally: # guaranteed to execute
# # if I have the temp directory, delete it
# do_cleanup()

# TODO: support specific temporary directory supplied by user
# Default /tmp/orthohmm-v489q48710ds
# v489q48710ds should be a random string
# create working directory
if not os.path.isdir(f"{output_directory}/working_dir"):
os.mkdir(f"{output_directory}/working_dir")
Expand Down Expand Up @@ -120,7 +132,7 @@ def execute(

# Step 4: Conduct mcl clustering
print("Step 4/6: Conducting clustering")
execute_mcl(mcl, inflation_value, output_directory)
execute_mcl(mcl, inflation_value, cpu, output_directory)
print(" Completed!\n")

# Step 5: Write out orthogroup files
Expand Down
18 changes: 9 additions & 9 deletions tests/expected/orthohmm_single_copy_orthogroups/OG2.fa
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
>ENSMUSP00000058613
>Mus_musculus.GRCm38.pep.all|ENSMUSP00000058613
MATRSSRRESRLPFLFALVALLPRGALGGGWTQRLHGGPAPLPQDRGFFVVQGDPRDLRLGTHGDAPGAS
PAARKPLRTRRSAALQPQPIQVYGQVSLNDSHNQMVVHWAGEKSNVIVALARDSLALARPKSSDVYVSYD
YGKSFSKISEKLNFGVGNNSEAVISQFYHSPADNKRYIFVDAYAQYLWITFDFCSTIHGFSIPFRAADLL
Expand Down Expand Up @@ -31,7 +31,7 @@ DQDLFYAIAVKDLIRKTDRSYKVRSRNSTVEYSLSKLEPGGKYHIIVQLGNMSKDSSIKITTVSLSAPDA
LKIITENDHVLLFWKSLALKEKQFNETRGYEIHMSDSAVNLTAYLGNTTDNFFKVSNLKMGHNYTFTVQA
RCLFGSQICGEPAVLLYDELSSGADAAVIQAARSTDVAAVVVPILFLILLSLGVGFAILYTKHRRLQSSF
SAFANSHYSSRLGSAIFSSGDDLGEDDEDAPMITGFSDDVPMVIA
>ENSCAFP00000017344
>Canis_familiaris.CanFam3.1.pep.all|ENSCAFP00000017344
MATRSSRRESRLPFLLALVALLPPGAVCAAGAQTLLGGRAPLPQERGVRVLRGEARGGPRGAAEAPPRRR
RSAALQPEPLQVYGQVSLNDSHNQMVVHWAGEKSNVIVALARDSLALARPRSSDVYVSYDYGKSFKRISE
KLNFGEGNSSEAVIAQFYHSPADNKRYIFADAYAQYLWITFDFCNTLQGFSIPFRAADLLLHSKASDLLL
Expand Down Expand Up @@ -64,7 +64,7 @@ DLIRKSDRSYKIKSRNSTVEYTLNKLEPGGKYHIIVQLGNMSKDSSIKITTVSLSAPDALKIITENDHVL
LFWKSLALKEKHFNESRGYEIHMFDSAMNISAYLGNTTDNFFKISNLKLGHNYTFTVQARCLFGSQICGE
PAVLLYDELGSGGGASAFQAARSTDVAAVVVPILFLILLSLGVGFAILYTKHRRLQSSFTAFANSHYSSR
LGSAIFSSGDDLGEDDEDAPMITGFSDDVPMVIA
>ENSTNIP00000017394
>Tetraodon_nigroviridis.TETRAODON8.pep.all|ENSTNIP00000017394
MAMRRPAAPQPPARSALCLLLLLCVSGSRSSNLHLRAGPARPQNRGFFTVDAIADPAGGDGAARAAAGGA
YPPRRSPRSSGESAMPKVYGQANLNDSHNQMVVHWAGEKSNVIVALARDSTGSTGPKSSSVYVSYDYGAS
FTQVSDKFRPPADGTGAEDTPVISQFYHSPADNKRYLFVDSGSRVLWSSLDFCRSVRSFRLPFRPSDLLL
Expand Down Expand Up @@ -97,7 +97,7 @@ YDSPDEPLMYAIHVRDAVRKTERDYKVTTQNNTVEYLLRGLEPGGRYSVSVRLRNMSKEASFSLSTVPLP
APEALKILTEDDHVFLFWKSLAVREKGFDEARGYEVHVYDSTTNQTSYLGNTTETYLRVSSLQPGHNYTF
SVQARCLVGGQLCGEAALLLYDQLTAEGGDAAGSGQRSGDMAAVVVPVLFMLLLGVCGGLVILYLRHRRL
QNNFTAFANSHYNSRLGSAIFSSGDELGDDDEDAPMISGFSDDVPMVIA
>ENSPTRP00000007532
>Pan_troglodytes.Pan_tro_3.0.pep.all|ENSPTRP00000007532
MATRSSRRESRLPFLFTLVALLPPGALCEVWTQRLHGGSAPLPQDRGFLVVQGDPRELRLWERGDARGAS
RADEKPLRRKRSAALQPEPIKVYGQVSLNDSHNQMVVHWAGEKSNVIVALARDSLALARPKSSDVYVSYD
YGKSFKKISDKLNFGVGNRSEAVIAQFYHSPADNKRYIFADAYAQYLWITFDFCNTLQGFSIPFRAADLL
Expand Down Expand Up @@ -130,7 +130,7 @@ QDLLYAIAVKDLIRKTDRSYKVKSRNSTVEYTLNKLEPGGKYHIIVQLGNMSKDSSIKITTVSLSAPDAL
KIITENDHVLLFWKSLALKEKHFNESRGYEIHMFDSAMNITAYLGNTTDNFFKISNLKMGHNYTFTVQAR
CLFGSQICGEPAILLYDELGSGADASATQAARSTDVAAVVVPILFLILLSLGVGFAILYTKHRRLQSSFT
AFANSHYSSRLGSAIFSSGDDLGEDDEDAPMITGFSDDVPMVIA
>ENSDARP00000129655
>Danio_rerio.GRCz11.pep.all|ENSDARP00000129655
MASGQTRKMLALSRCAIYLLLLLVPAAISSTLRLHHDQRFVLPQDRGFSLVSAHLEPAESRVVRLEREVR
EASAAHQLRVRRNAAGAPVPNVYGMANLNDSHNQMVVHWAGEKSDVIVALARDSVGATDPKTSSVYVSYD
YGANFTPVSEKFQLPREQEDKKQVISQFYHSPADNKRYLFTDTTNSYLWNTFDFCKTVQGFSIPFKPTDL
Expand Down Expand Up @@ -163,7 +163,7 @@ TYAVHVRDTVRKTESDYKVITQNNTVEYTLKGLEPAGRYSITIRLLNMSKEASYTLSTVPLPAPEALKIL
EEEDHVFLFWKSLAVKDRTFNESRGYEVYVHDSVTNSTKCLGNTTETFFRINSLLAGHNYTFSVRARCLL
SNQLCGESAVLLYDELGKAAGQNDAASQSGKSEDMAAIVVPVLFLLLVGVCGGLVVLYLRHRRLQHSFTA
FANSHYNSRLGSAIFSSGDELGDDDEDAPMISGFSDDVPMVIA
>ENSP00000260197
>Homo_sapiens.GRCh38.pep.all|ENSP00000260197
MATRSSRRESRLPFLFTLVALLPPGALCEVWTQRLHGGSAPLPQDRGFLVVQGDPRELRLWARGDARGAS
RADEKPLRRKRSAALQPEPIKVYGQVSLNDSHNQMVVHWAGEKSNVIVALARDSLALARPKSSDVYVSYD
YGKSFKKISDKLNFGLGNRSEAVIAQFYHSPADNKRYIFADAYAQYLWITFDFCNTLQGFSIPFRAADLL
Expand Down Expand Up @@ -196,7 +196,7 @@ QDLLYAVAVKDLIRKTDRSYKVKSRNSTVEYTLNKLEPGGKYHIIVQLGNMSKDSSIKITTVSLSAPDAL
KIITENDHVLLFWKSLALKEKHFNESRGYEIHMFDSAMNITAYLGNTTDNFFKISNLKMGHNYTFTVQAR
CLFGNQICGEPAILLYDELGSGADASATQAARSTDVAAVVVPILFLILLSLGVGFAILYTKHRRLQSSFT
AFANSHYSSRLGSAIFSSGDDLGEDDEDAPMITGFSDDVPMVIA
>ENSCINP00000024362
>Ciona_intestinalis.KH.pep.all|ENSCINP00000024362
AELHDGNTQLVIHWAGLNSSNIAVLTKNQRSGSVRTSNVYFSYDYGATFSRKISSSSSGSLPQARISDFV
HAYHNIDRYIFMDRHTPSLFTTTDSGRNFHHVTPLPFTPSTLTINPVKPWIVIGHDETGKNLFKSSDFGS
SWIRIQTRVEKYEFGFSAATSQFETNPDTLYVRRTNLDGSYTILRSETYFQNDIREQVVISHVTDFLLMN
Expand All @@ -218,7 +218,7 @@ PIYILMGGKFSDLSVVAASSLSVSSNVFLHTATTPTTPIPTLPPHECGSTHLPCRTSGMCYPLVWRCDGV
DDCGDDSDEFDCNTTSSTPTAHHLPCFSDLLQFRCANQLCIFRFYKCDGDNDCGDWSDERGCPNGNMSSV
PSVAPPTANPMHCVDGYTYCGVGGTRTCILDRFVCDGDNDCGNNADEKNCGQQCGTDQFTCHNGRCIALS
LHCNGIDDCSDNSDENCTTVDCGPSFYRCAADNHCILRSSVCNGYPDCSDNADEQGCAE
>ENSGALP00000046461
>Gallus_gallus.GRCg6a.pep.all|ENSGALP00000046461
MGCSQQVLLHHLLPGEGLSQCFNHLAAHSEHKDPSGGPCPRALTLTLTPQGPLAPGIVSVLFRFECLIPP
QQDGTPCSGSRCPFPLQVSLNDSHNQMVVHWAGEKSNVIVALARDSLSLLGPKNSDVYISYDYGKSFKKI
SERFSFGDGNSSAVAIAQFYHSPANNQRYIFVDAFVPYLWITTDFCKSIQGFSIPFRAADLLLHSRNPNL
Expand Down Expand Up @@ -251,7 +251,7 @@ KDLVRKTDKIYKVKTRNSTVEYTIKKLEPGGKYHVIVQLGNMSKESSMMINTVPLSAPDALKIITENDHI
LLFWKSLALKESNFNESRGYEVLMFDSLVNRTAYLGNTTENFFKVSNLKIGHNYSFAVRARCLYGGQMCG
EPATLLYDELGAGEDSSESKLGRSTDVAAIVVPILFLLLVALGAGFVVLYTRHRRLQSSFTAFANSHYSS
RLGSAIFSSGDDLGDEDDEAPMITGFSDDVPMVIA
>ENSMODP00000016287
>Monodelphis_domestica.ASM229v1.pep.all|ENSMODP00000016287
MATGSSCGEPRLPLLFALVALLPPWALTETRSLQSAGGRAALPQERGFFVLQGGMGQPWLRARRDAAGGA
SRPAGEPRPPRTRRSAAAHQLEPVKVYGQVSLNDSHNQMVVHWAGEKSNVIVALARDSLALLGPKSSDVY
VSYDYGKTFQKISEKFNFGMGNSSEAVIAQFYHSPADNKRYIFVDAYAQYLWITFDFCSTVQGFSIPFRA
Expand Down
10 changes: 10 additions & 0 deletions tests/unit/test_args_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@ def test_process_args_output_directory_dne(self, args):
with pytest.raises(SystemExit):
process_args(args)

def test_process_args_phmmer_not_installed(self, args):
args.phmmer = "phmmer-that-dne"
with pytest.raises(SystemExit):
process_args(args)

def test_process_args_mcl_not_installed(self, args):
args.mcl = "mcl-that-dne"
with pytest.raises(SystemExit):
process_args(args)

def test_process_args_default_single_copy_threshold(self, args):
args.single_copy_threshold = None
res = process_args(args)
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ def parser():

class TestParser(object):
def test_required_only(self, parser):
input_directory = "./tests/samples/"
parsed = parser.parse_args([input_directory])
assert parsed.input == input_directory
fasta_directory = "./tests/samples/"
parsed = parser.parse_args([fasta_directory])
assert parsed.fasta_directory == fasta_directory

0 comments on commit 9f54970

Please sign in to comment.