Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify resclust outputs #330

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: ci

on: push
on: pull_request

jobs:
build:
Expand Down
42 changes: 35 additions & 7 deletions src/arctic3d/cli_resclust.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
`linkage` : the linkage strategy.

`criterion` : the criterion to extract the clusters.

`output` : the path where to output clusters data.
"""
import argparse
import sys
Expand All @@ -36,6 +38,7 @@
get_clustering_dict,
)
from arctic3d.modules.input import Input
from arctic3d.modules.output import create_output_folder


argument_parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -88,6 +91,13 @@
"--chain", help="Segment ID to be considered", required=False
)

argument_parser.add_argument(
"--output",
help="Path to the generated output dictionary",
type=str,
required=False,
)


def load_args(arguments):
"""
Expand Down Expand Up @@ -128,7 +138,7 @@ def maincli():
cli(argument_parser, main)


def main(input_arg, residue_list, chain, threshold, linkage, criterion):
def main(input_arg, residue_list, chain, threshold, linkage, criterion, output):
"""Main function."""
log.setLevel("INFO")

Expand Down Expand Up @@ -192,14 +202,32 @@ def main(input_arg, residue_list, chain, threshold, linkage, criterion):
)

cl_dict = get_clustering_dict(clusters, unique_sorted_resids)
for el in cl_dict.keys():
log.info(
f"cluster {el}, residues"
f" {' '.join([str(res) for res in cl_dict[el]])}"
)

else:
log.info("Only one residue, no clustering performed.")
log.info(f"cluster 1, residues {unique_sorted_resids[0]}")
# fake cluster dict with only one entry
cl_dict = {1: unique_sorted_resids}

# log data
for el in cl_dict.keys():
log.info(
f"cluster {el}, residues"
f" {' '.join([str(res) for res in cl_dict[el]])}"
)

# check if data must be flushed to output file
if output:
# initiate output directory
output_basepath = create_output_folder(output, uniprot_id='resclust')
# write txt file
log.info(f'writing clusters data in "{output_basepath}/Clusters.txt"')
with open(f'{output_basepath}/clustered_residues.out', 'w') as filout:
for el in cl_dict.keys():
filout.write(
f"cluster {el} -> "
f"{' '.join([str(res) for res in cl_dict[el]])}"
"\n"
)


if __name__ == "__main__":
Expand Down
6 changes: 2 additions & 4 deletions src/arctic3d/modules/clustering.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,8 @@ def get_clustering_dict(clusters, ligands):
cl_dict = {}
# loop over clusters
for cl in range(len(clusters)):
if clusters[cl] not in cl_dict.keys():
cl_dict[clusters[cl]] = [ligands[cl]]
else:
cl_dict[clusters[cl]].append(ligands[cl])
cluster_members = cl_dict.setdefault(cl, [])
cluster_members.append(ligands[cl])
log.info(f"Cluster dictionary {cl_dict}")
return cl_dict

Expand Down
22 changes: 22 additions & 0 deletions tests/test_cli_resclust.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import pytest

import os
import shutil

from arctic3d.cli_resclust import main

from . import golden_data
Expand All @@ -22,6 +25,7 @@ def test_resclust_cli(example_pdbpath):
7.0,
"average",
"distance",
None,
)


Expand All @@ -35,6 +39,7 @@ def test_wrong_residue_list(example_pdbpath):
9.0,
"average",
"distance",
None,
)
assert e.type == SystemExit
assert e.value.code == 1
Expand All @@ -49,4 +54,21 @@ def test_resclust_maxclust(example_pdbpath):
2,
"average",
"maxclust",
None,
)


def test_resclust_genoutput(example_pdbpath):
main(
example_pdbpath,
"100,101,102,133,134,135",
None,
2,
"average",
"maxclust",
"resclustout",
)
assert os.path.exists("resclustout") == True
assert os.path.exists("resclustout/clustered_residues.out") == True
shutil.rmtree("resclustout")

Loading