-
Notifications
You must be signed in to change notification settings - Fork 11
/
secondary_mutants.py
executable file
·150 lines (124 loc) · 7.07 KB
/
secondary_mutants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python
# test run of CWB's MPI-capable PyRosetta module on TACC lonestar
import sys,os,re,argparse
from rosetta import *
from PyRosetta_TACC_MPI import *
from mpi4py import MPI
def _main(args):
parser = argparse.ArgumentParser(description="Predict ddG (ddREU) effects of point mutations to a protein structure. Handles nsAAs, uses MPI")
parser.add_argument('start_pose_pdbs_dir',help='Directory containing starting pose .pdb files')
parser.add_argument('database',help='location of Rosetta database to use')
parser.add_argument('center_residue',type=int,help='residue to center the sphere of residues to mutate, i.e. the primary mutation.')
parser.add_argument('center_residue_ref_pdb',help='pdb to use for calculating CA-CA distances to define sphere of residues to mutate')
parser.add_argument('center_residue_chain',help='pdb chain of the primary mutated residue (will only mutate residues on this chain)')
parser.add_argument('mutate_center_res_to',help="mutant AA of the primary mutant")
parser.add_argument('--mutate_secondary_to','-t',default='nAAs',help='set of AAs to mutate to; \'all\',\'nAAs\',\'nsAAs\', list of three-letter codes (e.g. \"AA1,AA2,...\"')
parser.add_argument('--ddg_out_file','-o',default='ddg_out.txt',help='name of file to output final text (tab-delimited) table of results')
parser.add_argument('--radius','-s',default=10,type=float,help="Radius defining the sphere of residues to mutate. Default=10 Angstroms")
parser.add_argument('--replicates','-r',default=10,type=int,help='Replicate runs (per starting pose). Default=10')
parser.add_argument('--restrict_to_chain','-c',action='store_false',help="Don't restrict to only residues w/in the same chain as center residue")
parser.add_argument('--min_restrict_radius','-d',action='store_true',help='Only minimize residues within the packing radius (Default=FALSE)')
parser.add_argument('--min_cst_sd','-k',default=None,type=float,help='Use bb coord constraints w/ this std. dev. during minimization (recommend 0.5, default=None)')
parser.add_argument('--dump_ref_pdb','-f',action='store_true')
parser.add_argument('--dump_mut_pdb','-m',action='store_true')
parser.add_argument('--dump_pdb_base','-b',default="ddg_out")
parser.add_argument('--verbose','-v',default=1,type=int,help="How much output to show. 0=no output, 1=script progress msgs, 2=1+Rosetta status msgs")
parsed_args = parser.parse_args()
#print parsed_args
start_pose_pdbs_dir = parsed_args.start_pose_pdbs_dir
database = parsed_args.database
center_residue = parsed_args.center_residue
center_residue_ref_pdb = parsed_args.center_residue_ref_pdb
center_residue_chain = parsed_args.center_residue_chain
mutate_center_res_to = parsed_args.mutate_center_res_to
AAs_str = parsed_args.mutate_secondary_to
radius = parsed_args.radius
ddg_out_file = parsed_args.ddg_out_file
nreplicates = parsed_args.replicates
min_cst_sd = parsed_args.min_cst_sd
min_restrict_radius = parsed_args.min_restrict_radius
restrict_to_chain = parsed_args.restrict_to_chain
dump_ref_pdb = parsed_args.dump_ref_pdb
dump_mut_pdb = parsed_args.dump_mut_pdb
dump_pdb_base = parsed_args.dump_pdb_base
verbose = parsed_args.verbose
#
# if len(args) not in (5,6):
# print args
# print "usage: <start_pose_pdbs_dir> <database> <PDB_residue1PDBChain[,residue2,residue3...]|range1Start-range1End[,range2Start-range2End,...]> <AA1[#,AA2,AA3...]|'All'|'nAAs'|'nsAAs'> <replicate_packing_runs> <restrict_to_chain>"
# sys.exit(0)
# elif len(args) == 5:
# (start_pose_pdbs_dir,database,residues_str,AAs_str, nreplicates) = args
# restrict_to_chain = True # by default
# elif len(args) == 6:
# (start_pose_pdbs_dir,database,residues_str, AAs_str, nreplicates,restrict_to_chain) = args
# if restrict_to_chain == "True":
# restrict_to_chain = True
# else:
# restrict_to_chain = False
# nreplicates = int(nreplicates)
AAs = None
if AAs_str.lower() == "all":
AAs = nAAs + nsAAs
elif AAs_str.lower() == "naas":
AAs = nAAs
elif AAs_str.lower() == "nsaas":
AAs = nsAAs
else:
AAs = AAs_str.split(",")
#residues = []
#for r in residues_ls:
# r_sp = r.split('-')
# if len(r_sp) > 1:
# r_ch1 = re.search("(\d+)(\w+)",r_sp[0])
# r_ch2 = re.search("(\d+)(\w+)",r_sp[1])
# r_range_num = range(int(r_ch1.group(1)),int(r_ch2.group(1)))
# assert r_ch1.group(2) == r_ch2.group(2), "When specifying range, chains must be the same: %s" % (r)
# r_range_chain = [r_ch1.group(2),] * len(r_range_num)
# r_range = zip(r_range_num,r_range_chain)
# residues.extend(r_range)
# else:
# r_ch = re.search("(\d+)(\w+)",r)
# residues.append((int(r_ch.group(1)),r_ch.group(2)))
#print residues
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
rosetta_mute = ""
if verbose < 2:
rosetta_mute = " -mute all"
rosetta_options="-ignore_unrecognized_res False -database=%s %s" % (database,rosetta_mute)
start_pose_pdbs = [start_pose_pdbs_dir + "/" + x for x in os.listdir(start_pose_pdbs_dir) if len(x) > 4 and x[-4:] == '.pdb']
if verbose > 0:
print "Using %d replicate starting poses..." % (len(start_pose_pdbs),)
#start_poses.pack_all_poses()
#experiments = []
#for (i,p) in enumerate(start_poses):
cur_exp = SecondaryMutantScanExperimentRunner(start_pose_pdbs,\
rosetta_options,\
comm,\
center_residue,\
mutate_center_res_to,\
center_residue_chain,\
radius,\
center_residue_ref_pdb,\
AAs,\
nreplicates,\
restrict_to_chain,\
min_cst_sd=min_cst_sd,\
min_restrict_radius=min_restrict_radius,\
dump_ref_pdb=dump_ref_pdb,\
dump_mut_pdb=dump_mut_pdb,\
pdb_base=dump_pdb_base,\
PDB_res=True,\
verbose=verbose)
cur_exp.scatter_job()
comm.barrier()
cur_exp.gather_results()
if rank == 0:
print "==============================="
print "All jobs complete, writing output to %s..." % (ddg_out_file,),
cur_exp.dump_ddg_results(ddg_out_file)
print "Done!"
if __name__=='__main__':
_main(sys.argv[1:])