Skip to content

Commit

Permalink
Merge branch 'develop-2.x'
Browse files Browse the repository at this point in the history
  • Loading branch information
Bernhard10 committed Oct 13, 2021
2 parents 1622602 + 371d586 commit 2b7a9a1
Show file tree
Hide file tree
Showing 51 changed files with 147,256 additions and 12,529 deletions.
1 change: 1 addition & 0 deletions doc/apidoc/forgi.graph.bulge_graph.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ forgi\.graph\.bulge\_graph module
:members:
:undoc-members:
:show-inheritance:
:inherited-members:
11 changes: 11 additions & 0 deletions doc/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
Changelog
=========

Changes in Version 2.1

* Cython code for vectors is no longer optional. This allows the setup to fail on errors, instead of hiding them and having them pop up later.
* Store 3 points per nucleotide
* In pymol visualizations the selections for elements are now inside a group
* Performance improvements
* compareRNA can compare more than 2 RNAs. Added additional metirc: stem-rmsd
* visulaization of virtual stem (for broken MLs)
* experimental integration with Vienna RNA package for refolding of missing elements.
* Added compatibility with python 3.7-3.9 (Note: forgi is still compatible with python 2.7)

Changes in Version 2.0
----------------------

Expand Down
6 changes: 3 additions & 3 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,16 @@

# General information about the project.
project = u'forgi'
copyright = u'2012-2016 Peter Kerpedjiev <[email protected]>; 2015-2018 Bernhard Thiel <[email protected]>'
copyright = u'2012-2016 Peter Kerpedjiev <[email protected]>; 2015-2021 Bernhard Thiel <[email protected]>; various contributors; University of Vienna, Theoretical Biochemistry Group'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '2.0'
version = '2.1'
# The full version, including alpha/beta/rc tags.
release = '2.0.3'
release = '2.1.0'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
forgi 2.0
forgi 2.1
=================================

forgi is a python library for manipulating RNA secondary structure. Some examples of its usage can be found in the two tutorials below.
Expand Down
142 changes: 108 additions & 34 deletions examples/compare_RNA.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,78 +14,152 @@


def get_parser():
parser = fuc.get_rna_input_parser("Compare two RNA 3d structures based on RMSD, ACC"
" or other measures", 2, "3d", enable_logging=True)
parser = fuc.get_rna_input_parser("Compare RNA 3d structures based on RMSD, ACC"
" or other measures. If more than 2 filenames are given, compare all files to the first.", "+", "3d", enable_logging=True)
output_group = parser.add_argument_group("Controlling output",
description="Control, based on which measure the two "
"structures will be compared")
output_group.add_argument("--acc", help="Compare based on the Adjacency correlation coefficient"
" ACC", action="store_true")
output_group.add_argument(
"--rmsd", help="Compare based on CG-RMSD", action="store_true")
output_group.add_argument(
"--stem-rmsd", help="Compare based on coarse grained stem RMSD", action="store_true")
output_group.add_argument(
"--pdb-rmsd", help="Compare based on PDB-RMSD", action="store_true")
return parser


def main(args):
with fuc.hide_traceback():
cg1, cg2 = fuc.cgs_from_args(
cgs = fuc.cgs_from_args(
args, rna_type="3d", enable_logging=True)

if not (args.acc or args.rmsd or args.pdb_rmsd):
if len(cgs)<2:
raise ValueError("At least 2 RNA structures required for comparison")
cg1=cgs[0]
if not (args.acc or args.rmsd or args.pdb_rmsd or args.stem_rmsd):
showall = True
else:
showall = False
if showall or args.acc:
if cg1.defines != cg2.defines:
if args.acc:
print(
"Cannot compare two 3d structures that do not correspond to the same RNA.")
sys.exit(1)
else:
adj = ftms.AdjacencyCorrelation(cg1)
print("ACC:\t{:.3f}".format(ftms.mcc(adj.evaluate(cg2))))
if showall or args.rmsd:
print("RMSD:\t{:.3f}".format(ftms.cg_rmsd(cg1, cg2)))
if showall or args.pdb_rmsd:
if not pdb_rmsd(cg1, cg2):
# If --pdb-rmsd was not given, just don't print it.
# If it was given, we exit with non-zero exit status.
if args.pdb_rmsd:
print(
"Cannot calculate PDB-RMSD: The two files do not contain the same chains.")
sys.exit(1)
for cg2 in cgs[1:]:
if showall or args.acc:
if cg1.defines != cg2.defines:
if args.acc:
print(
"Cannot compare two 3d structures that do not correspond to the same RNA.")
sys.exit(1)
else:
adj = ftms.AdjacencyCorrelation(cg1)
print("ACC:\t{:.3f}".format(ftms.mcc(adj.evaluate(cg2))))
if showall or args.rmsd:
print("RMSD:\t{:.3f}".format(ftms.cg_rmsd(cg1, cg2)))
if showall or args.stem_rmsd:
try:
print("Stem:\t{:.3f}".format(ftms.cg_stem_rmsd(cg1, cg2)))
except ftms.Incompareable:
if args.stem_rmsd:
raise
# Otherwise do nothing
if showall or args.pdb_rmsd:
if not pdb_rmsd(cg1, cg2):
# If --pdb-rmsd was not given, just don't print it.
# If it was given, we exit with non-zero exit status.
if args.pdb_rmsd:
print("Cannot calculate PDB-RMSD: "
"The two files do not contain the same chains.")

def pdb_rmsd(cg1, cg2):
if len(cg1.chains)==1==len(cg2.chains):
reslist1 = []
reslist2 = []
chain1, = cg1.chains.values()
chain2, = cg2.chains.values()
reslist1.extend(cr for cr in chain1.get_list()
if cr.resname.strip() in ftup.RNA_RESIDUES)
reslist2.extend(cr for cr in chain2.get_list()
if cr.resname.strip() in ftup.RNA_RESIDUES)
print("PDB-RMSD (single chain):\t{:.3f}".format(
ftup.residuelist_rmsd(reslist1, reslist2, sidechains=True)[1]))
return True
resnames1=[ r.resname.strip() for r in chain1 ]
resnames2=[ r.resname.strip() for r in chain2 ]
log.info("Resname lists are %s, %s", resnames1, resnames2)
if resnames1 == resnames2:
for r in chain1:
if r.resname.strip() in ftup.RNA_RESIDUES:
reslist1.append(r)
for r in chain2:
if r.resname.strip() in ftup.RNA_RESIDUES:
reslist2.append(r)
print("PDB-RMSD (single chain):\t{:.3f}".format(
ftup.residuelist_rmsd(reslist1, reslist2, sidechains=True)[1]))
return True

else:
log.info("Resname lists are different")
reslist1, reslist2 = common_reslists(chain1, chain2)
print("PDB-RMSD (single chain):\t{:.3f}".format(
ftup.residuelist_rmsd(reslist1, reslist2, sidechains=True)[1]))
return True
else:
log.info("Chain lengths are %s and %s",len(cg1.chains), len(cg2.chains) )
common_chains = set(cg1.chains.keys()) & set(cg2.chains.keys())
reslist1 = []
reslist2 = []
for chain in common_chains:
reslist1.extend(cr for cr in cg1.chains[chain].get_list()
if cr.resname.strip() in ftup.RNA_RESIDUES)
reslist2.extend(cr for cr in cg2.chains[chain].get_list()
if cr.resname.strip() in ftup.RNA_RESIDUES)
ext1, ext2 = common_reslists(cg1.chains[chain], cg2.chains[chain])
reslist1.extend(ext1)
reslist2.extend(ext2)
if common_chains:
print("PDB-RMSD (chains {}):\t{:.3f}".format("-".join(common_chains),
ftup.residuelist_rmsd(reslist1, reslist2, sidechains=True)[1]))
return True
return False


def common_reslists(chain1, chain2):
reslist1 = []
reslist2 = []
start1 = chain1.get_list()[0].id[1]
start2 = chain2.get_list()[0].id[1]
offset=start2-start1
mismatches=float("inf")
for offs in [0, offset]:
rl1, rl2, m = _common_reslist_with_offset(chain1, chain2, offs)
if m<mismatches:
mismatches=m
reslist1, reslist2 = rl1, rl2
if mismatches:
log.warning("There were %s point mutations between the two chains", mismatches)
if mismatches>8:
raise ValueError("Could not find mapping between chains!")
return reslist1, reslist2

def _common_reslist_with_offset(chain1, chain2, offset):
log.info("Trying mapping with offset %s", offset)
reslist1 = []
reslist2 = []
for cr2 in chain2.get_list():
mapped_id = cr2.id[0], cr2.id[1]-offset, cr2.id[2]
if mapped_id not in chain1:
log.info("Residue %s (%s) [->%s] not in chain1", cr2.id, cr2.resname.strip(), mapped_id)
log.info("Res %s has atoms %s", mapped_id, cr2.get_list())
mismatch=0
for cr1 in chain1.get_list():
mapped_id = cr1.id[0], cr1.id[1]+offset, cr1.id[2]
cr1_name = cr1.resname.strip()
try:
cr2 = chain2[mapped_id]
except KeyError as e:
log.info("Residue %s (%s) [->%s] not in chain2", cr1.id, cr1_name, mapped_id)
continue
cr2_name = cr2.resname.strip()
if cr1_name != cr2_name:
mismatch+=1
log.info("Residue %s [->%s](chains %s, %s) has different name in the "
"two chains: %s!=%s", cr1.id, mapped_id, chain1.id, chain2.id, cr1_name, cr2_name)
if cr1_name in ftup.RNA_RESIDUES and cr2_name in ftup.RNA_RESIDUES:
reslist1.append(cr1)
reslist2.append(cr2)

return reslist1, reslist2, mismatch



parser = get_parser()
if __name__ == "__main__":
args = parser.parse_args()
Expand Down
13 changes: 13 additions & 0 deletions examples/rnaConvert.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ def __init__(self, write_fun, extention_fun, rna_type):
parser.add_argument("-f", "--force", action="store_true",
help="Overwrite files, if they already exist. Note: In case of race conditions, "
"files could be overwritten even if this flag is not provided.")
parser.add_argument("--refold-missing", action="store_true",
help="Only used for conversion from PDB/ MMCIF to secondary structure formats."
"Requires the ViennaRNA library with python bindings installed."
"With this option, missing residues (i.e.residues without 3D coordinates)"
"are included in the output and their secondary structure is predicted."
"This prediction is done with RNAfold on the whole sequence using the "
"secondary structure from the PDB as hard constraints.")


if __name__ == "__main__":
Expand All @@ -97,9 +104,15 @@ def __init__(self, write_fun, extention_fun, rna_type):
else:
filename = None
directory = ""
if args.refold_missing and args.target_type and FILETYPES[args.target_type].rna_type == "3d":
raise ValueError("Option refold-missing cannot"
"be used for output files with 3D information.")

for i, cg in enumerate(cgs):
if not args.to_file and i > 0:
print("\n\n========================================\n\n")
if args.refold_missing:
cg = fuc.with_missing_refolded(cg)
target_str = FILETYPES[args.target_type].convert(cg)
if args.to_file:
if filename:
Expand Down
46 changes: 36 additions & 10 deletions examples/visualize_rna.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def get_parser():
help='Display the virtual atoms')
parser.add_argument('--virtual-residues', default=False,
action='store_true', help='Display the virtual residues as spheres')
parser.add_argument('--virtual-stems', default="",
type=str, help='Display the virtual stems at broken MLS (in this color)')
parser.add_argument('--three-points', default=False,
action='store_true', help='Display the virtual 2 points as spheres')
parser.add_argument('--only-elements', dest='only_elements', default=None,
help='Display only these elements, separated by commas')
parser.add_argument('--no-loops', action="store_true",
Expand Down Expand Up @@ -77,6 +81,7 @@ def get_parser():
help='Start pymol in batch mode')
parser.add_argument('--pymol-file', type=str,
help=argparse.SUPPRESS) # Store the PYMOL file under this name. WARNING: Do not use .pml as file ending!!!
parser.add_argument('--only-first-bg', action="store_true", help=argparse.SUPPRESS)

return parser

Expand All @@ -86,7 +91,8 @@ def pymol_printer_from_args(args):
if args.thin_cylinders:
pp.cylinder_width = 0.5
pp.show_twists = False
pp.display_virtual_residues = args.virtual_residues
pp.display_virtual_residues = args.virtual_residues
pp.display_3_points = args.three_points
pp.virtual_atoms = args.virtual_atoms

if args.only_elements is not None:
Expand All @@ -100,6 +106,17 @@ def pymol_printer_from_args(args):
pp.sidechain_atoms = args.sidechain_atoms
pp.basis = args.basis
pp.rainbow = args.rainbow
pp.plot_virtual_stems = args.virtual_stems
if args.virtual_stems:
if args.virtual_stems not in ftvp.NAMED_COLORS: # A hex value
try:
color = tuple(
int(args.virtual_stems[i:i + 2], 16) / 255 for i in (0, 2, 4))
except:
raise ValueError("Color value '{}' not understood. "
"Either provide a HEX value or "
"one of {}".format(args.virtual_stems, ",".join(ftvp.NAMED_COLORS.keys())))
pp.plot_virtual_stems = color
if args.element_colors:
directives = args.element_colors.split(",")
elem_colors = {}
Expand Down Expand Up @@ -145,12 +162,12 @@ def align_rnas(rnas):
assert ftuv.magnitude(ftuv.get_vector_centroid(rna.get_ordered_virtual_residue_poss()))<10**-5, ftuv.magnitude(ftuv.get_vector_centroid(rna.get_ordered_virtual_residue_poss()))



def main(args):
rnas = fuc.cgs_from_args(args, '+', '3d')
pp = pymol_printer_from_args(args)

if args.align:
print("Aligning RNAs")
align_rnas(rnas)
if args.labels:
label_list = args.labels.split(",")
Expand All @@ -172,9 +189,12 @@ def main(args):

color_modifier = 1.0
log.info("Visualizing {} rnas".format(len(rnas)))
plot_bg = True
for rna in rnas:
pp.add_cg(rna, labels, color_modifier)
color_modifier *= 0.7
pp.add_cg(rna, labels, color_modifier, plot_core_bulge_graph=plot_bg)
if args.only_first_bg:
plot_bg=False
#color_modifier *= 0.7

with make_temp_directory() as tmpdir:
# The file describing the cg-structure as cylinders
Expand All @@ -187,7 +207,9 @@ def main(args):

pdb_fns = []
selections = ""
group_selections=""
for i, rna in enumerate(rnas):
sel_names=[]
if rna.chains:
obj_name = "pdb{}_{}".format(i, rna.name.replace("-", "_"))
fn = os.path.join(tmpdir, obj_name + ".cif")
Expand All @@ -202,8 +224,10 @@ def main(args):
for c in chains:
sel.append("( %{} and chain {} and resi {}) ".format(
obj_name, c, "+".join(map(str, (r.resid[1] for r in resids)))))
selections += "select {}, ".format(
d + "_" + obj_name) + " or ".join(sel) + "\n"
sel_name = d + "_" + obj_name
selections += "select {}, {}\n".format(sel_name, " or ".join(sel))
sel_names.append(sel_name)
group_selections+=("cmd.group('sel_{}', '{}')\n".format(obj_name, " ".join(sel_names)))

pymol_cmd = 'hide all\n'
pymol_cmd += 'show cartoon, all\n'
Expand All @@ -214,16 +238,18 @@ def main(args):

for constraint in args.only_elements.split(','):
color = pp.get_element_color(constraint)

for r in cg.define_residue_num_iterator(constraint, seq_ids=True):
pymol_cmd += "show sticks, resi %r\n" % (r[1])
pymol_cmd += "color %s, resi %r\n" % (color, r[1])
for i, rna in enumerate(rnas):
for r in rna.define_residue_num_iterator(constraint, seq_ids=True):
pymol_cmd += "show sticks, resi {}\n".format(r[1])
pymol_cmd += "color {}, resi {}\n".format(color, r[1])

pymol_cmd += 'run %s\n' % (stru_filename)
pymol_cmd += 'bg white\n'
pymol_cmd += 'clip slab, 10000\n'
#pymol_cmd += 'orient\n'
pymol_cmd += selections
pymol_cmd += group_selections

if args.output is not None:
pymol_cmd += 'ray\n'
pymol_cmd += 'png %s\n' % (args.output)
Expand Down
Loading

0 comments on commit 2b7a9a1

Please sign in to comment.