-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathcif2pdb.py
116 lines (95 loc) · 3.77 KB
/
cif2pdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python
"""
Script to convert mmCIF files to PDB format.
usage: python cif2pdb.py ciffile [pdbfile]
Requires python BioPython (`pip install biopython`). It should work with recent version of python 2 or 3.
@author Spencer Bliven <[email protected]>
This was downloaded from https://gist.githubusercontent.com/sbliven/b7cc2c5305aa3652a75a580ae7c6ce33/raw/c9478c0ab7eb9ec8af495348a068e7152e03e3e4/cif2pdb.py
on August 14th 2022. Why is there no simple code to do this?
"""
import sys
import argparse
import logging
from Bio.PDB.MMCIFParser import MMCIFParser
from Bio.PDB import PDBIO
def int_to_chain(i,base=62):
"""
int_to_chain(int,int) -> str
Converts a positive integer to a chain ID. Chain IDs include uppercase
characters, numbers, and optionally lowercase letters.
i = a positive integer to convert
base = the alphabet size to include. Typically 36 or 62.
"""
if i < 0:
raise ValueError("positive integers only")
if base < 0 or 62 < base:
raise ValueError("Invalid base")
quot = int(i)//base
rem = i%base
if rem < 26:
letter = chr( ord("A") + rem)
elif rem < 36:
letter = str( rem-26)
else:
letter = chr( ord("a") + rem - 36)
if quot == 0:
return letter
else:
return int_to_chain(quot-1,base) + letter
class OutOfChainsError(Exception): pass
def rename_chains(structure):
"""Renames chains to be one-letter chains
Existing one-letter chains will be kept. Multi-letter chains will be truncated
or renamed to the next available letter of the alphabet.
If more than 62 chains are present in the structure, raises an OutOfChainsError
Returns a map between new and old chain IDs, as well as modifying the input structure
"""
next_chain = 0 #
# single-letters stay the same
chainmap = {c.id:c.id for c in structure.get_chains() if len(c.id) == 1}
for o in structure.get_chains():
if len(o.id) != 1:
if o.id[0] not in chainmap:
chainmap[o.id[0]] = o.id
o.id = o.id[0]
else:
c = int_to_chain(next_chain)
while c in chainmap:
next_chain += 1
c = int_to_chain(next_chain)
if next_chain >= 62:
raise OutOfChainsError()
chainmap[c] = o.id
o.id = c
return chainmap
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert mmCIF to PDB format')
parser.add_argument("ciffile",help="mmCIF input file")
parser.add_argument("pdbfile",nargs="?",help="PDB output file. Default based on CIF filename")
parser.add_argument("-v","--verbose", help="Long messages",
dest="verbose",default=False, action="store_true")
args = parser.parse_args()
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG if args.verbose else logging.WARN)
ciffile = args.ciffile
tmppdb = ciffile[::-1].replace('fic.', '', 1)[::-1]
pdbfile = args.pdbfile or tmppdb+".pdb"
#Not sure why biopython needs this to read a cif file
strucid = ciffile[:4] if len(ciffile)>4 else "1xxx"
# Read file
parser = MMCIFParser()
structure = parser.get_structure(strucid, ciffile)
# rename long chains
try:
chainmap = rename_chains(structure)
except OutOfChainsError:
logging.error("Too many chains to represent in PDB format")
sys.exit(1)
if args.verbose:
for new,old in chainmap.items():
if new != old:
logging.info("Renaming chain {0} to {1}".format(old,new))
#Write PDB
io = PDBIO()
io.set_structure(structure)
#TODO What happens with large structures?
io.save(pdbfile)