Skip to content

Commit

Permalink
Select multi-char chains with ChainPDBSelector
Browse files Browse the repository at this point in the history
Rather than taking a single string of one-character
chain IDs, have ChainPDBSelector take a list of
string IDs, so that multiple-character chains can
be selected.
  • Loading branch information
benmwebb committed Sep 28, 2023
1 parent 4728f02 commit a30dc30
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 22 deletions.
27 changes: 19 additions & 8 deletions modules/atom/include/pdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,22 +259,33 @@ class ChainPDBSelector : public NonAlternativePDBSelector {
if (!NonAlternativePDBSelector::get_is_selected(record)) {
return false;
}
char cid = record.get_chain_id()[0];
for (int i = 0; i < (int)chains_.length(); i++) {
if (cid == chains_[i]) return true;
}
return false;
std::string cid = record.get_chain_id();
return std::binary_search(chains_.begin(), chains_.end(), cid);
}
IMP_OBJECT_METHODS(ChainPDBSelector);

//! Allow any of the named chains
/** Chain IDs here, and in mmCIF files, can be any length,
although chains in legacy PDB files are restricted to
a single character.
*/
ChainPDBSelector(Strings chains,
std::string name = "ChainPDBSelector%1%")
: NonAlternativePDBSelector(name), chains_(chains) {
std::sort(chains_.begin(), chains_.end());
}

#ifndef IMP_DOXYGEN
//! The chain id can be any character in chains
/** \note This limits the selection to single-character chain IDs
(mmCIF files support multiple-character chain names) */
IMPATOM_DEPRECATED_METHOD_DECL(2.20)
ChainPDBSelector(const std::string &chains,
std::string name = "ChainPDBSelector%1%")
: NonAlternativePDBSelector(name), chains_(chains) {}
std::string name = "ChainPDBSelector%1%");
#endif

private:
std::string chains_;
Strings chains_;
};

//! Select all non-water ATOM and HETATM records
Expand Down
10 changes: 10 additions & 0 deletions modules/atom/src/pdb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,16 @@ bool HydrogenPDBSelector::is_hydrogen(const PDBRecord &record) const {
(atom_name[0] == 'H' || atom_name[0] == 'D'));
}

ChainPDBSelector::ChainPDBSelector(const std::string &chains, std::string name)
: NonAlternativePDBSelector(name) {
IMPATOM_DEPRECATED_METHOD_DEF(
2.20, "Pass a list of chain ID strings instead");
for (size_t i = 0; i < chains.length(); ++i) {
chains_.push_back(std::string(1, chains[i]));
}
std::sort(chains_.begin(), chains_.end());
}

namespace {
std::string nicename(std::string name) {
boost::filesystem::path path(name);
Expand Down
9 changes: 6 additions & 3 deletions modules/atom/test/input/chaintest.cif
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ _atom_site.auth_comp_id
_atom_site.auth_asym_id
_atom_site.auth_atom_id
_atom_site.pdbx_PDB_model_num
ATOM 1 C CA . ALA A 1 1 ? 953.312 704.510 700.259 1.00 84.31 ? 68 ALA 7 CA 1
ATOM 2 C CA . GLN MA 12 1 ? 862.521 620.909 612.377 1.00 10.00 ? 287 GLN K CA 1
ATOM 2 C CA . GLN B 12 1 ? 862.521 620.909 612.377 1.00 10.00 ? 287 GLN . CA 1
ATOM 1 C CA . ALA A 1 1 ? 953.312 704.510 700.259 1.00 84.31 ? 68 ALA 7 CA 1
ATOM 2 C CA . GLN MA 12 1 ? 862.521 620.909 612.377 1.00 10.00 ? 287 GLN K CA 1
ATOM 3 C CA . GLN B 12 1 ? 862.521 620.909 612.377 1.00 10.00 ? 287 GLN . CA 1
ATOM 4 C CA . ALA ZA 1 1 ? 953.312 704.510 700.259 1.00 84.31 ? 68 ALA Z7 CA 1
ATOM 5 C CA . GLN ZMA 12 1 ? 862.521 620.909 612.377 1.00 10.00 ? 287 GLN ZK CA 1
ATOM 6 C CA . GLN ZB 12 1 ? 862.521 620.909 612.377 1.00 10.00 ? 287 GLN . CA 1
44 changes: 41 additions & 3 deletions modules/atom/test/test_mmcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,24 +195,62 @@ def test_chain_selector(self):
m = IMP.Model()

mp = IMP.atom.read_mmcif(self.get_input_file_name('chaintest.cif'), m,
IMP.atom.ChainPDBSelector("K"))
IMP.atom.ChainPDBSelector(["K"]))
chains = [IMP.atom.Chain(x)
for x in IMP.atom.get_by_type(mp, IMP.atom.CHAIN_TYPE)]
self.assertEqual([c.get_id() for c in chains], ['K'])

mp = IMP.atom.read_mmcif(self.get_input_file_name('chaintest.cif'), m,
IMP.atom.ChainPDBSelector("7"))
IMP.atom.ChainPDBSelector(["7"]))
chains = [IMP.atom.Chain(x)
for x in IMP.atom.get_by_type(mp, IMP.atom.CHAIN_TYPE)]
self.assertEqual([c.get_id() for c in chains], ['7'])

# If no auth-provided chain, select by asym_id
mp = IMP.atom.read_mmcif(self.get_input_file_name('chaintest.cif'), m,
IMP.atom.ChainPDBSelector("B"))
IMP.atom.ChainPDBSelector(["B"]))
chains = [IMP.atom.Chain(x)
for x in IMP.atom.get_by_type(mp, IMP.atom.CHAIN_TYPE)]
self.assertEqual([c.get_id() for c in chains], ['B'])

def test_chain_selector_multi_char(self):
"""Check reading single chain with multi-char ID from an mmCIF file"""
m = IMP.Model()

# Try deprecated method, will select chains "Z" and "K"
with IMP.allow_deprecated():
s = IMP.atom.ChainPDBSelector("ZK")
mp = IMP.atom.read_mmcif(
self.get_input_file_name('chaintest.cif'), m, s)
chains = [IMP.atom.Chain(x)
for x in IMP.atom.get_by_type(mp, IMP.atom.CHAIN_TYPE)]
self.assertEqual([c.get_id() for c in chains], ['K'])

mp = IMP.atom.read_mmcif(self.get_input_file_name('chaintest.cif'), m,
IMP.atom.ChainPDBSelector(["Z", "K"]))
chains = [IMP.atom.Chain(x)
for x in IMP.atom.get_by_type(mp, IMP.atom.CHAIN_TYPE)]
self.assertEqual([c.get_id() for c in chains], ['K'])

mp = IMP.atom.read_mmcif(self.get_input_file_name('chaintest.cif'), m,
IMP.atom.ChainPDBSelector(["ZK"]))
chains = [IMP.atom.Chain(x)
for x in IMP.atom.get_by_type(mp, IMP.atom.CHAIN_TYPE)]
self.assertEqual([c.get_id() for c in chains], ['ZK'])

mp = IMP.atom.read_mmcif(self.get_input_file_name('chaintest.cif'), m,
IMP.atom.ChainPDBSelector(["Z7"]))
chains = [IMP.atom.Chain(x)
for x in IMP.atom.get_by_type(mp, IMP.atom.CHAIN_TYPE)]
self.assertEqual([c.get_id() for c in chains], ['Z7'])

# If no auth-provided chain, select by asym_id
mp = IMP.atom.read_mmcif(self.get_input_file_name('chaintest.cif'), m,
IMP.atom.ChainPDBSelector(["ZB"]))
chains = [IMP.atom.Chain(x)
for x in IMP.atom.get_by_type(mp, IMP.atom.CHAIN_TYPE)]
self.assertEqual([c.get_id() for c in chains], ['ZB'])


if __name__ == '__main__':
IMP.test.main()
16 changes: 9 additions & 7 deletions modules/atom/test/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,25 +157,27 @@ def test_sel_logic(self):
ab = IMP.atom.get_leaves(mpb)
self.assertEqual(len(ab), len(an) + len(a))
for s in (IMP.atom.AndPDBSelector(IMP.atom.HydrogenPDBSelector(),
IMP.atom.ChainPDBSelector('L')),
IMP.atom.ChainPDBSelector(['L'])),
IMP.atom.HydrogenPDBSelector()
& IMP.atom.ChainPDBSelector('L')):
& IMP.atom.ChainPDBSelector(['L'])):
with self.open_input_file("hydrogen.pdb") as fh:
mpb = IMP.atom.read_pdb(fh, m, s)
ab = IMP.atom.get_leaves(mpb)
self.assertEqual(len(ab), 9)
for s in (IMP.atom.XorPDBSelector(IMP.atom.HydrogenPDBSelector(),
IMP.atom.ChainPDBSelector('L')),
IMP.atom.ChainPDBSelector(['L'])),
IMP.atom.HydrogenPDBSelector()
^ IMP.atom.ChainPDBSelector('L')):
^ IMP.atom.ChainPDBSelector(['L'])):
with self.open_input_file("hydrogen.pdb") as fh:
mpb = IMP.atom.read_pdb(fh, m, s)
ab = IMP.atom.get_leaves(mpb)
self.assertEqual(len(ab), 14)
for s in (IMP.atom.AndPDBSelector(IMP.atom.HydrogenPDBSelector(),
IMP.atom.NotPDBSelector(IMP.atom.ChainPDBSelector('L'))),
for s in (IMP.atom.AndPDBSelector(
IMP.atom.HydrogenPDBSelector(),
IMP.atom.NotPDBSelector(
IMP.atom.ChainPDBSelector(['L']))),
IMP.atom.HydrogenPDBSelector()
- IMP.atom.ChainPDBSelector('L')):
- IMP.atom.ChainPDBSelector(['L'])):
with self.open_input_file("hydrogen.pdb") as fh:
mpb = IMP.atom.read_pdb(fh, m, s)
ab = IMP.atom.get_leaves(mpb)
Expand Down
2 changes: 1 addition & 1 deletion modules/mmcif/pyext/src/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ def _read_coords(self):
"""Read the coordinates for this starting model"""
m = IMP.Model()
# todo: support reading other subsets of the atoms (e.g. CA/CB)
slt = IMP.atom.ChainPDBSelector(self.asym_id) \
slt = IMP.atom.ChainPDBSelector([self.asym_id]) \
& IMP.atom.NonWaterNonHydrogenPDBSelector()
hier = IMP.atom.read_pdb(self.filename, m, slt)
rng = self.asym_unit.seq_id_range
Expand Down

0 comments on commit a30dc30

Please sign in to comment.