From 8adc1c99cee3ac8fe0d5800acfd53e8b7583f9b7 Mon Sep 17 00:00:00 2001 From: marjanalbouye Date: Thu, 18 Jan 2024 10:26:52 -0700 Subject: [PATCH 1/6] add function to identify connections in a snapshot --- cmeutils/gsd_utils.py | 235 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) diff --git a/cmeutils/gsd_utils.py b/cmeutils/gsd_utils.py index 3f843a5..86b1734 100644 --- a/cmeutils/gsd_utils.py +++ b/cmeutils/gsd_utils.py @@ -1,9 +1,12 @@ +import warnings from tempfile import NamedTemporaryFile import freud import gsd.hoomd import hoomd +import networkx as nx import numpy as np +from boltons.setutils import IndexedSet from cmeutils.geometry import moit @@ -374,3 +377,235 @@ def xml_to_gsd(xmlfile, gsdfile): snap.bonds.group = bonds newt.append(snap) print(f"XML data written to {gsdfile}") + + +def identify_snapshot_connections(snapshot): + """Identify angle and dihedral connections in a snapshot from bonds. + + Parameters + ---------- + snapshot : gsd.hoomd.Frame + The snapshot to read in. + + Returns + ------- + gsd.hoomd.Frame + The snapshot with angle and dihedral information added. + """ + if snapshot.bonds.N == 0: + warnings.warn( + "No bonds found in snapshot, hence, no angles or " + "dihedrals will be identified." + ) + return snapshot + bond_groups = snapshot.bonds.group + connection_matches = _find_connections(bond_groups) + + if connection_matches["angles"]: + _fill_connection_info( + snapshot=snapshot, + connections=connection_matches["angles"], + type_="angles", + ) + if connection_matches["dihedrals"]: + _fill_connection_info( + snapshot=snapshot, + connections=connection_matches["dihedrals"], + type_="dihedrals", + ) + return snapshot + + +def _fill_connection_info(snapshot, connections, type_): + p_types = snapshot.particles.types + p_typeid = snapshot.particles.typeid + _connection_types = [] + _connection_typeid = [] + for conn in connections: + type = "-".join([p_types[p_typeid[i]] for i in conn]) + types_inv = type[::-1] + # check if type not in angle_types and types_inv not in angle_types: + if type not in _connection_types or types_inv not in _connection_types: + _connection_types.append(type) + _connection_typeid.append( + max(_connection_typeid) + 1 if _connection_typeid else 0 + ) + else: + _connection_typeid.append(_connection_types.index(type)) + + if type_ == "angles": + snapshot.angles.N = len(connections) + snapshot.angles.M = len(_connection_types) + snapshot.angles.group = connections + snapshot.angles.types = _connection_types + snapshot.angles.typeid = _connection_typeid + elif type_ == "dihedrals": + snapshot.dihedrals.N = len(connections) + snapshot.dihedrals.M = len(_connection_types) + snapshot.dihedrals.group = connections + snapshot.dihedrals.types = _connection_types + snapshot.dihedrals.typeid = _connection_typeid + + +# The following functions are obtained from gmso/utils/connectivity.py with +# minor modifications. +def _find_connections(bonds): + """Identify all possible connections within a topology.""" + compound = nx.Graph() + + for b in bonds: + compound.add_edge(b[0], b[1]) + + compound_line_graph = nx.line_graph(compound) + + angle_matches = _detect_connections(compound_line_graph, type_="angle") + dihedral_matches = _detect_connections( + compound_line_graph, type_="dihedral" + ) + + return { + "angles": angle_matches, + "dihedrals": dihedral_matches, + } + + +def _detect_connections(compound_line_graph, type_="angle"): + EDGES = { + "angle": ((0, 1),), + "dihedral": ((0, 1), (1, 2)), + } + + connection = nx.Graph() + for edge in EDGES[type_]: + assert len(edge) == 2, "Edges should be of length 2" + connection.add_edge(edge[0], edge[1]) + + matcher = nx.algorithms.isomorphism.GraphMatcher( + compound_line_graph, connection + ) + + formatter_fns = { + "angle": _format_subgraph_angle, + "dihedral": _format_subgraph_dihedral, + } + + conn_matches = IndexedSet() + for m in matcher.subgraph_isomorphisms_iter(): + new_connection = formatter_fns[type_](m) + conn_matches.add(new_connection) + if conn_matches: + conn_matches = _trim_duplicates(conn_matches) + + # Do more sorting of individual connection + sorted_conn_matches = list() + for match in conn_matches: + if match[0] < match[-1]: + sorted_conn = match + else: + sorted_conn = match[::-1] + sorted_conn_matches.append(sorted_conn) + + # Final sorting the whole list + if type_ == "angle": + return sorted( + sorted_conn_matches, + key=lambda angle: ( + angle[1], + angle[0], + angle[2], + ), + ) + elif type_ == "dihedral": + return sorted( + sorted_conn_matches, + key=lambda dihedral: ( + dihedral[1], + dihedral[2], + dihedral[0], + dihedral[3], + ), + ) + + +def _get_sorted_by_n_connections(m): + """Return sorted by n connections for the matching graph.""" + small = nx.Graph() + for k, v in m.items(): + small.add_edge(k[0], k[1]) + return sorted(small.adj, key=lambda x: len(small[x])), small + + +def _format_subgraph_angle(m): + """Format the angle subgraph. + + Since we are matching compound line graphs, + back out the actual nodes, not just the edges + + Parameters + ---------- + m : dict + keys are the compound line graph nodes + Values are the sub-graph matches (to the angle, dihedral, or improper) + + Returns + ------- + connection : list of nodes, in order of bonding + (start, middle, end) + """ + (sort_by_n_connections, _) = _get_sorted_by_n_connections(m) + ends = sorted([sort_by_n_connections[0], sort_by_n_connections[1]]) + middle = sort_by_n_connections[2] + return [ + ends[0], + middle, + ends[1], + ] + + +def _format_subgraph_dihedral(m): + """Format the dihedral subgraph. + + Since we are matching compound line graphs, + back out the actual nodes, not just the edges + + Parameters + ---------- + m : dict + keys are the compound line graph nodes + Values are the sub-graph matches (to the angle, dihedral, or improper) + top : gmso.Topology + The original Topology + + Returns + ------- + connection : list of nodes, in order of bonding + (start, mid1, mid2, end) + """ + (sort_by_n_connections, small) = _get_sorted_by_n_connections(m) + start = sort_by_n_connections[0] + if sort_by_n_connections[2] in small.neighbors(start): + mid1 = sort_by_n_connections[2] + mid2 = sort_by_n_connections[3] + else: + mid1 = sort_by_n_connections[3] + mid2 = sort_by_n_connections[2] + + end = sort_by_n_connections[1] + return [start, mid1, mid2, end] + + +def _trim_duplicates(all_matches): + """Remove redundant sub-graph matches. + + Is there a better way to do this? Like when we format the subgraphs, + can we impose an ordering so it's easier to eliminate redundant matches? + """ + trimmed_list = IndexedSet() + for match in all_matches: + if ( + match + and match not in trimmed_list + and match[::-1] not in trimmed_list + ): + trimmed_list.add(match) + return trimmed_list From 1ffe06f479b0340ce65c91ad95e448e8ce561523 Mon Sep 17 00:00:00 2001 From: marjanalbouye Date: Fri, 19 Jan 2024 10:58:18 -0700 Subject: [PATCH 2/6] sort angle and dihedral types --- cmeutils/gsd_utils.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/cmeutils/gsd_utils.py b/cmeutils/gsd_utils.py index 86b1734..dd13ffa 100644 --- a/cmeutils/gsd_utils.py +++ b/cmeutils/gsd_utils.py @@ -422,10 +422,11 @@ def _fill_connection_info(snapshot, connections, type_): _connection_types = [] _connection_typeid = [] for conn in connections: - type = "-".join([p_types[p_typeid[i]] for i in conn]) - types_inv = type[::-1] + conn_sites = [p_types[p_typeid[i]] for i in conn] + sorted_conn_sites = _sort_connection_by_name(conn_sites, type_) + type = "-".join(sorted_conn_sites) # check if type not in angle_types and types_inv not in angle_types: - if type not in _connection_types or types_inv not in _connection_types: + if type not in _connection_types: _connection_types.append(type) _connection_typeid.append( max(_connection_typeid) + 1 if _connection_typeid else 0 @@ -435,13 +436,13 @@ def _fill_connection_info(snapshot, connections, type_): if type_ == "angles": snapshot.angles.N = len(connections) - snapshot.angles.M = len(_connection_types) + snapshot.angles.M = 3 snapshot.angles.group = connections snapshot.angles.types = _connection_types snapshot.angles.typeid = _connection_typeid elif type_ == "dihedrals": snapshot.dihedrals.N = len(connections) - snapshot.dihedrals.M = len(_connection_types) + snapshot.dihedrals.M = 4 snapshot.dihedrals.group = connections snapshot.dihedrals.types = _connection_types snapshot.dihedrals.typeid = _connection_typeid @@ -449,6 +450,18 @@ def _fill_connection_info(snapshot, connections, type_): # The following functions are obtained from gmso/utils/connectivity.py with # minor modifications. +def _sort_connection_by_name(conn_sites, type_): + if type_ == "angles": + site1, site3 = sorted([conn_sites[0], conn_sites[2]]) + return [site1, conn_sites[1], site3] + elif type_ == "dihedrals": + site1, site2, site3, site4 = conn_sites + if site2 > site3 or (site2 == site3 and site1 > site4): + return [site4, site3, site2, site1] + else: + return [site1, site2, site3, site4] + + def _find_connections(bonds): """Identify all possible connections within a topology.""" compound = nx.Graph() @@ -503,7 +516,7 @@ def _detect_connections(compound_line_graph, type_="angle"): sorted_conn = match else: sorted_conn = match[::-1] - sorted_conn_matches.append(sorted_conn) + sorted_conn_matches.append(list(sorted_conn)) # Final sorting the whole list if type_ == "angle": @@ -555,11 +568,11 @@ def _format_subgraph_angle(m): (sort_by_n_connections, _) = _get_sorted_by_n_connections(m) ends = sorted([sort_by_n_connections[0], sort_by_n_connections[1]]) middle = sort_by_n_connections[2] - return [ + return ( ends[0], middle, ends[1], - ] + ) def _format_subgraph_dihedral(m): @@ -591,7 +604,7 @@ def _format_subgraph_dihedral(m): mid2 = sort_by_n_connections[2] end = sort_by_n_connections[1] - return [start, mid1, mid2, end] + return (start, mid1, mid2, end) def _trim_duplicates(all_matches): From fb40419d7781dcebd6ac6213dde71fca7bd23ca0 Mon Sep 17 00:00:00 2001 From: marjanalbouye Date: Fri, 19 Jan 2024 10:58:35 -0700 Subject: [PATCH 3/6] add unit tests for identify connections --- cmeutils/tests/test_gsd.py | 113 +++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/cmeutils/tests/test_gsd.py b/cmeutils/tests/test_gsd.py index e54cc62..8ba514b 100644 --- a/cmeutils/tests/test_gsd.py +++ b/cmeutils/tests/test_gsd.py @@ -5,6 +5,7 @@ import packaging.version import pytest from base_test import BaseTest +from gmso.external import from_mbuild, to_gsd_snapshot from mbuild.formats.hoomd_forcefield import to_hoomdsnapshot from cmeutils.gsd_utils import ( @@ -15,6 +16,7 @@ get_all_types, get_molecule_cluster, get_type_position, + identify_snapshot_connections, snap_delete_types, update_rigid_snapshot, xml_to_gsd, @@ -122,3 +124,114 @@ def test_xml_to_gsd(self, tmp_path, p3ht_gsd, p3ht_xml): old_snap.particles.position == new_snap.particles.position ) assert np.all(old_snap.particles.image == new_snap.particles.image) + + def test_identify_snapshot_connections_benzene(self): + benzene = mb.load("c1ccccc1", smiles=True) + topology = from_mbuild(benzene) + no_connection_snapshot, _ = to_gsd_snapshot(topology) + assert no_connection_snapshot.bonds.N == 12 + assert no_connection_snapshot.angles.N == 0 + assert no_connection_snapshot.dihedrals.N == 0 + updated_snapshot = identify_snapshot_connections(no_connection_snapshot) + + topology.identify_connections() + topology_snapshot, _ = to_gsd_snapshot(topology) + assert updated_snapshot.angles.N == topology_snapshot.angles.N + assert np.array_equal( + sorted( + updated_snapshot.angles.group, + key=lambda angle: ( + angle[1], + angle[0], + angle[2], + ), + ), + sorted( + topology_snapshot.angles.group, + key=lambda angle: ( + angle[1], + angle[0], + angle[2], + ), + ), + ) + assert sorted(updated_snapshot.angles.types) == sorted( + topology_snapshot.angles.types + ) + assert len(updated_snapshot.angles.typeid) == len( + topology_snapshot.angles.typeid + ) + assert updated_snapshot.dihedrals.N == topology_snapshot.dihedrals.N + assert np.array_equal( + sorted( + updated_snapshot.dihedrals.group, + key=lambda angle: ( + angle[1], + angle[0], + angle[2], + ), + ), + sorted( + topology_snapshot.dihedrals.group, + key=lambda angle: ( + angle[1], + angle[0], + angle[2], + ), + ), + ) + assert sorted(updated_snapshot.dihedrals.types) == sorted( + topology_snapshot.dihedrals.types + ) + assert len(updated_snapshot.dihedrals.typeid) == len( + topology_snapshot.dihedrals.typeid + ) + + def test_identify_connection_thiophene(self): + thiophene = mb.load("c1cscc1", smiles=True) + topology = from_mbuild(thiophene) + no_connection_snapshot, _ = to_gsd_snapshot(topology) + updated_snapshot = identify_snapshot_connections(no_connection_snapshot) + assert updated_snapshot.angles.N == 13 + assert sorted(updated_snapshot.angles.types) == sorted( + ["C-S-C", "H-C-S", "C-C-H", "C-C-S", "C-C-C"] + ) + + assert updated_snapshot.dihedrals.N == 16 + assert sorted(updated_snapshot.dihedrals.types) == sorted( + [ + "C-C-C-H", + "C-C-C-C", + "H-C-C-H", + "H-C-S-C", + "H-C-C-S", + "C-C-S-C", + "C-C-C-S", + ] + ) + + def test_identify_connection_no_dihedrals(self): + methane = mb.load("C", smiles=True) + topology = from_mbuild(methane) + no_connection_snapshot, _ = to_gsd_snapshot(topology) + assert no_connection_snapshot.bonds.N != 0 + assert no_connection_snapshot.angles.N == 0 + assert no_connection_snapshot.dihedrals.N == 0 + updated_snapshot = identify_snapshot_connections(no_connection_snapshot) + assert updated_snapshot.angles.N == 6 + assert updated_snapshot.angles.types == ["H-C-H"] + assert updated_snapshot.angles.typeid == [0, 0, 0, 0, 0, 0] + assert updated_snapshot.dihedrals.N == 0 + assert updated_snapshot.dihedrals.types is None + assert updated_snapshot.dihedrals.typeid is None + + def test_identify_connection_no_connections(self): + snapshot = gsd.hoomd.Frame() + snapshot.particles.N = 2 + snapshot.particles.types = ["A", "B"] + snapshot.particles.typeid = [0, 1] + with pytest.warns(UserWarning): + updated_snapshot = identify_snapshot_connections(snapshot) + assert updated_snapshot.bonds.N == 0 + assert updated_snapshot.angles.N == 0 + assert updated_snapshot.dihedrals.N == 0 From 3718891e598f29bad781b1826392af6b5049e22e Mon Sep 17 00:00:00 2001 From: marjanalbouye Date: Fri, 19 Jan 2024 10:59:05 -0700 Subject: [PATCH 4/6] add gmso to environment.yml --- environment-dev.yml | 1 + environment.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/environment-dev.yml b/environment-dev.yml index dad4e3d..bb2ebca 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -3,6 +3,7 @@ channels: - conda-forge dependencies: - freud >=2.13.1 + - gmso >=0.11.2 - fresnel >=0.13.5 - gsd >=3.0 - hoomd >=4.0 diff --git a/environment.yml b/environment.yml index eece9ba..94f99f8 100644 --- a/environment.yml +++ b/environment.yml @@ -3,6 +3,7 @@ channels: - conda-forge dependencies: - freud >=2.13.1 + - gmso >=0.11.2 - fresnel >=0.13.5 - gsd >=3.0 - hoomd >=4.0 From 1b83345ee8a8e57c9fe9d739518f47e9b6e0051a Mon Sep 17 00:00:00 2001 From: chrisjonesBSU Date: Mon, 22 Jan 2024 11:58:26 -0700 Subject: [PATCH 5/6] add pekk cg gsd file, test for pekk angle types --- cmeutils/tests/assets/pekk-cg.gsd | Bin 0 -> 6368 bytes cmeutils/tests/base_test.py | 4 ++++ cmeutils/tests/test_gsd.py | 10 ++++++++++ 3 files changed, 14 insertions(+) create mode 100644 cmeutils/tests/assets/pekk-cg.gsd diff --git a/cmeutils/tests/assets/pekk-cg.gsd b/cmeutils/tests/assets/pekk-cg.gsd new file mode 100644 index 0000000000000000000000000000000000000000..1a15bdf0a9d0f2e8be689dec43e48a48c528cf7f GIT binary patch literal 6368 zcmeHMTSyd97#{C#Udy!gl1WLF(VB{6sB=b34OW!Ou28b;uG@yX>#l;On0GV{Z3Po5 zFccq_6zxJ@a?YSBky%zkL5UBBQCVaU78cq3&a4hSln)XZGt1BSpYxsb{r`TuPybA) zCl)@06|wLXaX=7jxx}4c+b$fiCNxHs?6gjosR{^_h01nWPA?l5?k@ZEzdLAeDdou} zoE;L&uEicgD&VaMn?>H5&-{2E*4BjGBJa;<0fPHBg8LS}QNY_0OZ6?hyB8+}mF)yP z|ML-f8#o@;_5yuNe+4{m%kmC_{x)*gh%&)_N5Opy|BdHCPr@pQkLZBtfarkefarke zfarkefarkefarkefat(#a3D>Wm9Ev~8WcHNT~@GCmq+Loh8%5LhT0gMB*=5l>ea^I z)>@TdBU51%@_L<73zCFVm!$$J4mp{noR#1f8iOuZzZwh^dlVh`j~viWMo=V@P*HnM z#R$I^%-iA!xp3=`In3KuG6uqXV$C64e~rZ5qp>R$s>o-trCnWk=1CQ9cAH@8KAZ4V z(Gh&JwS#%@aRRsH`r@d`66SWiD}Fo1&^0KC`SGQUepld0qwH%qKhiULHnlB!;7ua= zD#lFev#yhR^C;T#DF9{nJE6eWd#U5S8OS^mk19KBsf2x3kzu+B{mkj3e5z?YUsQp= z505Z&QO!8@VKGjUVdi{GBOaVRfXf}582ZLJTvoLozdUn^d62KhH6OgN{{dDR$Gikacp^^_1DC}e(IsIrW6<6VjOo{HO zP^zZNHz%U!-QH+=doh)NNP&ir%F&(VCTjd^HF6$ILCTNq)Umn-R1#8(c6dCdx+8C) zn)XUGrW~hWo*K7PPRQ9<$!EF$L8!Tg4LI+_qce*z*LC61l|`5{yYUF`0$^?ouLc0u zu>liq4<6UE2y=88VFSXb8YaXrG6s~g;mC$Jk3KBIgxi-#KNg`U=m&a%KA;Dv54E8# X)P#Ca3+g}(& Date: Mon, 22 Jan 2024 18:59:08 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cmeutils/tests/test_gsd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmeutils/tests/test_gsd.py b/cmeutils/tests/test_gsd.py index a84550e..46cf35a 100644 --- a/cmeutils/tests/test_gsd.py +++ b/cmeutils/tests/test_gsd.py @@ -239,7 +239,7 @@ def test_identify_connection_no_connections(self): def test_identify_connections_pekk_cg(self, pekk_cg_gsd): with gsd.hoomd.open(pekk_cg_gsd) as traj: snap = traj[0] - assert snap.angles.types == [] + assert snap.angles.types == [] snap_with_connections = identify_snapshot_connections(snap) assert "K-E-K" in snap_with_connections.angles.types assert "E-K-K" in snap_with_connections.angles.types