Skip to content
This repository has been archived by the owner on Jul 10, 2024. It is now read-only.

Commit

Permalink
changed tie-breaking to be more explicitly consistent with previous m…
Browse files Browse the repository at this point in the history
…echanism
  • Loading branch information
tylerperyea committed Apr 15, 2019
1 parent 4297bf8 commit 9558e17
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 22 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<groupId>gov.nih.ncats</groupId>
<artifactId>lychi</artifactId>
<packaging>jar</packaging>
<version>0.5.1</version>
<version>0.5.1ISOTOPE_FIX</version>
<name>Lychi</name>

<repositories>
Expand Down
59 changes: 40 additions & 19 deletions src/main/java/lychi/LyChIStandardizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -2708,6 +2708,31 @@ public static String hashKey (Molecule mol, String sep) {
return keys[0]+sep+keys[1]+sep+keys[2]+sep+keys[3];
}


private static Molecule getLayer3Equivalent(Molecule m){
Molecule m0=m.cloneMolecule();

int[] atno = new int[m0.getAtomCount()];
for (int i = 0; i < atno.length; ++i) {
MolAtom a = m0.getAtom(i);
a.setRadical(0);
a.setCharge(0);
a.setFlags(0);
a.setMassno(0);
a.setAtomMap(i+1);
}
for (MolBond b : m0.getBondArray()) {
b.setStereo2Flags(b.getNode1(), b.getNode2(), 0);
if(b.isQuery()){ //hack
b.setFlags(1);
}
}
Molecule mout = new Molecule();
ChemUtil.canonicalSMILES(mout,m0,false);

return mout;
}

/**
* Extended version of the hash key that includes the topology+label
* layer that sits between the first and second layers of previous
Expand Down Expand Up @@ -2771,30 +2796,27 @@ public static String[] hashKeyArray (Molecule input) {
int[] rank = new int[atno.length];
m0.getGrinv(rank);

int[] fallbackLookup = new int[atno.length];


for (int i = 0; i < atno.length; ++i) {
rank[i] *= atno[i]*1204; // update rank to resolve symmetry
// large number to allow small fiddling for tie breaking
try{
//set the tie-breaking priority based on the layer-3 information
Molecule stdLychi3Mol=getLayer3Equivalent(m1);
MolAtom[] matarr1=stdLychi3Mol.getAtomArray();

for (int i = 0; i < atno.length; ++i) {
fallbackLookup[matarr1[i].getAtomMap()-1]=i;
}
}catch(Exception e){
logger.log(Level.SEVERE,
"Can't produce simplified structure from molecule", e);
}


for (int i = 0; i < atno.length; ++i) {
for (int j = i+1; j < atno.length; ++j) {
if(rank[i] == rank[j]){
if(atno[i]!=atno[j]){
rank[i]+=atno[i]*5;
rank[j]+=atno[j]*5;
}
}
}
rank[i] = (rank[i]*atno[i]*2048); // update rank to resolve symmetry
rank[i] += fallbackLookup[i]; //tie breaking based on lychi-3 fallback order
}

for(int i=0;i< atno.length;++i){
rank[i] -= m1.getAtom(i).getImplicitHcount(); // break symmetry when it's based on bond order
}





for (AtomIterator ai = new AtomIterator (m0, rank);
Expand Down Expand Up @@ -2824,7 +2846,6 @@ public static String[] hashKeyArray (Molecule input) {
"2: "+level2 + "\n"+
"3: "+level3 + "\n");
}

return hashChain45 (level0, level1, level2, level3);
}

Expand Down
31 changes: 29 additions & 2 deletions src/test/java/lychi/LychiRegressionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import static org.junit.Assert.*;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Random;
Expand Down Expand Up @@ -104,8 +105,12 @@ public static void basicTest(Molecule m, String expected, boolean match, int lay
std.standardize(m);
String fullKey=LyChIStandardizer.hashKey(m);

String layer = fullKey.split("-")[layerMatch-1];
String expectedLayer = expected.split("-")[layerMatch-1];
String layer = Arrays.stream(fullKey.split("-"))
.limit(layerMatch)
.collect(Collectors.joining("-"));
String expectedLayer = Arrays.stream(expected.split("-"))
.limit(layerMatch)
.collect(Collectors.joining("-"));

if(match){
assertEquals(expectedLayer,layer);
Expand Down Expand Up @@ -135,6 +140,9 @@ public static Molecule shuffleMolecule(Molecule m, int[] map){
int ni2=rmap[oi2];
MolBond nmb=mb.cloneBond(nmas[ni1], nmas[ni2]);
m2.add(nmb);
if(nmb.isQuery()){
nmb.setFlags(mb.getFlags());
}
}

return m2;
Expand Down Expand Up @@ -193,9 +201,25 @@ public static List<Object[]> data(){
tests.add(LychiTestInstance.of("O=C(O[C@H]1C[C@H]2C[C@H]3C[C@@H](C1)N2CC3=O)C4=CNC5=C4C=CC=C5","38C4U16JU-UC5KDUPMVH-UHFJLJL661C-UHCRHDK74DXU").name("cage-like structure"));
tests.add(LychiTestInstance.of("C[C@@H]1CC[C@@H](C)CC1","T75RBW5S8-8D9T563A7Y-8YC8NQXD9W5-8Y5MFVTVS3J3").name("trans across ring"));
tests.add(LychiTestInstance.of("C[C@H]1CC[C@@H](C)CC1","T75RBW5S8-8D9T563A7Y-8YC8NQXD9W5-8Y5JH5RWXRLR").name("cis across ring"));

tests.add(LychiTestInstance.of("CN(C)CCOC(C1=CC=CC=C1)C2=CC=CC=C2","SG1MX4TJL-LRQMG7F9KY-LYVJD4DSRGU-LYU23YRCSQTR").name("test lychi change"));


tests.add(LychiTestInstance.equivalentLayer3("CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("layer 3 the same when only stereo changes"));
tests.add(LychiTestInstance.equivalentLayer3("CCCCCCCCCCCCCC.CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CCCCCCCCCCCCCC.CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("rare salt should be stripped, regardless of stereo"));

tests.add(LychiTestInstance.of("[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38", "DCLRH149F-FFMPLZ16VC-FC35942KGAU-FCUDSDS2V1NT").name("round trip problem"));


tests.add(LychiTestInstance.of("[Na+].[Na+].[Na+].[Na+].[O-]P([O-])(=O)OP([O-])([O-])=O", "U42VPKYB8-83HRLLLGLV-8VMGB3AAA1L-8VLXF4WDFH73")
.name("legacy consistency test 1"));
tests.add(LychiTestInstance.of("CC(C)NCC(O)COC1=CC=C(CCOCC2CC2)C=C1", "19W74QJNW-WXMWMLXXWD-WDPBV6R9GFJ-WDJKLYLWS5JW")
.name("legacy consistency test 2"));
tests.add(LychiTestInstance.of("CC1=C(CC(O)=O)C2=C(C=CC(F)=C2)\\C1=C/C3=CC=C(C=C3)S(C)(=O)=O", "4D13QHCQ6-6CKUM1H2QX-6XM1AWY81DJ-6XJV527T8L5X")
.name("legacy consistency test 3"));



tests.add(LychiTestInstance.equivalent("\n" +
" Ketcher 12201304332D 1 1.00000 0.00000 0\n" +
"\n" +
Expand Down Expand Up @@ -352,6 +376,7 @@ public static List<Object[]> data(){
tests.add(LychiTestInstance.notEquivalent("OC1[C@H](O)[C@H](O)C1O","OC1[C@@H](O)[C@H](O)C1O").name("4-center, 2 specified symmetric meaningful stereo should not be same as 1 center modified"));



//OC1[C@H](O)[C@H](O)C1O

//C[C@H]1OC(C)O[C@@H](C)O1
Expand Down Expand Up @@ -404,6 +429,8 @@ public static List<Object[]> data(){
.name("meaningless stereo with 2 dashed bonds on ring shouldn't be honored"));
*/



return tests.stream().map(ls->ls.asJunitInput()).collect(Collectors.toList());
}
}

0 comments on commit 9558e17

Please sign in to comment.