diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index ebfd5f6..1cfd5be 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -1166,7 +1166,129 @@ else if (chiral != 0) { int i = m.indexOf(me.getKey()); m.setChirality(i, me.getValue()); } - + try{ + + Map nonChiralStereo = new LinkedHashMap<>(); + + for(int k=0;k rings = new HashSet(); + + int[][] sssr=m.getSSSR(); + for(MolAtom ma:nonChiralStereo.keySet()){ + //need to find all atoms in the ring + int im=m.indexOf(ma); + for(int[] ir:sssr){ + for(int i=0;i ratoms=Arrays.stream(rr) + .mapToObj(i->m.getAtom(i)) + .collect(Collectors.toSet()); + + MolBond[] bonds=ratoms.stream() + .filter(a->!chirality.containsKey(a)) + .flatMap(a->IntStream.range(0, a.getEdgeCount()).mapToObj(i->a.getEdge(i))) + .filter(e->!ratoms.contains(e.getNode1()) || !ratoms.contains(e.getNode2())) + .map(b->(MolBond)b) + .filter(b->b.getType()==1) + .peek(b->{ + if(ratoms.contains(b.getAtom1()))b.swap(); + }) + .toArray(i->new MolBond[i]); + + BitSet bs = new BitSet(bonds.length*2); + for(int i=0;i allPossible = new HashSet(); + Set currentPossible = new HashSet(); + + for(int i=0;i>j&1)==1){ + onOff.set(j*2); + bonds[j].setFlags(MolBond.UP, MolBond.STEREO1_MASK); + }else{ + onOff.set(j*2+1); + bonds[j].setFlags(MolBond.DOWN, MolBond.STEREO1_MASK); + } + } + Molecule mclone=m.cloneMolecule(); + //(new LyChIStandardizer()).standardize(mclone); + String hash1=LyChIStandardizer.hashKey(mclone); + allPossible.add(hash1); + onOff.or(bs); + + if(onOff.cardinality() == bs.cardinality()){ + currentPossible.add(hash1); + } + } + if(allPossible.size()==currentPossible.size()){ + for(int j=0;j me : chirality.entrySet()) { diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index 2882596..85b8b48 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -1,6 +1,7 @@ package lychi; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.*; import java.util.ArrayList; import java.util.Collections; @@ -26,6 +27,7 @@ public static class LychiTestInstance{ String name; String input; String expectedLychi; + boolean shouldMatch=true; public static LychiTestInstance of(String smi, String lychi){ @@ -36,6 +38,27 @@ public static LychiTestInstance of(String smi, String lychi){ return ltest; } + public static LychiTestInstance equivalent(String smi1, String smi2){ + try{ + MolHandler mh = new MolHandler(); + mh.setMolecule(smi2); + Molecule m= mh.getMolecule(); + LyChIStandardizer std = new LyChIStandardizer(); + std.standardize(m); + String fullKey=LyChIStandardizer.hashKey(m); + return of(smi1,fullKey); + }catch(Exception e){ + throw new RuntimeException(e); + } + } + public static LychiTestInstance notEquivalent(String smi1, String smi2){ + return equivalent(smi1,smi2).negate(); + } + public LychiTestInstance negate(){ + this.shouldMatch=!this.shouldMatch; + return this; + } + public LychiTestInstance name(String n){ this.name=n; return this; @@ -54,6 +77,8 @@ public Molecule getMolecule() throws MolFormatException{ } } + + private LychiTestInstance spec; @@ -61,11 +86,15 @@ public LychiRegressionTest(String ignored, LychiTestInstance spec){ this.spec = spec; } - public static void basicTest(Molecule m, String expected) throws Exception{ + public static void basicTest(Molecule m, String expected, boolean match) throws Exception{ LyChIStandardizer std = new LyChIStandardizer(); std.standardize(m); String fullKey=LyChIStandardizer.hashKey(m); - assertEquals(expected,fullKey); + if(match){ + assertEquals(expected,fullKey); + }else{ + assertNotEquals(expected,fullKey); + } } public static Molecule shuffleMolecule(Molecule m, int[] map){ @@ -97,7 +126,7 @@ public static Molecule shuffleMolecule(Molecule m, int[] map){ @Test public void correctLychiFirstTime() throws Exception{ - basicTest(spec.getMolecule(),spec.expectedLychi); + basicTest(spec.getMolecule(),spec.expectedLychi, spec.shouldMatch); } @Test @@ -119,11 +148,11 @@ public void correctLychiAfterRandomShuffle() throws Exception{ int[] map =iatoms.stream().mapToInt(i1->i1).toArray(); Molecule s=shuffleMolecule(m,map); - basicTest(s,spec.expectedLychi); + basicTest(s,spec.expectedLychi,spec.shouldMatch); } } - @Test + //@Test public void daisyChainLychiAfter10Times() throws Exception{ Molecule m=spec.getMolecule(); m.clean(2, null); @@ -133,7 +162,7 @@ public void daisyChainLychiAfter10Times() throws Exception{ Collections.shuffle(iatoms); int[] map =iatoms.stream().mapToInt(i1->i1).toArray(); Molecule s=shuffleMolecule(m,map); - basicTest(s,spec.expectedLychi); + basicTest(s,spec.expectedLychi,spec.shouldMatch); m=s; } } @@ -147,6 +176,189 @@ public static List data(){ tests.add(LychiTestInstance.of("O=C(O[C@H]1C[C@H]2C[C@H]3C[C@@H](C1)N2CC3=O)C4=CNC5=C4C=CC=C5","38C4U16JU-UC5KDUPMVH-UHFJLJL661C-UHCRHDK74DXU").name("cage-like structure")); tests.add(LychiTestInstance.of("C[C@@H]1CC[C@@H](C)CC1","T75RBW5S8-8D9T563A7Y-8YC8NQXD9W5-8Y5MFVTVS3J3").name("trans across ring")); tests.add(LychiTestInstance.of("C[C@H]1CC[C@@H](C)CC1","T75RBW5S8-8D9T563A7Y-8YC8NQXD9W5-8Y5JH5RWXRLR").name("cis across ring")); + tests.add(LychiTestInstance.of("[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38", "DCLRH149F-FFMPLZ16VC-FC35942KGAU-FCUDSDS2V1NT").name("round trip problem")); + + + tests.add(LychiTestInstance.equivalent("\n" + + " Ketcher 12201304332D 1 1.00000 0.00000 0\n" + + "\n" + + " 59 67 0 1 0 999 V2000\n" + + " -2.2321 -1.8660 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.7321 -1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.5981 -0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.5981 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -3.4641 1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -3.4641 2.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -4.3301 2.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.5981 2.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.5981 3.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -3.4641 4.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.7321 2.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8660 2.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.7321 1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8660 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.4740 1.2647 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.7321 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.9071 -0.4750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.0000 1.0000 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.0000 -1.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8660 -1.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8660 -2.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8660 -1.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8561 -2.3746 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.4488 -3.1947 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.5544 -3.0234 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.3132 -1.2000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 6.5741 -1.3179 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.8632 0.2250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.0294 0.9234 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.9488 1.1197 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8660 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8811 1.3246 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.7321 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.5981 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.4244 1.4848 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.6097 2.0768 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.7419 1.9858 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.7927 2.9165 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.4641 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.9301 0.3000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.4641 -1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.2072 -1.6691 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.8005 -2.5827 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.8060 -2.4781 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.5981 -1.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.7321 -1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.6506 -0.2222 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 6.4172 0.1894 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 6.4966 1.1232 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.8342 1.6954 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 6.0136 2.6792 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.2512 3.3264 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.4306 4.3102 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.3096 2.9897 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.5473 3.6370 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.7266 4.6207 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.1303 2.0060 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.8676 1.3838 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2 1 1 1 0 0\n" + + " 2 3 1 0 0 0\n" + + " 3 4 1 0 0 0\n" + + " 4 5 1 0 0 0\n" + + " 5 6 2 0 0 0\n" + + " 6 7 1 0 0 0\n" + + " 6 8 1 0 0 0\n" + + " 8 9 1 0 0 0\n" + + " 9 10 1 0 0 0\n" + + " 8 11 2 0 0 0\n" + + " 11 12 1 0 0 0\n" + + " 11 13 1 0 0 0\n" + + " 4 13 2 0 0 0\n" + + " 13 14 1 0 0 0\n" + + " 14 15 1 1 0 0\n" + + " 14 16 1 0 0 0\n" + + " 2 16 1 0 0 0\n" + + " 16 17 1 0 0 0\n" + + " 14 18 1 0 0 0\n" + + " 18 19 1 1 0 0\n" + + " 18 20 1 0 0 0\n" + + " 20 21 1 0 0 0\n" + + " 2 21 1 0 0 0\n" + + " 21 22 1 1 0 0\n" + + " 20 23 1 0 0 0\n" + + " 23 24 1 1 0 0\n" + + " 23 25 1 0 0 0\n" + + " 25 26 1 0 0 0\n" + + " 26 27 1 0 0 0\n" + + " 27 28 2 0 0 0\n" + + " 29 27 1 0 0 0\n" + + " 29 30 1 6 0 0\n" + + " 30 31 1 0 0 0\n" + + " 31 32 1 0 0 0\n" + + " 18 32 1 0 0 0\n" + + " 32 33 1 1 0 0\n" + + " 32 34 1 0 0 0\n" + + " 34 35 1 0 0 0\n" + + " 35 36 1 0 0 0\n" + + " 36 37 1 0 0 0\n" + + " 37 38 1 0 0 0\n" + + " 37 39 2 0 0 0\n" + + " 35 40 2 0 0 0\n" + + " 40 41 1 0 0 0\n" + + " 40 42 1 0 0 0\n" + + " 42 43 1 0 0 0\n" + + " 43 44 1 0 0 0\n" + + " 44 45 1 0 0 0\n" + + " 45 46 1 0 0 0\n" + + " 42 46 2 0 0 0\n" + + " 46 47 1 0 0 0\n" + + " 23 47 1 0 0 0\n" + + " 34 47 2 0 0 0\n" + + " 29 48 1 0 0 0\n" + + " 48 49 1 0 0 0\n" + + " 49 50 1 0 0 0\n" + + " 50 51 1 0 0 0\n" + + " 51 52 1 0 0 0\n" + + " 52 53 2 0 0 0\n" + + " 53 54 1 0 0 0\n" + + " 53 55 1 0 0 0\n" + + " 55 56 1 0 0 0\n" + + " 56 57 1 0 0 0\n" + + " 55 58 2 0 0 0\n" + + " 58 59 1 0 0 0\n" + + " 29 59 1 0 0 0\n" + + " 51 59 2 0 0 0\n" + + "M END", "[H][C@@]12CC3=C(C(O)=C(OC)C(C)=C3)[C@@]([H])(N1C)[C@@]4([H])N([C@H]2O)[C@@]5([H])COC(=O)[C@]8(CS[C@]4([H])C6=C5C7=C(OCO7)C(C)=C6OC(C)=O)NCCC9=C8C=C(OC)C(O)=C9").name("strereo parity issue 1")); + //C(C)1CCC(C)CC1 + tests.add(LychiTestInstance.equivalent("[C@H](C)1CCC(C)CC1","C(C)1CCC(C)CC1").name("meaningless streo on a ring shouldn't be honored")); + tests.add(LychiTestInstance.equivalent("[C@H](C)1CCC(C)CC1","[C@@H](C)1CCC(C)CC1")); + + tests.add(LychiTestInstance.equivalent("C[C@H]1CC[C@@H](C)CC1","C[C@@H]1CC[C@H](C)CC1").name("opposite form of cis/trans on ring should be the same")); + + tests.add(LychiTestInstance.notEquivalent("C[C@H]1CC[C@@H](C)CC1","C[C@H]1CC[C@H](C)CC1").name("cis across ring is different from trans across ring")); + + + tests.add(LychiTestInstance.equivalent("C[C@H]1C[C@@H](C)CC(C)C1","C[C@@H]1C[C@H](C)CC(C)C1").name("symmetric half-defined stereo should be the same")); + + //O[C@H]1CC(O)CC(O)C1 + tests.add(LychiTestInstance.equivalent("O[C@H]1CC(O)CC(O)C1","O[C@@H]1CC(O)CC(O)C1").name("3-center, 1 specified meaningless center should be same as inverted")); + + tests.add(LychiTestInstance.equivalent("C[C@H]1OC(C)O[C@@H](C)O1","CC1OC(C)OC(C)O1").name("meaningless stereo with 2 dashed bonds on ring shouldn't be honored")); + + //OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O + tests.add(LychiTestInstance.equivalent("OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O","OC1C(O)C(O)C(O)[C@H](O)[C@@H]1O").name("semi-meaningful symmetric stereo honored")); + tests.add(LychiTestInstance.notEquivalent("OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O","OC1C(O)C(O)C(O)[C@@H](O)[C@@H]1O").name("distinct semi-meaningful symmetric stereo honored")); + + tests.add(LychiTestInstance.equivalent("OC1[C@H](O)[C@H](O)C1O","OC1[C@@H](O)[C@@H](O)C1O").name("4-center, 2 specified symmetric meaningful stereo should be same as inverted")); + tests.add(LychiTestInstance.notEquivalent("OC1[C@H](O)[C@H](O)C1O","OC1[C@@H](O)[C@H](O)C1O").name("4-center, 2 specified symmetric meaningful stereo should not be same as 1 center modified")); + + + //OC1[C@H](O)[C@H](O)C1O + + //C[C@H]1OC(C)O[C@@H](C)O1 + //[#6][C@H]1C[C@@H]([#6])CC([#6])C1.[#6][C@H]2CC([#6])C[C@@H]([#6])C2 + tests.add(LychiTestInstance.equivalent("\n" + + " MJ150420 \n" + + "\n" + + " 8 8 0 0 0 0 0 0 0 0999 V2000\n" + + " -2.2656 0.8138 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9801 0.4013 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9801 -0.4237 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.2656 -0.8361 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5511 -0.4237 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5511 0.4013 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8366 0.8138 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8366 -0.0111 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1 2 1 0 0 0 0\n" + + " 1 6 1 0 0 0 0\n" + + " 2 3 1 0 0 0 0\n" + + " 3 4 1 0 0 0 0\n" + + " 4 5 1 0 0 0 0\n" + + " 5 6 1 0 0 0 0\n" + + " 6 7 1 1 0 0 0\n" + + " 6 8 1 6 0 0 0\n" + + "M END","C1CCCCC1").name("meaningless stereo 1")); return tests.stream().map(ls->ls.asJunitInput()).collect(Collectors.toList());