diff --git a/make.sh b/make.sh
new file mode 100644
index 0000000..c7bb97e
--- /dev/null
+++ b/make.sh
@@ -0,0 +1,3 @@
+bash mavenMake.sh
+mvn clean package
+
diff --git a/pom.xml b/pom.xml
index 46c5a43..378b828 100644
--- a/pom.xml
+++ b/pom.xml
@@ -7,7 +7,7 @@
gov.nih.ncats
lychi
jar
- 0.7.0
+ 0.7.1
Lychi
@@ -51,7 +51,7 @@
-
+
${project.artifactId}-${project.version}
${deploy.dir}
diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java
index da15147..aae0a16 100644
--- a/src/main/java/lychi/LyChIStandardizer.java
+++ b/src/main/java/lychi/LyChIStandardizer.java
@@ -5,6 +5,7 @@
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
+import java.math.BigInteger;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
@@ -14,6 +15,7 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -58,10 +60,19 @@ public class LyChIStandardizer {
/**
* This static version value must be updated if any changes is made
- * to this class that would be imcompatible with earlier results!!!
+ * to this class that would be incompatible with earlier results!!!
*/
public static final int VERSION = 0x10;
+
+
+ /**
+ * This flag, when true, checks for "deeper" symmetry by enumerating
+ * unspecified stereo forms and confirming that they
+ */
+ private static final boolean DEEP_SYMMETRY = true;
+
+
static final private boolean DEBUG;
static final private boolean UNMEX; // apply UNM extra rules
static {
@@ -1150,6 +1161,168 @@ else if (chiral != 0) {
}
}
+ if(DEEP_SYMMETRY){
+ try{
+
+ Map nonChiralStereo = new LinkedHashMap<>();
+
+ for(int k=0;k rings = new LinkedHashSet();
+
+ int maxRing=0;
+
+ for(MolAtom ma:nonChiralStereo.keySet()){
+ maxRing=Math.max(maxRing, ma.sringsize());
+ }
+
+
+
+ int[][] sssr=m.getSSSR();
+ for(MolAtom ma:nonChiralStereo.keySet()){
+ int mm =ma.sringsize();
+ //need to find all atoms in the ring
+ int im=m.indexOf(ma);
+ for(int[] ir:sssr){
+ if(ir.length==mm){
+ for(int i=0;i ratoms=Arrays.stream(rr)
+ .mapToObj(i->m.getAtom(i))
+ .collect(Collectors.toCollection(()->new LinkedHashSet()));
+
+ MolBond[] bonds=ratoms.stream()
+ .filter(a->!chirality.containsKey(a))
+ .flatMap(a->IntStream.range(0, a.getEdgeCount()).mapToObj(i->a.getEdge(i)))
+ .filter(e->!ratoms.contains(e.getNode1()) || !ratoms.contains(e.getNode2()))
+ .map(b->(MolBond)b)
+ .filter(b->b.getType()==1)
+ .peek(b->{
+ if(ratoms.contains(b.getAtom2()))b.swap();
+ })
+ .toArray(i->new MolBond[i]);
+
+
+ MolBond[] bondsInRing=ratoms.stream()
+ .filter(a->!chirality.containsKey(a))
+ .flatMap(a->IntStream.range(0, a.getEdgeCount()).mapToObj(i->a.getEdge(i)))
+ .filter(e->{
+ boolean inRing=ratoms.contains(e.getNode1()) && ratoms.contains(e.getNode2());
+ return inRing;
+ })
+ .map(b->(MolBond)b)
+ .filter(b->b.getType()==1)
+ .toArray(i->new MolBond[i]);
+
+
+ int[] oldPar = new int[bondsInRing.length];
+
+ for(int i=0;i allPossible = new HashSet();
+ Set currentPossible = new HashSet();
+
+ for(int i=0;i>j)&1)==1){
+ onOff.set(j*2);
+ bonds[j].setFlags(MolBond.UP, MolBond.STEREO1_MASK);
+ }else{
+ onOff.set(j*2+1);
+ bonds[j].setFlags(MolBond.DOWN, MolBond.STEREO1_MASK);
+ }
+ }
+ Molecule mclone=m.cloneMolecule();
+ //(new LyChIStandardizer()).standardize(mclone);
+ String hash1=LyChIStandardizer.hashKey(mclone);
+
+ allPossible.add(hash1);
+ onOff.or(bs);
+
+ //System.out.println(mclone.toFormat("mol"));
+
+ if(onOff.cardinality() == bs.cardinality()){
+ currentPossible.add(hash1);
+ }
+ }
+ if(allPossible.size()==currentPossible.size()){
+ for(int j=0;j me : chirality.entrySet()) {
int i = m.indexOf(me.getKey());
int oi=me.getValue();
@@ -2709,82 +2881,145 @@ public static String hashKey (Molecule mol, String sep) {
return keys[0]+sep+keys[1]+sep+keys[2]+sep+keys[3];
}
- /**
- * Extended version of the hash key that includes the topology+label
- * layer that sits between the first and second layers of previous
- * key.
- */
- public static String[] hashKeyArray (Molecule input) {
- if (input == null || input.getAtomCount() == 0) {
- return hashChain45 ("", "", "", "");
- }
- Molecule mol = null;
- String molstr = ChemUtil.canonicalSMILES(input);
- try {
- MolHandler mh = new MolHandler (molstr);
- mol = mh.getMolecule();
- }
- catch (Exception ex) {
- logger.log(Level.SEVERE,
- "Can't parse canonical SMILES: "+molstr, ex);
- mol = input.cloneMolecule();
- }
+ private static Molecule getLayer3Equivalent(Molecule m){
+ Molecule m0=m.cloneMolecule();
+
+ int[] atno = new int[m0.getAtomCount()];
+ for (int i = 0; i < atno.length; ++i) {
+ MolAtom a = m0.getAtom(i);
+ a.setRadical(0);
+ a.setCharge(0);
+ a.setFlags(0);
+ a.setMassno(0);
+ a.setAtomMap(i+1);
+ }
+ for (MolBond b : m0.getBondArray()) {
+ b.setStereo2Flags(b.getNode1(), b.getNode2(), 0);
+ if(b.isQuery()){ //hack
+ b.setFlags(1);
+ }
+ }
+ Molecule mout = new Molecule();
+ ChemUtil.canonicalSMILES(mout,m0,false);
+
+ return mout;
+ }
- Molecule m0 = mol.cloneMolecule();
- m0.expandSgroups();
- m0.hydrogenize(false);
- // make sure H isotopes are suppressed
- m0.implicitizeHydrogens(MolAtom.ALL_H);
+ private static Molecule getLayer2Equivalent(Molecule m){
+ Molecule m0=m.cloneMolecule();
- // remove any leftover H that didn't get removed
- // above
- for (MolAtom a : m0.getAtomArray()) {
- if (a.getAtno() == 1)
- m0.removeNode(a);
- }
-
- Molecule m1 = m0.cloneMolecule();
int[] atno = new int[m0.getAtomCount()];
for (int i = 0; i < atno.length; ++i) {
MolAtom a = m0.getAtom(i);
- atno[i] = a.getAtno();
- a.setAtno(6);
a.setRadical(0);
a.setCharge(0);
a.setFlags(0);
a.setMassno(0);
+ a.setAtomMap(i+1);
}
-
for (MolBond b : m0.getBondArray()) {
- b.setFlags(0);
- b.setType(1);
- }
+ b.setStereo2Flags(b.getNode1(), b.getNode2(), 0);
+ if(b.isQuery()){ //hack
+ b.setFlags(1);
+ }
+ b.setType(1); //force single
+ }
+ Molecule mout = new Molecule();
+ ChemUtil.canonicalSMILES(mout,m0,false);
+
+ return mout;
+ }
+
+ private static Molecule getLayer1Equivalent(Molecule m){
+ Molecule m0=m.cloneMolecule();
+
+ int[] atno = new int[m0.getAtomCount()];
+ for (int i = 0; i < atno.length; ++i) {
+ MolAtom a = m0.getAtom(i);
+ a.setRadical(0);
+ a.setCharge(0);
+ a.setFlags(0);
+ a.setMassno(0);
+ a.setAtno(6);
+ a.setAtomMap(i+1);
+ }
+ for (MolBond b : m0.getBondArray()) {
+ b.setStereo2Flags(b.getNode1(), b.getNode2(), 0);
+ if(b.isQuery()){ //hack
+ b.setFlags(1);
+ }
+ b.setType(1); //force single
+ }
+ Molecule mout = new Molecule();
+ ChemUtil.canonicalSMILES(mout,m0,false);
+
+ return mout;
+ }
+
- // level0: molecular skeleton...
- String level0 = ChemUtil.canonicalSMILES (m0, false);
- StringBuilder sb = new StringBuilder ();
- // level1: topology+atom label
+ /**
+ * Takes an int array and replaces each value with the position it would have in a sorted
+ * unique list.
+ *
+ * For example:
+ * [4,2,1,10,42234,1,42234]
+ *
+ * would become:
+ *
+ * [2,1,0,3,4,0,4]
+ *
+ *
+ * @param rank
+ */
+ public static void normalizeRanks(int[] rank){
+
+ int[] last=new int[]{-1, -1};
+ IntStream.range(0, rank.length)
+ .mapToObj(i->new int[]{i,rank[i]})
+ .sorted((a,b)->b[1]-a[1])
+ .forEach(ab->{
+ if(last[1]!=ab[1]){
+ last[0]++;
+ last[1]=ab[1];
+ }
+ rank[ab[0]]=last[0];
+ });
+ }
- int[] rank;
+ /**
+ * Simple morgan's algorithm for graph invariants. This requires k*N operations
+ * where k is a constant that is large enough to "absorb" the whole graph (13 here).
+ *
+ * @param m
+ * @return
+ */
+ public static long[] morgans(Molecule m){
+ int MAX_ROUND = 13;
+ int[] atno = new int[m.getAtomCount()];
+ for (int i = 0; i < atno.length; ++i) {
+ MolAtom a = m.getAtom(i);
+ atno[i]=a.getAtno();
+ }
+ long[] rank;
{
- int MAX_ROUND = 13;
- int[][] hash = new int[MAX_ROUND][atno.length];
+
+ long[][] hash = new long[MAX_ROUND][atno.length];
for (int i = 0; i < atno.length; ++i)
hash[0][i] = atno[i];
-
+
int round = 1;
for (; round < MAX_ROUND; ++round) {
int p = round - 1;
for (int i = 0; i < atno.length; ++i) {
- MolAtom a = m1.getAtom(i);
- int ha = hash[p][i];
+ MolAtom a = m.getAtom(i);
+ long ha = hash[p][i];
for (int j = 0; j < a.getBondCount(); ++j) {
MolAtom xa = a.getBond(j).getOtherAtom(a);
- int k = m1.indexOf(xa);
- ha += (a.getBond(j).getType() << xa.getImplicitHcount())
- + hash[p][k];
+ int k = m.indexOf(xa);
+ ha += ha += (a.getBond(j).getType() << xa.getImplicitHcount())
+ + hash[p][k];
}
if (ha < 0) {
if (DEBUG) {
@@ -2798,41 +3033,65 @@ public static String[] hashKeyArray (Molecule input) {
}
rank = hash[round-1];
}
- /*
- m0.getGrinv(rank);
- for (int i = 0; i < atno.length; ++i) {
- rank[i] *= atno[i]; // update rank to resolve symmetry
- }
- */
-
- for (AtomIterator ai = new AtomIterator (m0, rank);
- ai.hasNext(); ai.next()) {
- int index = ai.nextIndex();
- sb.append(MolAtom.symbolOf(atno[index]));
+ return rank;
+ }
+
+ /**
+ * Return the morgan's algorithm values as a sorted list, encoded
+ * as a string.
+ * @param m
+ * @return
+ */
+ public static String morgansAsString(Molecule m){
+ long[] order = Arrays.stream(morgans(m))
+ .sorted()
+ .toArray();
+
+ String s=new BigInteger(BitSet.valueOf(order).toByteArray()).toString(64);
+ return s;
+ }
+
+ /**
+ * Extended version of the hash key that includes the topology+label
+ * layer that sits between the first and second layers of previous
+ * key.
+ */
+ public static String[] hashKeyArray (Molecule input) {
+ if (input == null || input.getAtomCount() == 0) {
+ return hashChain45 ("", "", "", "");
}
- String level1 = sb.toString();
- // level2: topology+atom label+bond order
- sb = new StringBuilder ();
- for (AtomIterator ai =new AtomIterator (m1, rank); ai.hasNext(); ) {
- MolAtom a = ai.next();
- sb.append(a.getSymbol()+a.getImplicitHcount());
+ Molecule mol = null;
+ String molstr = ChemUtil.canonicalSMILES(input);
+ try {
+ MolHandler mh = new MolHandler (molstr);
+ mol = mh.getMolecule();
+ }
+ catch (Exception ex) {
+ logger.log(Level.SEVERE,
+ "Can't parse canonical SMILES: "+molstr, ex);
+ mol = input.cloneMolecule();
}
- String level2 = sb.toString();
- // level2: full canonical smiles with stereo/isotope/charge...
- String level3 = molstr;
+ Molecule m0 = mol.cloneMolecule();
+ m0.expandSgroups();
+ m0.hydrogenize(false);
+ // make sure H isotopes are suppressed
+ m0.implicitizeHydrogens(MolAtom.ALL_H);
- String[] hkeys = hashChain45 (level0, level1, level2, level3);
- if (DEBUG) {
- logger.info("hash layers:\n"+
- "0: "+level0 + "\n"+
- "1: "+level1 + "\n"+
- "2: "+level2 + "\n"+
- "3: "+level3 + "\n");
+ // remove any leftover H that didn't get removed
+ // above
+ for (MolAtom a : m0.getAtomArray()) {
+ if (a.getAtno() == 1)
+ m0.removeNode(a);
}
- return hkeys;
+ String molstr1=morgansAsString(getLayer1Equivalent(m0));
+ String molstr2=morgansAsString(getLayer2Equivalent(m0));
+ String molstr3=morgansAsString(getLayer3Equivalent(m0));
+
+
+ return hashChain45 (molstr1, molstr2, molstr3, molstr);
}
static String[] hashChain45 (String... strs) {
diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java
index 2882596..3d34814 100644
--- a/src/test/java/lychi/LychiRegressionTest.java
+++ b/src/test/java/lychi/LychiRegressionTest.java
@@ -1,8 +1,10 @@
package lychi;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.*;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Random;
@@ -21,134 +23,775 @@
@RunWith(Parameterized.class)
public class LychiRegressionTest {
-
- public static class LychiTestInstance{
- String name;
- String input;
- String expectedLychi;
-
-
- public static LychiTestInstance of(String smi, String lychi){
- LychiTestInstance ltest= new LychiTestInstance();
- ltest.input=smi;
- ltest.expectedLychi=lychi;
- ltest.name=smi;
- return ltest;
- }
-
- public LychiTestInstance name(String n){
- this.name=n;
- return this;
- }
-
-
- public Object[] asJunitInput(){
- return new Object[]{this.name, this};
- }
-
- public Molecule getMolecule() throws MolFormatException{
- MolHandler mh = new MolHandler();
- mh.setMolecule(input);
- return mh.getMolecule();
-
- }
- }
-
+
+ public static class LychiTestInstance{
+ String name;
+ String input;
+ String expectedLychi;
+ boolean shouldMatch=true;
+ int layer = 4;
+
+ boolean fail=false;
+
+
+
+ public static LychiTestInstance of(String smi, String lychi){
+ LychiTestInstance ltest= new LychiTestInstance();
+ ltest.input=smi;
+ ltest.expectedLychi=lychi;
+ ltest.name=smi;
+ return ltest;
+ }
+
+ public static LychiTestInstance equivalent(String smi1, String smi2){
+ try{
+ MolHandler mh = new MolHandler();
+ mh.setMolecule(smi2);
+ Molecule m= mh.getMolecule();
+ LyChIStandardizer std = new LyChIStandardizer();
+ std.standardize(m);
+ String fullKey=LyChIStandardizer.hashKey(m);
+ return of(smi1,fullKey);
+ }catch(Exception e){
+ throw new RuntimeException(e);
+ }
+ }
+
+ public LychiTestInstance layer(int layer){
+ this.layer=layer;
+ return this;
+ }
+
+ public static LychiTestInstance equivalentLayer3(String smi1, String smi2){
+ return equivalent(smi1,smi2).layer(3);
+ }
+
+ public static LychiTestInstance notEquivalent(String smi1, String smi2){
+ return equivalent(smi1,smi2).negate();
+ }
+ public LychiTestInstance negate(){
+ this.shouldMatch=!this.shouldMatch;
+ return this;
+ }
+
+ public LychiTestInstance name(String n){
+ this.name=n;
+ return this;
+ }
+
+
+ public Object[] asJunitInput(){
+ return new Object[]{this.name, this};
+ }
+
+ public Molecule getMolecule() throws MolFormatException{
+ MolHandler mh = new MolHandler();
+ mh.setMolecule(input);
+ return mh.getMolecule();
+
+ }
+
+ public LychiTestInstance markToFail(){
+ this.fail=true;
+ return this;
+ }
+
+ public boolean shouldFail(){
+ return this.fail;
+ }
+ }
+
+
+
- private LychiTestInstance spec;
+ private LychiTestInstance spec;
- public LychiRegressionTest(String ignored, LychiTestInstance spec){
- this.spec = spec;
- }
-
- public static void basicTest(Molecule m, String expected) throws Exception{
- LyChIStandardizer std = new LyChIStandardizer();
- std.standardize(m);
- String fullKey=LyChIStandardizer.hashKey(m);
- assertEquals(expected,fullKey);
- }
-
- public static Molecule shuffleMolecule(Molecule m, int[] map){
- MolAtom[] mas=m.getAtomArray();
- MolBond[] mbs=m.getBondArray();
-
- Molecule m2=new Molecule();
-
- int[] rmap = new int[map.length];
-
- for(int i=0;i iatoms=IntStream.range(0, m.getAtomCount()).mapToObj(i2->i2).collect(Collectors.toList());
- Collections.shuffle(iatoms, r);
- int[] map =iatoms.stream().mapToInt(i1->i1).toArray();
- Molecule s=shuffleMolecule(m,map);
-
- basicTest(s,spec.expectedLychi);
- }
- }
-
- @Test
- public void daisyChainLychiAfter10Times() throws Exception{
- Molecule m=spec.getMolecule();
- m.clean(2, null);
- for (int i=0;i<10;i++){
-
- List iatoms=IntStream.range(0, m.getAtomCount()).mapToObj(i2->i2).collect(Collectors.toList());
- Collections.shuffle(iatoms);
- int[] map =iatoms.stream().mapToInt(i1->i1).toArray();
- Molecule s=shuffleMolecule(m,map);
- basicTest(s,spec.expectedLychi);
- m=s;
- }
- }
-
-
- @Parameterized.Parameters(name = "{0}")
- public static List