diff --git a/pom.xml b/pom.xml index cd9111a..648c4bb 100644 --- a/pom.xml +++ b/pom.xml @@ -84,7 +84,7 @@ gov.nih.ncats molwitch - 0.6.6-SNAPSHOT + 0.6.6 org.apache.logging.log4j diff --git a/src/main/java/org/openscience/cdk/geometry/cip/CIPToolMod.java b/src/main/java/org/openscience/cdk/geometry/cip/CIPToolMod.java new file mode 100644 index 0000000..bc5ad12 --- /dev/null +++ b/src/main/java/org/openscience/cdk/geometry/cip/CIPToolMod.java @@ -0,0 +1,278 @@ +package org.openscience.cdk.geometry.cip; + +import java.util.List; + +import org.openscience.cdk.CDKConstants; +import org.openscience.cdk.geometry.cip.CIPTool.CIP_CHIRALITY; +import org.openscience.cdk.geometry.cip.rules.CIPLigandRule; +import org.openscience.cdk.geometry.cip.rules.CIPLigandRule2; +import org.openscience.cdk.geometry.cip.rules.ISequenceSubRule; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.interfaces.IDoubleBondStereochemistry; +import org.openscience.cdk.interfaces.IDoubleBondStereochemistry.Conformation; +import org.openscience.cdk.interfaces.IStereoElement; +import org.openscience.cdk.interfaces.ITetrahedralChirality; +import org.openscience.cdk.interfaces.ITetrahedralChirality.Stereo; + +/** + * This modified version of {@link CIPTool} is just present temporarily to correct + * a CIP bug in CDK. CDK's default CIP rule works well, but has a bug in the recursive + * part where, if there's a tie between 2 ligands based on atomic number/mass, it will + * then get all sub-ligands and compare them. The issue is that each set of sub-ligands + * is sorted by a mass+atomic number only priority, and each sub-ligand from the parent + * ligand is compared only to its matched counterpart, not to all potentially higher + * priority ligands. + * + *
+ * Consider this case below (4S)-2,3,4,5-tetramethyloctane:  
+ *  
+ *     3'    5'
+ *     M     C
+ *     |  4  | 
+ * M-C-C-[C]-C-C-C-M
+ *   | 3  :  5
+ *   M    M
+ *        4'
+ * 
+ * M = Methyl
+ * C = Carbon
+ * : = Dashed bond
+ * # = Locant 
+ * 
+ * + * The 4 postion [C] is a stereo center with S configuration (as demonstrated + * with the : dashed bond to the methyl group below). The carbon atoms at 3, 4 + * and 5 positions are tied for priority based on first pass CIP rules (atom + * numbner and mass). However, 3 and 5 are quickly seen as higher priority than + * 4' in the first tie-break as 4' has no sub-ligands. 3 and 5, however, both + * have 2 sub-ligands. 3 has [3',2] and 5 has [5',6] as sub-ligands. The next + * step CDK CIP rules do is to ORDER these sub-ligands and then compare them. + * In this case, we need to order [3',2] and [5',6] individually. For [3',2], + * this is done by comparing only the ATOMIC NUMBER+MASS between 3' and 2, + * and since they are both carbon atoms, this will result in a tie. This means + * that if the starting order of [3',2] was [3',2] it will remain that way. If + * the starting order is [2,3'] it will remain [2,3']. Since the input order + * of the ligands is based on read-order (order of bonds in the bond table in + * the CTAB, for example), this means the ordering of the sub-ligands is effectively + * non-deterministic. 2 is certainly higher priority than 3' in full CIP rules, + * and 6 is higher priorty than 5' as well. In the above cases there are 4 ways + * the ligand ordering can go + *
+ * Possibility 1: both sub-ligands in right order
+ *    3 vs 5 =>
+ *    [2,3'] vs [6,5'] => 
+ *    2 vs 6 => 2 is higher priority
+ *    therefore 3 is higher priority (correct)
+ * 
+ * Possibility 2: 3 sub-ligands wrong order, 5 right order
+ *    3 vs 5 =>
+ *    [3',2] vs [6,5'] => 
+ *    3' vs 6 => 6 is higher priority
+ *    therefore 5 is higher priority (incorrect)
+ * 
+ * Possibility 3: 3 sub-ligands RIGHT order, 5 wrong order
+ *    3 vs 5 =>
+ *    [2,3'] vs [5',6] => 
+ *    2 vs 5' => 2 is higher priority
+ *    therefore 3 is higher priority (correct)
+ * 
+ * Possibility 4: both sub-ligands in WRONG order
+ *    3 vs 5 =>
+ *    [3',2] vs [5',6] => 
+ *    3' vs 5' => TIE, go to next
+ *    2 vs 6 => 2 is higher priority
+ *    therefore 3 is higher priority (correct)
+ * 
+ *

+ * Notice here that 3 of the 4 possibilities above will give the correct + * CIP ordering. This is indeed typical. The bug only applies in circumstances + * where 2 or more "principal" ligands are tied for CIP priority based on + * atom number and atomic weight AND those 2 ligands both have sub-ligands + * (non-hydrogens) AND at least 2 sub-ligands of those ligands are tied for + * atomic weight and atomic mass. Even still, the majority of the time the + * criteria is met, CDK will still produce the correct CIP labeling. + *

+ *

+ * This class corrects the bug by switching how the ordering is done to instead + * call the full CIP ordering for sub-ligands. This is potentially computationally + * more expensive. The actual code here is largely just copy/pasted methods needed + * to perform the same methods as {@link CIPTool} and a reference to a modified {@link CIPLigandRule} + * (called {@link CIPLigandRule2}). + * + *

+ * + * + * + * @author tyler + * + */ +public class CIPToolMod { + + public static boolean USE_NEW_CENTRES=true; + + + private static ISequenceSubRule cipRule = new CIPLigandRule2(); + + /** + * GSRS-MODIFIED: Temporary bug fix for {@link CIPTool#label(IAtomContainer)} + * + * @param container structure to label + */ + public static void label(IAtomContainer container) { + //Experimental new labeller + if(USE_NEW_CENTRES) { + com.simolecule.centres.CdkLabeller.label(container); + return; + }else { + for (IStereoElement stereoElement : container.stereoElements()) { + if (stereoElement instanceof ITetrahedralChirality) { + ITetrahedralChirality tc = (ITetrahedralChirality) stereoElement; + tc.getChiralAtom().setProperty(CDKConstants.CIP_DESCRIPTOR, getCIPChirality(container, tc).toString()); + } else if (stereoElement instanceof IDoubleBondStereochemistry) { + IDoubleBondStereochemistry dbs = (IDoubleBondStereochemistry) stereoElement; + dbs.getStereoBond() + .setProperty(CDKConstants.CIP_DESCRIPTOR, getCIPChirality(container, dbs).toString()); + } + } + } + + } + + + /** + * GSRS-MODIFIED: Temporary bug fix for {@link CIPTool#getCIPChirality(IAtomContainer, ITetrahedralChirality)} + * + * @param container structure to label + */ + public static CIP_CHIRALITY getCIPChirality(IAtomContainer container, ITetrahedralChirality stereoCenter) { + + // the LigancyFourChirality is kind of redundant but we keep for an + // easy way to get the ILigands array + LigancyFourChirality tmp = new LigancyFourChirality(container, stereoCenter); + Stereo stereo = stereoCenter.getStereo(); + + int parity = permParity(tmp.getLigands()); + + if (parity == 0) return CIP_CHIRALITY.NONE; + if (parity < 0) stereo = stereo.invert(); + + if (stereo == Stereo.CLOCKWISE) return CIP_CHIRALITY.R; + if (stereo == Stereo.ANTI_CLOCKWISE) return CIP_CHIRALITY.S; + + return CIP_CHIRALITY.NONE; + } + + /** + * GSRS-MODIFIED: Temporary bug fix for {@link CIPTool#getCIPChirality(IAtomContainer, IDoubleBondStereochemistry)} + * + * @param container structure to label + */ + public static CIP_CHIRALITY getCIPChirality(IAtomContainer container, IDoubleBondStereochemistry stereoCenter) { + + IBond stereoBond = stereoCenter.getStereoBond(); + IBond leftBond = stereoCenter.getBonds()[0]; + IBond rightBond = stereoCenter.getBonds()[1]; + + // the following variables are usd to label the atoms - makes things + // a little more concise + // + // x y x + // \ / \ + // u = v or u = v + // \ + // y + // + IAtom u = stereoBond.getBegin(); + IAtom v = stereoBond.getEnd(); + IAtom x = leftBond.getOther(u); + IAtom y = rightBond.getOther(v); + + Conformation conformation = stereoCenter.getStereo(); + + ILigand[] leftLigands = getLigands(u, container, v); + ILigand[] rightLigands = getLigands(v, container, u); + + if (leftLigands.length > 2 || rightLigands.length > 2) return CIP_CHIRALITY.NONE; + + // invert if x/y aren't in the first position + if (!leftLigands[0].getLigandAtom().equals(x)) conformation = conformation.invert(); + if (!rightLigands[0].getLigandAtom().equals(y)) conformation = conformation.invert(); + + int p = permParity(leftLigands) * permParity(rightLigands); + + if (p == 0) return CIP_CHIRALITY.NONE; + + if (p < 0) conformation = conformation.invert(); + + if (conformation == Conformation.TOGETHER) return CIP_CHIRALITY.Z; + if (conformation == Conformation.OPPOSITE) return CIP_CHIRALITY.E; + + return CIP_CHIRALITY.NONE; + } + + + /** + * GSRS-MODIFIED: Temporary bug fix + * + * @param atom + * @param container + * @param exclude + * @return + */ + private static ILigand[] getLigands(IAtom atom, IAtomContainer container, IAtom exclude) { + + List neighbors = container.getConnectedAtomsList(atom); + + ILigand[] ligands = new ILigand[neighbors.size() - 1]; + + int i = 0; + for (IAtom neighbor : neighbors) { + if (!neighbor.equals(exclude)) ligands[i++] = new Ligand(container, new VisitedAtoms(), atom, neighbor); + } + + return ligands; + } + + + + + + + /** + * Obtain the permutation parity (-1,0,+1) to put the ligands in descending + * order (highest first). A parity of 0 indicates two or more ligands were + * equivalent. + * + * @param ligands the ligands to sort + * @return parity, odd (-1), even (+1) or none (0) + */ + private static int permParity(final ILigand[] ligands) { + + // count the number of swaps made by insertion sort - if duplicates + // are fount the parity is 0 + int swaps = 0; + + for (int j = 1, hi = ligands.length; j < hi; j++) { + ILigand ligand = ligands[j]; + int i = j - 1; + int cmp = 0; + while ((i >= 0) && (cmp = cipRule.compare(ligand, ligands[i])) > 0) { + ligands[i + 1] = ligands[i--]; + swaps++; + } + if (cmp == 0) // identical entries + return 0; + ligands[i + 1] = ligand; + } + + // odd (-1) or even (+1) + return (swaps & 0x1) == 0x1 ? -1 : +1; + + } + + + + +} diff --git a/src/main/java/org/openscience/cdk/geometry/cip/rules/CIPLigandRule2.java b/src/main/java/org/openscience/cdk/geometry/cip/rules/CIPLigandRule2.java new file mode 100644 index 0000000..8c0ba94 --- /dev/null +++ b/src/main/java/org/openscience/cdk/geometry/cip/rules/CIPLigandRule2.java @@ -0,0 +1,80 @@ +package org.openscience.cdk.geometry.cip.rules; + +import java.util.Arrays; + +import org.openscience.cdk.geometry.cip.CIPTool; +import org.openscience.cdk.geometry.cip.CIPToolMod; +import org.openscience.cdk.geometry.cip.ILigand; +/** + * This class does the same as {@link CIPLigandRule2} only it also orders + * sub-ligands based on its own rule rather than based on the {@link CombinedAtomicMassNumberRule} + * order. This fixes a CDK bug detailed in {@link CIPToolMod}. + * @author tyler + * + */ +public class CIPLigandRule2 implements ISequenceSubRule { + + CombinedAtomicMassNumberRule numberRule = new CombinedAtomicMassNumberRule(); + + /** {@inheritDoc} */ + @Override + public int compare(ILigand ligand1, ILigand ligand2) { + int numberComp = numberRule.compare(ligand1, ligand2); + if (numberComp != 0) return numberComp; + + // OK, now I need to recurse... + ILigand[] ligand1Ligands = CIPTool.getLigandLigands(ligand1); + ILigand[] ligand2Ligands = CIPTool.getLigandLigands(ligand2); + // if neither have ligands: + if (ligand1Ligands.length == 0 && ligand2Ligands.length == 0) return 0; + // else if one has no ligands + if (ligand1Ligands.length == 0) return -1; + if (ligand2Ligands.length == 0) return 1; + // ok, both have at least one ligand + int minLigandCount = Math.min(ligand1Ligands.length, ligand2Ligands.length); + if (ligand1Ligands.length > 1) ligand1Ligands = order(ligand1Ligands); + if (ligand2Ligands.length > 1) ligand2Ligands = order(ligand2Ligands); + // first do a basic number rule + for (int i = 0; i < minLigandCount; i++) { + int comparison = numberRule.compare(ligand1Ligands[i], ligand2Ligands[i]); + if (comparison != 0) return comparison; + } + if (ligand1Ligands.length == ligand2Ligands.length) { + // it that does not resolve it, do a full, recursive compare + for (int i = 0; i < minLigandCount; i++) { + int comparison = compare(ligand1Ligands[i], ligand2Ligands[i]); + if (comparison != 0) return comparison; + } + } + // OK, if we reached this point, then the ligands they 'share' are all equals, so the one + // with more ligands wins + if (ligand1Ligands.length > ligand2Ligands.length) + return 1; + else if (ligand1Ligands.length < ligand2Ligands.length) + return -1; + else + return 0; + } + + /** + * Order the ligands from high to low precedence according to atomic and mass numbers. + */ + private ILigand[] order(ILigand[] ligands) { + ILigand[] newLigands = new ILigand[ligands.length]; + System.arraycopy(ligands, 0, newLigands, 0, ligands.length); + + //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + //THIS IS THE ONLY LINE OF CODE THAT NEEDS TO BE CHANGED + //FOR CDK CIP TO WORK + //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + Arrays.sort(newLigands, this); + // this above list is from low to high precendence, so we need to revert the array + ILigand[] reverseLigands = new ILigand[newLigands.length]; + for (int i = 0; i < newLigands.length; i++) { + reverseLigands[(newLigands.length - 1) - i] = newLigands[i]; + } + return reverseLigands; + } + + } \ No newline at end of file