From fafb49ee7d6ae4374b0fb47d418dab47f99f2dc6 Mon Sep 17 00:00:00 2001 From: Mitch Miller Date: Fri, 30 Aug 2024 12:50:01 -0400 Subject: [PATCH 1/3] adding stereochemistryComments/'additional stereochemistry' to the def hash --- .../java/example/chem/DefHashCalcTest.java | 56 +++++++++++++++++++ .../test/resources/molfiles/M8LE2AF05P.mol | 23 ++++++++ .../test/resources/testJSON/R0NL28355M.json | 1 + ...micalSubstanceDefinitionalElementImpl.java | 17 ++++++ 4 files changed, 97 insertions(+) create mode 100644 gsrs-module-substance-example/src/test/resources/molfiles/M8LE2AF05P.mol create mode 100644 gsrs-module-substance-example/src/test/resources/testJSON/R0NL28355M.json diff --git a/gsrs-module-substance-example/src/test/java/example/chem/DefHashCalcTest.java b/gsrs-module-substance-example/src/test/java/example/chem/DefHashCalcTest.java index 848b67c35..a2c97d2b9 100644 --- a/gsrs-module-substance-example/src/test/java/example/chem/DefHashCalcTest.java +++ b/gsrs-module-substance-example/src/test/java/example/chem/DefHashCalcTest.java @@ -7,11 +7,15 @@ import ix.core.chem.StructureProcessor; import ix.core.models.Structure; import ix.ginas.modelBuilders.ChemicalSubstanceBuilder; +import ix.ginas.modelBuilders.SubstanceBuilder; import ix.ginas.models.v1.ChemicalSubstance; import ix.ginas.models.v1.GinasChemicalStructure; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.io.ClassPathResource; + +import java.io.File; import java.util.ArrayList; import java.util.List; @@ -85,4 +89,56 @@ public void testOpticalInDefinitionalHashCalcPos() throws Exception { definitionalElements.forEach(de-> System.out.printf("key: %s = %s\n", de.getKey(), de.getValue())); Assertions.assertTrue(definitionalElements.stream().anyMatch(de->de.getKey().equals(opticalActivityKey))); } + + + @Test + public void testStereoCommentsInDefinitionalHashCalcPos() throws Exception { + String additionalStereochemistryKey="structure.properties.stereoComments"; + String dataFileName ="R0NL28355M.json"; + File proteinFile = new ClassPathResource("testJSON/" + dataFileName).getFile(); + ChemicalSubstanceBuilder builder = SubstanceBuilder.from(proteinFile); + + ChemicalSubstance chem =builder.build(); + System.out.printf("atropisomerism: %s\n", chem.getStructure().atropisomerism); + ChemicalSubstanceDefinitionalElementImpl defHashCalculator = new ChemicalSubstanceDefinitionalElementImpl(); + List definitionalElements = new ArrayList<>(); + defHashCalculator.computeDefinitionalElements(chem, definitionalElements::add); + definitionalElements.forEach(de-> System.out.printf("key: %s = %s\n", de.getKey(), de.getValue())); + Assertions.assertTrue(definitionalElements.stream().anyMatch(de->de.getKey().equals(additionalStereochemistryKey))); + } + + @Test + public void testStereoCommentsInDefinitionalHashCalcNeg() throws Exception { + String additionalStereochemistryKey="structure.properties.stereoComments"; + String dataFileName ="R0NL28355M.json"; + File proteinFile = new ClassPathResource("testJSON/" + dataFileName).getFile(); + ChemicalSubstanceBuilder builder = SubstanceBuilder.from(proteinFile); + + ChemicalSubstance chem =builder.build(); + chem.getStructure().atropisomerism= Structure.NYU.No; + System.out.printf("atropisomerism: %s\n", chem.getStructure().atropisomerism); + ChemicalSubstanceDefinitionalElementImpl defHashCalculator = new ChemicalSubstanceDefinitionalElementImpl(); + List definitionalElements = new ArrayList<>(); + defHashCalculator.computeDefinitionalElements(chem, definitionalElements::add); + definitionalElements.forEach(de-> System.out.printf("key: %s = %s\n", de.getKey(), de.getValue())); + Assertions.assertFalse(definitionalElements.stream().anyMatch(de->de.getKey().equals(additionalStereochemistryKey))); + } + + @Test + public void testStereoCommentsInDefinitionalHashCalcNeg2() throws Exception { + String additionalStereochemistryKey="structure.properties.stereoComments"; + String dataFileName ="R0NL28355M.json"; + File proteinFile = new ClassPathResource("testJSON/" + dataFileName).getFile(); + ChemicalSubstanceBuilder builder = SubstanceBuilder.from(proteinFile); + + ChemicalSubstance chem =builder.build(); + chem.getStructure().stereoComments = null; + ChemicalSubstanceDefinitionalElementImpl defHashCalculator = new ChemicalSubstanceDefinitionalElementImpl(); + List definitionalElements = new ArrayList<>(); + defHashCalculator.computeDefinitionalElements(chem, definitionalElements::add); + definitionalElements.forEach(de-> System.out.printf("key: %s = %s\n", de.getKey(), de.getValue())); + Assertions.assertFalse(definitionalElements.stream().anyMatch(de->de.getKey().equals(additionalStereochemistryKey))); + } + + } diff --git a/gsrs-module-substance-example/src/test/resources/molfiles/M8LE2AF05P.mol b/gsrs-module-substance-example/src/test/resources/molfiles/M8LE2AF05P.mol new file mode 100644 index 000000000..3a9700b0d --- /dev/null +++ b/gsrs-module-substance-example/src/test/resources/molfiles/M8LE2AF05P.mol @@ -0,0 +1,23 @@ + + Marvin 01132111322D + + 9 9 0 0 0 0 999 V2000 + 7.3523 -4.5535 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 7.6081 -3.7619 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 + 6.9404 -3.2827 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.2700 -3.7619 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.5257 -4.5535 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 + 6.0411 -5.2212 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 8.3916 -3.5035 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 7.8370 -5.2212 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 7.5031 -5.9750 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 0 0 0 + 2 3 1 0 0 0 0 + 3 4 1 0 0 0 0 + 1 5 1 0 0 0 0 + 4 5 1 0 0 0 0 + 5 6 1 6 0 0 0 + 2 7 1 1 0 0 0 + 1 8 1 0 0 0 0 + 8 9 2 0 0 0 0 +M END \ No newline at end of file diff --git a/gsrs-module-substance-example/src/test/resources/testJSON/R0NL28355M.json b/gsrs-module-substance-example/src/test/resources/testJSON/R0NL28355M.json new file mode 100644 index 000000000..28053a0e8 --- /dev/null +++ b/gsrs-module-substance-example/src/test/resources/testJSON/R0NL28355M.json @@ -0,0 +1 @@ +{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716598896000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"04187dcc-aec4-4b21-bf7a-8180ac72803e","definitionType":"PRIMARY","definitionLevel":"INCOMPLETE","substanceClass":"chemical","status":"approved","version":"4","approvedBy":"FDA_SRS","names":[{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716682404000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"e3a78e9c-708a-4a82-b34e-3e0a9e4bc36b","name":"(7,10'-BI-4H-NAPHTHO(2,3-B)PYRAN)-4,4'-DIONE, 2,2',3,3'-TETRAHYDRO-2,2',5,5',8-PENTAHYDROXY-6,6',8'-TRIMETHOXY-2,2'-DIMETHYL-","stdName":"(7,10'-BI-4H-NAPHTHO(2,3-B)PYRAN)-4,4'-DIONE, 2,2',3,3'-TETRAHYDRO-2,2',5,5',8-PENTAHYDROXY-6,6',8'-TRIMETHOXY-2,2'-DIMETHYL-","type":"sys","domains":[],"languages":["en"],"nameJurisdiction":[],"nameOrgs":[],"preferred":false,"displayName":false,"references":["cc7c71bf-ecf0-461f-89e4-e272e1ff7190"],"access":[]},{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716680950000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"577b7e38-78fd-4ca3-9960-1e5bcc869551","name":"2,2',3,3'-TETRAHYDRO-2,2',5,5',8-PENTAHYDROXY-6,6',8'-TRIMETHOXY-2,2'-DIMETHYL(7,10'-BI-4H-NAPHTHO(2,3-B)PYRAN)-4,4'-DIONE, (S)-","stdName":"2,2',3,3'-TETRAHYDRO-2,2',5,5',8-PENTAHYDROXY-6,6',8'-TRIMETHOXY-2,2'-DIMETHYL(7,10'-BI-4H-NAPHTHO(2,3-B)PYRAN)-4,4'-DIONE, (S)-","type":"sys","domains":[],"languages":["en"],"nameJurisdiction":[],"nameOrgs":[],"preferred":false,"displayName":false,"references":["09c84a7b-2282-4e40-a773-4fe20a2a24eb"],"access":[]},{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716691939000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"208fd7f6-7705-432f-9a48-541444d71967","name":"AURASPERONE C","stdName":"AURASPERONE C","type":"cn","domains":[],"languages":["en"],"nameJurisdiction":[],"nameOrgs":[],"preferred":false,"displayName":true,"references":["6830c204-b8a1-4b52-9136-701bbbfe789a"],"access":[]}],"codes":[{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716598896000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"11b6e674-0487-4e00-8b7c-a2da18d06491","codeSystem":"FDA UNII","code":"R0NL28355M","type":"PRIMARY","_isClassification":false,"references":[],"access":[]},{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716598896000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"14d15c9d-92d7-d2ee-7dbc-36eb73c8c460","codeSystem":"EPA CompTox","code":"DTXSID80961903","type":"PRIMARY","url":"https://comptox.epa.gov/dashboard/chemical/details/DTXSID80961903","_isClassification":false,"references":[],"access":[]},{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716598896000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"c9e216eb-4674-4e40-9649-5f9ea44d1873","codeSystem":"PUBCHEM","code":"179521","type":"PRIMARY","url":"https://pubchem.ncbi.nlm.nih.gov/compound/179521","_isClassification":false,"references":["3af2d71e-2a88-40c7-9e85-d2b6b159fb68"],"access":[]},{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716598896000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"cd5caa86-e224-40ef-ac7c-49b9550cd69a","codeSystem":"CAS","code":"41689-66-1","type":"PRIMARY","url":"https://commonchemistry.cas.org/detail?cas_rn=41689-66-1","_isClassification":false,"references":["3af2d71e-2a88-40c7-9e85-d2b6b159fb68","746d40e9-e94c-4ceb-b823-8e272f89d6ea"],"access":[]}],"notes":[],"properties":[],"relationships":[],"references":[{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716598896000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"09c84a7b-2282-4e40-a773-4fe20a2a24eb","citation":"FDA_SRS","docType":"SRS","publicDomain":true,"tags":["NOMEN"],"access":[]},{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716598896000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"3af2d71e-2a88-40c7-9e85-d2b6b159fb68","citation":"SRS CODE IMPORT","docType":"SRS","documentDate":1493407997000,"publicDomain":true,"tags":["NOMEN"],"access":[]},{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716598896000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"5f74f4d0-006c-4b40-a63f-5c46db78ec1f","citation":"fda_gsrs_public_data_exported-2024-05-16__22.30.02.gsrs","docType":"BATCH_IMPORT","documentDate":1716598895000,"publicDomain":false,"tags":[],"id":"13f3ec4422ea2da09feb","access":[]},{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716598896000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"6830c204-b8a1-4b52-9136-701bbbfe789a","citation":"CFSAN","docType":"SRS","publicDomain":true,"tags":["NOMEN","PUBLIC_DOMAIN_RELEASE","AUTO_SELECTED"],"access":[]},{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716598896000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"746d40e9-e94c-4ceb-b823-8e272f89d6ea","citation":"STN","docType":"STN (SCIFINDER)","publicDomain":true,"tags":[],"access":[]},{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716598896000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"c37041d9-6e8e-47b1-85e1-ce86bce08919","citation":"SRS import [R0NL28355M]","docType":"SRS","documentDate":1493407997000,"publicDomain":true,"tags":["NOMEN"],"url":"http://fdasis.nlm.nih.gov/srs/srsdirect.jsp?regno=R0NL28355M","access":[]},{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716598896000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"cc7c71bf-ecf0-461f-89e4-e272e1ff7190","citation":"STN","docType":"SRS","publicDomain":true,"tags":["NOMEN"],"access":[]}],"approvalID":"R0NL28355M","tags":[],"structure":{"id":"ce349b63-8cbc-4856-bcdf-7ef20c962371","created":1716598896000,"lastEdited":1719137278000,"deprecated":false,"digest":"b9d2eb97e879022c306e751604f201abe01c68e7","molfile":"\n JSDraw206252112202D\n\n 43 48 0 0 0 0 0 V2000\n 29.8039 -9.9873 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 29.8039 -8.4243 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 31.1539 -7.6427 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 32.5040 -8.4243 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 32.5040 -9.9873 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 31.1539 -10.7689 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 28.4540 -10.7689 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 28.4540 -7.6427 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 27.1040 -8.4243 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 25.7542 -7.6427 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 24.4043 -8.4243 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 24.4043 -9.9873 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 25.7542 -10.7689 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 27.1040 -9.9873 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 23.0543 -6.0797 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 21.6864 -5.3158 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 21.6864 -3.7527 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 23.0543 -2.9712 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 24.4043 -3.7527 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 24.4043 -5.3158 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 23.0543 -7.6427 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 20.3366 -6.0797 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 20.3366 -7.6427 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 18.9866 -8.4243 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 18.9866 -9.9873 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 20.3366 -10.7689 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 21.6864 -9.9873 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 21.6864 -8.4243 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 31.1539 -6.0797 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 20.3366 -2.9712 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 24.9371 -2.2784 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 34.0493 -9.7211 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 20.3366 -12.3321 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 18.9866 -13.1136 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 17.6367 -7.6427 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 16.2867 -8.4243 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 25.7542 -6.0797 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 27.1040 -5.3158 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 23.0543 -10.7689 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 18.9866 -5.3158 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 28.4540 -6.0797 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 25.9317 -4.0193 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 33.0546 -11.4617 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0 0 0 0\n 1 6 1 0 0 0 0\n 1 7 2 0 0 0 0\n 2 3 1 0 0 0 0\n 2 8 2 0 0 0 0\n 3 4 1 0 0 0 0\n 3 29 2 0 0 0 0\n 4 5 1 0 0 0 0\n 5 6 1 0 0 0 0\n 5 32 1 0 0 0 0\n 5 43 1 0 0 0 0\n 7 14 1 0 0 0 0\n 8 9 1 0 0 0 0\n 8 41 1 0 0 0 0\n 9 10 2 0 0 0 0\n 9 14 1 0 0 0 0\n 11 10 1 1 0 0 0\n 10 37 1 0 0 0 0\n 11 12 2 0 0 0 0\n 11 21 1 0 0 0 0\n 12 13 1 0 0 0 0\n 12 39 1 0 0 0 0\n 13 14 2 0 0 0 0\n 15 16 1 0 0 0 0\n 15 20 1 0 0 0 0\n 15 21 2 0 0 0 0\n 16 17 1 0 0 0 0\n 16 22 2 0 0 0 0\n 17 18 1 0 0 0 0\n 17 30 2 0 0 0 0\n 18 19 1 0 0 0 0\n 19 20 1 0 0 0 0\n 19 31 1 0 0 0 0\n 19 42 1 0 0 0 0\n 21 28 1 0 0 0 0\n 22 23 1 0 0 0 0\n 22 40 1 0 0 0 0\n 23 24 2 0 0 0 0\n 23 28 1 0 0 0 0\n 24 25 1 0 0 0 0\n 24 35 1 0 0 0 0\n 25 26 2 0 0 0 0\n 26 27 1 0 0 0 0\n 26 33 1 0 0 0 0\n 27 28 2 0 0 0 0\n 33 34 1 0 0 0 0\n 35 36 1 0 0 0 0\n 37 38 1 0 0 0 0\nM END","smiles":"CC1(CC(=O)c2c(cc3cc(c(-c4c5cc(cc(c5c(c6C(=O)CC(C)(O)Oc46)O)OC)OC)c(c3c2O)OC)O)O1)O","formula":"C31H28O12","opticalActivity":"UNSPECIFIED","atropisomerism":"Yes","stereoComments":"AXIAL, S ","stereoCenters":4,"definedStereo":2,"ezCenters":0,"charge":0,"mwt":592.548,"properties":[{"id":2140064,"label":"InChI_Key","term":"BAIJEJFONPISHA-UHFFFAOYSA-N"},{"id":2140065,"label":"EXACT_HASH","term":"BAIJEJFONPISHA_UHFFFAOYSA_N"},{"id":2140066,"label":"STEREO_INSENSITIVE_HASH","term":"BAIJEJFONPISHA"}],"links":[],"count":1,"createdBy":"ADMIN","lastEditedBy":"ADMIN","stereochemistry":"ABSOLUTE","references":["6830c204-b8a1-4b52-9136-701bbbfe789a","c37041d9-6e8e-47b1-85e1-ce86bce08919"],"access":[],"hash":"BAIJEJFONPISHA_UHFFFAOYSA_N","_inchiKey":"BAIJEJFONPISHA-UHFFFAOYSA-N","_inchi":"InChI=1S/C31H28O12/c1-30(37)10-16(33)23-19(42-30)7-12-6-15(32)24(28(41-5)20(12)26(23)35)22-14-8-13(39-3)9-18(40-4)21(14)27(36)25-17(34)11-31(2,38)43-29(22)25/h6-9,32,35-38H,10-11H2,1-5H3"},"moieties":[{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1719133225000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"711de127-1d79-44d4-b9e1-294168679151","id":"711de127-1d79-44d4-b9e1-294168679151","digest":"40f5891dffbdf2e5ffb24eea314fce9f11355606","molfile":"\n Marvin 01132105522D \n\n 43 48 0 0 0 0 999 V2000\n 1.4279 -5.7304 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.1419 -5.3171 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 2.1419 -4.4904 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.4279 -4.0770 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.4279 -3.2504 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.7140 -2.8370 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 0.0000 -3.2504 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.1419 -2.8370 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.1419 -2.0104 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.4279 -1.6064 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 2.8558 -1.6064 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.8558 -0.7797 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.1419 -0.3664 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 3.5792 -0.3664 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.2932 -0.7797 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n 4.5750 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 5.1010 -0.9207 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 4.2932 -1.6064 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 3.5792 -2.0104 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.5792 -2.8370 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.2932 -3.2504 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.2932 -4.0770 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.5792 -4.4904 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 5.0071 -4.4904 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 5.7210 -4.0770 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 6.4350 -4.4904 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 7.1489 -4.0770 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 7.8629 -4.4904 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 8.5769 -4.0770 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0\n 9.3942 -3.9362 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 8.8681 -4.8568 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 8.5769 -3.2504 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 7.8629 -2.8370 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 7.8629 -2.0104 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 7.1489 -3.2504 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 6.4350 -2.8370 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 6.4350 -2.0104 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 5.7210 -3.2504 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 5.0071 -2.8370 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 5.0071 -2.0104 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 5.7210 -1.6064 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.8558 -3.2504 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.8558 -4.0770 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2 1 1 0 0 0 0\n 3 2 1 0 0 0 0\n 4 3 1 0 0 0 0\n 3 43 2 0 0 0 0\n 5 4 2 0 0 0 0\n 5 6 1 0 0 0 0\n 8 5 1 0 0 0 0\n 6 7 1 0 0 0 0\n 9 8 1 0 0 0 0\n 8 42 2 0 0 0 0\n 9 10 1 0 0 0 0\n 11 9 2 0 0 0 0\n 11 12 1 0 0 0 0\n 19 11 1 0 0 0 0\n 12 13 2 0 0 0 0\n 12 14 1 0 0 0 0\n 14 15 1 0 0 0 0\n 15 16 1 0 0 0 0\n 15 17 1 0 0 0 0\n 15 18 1 0 0 0 0\n 19 18 1 0 0 0 0\n 19 20 2 0 0 0 0\n 21 20 1 0 0 0 0\n 20 42 1 0 0 0 0\n 21 22 1 0 0 0 0\n 39 21 2 0 0 0 0\n 22 23 1 0 0 0 0\n 22 24 2 0 0 0 0\n 24 25 1 0 0 0 0\n 26 25 1 0 0 0 0\n 38 25 2 0 0 0 0\n 27 26 2 0 0 0 0\n 27 28 1 0 0 0 0\n 27 35 1 0 0 0 0\n 29 28 1 0 0 0 0\n 29 30 1 0 0 0 0\n 29 31 1 0 0 0 0\n 32 29 1 0 0 0 0\n 33 32 1 0 0 0 0\n 33 34 2 0 0 0 0\n 35 33 1 0 0 0 0\n 35 36 2 0 0 0 0\n 36 37 1 0 0 0 0\n 36 38 1 0 0 0 0\n 38 39 1 0 0 0 0\n 39 40 1 0 0 0 0\n 40 41 1 0 0 0 0\n 43 42 1 0 0 0 0\nM END","smiles":"CC1(CC(=O)c2c(cc3cc(c(-c4c5cc(cc(c5c(c6C(=O)CC(C)(O)Oc46)O)OC)OC)c(c3c2O)OC)O)O1)O","formula":"C31H28O12","opticalActivity":"UNSPECIFIED","atropisomerism":"No","stereoCenters":2,"definedStereo":0,"ezCenters":0,"charge":0,"mwt":592.548,"properties":[{"id":1863047,"label":"InChI_Key","term":"BAIJEJFONPISHA-UHFFFAOYSA-N"},{"id":1863048,"label":"EXACT_HASH","term":"BAIJEJFONPISHA_UHFFFAOYSA_N"},{"id":1863049,"label":"STEREO_INSENSITIVE_HASH","term":"BAIJEJFONPISHA"}],"links":[],"count":1,"stereochemistry":"MIXED","references":[],"access":[],"hash":"BAIJEJFONPISHA_UHFFFAOYSA_N","_inchiKey":"BAIJEJFONPISHA-UHFFFAOYSA-N","_inchi":"InChI=1S/C31H28O12/c1-30(37)10-16(33)23-19(42-30)7-12-6-15(32)24(28(41-5)20(12)26(23)35)22-14-8-13(39-3)9-18(40-4)21(14)27(36)25-17(34)11-31(2,38)43-29(22)25/h6-9,32,35-38H,10-11H2,1-5H3","countAmount":{"created":1716598896000,"createdBy":"ADMIN","lastEdited":1716598896000,"lastEditedBy":"ADMIN","deprecated":false,"uuid":"abfca7e0-3eb3-4a49-828a-dc573df09fc4","type":"MOL RATIO","average":1,"units":"MOL RATIO","references":[],"access":[]}}],"_name":"AURASPERONE C","_approvalIDDisplay":"R0NL28355M","access":[],"_self":"https://gsrs.ncats.nih.gov/api/v1/substances(04187dcc-aec4-4b21-bf7a-8180ac72803e)?view=full"} \ No newline at end of file diff --git a/gsrs-module-substances-core/src/main/java/gsrs/module/substance/definitional/ChemicalSubstanceDefinitionalElementImpl.java b/gsrs-module-substances-core/src/main/java/gsrs/module/substance/definitional/ChemicalSubstanceDefinitionalElementImpl.java index e3f9afed6..fe677215a 100644 --- a/gsrs-module-substances-core/src/main/java/gsrs/module/substance/definitional/ChemicalSubstanceDefinitionalElementImpl.java +++ b/gsrs-module-substances-core/src/main/java/gsrs/module/substance/definitional/ChemicalSubstanceDefinitionalElementImpl.java @@ -62,6 +62,15 @@ public void computeDefinitionalElements(Object s, Consumer log.debug("structure.opticalActivity.toString(): " + structure.opticalActivity.toString()); } } + //adding 'additional stereochemistry' as requested 30 August 2024 + if(structure.atropisomerism!= null + && "YES".equalsIgnoreCase(structure.atropisomerism.toString()) + && structure.stereoComments!=null + && structure.stereoComments.trim().length()>0){ + log.trace("additional/stereoComments stereochemistry to def hash for structure"); + consumer.accept(DefinitionalElement.of("structure.properties.stereoComments", + structure.stereoComments, 2)); + } } if( chemicalSubstance.moieties != null) { @@ -82,6 +91,14 @@ public void computeDefinitionalElements(Object s, Consumer m.structure.opticalActivity.toString(), 2)); log.debug("m.structure.opticalActivity.toString(): " + m.structure.opticalActivity.toString()); } + //adding 'additional stereochemistry' as requested 30 August 2024 + if(m.structure.atropisomerism!= null + && "YES".equalsIgnoreCase(m.structure.atropisomerism.toString()) + && m.structure.stereoComments.trim().length()>0){ + log.trace("additional/stereoComments stereochemistry to def hash for moiety structure"); + consumer.accept(DefinitionalElement.of("moiety[" + mh + "].properties.stereoComments", + m.structure.stereoComments, 2)); + } consumer.accept(DefinitionalElement.of("moiety[" + mh + "].countAmount", m.getCountAmount().toString(), 2)); log.debug("m.getCountAmount().toString(): " + m.getCountAmount().toString()); From e4c180b6486daa34725ca607ad83a4deca385d74 Mon Sep 17 00:00:00 2001 From: Mitch Miller Date: Wed, 18 Sep 2024 10:38:57 -0400 Subject: [PATCH 2/3] added 'RACEMIC' to the types of stereochemistry that make opticalActivity part of the definitional hash --- .../definitional/ChemicalSubstanceDefinitionalElementImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gsrs-module-substances-core/src/main/java/gsrs/module/substance/definitional/ChemicalSubstanceDefinitionalElementImpl.java b/gsrs-module-substances-core/src/main/java/gsrs/module/substance/definitional/ChemicalSubstanceDefinitionalElementImpl.java index fe677215a..5742678e8 100644 --- a/gsrs-module-substances-core/src/main/java/gsrs/module/substance/definitional/ChemicalSubstanceDefinitionalElementImpl.java +++ b/gsrs-module-substances-core/src/main/java/gsrs/module/substance/definitional/ChemicalSubstanceDefinitionalElementImpl.java @@ -18,7 +18,7 @@ public class ChemicalSubstanceDefinitionalElementImpl implements DefinitionalEle @Autowired private StructureProcessor structureProcessor; - private List stereoUsingOpticalActivities = Arrays.asList( "UNKNOWN", "MIXED", "EPIMERIC"); + private List stereoUsingOpticalActivities = Arrays.asList( "UNKNOWN", "MIXED", "EPIMERIC", "RACEMIC"); @Override public boolean supports(Object s) { From 808d62323b1b4c4a26d23d6ed7ab5afe8913e54d Mon Sep 17 00:00:00 2001 From: Mitch Miller Date: Tue, 29 Oct 2024 11:08:03 -0400 Subject: [PATCH 3/3] adjusted calculation of def hash based on stereochemistry: include optical activity when stereochemistry is 'UNKNOWN' based on request from colleague --- .../java/example/chem/DefHashCalcTest.java | 36 ++++++++++++++++++- ...micalSubstanceDefinitionalElementImpl.java | 4 ++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/gsrs-module-substance-example/src/test/java/example/chem/DefHashCalcTest.java b/gsrs-module-substance-example/src/test/java/example/chem/DefHashCalcTest.java index 0480f732b..3038db4c3 100644 --- a/gsrs-module-substance-example/src/test/java/example/chem/DefHashCalcTest.java +++ b/gsrs-module-substance-example/src/test/java/example/chem/DefHashCalcTest.java @@ -62,9 +62,43 @@ public void testOpticalInDefinitionalHashCalcNeg() throws Exception { List definitionalElements = new ArrayList<>(); defHashCalculator.computeDefinitionalElements(chem, definitionalElements::add); definitionalElements.forEach(de-> System.out.printf("key: %s = %s\n", de.getKey(), de.getValue())); + Assertions.assertTrue(definitionalElements.stream().noneMatch(de->de.getKey().equals(opticalActivityKey))); + } + + @Test + public void testOpticalInDefinitionalHashCalcNeg2() throws Exception { + String opticalActivityKey="structure.properties.opticalActivity"; + String structureJson = "{\n" + + " \"opticalActivity\": \"UNSPECIFIED\",\n" + + " \"molfile\": \"\\n ACCLDraw07282209012D\\n\\n 5 4 0 0 0 0 0 0 0 0999 V2000\\n 10.5000 -8.5938 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 11.5229 -8.0032 0.0000 C 0 0 3 0 0 0 0 0 0 0 0 0\\n 11.5229 -6.8217 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0\\n 12.5460 -8.5939 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 13.5692 -8.0032 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 1 2 1 0 0 0 0\\n 2 3 1 0 0 0 0\\n 2 4 1 0 0 0 0\\n 4 5 1 0 0 0 0\\nM END\",\n" + + " \"stereoCenters\": 1,\n" + + " \"definedStereo\": 0,\n" + + " \"ezCenters\": 0,\n" + + " \"charge\": 0,\n" + + " \"mwt\": 92.56726,\n" + + " \"count\": 1,\n" + + " \"stereochemistry\": \"UNKNOWN\"\n" + + "}\n" + + ""; + ObjectMapper om = new ObjectMapper(); + Structure rawStructure = om.readValue(structureJson, Structure.class); + Structure instrumentedStructure =structureProcessor.instrument(rawStructure.toChemical(), true); + GinasChemicalStructure ginasChemicalStructure = new GinasChemicalStructure(instrumentedStructure); + ginasChemicalStructure.setStereoChemistry(Structure.Stereo.UNKNOWN); + + ChemicalSubstanceBuilder builder = new ChemicalSubstanceBuilder(); + ChemicalSubstance chem =builder + .setStructure(ginasChemicalStructure) + .addName("2-chlorobutane") + .build(); + ChemicalSubstanceDefinitionalElementImpl defHashCalculator = new ChemicalSubstanceDefinitionalElementImpl(); + List definitionalElements = new ArrayList<>(); + defHashCalculator.computeDefinitionalElements(chem, definitionalElements::add); + definitionalElements.forEach(de-> System.out.printf("key: %s = %s\n", de.getKey(), de.getValue())); Assertions.assertTrue(definitionalElements.stream().anyMatch(de->de.getKey().equals(opticalActivityKey))); } + @Test public void testOpticalInDefinitionalHashCalcPos() throws Exception { String opticalActivityKey="structure.properties.opticalActivity"; @@ -96,7 +130,7 @@ public void testOpticalInDefinitionalHashCalcPos() throws Exception { List definitionalElements = new ArrayList<>(); defHashCalculator.computeDefinitionalElements(chem, definitionalElements::add); definitionalElements.forEach(de-> System.out.printf("key: %s = %s\n", de.getKey(), de.getValue())); - Assertions.assertTrue(definitionalElements.stream().anyMatch(de->de.getKey().equals(opticalActivityKey))); + Assertions.assertTrue(definitionalElements.stream().noneMatch(de->de.getKey().equals(opticalActivityKey))); } @Test diff --git a/gsrs-module-substances-core/src/main/java/gsrs/module/substance/definitional/ChemicalSubstanceDefinitionalElementImpl.java b/gsrs-module-substances-core/src/main/java/gsrs/module/substance/definitional/ChemicalSubstanceDefinitionalElementImpl.java index 3eca395fb..abf5ed71f 100644 --- a/gsrs-module-substances-core/src/main/java/gsrs/module/substance/definitional/ChemicalSubstanceDefinitionalElementImpl.java +++ b/gsrs-module-substances-core/src/main/java/gsrs/module/substance/definitional/ChemicalSubstanceDefinitionalElementImpl.java @@ -10,6 +10,7 @@ import org.springframework.beans.factory.annotation.Autowired; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.function.Consumer; @Slf4j @@ -18,7 +19,8 @@ public class ChemicalSubstanceDefinitionalElementImpl implements DefinitionalEle @Autowired private StructureProcessor structureProcessor; - private List stereoUsingOpticalActivities = Arrays.asList( "UNKNOWN", "MIXED", "EPIMERIC", "RACEMIC"); + private List stereoUsingOpticalActivities = Collections.singletonList("UNKNOWN"); + //changed to UNKNOWN based on Slack conversation on 29 Oct 2024 Arrays.asList( "UNKNOWN", "MIXED", "EPIMERIC", "RACEMIC"); @Override public boolean supports(Object s) {