diff --git a/extraJars/Featureize-Nitrosamines-0.0.2-SNAPSHOT.jar b/extraJars/Featureize-Nitrosamines-0.0.2-SNAPSHOT.jar new file mode 100644 index 000000000..eed7d1493 Binary files /dev/null and b/extraJars/Featureize-Nitrosamines-0.0.2-SNAPSHOT.jar differ diff --git a/gsrs-module-substance-example/src/test/java/example/chem/FeatureGenerationTest.java b/gsrs-module-substance-example/src/test/java/example/chem/FeatureGenerationTest.java new file mode 100644 index 000000000..4ee346066 --- /dev/null +++ b/gsrs-module-substance-example/src/test/java/example/chem/FeatureGenerationTest.java @@ -0,0 +1,83 @@ +package example.chem; + +import gov.fda.gsrs.ndsri.FeaturizeNitrosamine; +import gov.nih.ncats.molwitch.Chemical; +import gsrs.module.substance.utils.FeatureUtils; +import ix.ginas.models.v1.GinasChemicalStructure; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +@Slf4j +public class FeatureGenerationTest { + + @Test + void testOneMol() throws Exception{ + String molfileText = IOUtils.toString( + this.getClass().getResourceAsStream("/molfiles/1~{H}-quinolin-4-one.mol"), + "UTF-8" + ); + GinasChemicalStructure structure = new GinasChemicalStructure(); + structure.molfile = molfileText; + List> properties = FeatureUtils.calculateFeatures( structure.toChemical()); + //properties.get(0).entrySet().forEach(e-> System.out.printf("key: %s = value: %s\n", e.getKey(), e.getValue())); + Assertions.assertEquals("5", properties.get(0).get("categoryScore")); + } + + @Test + void testIVACAFTORType() throws Exception { + Chemical c1= Chemical.parse("c1ccc2c(c1)c(=O)cc[nH]2"); + + List> properties = FeatureUtils.calculateFeatures( c1); + assertEquals(1, properties.size()); + assertEquals("A. Secondary Amine" ,properties.get(0).get("type")); + assertEquals("0,1", properties.get(0).get("Alpha-Hydrogens")); + } + + @Test + void testFostamatibib() throws Exception { + Chemical c1= Chemical.parse("O.O.O.O.O.O.[Na+].[Na+].COC1=CC([NH:1]C2=NC=C(F)C(NC3=NC4=C(OC(C)(C)C(=O)N4COP([O-])([O-])=O)C=C3)=N2)=CC(OC)=C1OC"); + + List> properties = FeatureUtils.calculateFeatures( c1); + + assertEquals(2,properties.size()); + assertEquals("A. Multiple Secondary Amine", properties.get(0).get("type")); + assertEquals("0,0", properties.get(0).get("Alpha-Hydrogens")); + } + + @Test + public void testIsPiperazine() throws Exception { + FeaturizeNitrosamine.GLOBAL_SETTINGS.DO_EXTENDED_FEATURES_TOO=true; + + Chemical c1= Chemical.parse("N1CCNCC1"); + List> properties = FeatureUtils.calculateFeatures( c1); + + assertEquals(2,properties.size()); + assertEquals("A. Multiple Secondary Amine", properties.get(0).get("type")); + assertEquals("2,2", properties.get(0).get("Alpha-Hydrogens")); + assertEquals("YES", properties.get(0).get( FeaturizeNitrosamine.FeaturePairRegistry.PIPERAZINE.getFeatureName())); + } + + @Test + public void testCarboxylicAcidOnSaltDoesNotCount() throws Exception { + Chemical c1= Chemical.parse("O[C@H]([C@@H](O)C(O)=O)C(O)=O.COC1=CC=C(C[C@@H](C)[NH:20]C[C@H](O)C2=CC=C(O)C(NC=O)=C2)C=C1"); + List> properties = FeatureUtils.calculateFeatures( c1); + assertEquals(1, properties.size()); + assertEquals("A. Secondary Amine", properties.get(0).get("type")); + assertEquals("NO" ,properties.get(0).get(FeaturizeNitrosamine.FeaturePairRegistry.COOH.getFeatureName())); + } + + @Test + public void testBenzylLikeFeatureShouldNotFindPsuedoAromaticity() throws Exception { + Chemical c1= Chemical.parse("C[C@H]([NH:3]C1=C2N=CNC2=NC=N1)C3=CC4=C(C(Cl)=CC=C4)C(=O)N3C5=CC=CC=C5"); + List> properties = FeatureUtils.calculateFeatures( c1); + assertEquals(1, properties.size()); + assertEquals("A. Secondary Amine", properties.get(0).get("type")); + assertEquals("NO", properties.get(0).get(FeaturizeNitrosamine.FeaturePairRegistry.ARYL_ALPHA.getFeatureName())); + } +} diff --git a/gsrs-module-substance-example/src/test/resources/molfiles/1~{H}-quinolin-4-one.mol b/gsrs-module-substance-example/src/test/resources/molfiles/1~{H}-quinolin-4-one.mol new file mode 100644 index 000000000..44c623c13 --- /dev/null +++ b/gsrs-module-substance-example/src/test/resources/molfiles/1~{H}-quinolin-4-one.mol @@ -0,0 +1,28 @@ + + + + 11 12 0 0 0 0 0 0 0 0999 V2000 + 6.9665 -11.7372 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.9665 -12.9183 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9436 -13.5088 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.9207 -12.9183 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.9207 -11.7372 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9436 -11.1466 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9436 -9.9655 0.0000 N 0 0 3 0 0 0 0 0 0 0 0 0 + 6.9665 -9.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 7.9893 -9.9655 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 7.9893 -11.1466 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 9.0122 -11.7372 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 1 6 1 0 0 0 0 + 1 10 1 0 0 0 0 + 2 1 2 0 0 0 0 + 3 2 1 0 0 0 0 + 4 3 2 0 0 0 0 + 5 4 1 0 0 0 0 + 6 5 2 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 8 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 2 0 0 0 0 +M END diff --git a/gsrs-module-substances-core/pom.xml b/gsrs-module-substances-core/pom.xml index ba45a791f..378d787f1 100644 --- a/gsrs-module-substances-core/pom.xml +++ b/gsrs-module-substances-core/pom.xml @@ -329,5 +329,10 @@ batik-svg-dom 1.17 + + gov.fda.gsrs + Featureize-Nitrosamines + 0.0.2-SNAPSHOT + diff --git a/gsrs-module-substances-core/src/main/java/gsrs/module/substance/controllers/SubstanceController.java b/gsrs-module-substances-core/src/main/java/gsrs/module/substance/controllers/SubstanceController.java index fc3b87205..9328b9066 100644 --- a/gsrs-module-substances-core/src/main/java/gsrs/module/substance/controllers/SubstanceController.java +++ b/gsrs-module-substances-core/src/main/java/gsrs/module/substance/controllers/SubstanceController.java @@ -28,6 +28,7 @@ import javax.servlet.http.HttpServletRequest; import javax.validation.constraints.NotBlank; +import gsrs.module.substance.utils.FeatureUtils; import gsrs.module.substance.utils.ChemicalUtils; import org.freehep.graphicsio.svg.SVGGraphics2D; import org.springframework.beans.factory.annotation.Autowired; @@ -38,6 +39,7 @@ import org.springframework.http.HttpStatus; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; +import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.transaction.PlatformTransactionManager; import org.springframework.transaction.TransactionDefinition; import org.springframework.transaction.annotation.Transactional; @@ -980,6 +982,7 @@ public ResponseEntity interpretStructure(@NotBlank @RequestBody String m boolean isQuery="query".equalsIgnoreCase(mode); + boolean appendFeatures = queryParameters.containsKey("appendNNOFeatures") && queryParameters.get("appendNNOFeatures").equalsIgnoreCase("true"); try { String payload = ChemCleaner.getCleanMolfile(mol); @@ -1019,6 +1022,10 @@ public ResponseEntity interpretStructure(@NotBlank @RequestBody String m saveTempStructure(struc); node.put("structure", mapper.valueToTree(struc)); node.put("moieties", an); + if( appendFeatures) { + log.trace("going to append nitrosamine features"); + appendFeatureStuff(struc.toChemical(), node); + } } catch (Exception e) { e.printStackTrace(); } @@ -1034,6 +1041,7 @@ public ResponseEntity interpretStructure(@NotBlank @RequestBody String m e.printStackTrace(); log.error("Can't enumerate polymer", e); } + return new ResponseEntity<>(node, HttpStatus.OK); } catch (Exception ex) { @@ -1044,6 +1052,79 @@ public ResponseEntity interpretStructure(@NotBlank @RequestBody String m } + @PostGsrsRestApiMapping("/interpretFeatures") + @PreAuthorize("isAuthenticated()") + public ResponseEntity interpretStructureFeatures(@NotBlank @RequestBody String mol, @RequestParam Map queryParameters){ + String[] standardize = Optional.ofNullable(queryParameters.get("standardize")) + .orElse("NONE") + .split(","); + SimpleStandardizer simpStd=Arrays.stream(standardize) + .filter(s->!s.equals("NONE")) + .map(val->val.toUpperCase()) + .map(val->StructureStandardizerPresets.value(val)) + .filter(v->v.isPresent()) + .map(v->v.get()) + .map(std->std.getStandardizer()) + .reduce(SimpleStandardizer::and).orElse(null); + + String mode = Optional.ofNullable(queryParameters.get("mode")) + .orElse("basic"); + + boolean isQuery="query".equalsIgnoreCase(mode); + + + try { + String payload = ChemCleaner.getCleanMolfile(mol); + List moieties = new ArrayList<>(); + ObjectMapper mapper = EntityFactory.EntityMapper.FULL_ENTITY_MAPPER(); + ObjectNode node = mapper.createObjectNode(); + try { + Structure struc = structureProcessor.taskFor(payload) + .components(moieties) + .standardize(false) + .query(isQuery) + .build() + .instrument() + .getStructure(); + // don't standardize! + // we should be really use the PersistenceQueue to do this + // so that it doesn't block + // in fact, it probably shouldn't be saving this at all + if (payload.contains("\n") && payload.contains("M END")) { + struc.molfile = payload; + } + + if(simpStd!=null) { + struc.molfile=simpStd.standardize(struc.molfile); + } + + ArrayNode an = mapper.createArrayNode(); + for (Structure m : moieties) { + saveTempStructure(m); + ObjectNode on = mapper.valueToTree(m); + Amount c1 = Moiety.intToAmount(m.count); + JsonNode amt = mapper.valueToTree(c1); + on.set("countAmount", amt); + an.add(on); + } + //TODO: fill in calculation + //saveTempStructure(struc); + node.put("structure", mapper.valueToTree(struc)); + node.put("moieties", an); + appendFeatureStuff(struc.toChemical(), node); + + } catch (Exception e) { + e.printStackTrace(); + } + return new ResponseEntity<>(node, HttpStatus.OK); + + } catch (Exception ex) { + log.error("Can't process payload", ex); + return new ResponseEntity<>("Can't process mol payload", + this.getGsrsControllerConfiguration().getHttpStatusFor(HttpStatus.INTERNAL_SERVER_ERROR, queryParameters)); + } + + } public void saveTempStructure(Structure s) { if (s.id == null){ s.id = UUID.randomUUID(); @@ -1931,4 +2012,16 @@ public String makeFlexSearchMoietyClauses(Structure structure) { } } + private void appendFeatureStuff(Chemical chemical, ObjectNode topLevelNode ) throws Exception { + log.trace("in appendFeatureStuff"); + ObjectMapper mapper = new ObjectMapper(); + List> featureList = FeatureUtils.calculateFeatures(chemical); + ArrayNode featureArrayNode = mapper.createArrayNode(); + featureList.forEach(features ->{ + ObjectNode oneSet = mapper.createObjectNode(); + features.entrySet().forEach(f-> oneSet.put(f.getKey(), f.getValue())); + featureArrayNode.add(oneSet); + }); + topLevelNode.put("featureList", featureArrayNode); + } } diff --git a/gsrs-module-substances-core/src/main/java/gsrs/module/substance/utils/FeatureUtils.java b/gsrs-module-substances-core/src/main/java/gsrs/module/substance/utils/FeatureUtils.java new file mode 100644 index 000000000..bb6a660b4 --- /dev/null +++ b/gsrs-module-substances-core/src/main/java/gsrs/module/substance/utils/FeatureUtils.java @@ -0,0 +1,38 @@ +package gsrs.module.substance.utils; + +import gov.nih.ncats.molwitch.Chemical; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import gov.fda.gsrs.ndsri.FeaturizeNitrosamine; +import gov.fda.gsrs.ndsri.FeaturizeNitrosamine.FeatureJob; +import gov.fda.gsrs.ndsri.FeaturizeNitrosamine.FeatureResponse; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class FeatureUtils { + public static List> calculateFeatures(Chemical chemical) throws Exception{ + FeatureJob fj; + try{ + fj = FeatureJob.forOneNitrosamine(chemical); + } catch (Exception ex) { + log.info("forOneNitrosamine failed; using regular constructor"); + fj = new FeatureJob(chemical); + } + + List resp = FeaturizeNitrosamine.fingerprintNitrosamine(fj); + + List> maps = new ArrayList<>(); + resp.forEach(r->{ + Map ret = new HashMap<>(); + r.getFeatureSet().entrySet().forEach(e-> ret.put(e.getKey(), e.getValue())); + ret.put("categoryScore", Integer.toString( r.getCategoryScore())); + ret.put("sumOfScores", Integer.toString(r.getSumOfScores())); + ret.put("type", r.getType()); + maps.add(ret); + }); + return maps; + } +} diff --git a/installExtraJars.cmd b/installExtraJars.cmd index 7ce815ab1..cdc9524e4 100755 --- a/installExtraJars.cmd +++ b/installExtraJars.cmd @@ -1,4 +1,4 @@ mvnw.cmd install:install-file -Dfile=extraJars/applications-api-3.1-SNAPSHOT.jar mvnw.cmd install:install-file -Dfile=extraJars/products-api-3.1.1-SNAPSHOT.jar mvnw.cmd install:install-file -Dfile=extraJars/clinical-trials-api-3.1.1-SNAPSHOT.jar - +mvnw.cmd install:install-file -Dfile=extraJars/Featureize-Nitrosamines-0.0.2-SNAPSHOT.jar diff --git a/installExtraJars.sh b/installExtraJars.sh index 1e37edb79..7e95c4d93 100755 --- a/installExtraJars.sh +++ b/installExtraJars.sh @@ -1,3 +1,4 @@ ./mvnw install:install-file -Dfile=extraJars/applications-api-3.1-SNAPSHOT.jar ./mvnw install:install-file -Dfile=extraJars/products-api-3.1.1-SNAPSHOT.jar ./mvnw install:install-file -Dfile=extraJars/clinical-trials-api-3.1.1-SNAPSHOT.jar +./mvnw install:install-file -Dfile=extraJars/Featureize-Nitrosamines-0.0.2-SNAPSHOT.jar