Skip to content

Commit

Permalink
Merge pull request #357 from ncats/feature_test
Browse files Browse the repository at this point in the history
Feature test
  • Loading branch information
tylerperyea authored Aug 6, 2024
2 parents 3af210c + 06a1d0f commit 47b7287
Show file tree
Hide file tree
Showing 8 changed files with 249 additions and 1 deletion.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package example.chem;

import gov.fda.gsrs.ndsri.FeaturizeNitrosamine;
import gov.nih.ncats.molwitch.Chemical;
import gsrs.module.substance.utils.FeatureUtils;
import ix.ginas.models.v1.GinasChemicalStructure;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.util.List;
import java.util.Map;

import static org.junit.Assert.assertEquals;

@Slf4j
public class FeatureGenerationTest {

@Test
void testOneMol() throws Exception{
String molfileText = IOUtils.toString(
this.getClass().getResourceAsStream("/molfiles/1~{H}-quinolin-4-one.mol"),
"UTF-8"
);
GinasChemicalStructure structure = new GinasChemicalStructure();
structure.molfile = molfileText;
List<Map<String, String>> properties = FeatureUtils.calculateFeatures( structure.toChemical());
//properties.get(0).entrySet().forEach(e-> System.out.printf("key: %s = value: %s\n", e.getKey(), e.getValue()));
Assertions.assertEquals("5", properties.get(0).get("categoryScore"));
}

@Test
void testIVACAFTORType() throws Exception {
Chemical c1= Chemical.parse("c1ccc2c(c1)c(=O)cc[nH]2");

List<Map<String, String>> properties = FeatureUtils.calculateFeatures( c1);
assertEquals(1, properties.size());
assertEquals("A. Secondary Amine" ,properties.get(0).get("type"));
assertEquals("0,1", properties.get(0).get("Alpha-Hydrogens"));
}

@Test
void testFostamatibib() throws Exception {
Chemical c1= Chemical.parse("O.O.O.O.O.O.[Na+].[Na+].COC1=CC([NH:1]C2=NC=C(F)C(NC3=NC4=C(OC(C)(C)C(=O)N4COP([O-])([O-])=O)C=C3)=N2)=CC(OC)=C1OC");

List<Map<String, String>> properties = FeatureUtils.calculateFeatures( c1);

assertEquals(2,properties.size());
assertEquals("A. Multiple Secondary Amine", properties.get(0).get("type"));
assertEquals("0,0", properties.get(0).get("Alpha-Hydrogens"));
}

@Test
public void testIsPiperazine() throws Exception {
FeaturizeNitrosamine.GLOBAL_SETTINGS.DO_EXTENDED_FEATURES_TOO=true;

Chemical c1= Chemical.parse("N1CCNCC1");
List<Map<String, String>> properties = FeatureUtils.calculateFeatures( c1);

assertEquals(2,properties.size());
assertEquals("A. Multiple Secondary Amine", properties.get(0).get("type"));
assertEquals("2,2", properties.get(0).get("Alpha-Hydrogens"));
assertEquals("YES", properties.get(0).get( FeaturizeNitrosamine.FeaturePairRegistry.PIPERAZINE.getFeatureName()));
}

@Test
public void testCarboxylicAcidOnSaltDoesNotCount() throws Exception {
Chemical c1= Chemical.parse("O[C@H]([C@@H](O)C(O)=O)C(O)=O.COC1=CC=C(C[C@@H](C)[NH:20]C[C@H](O)C2=CC=C(O)C(NC=O)=C2)C=C1");
List<Map<String, String>> properties = FeatureUtils.calculateFeatures( c1);
assertEquals(1, properties.size());
assertEquals("A. Secondary Amine", properties.get(0).get("type"));
assertEquals("NO" ,properties.get(0).get(FeaturizeNitrosamine.FeaturePairRegistry.COOH.getFeatureName()));
}

@Test
public void testBenzylLikeFeatureShouldNotFindPsuedoAromaticity() throws Exception {
Chemical c1= Chemical.parse("C[C@H]([NH:3]C1=C2N=CNC2=NC=N1)C3=CC4=C(C(Cl)=CC=C4)C(=O)N3C5=CC=CC=C5");
List<Map<String, String>> properties = FeatureUtils.calculateFeatures( c1);
assertEquals(1, properties.size());
assertEquals("A. Secondary Amine", properties.get(0).get("type"));
assertEquals("NO", properties.get(0).get(FeaturizeNitrosamine.FeaturePairRegistry.ARYL_ALPHA.getFeatureName()));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@



11 12 0 0 0 0 0 0 0 0999 V2000
6.9665 -11.7372 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
6.9665 -12.9183 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.9436 -13.5088 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.9207 -12.9183 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.9207 -11.7372 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.9436 -11.1466 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.9436 -9.9655 0.0000 N 0 0 3 0 0 0 0 0 0 0 0 0
6.9665 -9.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7.9893 -9.9655 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7.9893 -11.1466 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
9.0122 -11.7372 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
1 6 1 0 0 0 0
1 10 1 0 0 0 0
2 1 2 0 0 0 0
3 2 1 0 0 0 0
4 3 2 0 0 0 0
5 4 1 0 0 0 0
6 5 2 0 0 0 0
7 6 1 0 0 0 0
8 7 1 0 0 0 0
9 8 2 0 0 0 0
10 9 1 0 0 0 0
11 10 2 0 0 0 0
M END
5 changes: 5 additions & 0 deletions gsrs-module-substances-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -329,5 +329,10 @@
<artifactId>batik-svg-dom</artifactId>
<version>1.17</version>
</dependency>
<dependency>
<groupId>gov.fda.gsrs</groupId>
<artifactId>Featureize-Nitrosamines</artifactId>
<version>0.0.2-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import javax.servlet.http.HttpServletRequest;
import javax.validation.constraints.NotBlank;

import gsrs.module.substance.utils.FeatureUtils;
import gsrs.module.substance.utils.ChemicalUtils;
import org.freehep.graphicsio.svg.SVGGraphics2D;
import org.springframework.beans.factory.annotation.Autowired;
Expand All @@ -38,6 +39,7 @@
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.transaction.PlatformTransactionManager;
import org.springframework.transaction.TransactionDefinition;
import org.springframework.transaction.annotation.Transactional;
Expand Down Expand Up @@ -980,6 +982,7 @@ public ResponseEntity<Object> interpretStructure(@NotBlank @RequestBody String m

boolean isQuery="query".equalsIgnoreCase(mode);

boolean appendFeatures = queryParameters.containsKey("appendNNOFeatures") && queryParameters.get("appendNNOFeatures").equalsIgnoreCase("true");

try {
String payload = ChemCleaner.getCleanMolfile(mol);
Expand Down Expand Up @@ -1019,6 +1022,10 @@ public ResponseEntity<Object> interpretStructure(@NotBlank @RequestBody String m
saveTempStructure(struc);
node.put("structure", mapper.valueToTree(struc));
node.put("moieties", an);
if( appendFeatures) {
log.trace("going to append nitrosamine features");
appendFeatureStuff(struc.toChemical(), node);
}
} catch (Exception e) {
e.printStackTrace();
}
Expand All @@ -1034,6 +1041,7 @@ public ResponseEntity<Object> interpretStructure(@NotBlank @RequestBody String m
e.printStackTrace();
log.error("Can't enumerate polymer", e);
}

return new ResponseEntity<>(node, HttpStatus.OK);

} catch (Exception ex) {
Expand All @@ -1044,6 +1052,79 @@ public ResponseEntity<Object> interpretStructure(@NotBlank @RequestBody String m

}

@PostGsrsRestApiMapping("/interpretFeatures")
@PreAuthorize("isAuthenticated()")
public ResponseEntity<Object> interpretStructureFeatures(@NotBlank @RequestBody String mol, @RequestParam Map<String, String> queryParameters){
String[] standardize = Optional.ofNullable(queryParameters.get("standardize"))
.orElse("NONE")
.split(",");
SimpleStandardizer simpStd=Arrays.stream(standardize)
.filter(s->!s.equals("NONE"))
.map(val->val.toUpperCase())
.map(val->StructureStandardizerPresets.value(val))
.filter(v->v.isPresent())
.map(v->v.get())
.map(std->std.getStandardizer())
.reduce(SimpleStandardizer::and).orElse(null);

String mode = Optional.ofNullable(queryParameters.get("mode"))
.orElse("basic");

boolean isQuery="query".equalsIgnoreCase(mode);


try {
String payload = ChemCleaner.getCleanMolfile(mol);
List<Structure> moieties = new ArrayList<>();
ObjectMapper mapper = EntityFactory.EntityMapper.FULL_ENTITY_MAPPER();
ObjectNode node = mapper.createObjectNode();
try {
Structure struc = structureProcessor.taskFor(payload)
.components(moieties)
.standardize(false)
.query(isQuery)
.build()
.instrument()
.getStructure();
// don't standardize!
// we should be really use the PersistenceQueue to do this
// so that it doesn't block
// in fact, it probably shouldn't be saving this at all
if (payload.contains("\n") && payload.contains("M END")) {
struc.molfile = payload;
}

if(simpStd!=null) {
struc.molfile=simpStd.standardize(struc.molfile);
}

ArrayNode an = mapper.createArrayNode();
for (Structure m : moieties) {
saveTempStructure(m);
ObjectNode on = mapper.valueToTree(m);
Amount c1 = Moiety.intToAmount(m.count);
JsonNode amt = mapper.valueToTree(c1);
on.set("countAmount", amt);
an.add(on);
}
//TODO: fill in calculation
//saveTempStructure(struc);
node.put("structure", mapper.valueToTree(struc));
node.put("moieties", an);
appendFeatureStuff(struc.toChemical(), node);

} catch (Exception e) {
e.printStackTrace();
}
return new ResponseEntity<>(node, HttpStatus.OK);

} catch (Exception ex) {
log.error("Can't process payload", ex);
return new ResponseEntity<>("Can't process mol payload",
this.getGsrsControllerConfiguration().getHttpStatusFor(HttpStatus.INTERNAL_SERVER_ERROR, queryParameters));
}

}
public void saveTempStructure(Structure s) {
if (s.id == null){
s.id = UUID.randomUUID();
Expand Down Expand Up @@ -1931,4 +2012,16 @@ public String makeFlexSearchMoietyClauses(Structure structure) {
}
}

private void appendFeatureStuff(Chemical chemical, ObjectNode topLevelNode ) throws Exception {
log.trace("in appendFeatureStuff");
ObjectMapper mapper = new ObjectMapper();
List<Map<String, String>> featureList = FeatureUtils.calculateFeatures(chemical);
ArrayNode featureArrayNode = mapper.createArrayNode();
featureList.forEach(features ->{
ObjectNode oneSet = mapper.createObjectNode();
features.entrySet().forEach(f-> oneSet.put(f.getKey(), f.getValue()));
featureArrayNode.add(oneSet);
});
topLevelNode.put("featureList", featureArrayNode);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package gsrs.module.substance.utils;

import gov.nih.ncats.molwitch.Chemical;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import gov.fda.gsrs.ndsri.FeaturizeNitrosamine;
import gov.fda.gsrs.ndsri.FeaturizeNitrosamine.FeatureJob;
import gov.fda.gsrs.ndsri.FeaturizeNitrosamine.FeatureResponse;
import lombok.extern.slf4j.Slf4j;

@Slf4j
public class FeatureUtils {
public static List<Map<String, String>> calculateFeatures(Chemical chemical) throws Exception{
FeatureJob fj;
try{
fj = FeatureJob.forOneNitrosamine(chemical);
} catch (Exception ex) {
log.info("forOneNitrosamine failed; using regular constructor");
fj = new FeatureJob(chemical);
}

List<FeatureResponse> resp = FeaturizeNitrosamine.fingerprintNitrosamine(fj);

List<Map<String,String>> maps = new ArrayList<>();
resp.forEach(r->{
Map<String, String> ret = new HashMap<>();
r.getFeatureSet().entrySet().forEach(e-> ret.put(e.getKey(), e.getValue()));
ret.put("categoryScore", Integer.toString( r.getCategoryScore()));
ret.put("sumOfScores", Integer.toString(r.getSumOfScores()));
ret.put("type", r.getType());
maps.add(ret);
});
return maps;
}
}
2 changes: 1 addition & 1 deletion installExtraJars.cmd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
mvnw.cmd install:install-file -Dfile=extraJars/applications-api-3.1-SNAPSHOT.jar
mvnw.cmd install:install-file -Dfile=extraJars/products-api-3.1.1-SNAPSHOT.jar
mvnw.cmd install:install-file -Dfile=extraJars/clinical-trials-api-3.1.1-SNAPSHOT.jar

mvnw.cmd install:install-file -Dfile=extraJars/Featureize-Nitrosamines-0.0.2-SNAPSHOT.jar
1 change: 1 addition & 0 deletions installExtraJars.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
./mvnw install:install-file -Dfile=extraJars/applications-api-3.1-SNAPSHOT.jar
./mvnw install:install-file -Dfile=extraJars/products-api-3.1.1-SNAPSHOT.jar
./mvnw install:install-file -Dfile=extraJars/clinical-trials-api-3.1.1-SNAPSHOT.jar
./mvnw install:install-file -Dfile=extraJars/Featureize-Nitrosamines-0.0.2-SNAPSHOT.jar

0 comments on commit 47b7287

Please sign in to comment.