Skip to content

Commit

Permalink
Merge branch 'ncats:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
pabbathreddya2 authored Jul 5, 2024
2 parents b2e6f2a + 8c59bfc commit 3914dc3
Show file tree
Hide file tree
Showing 15 changed files with 210 additions and 38 deletions.
2 changes: 1 addition & 1 deletion gsrs-module-substance-example/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@
<dependency>
<groupId>gov.nih.ncats</groupId>
<artifactId>molwitch-cdk</artifactId>
<version>1.0.18</version>
<version>1.0.19</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,45 @@
import gov.nih.ncats.molwitch.Chemical;
import ix.core.chem.Chem;
import ix.ginas.models.v1.FragmentVocabularyTerm;
import ix.ginas.utils.validation.validators.CVFragmentStructureValidator;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import java.io.IOException;
import java.util.Optional;
import java.util.stream.Stream;

@Slf4j
public class VocabFragmentCleanupTest {

@Test
void testQueryFeatures() throws IOException {
//test like we have a vocabulary term here
FragmentVocabularyTerm fragmentVocabularyTerm = new FragmentVocabularyTerm();
fragmentVocabularyTerm.setFragmentStructure("[*]N[C@@H](CS[*])C([*])=O |$_R1;;;;;_R3;;_R2;$|");
fragmentVocabularyTerm.setFragmentStructure("[*]N[C@@H](CS[*])C([*])=O |$_R1;;;;;_R3;;_R2;$|"); //_R... - atom alias
String inputStructure = fragmentVocabularyTerm.getFragmentStructure().split(" ")[0];
//todo: include optional square brackets around *...
// 2 replacements

String lexicallyCleaned = inputStructure.replace("*", "6He");
log.trace("lexicallyCleaned: {}}", lexicallyCleaned);

Chemical chem = Chemical.parse(inputStructure);
chem = Chem.RemoveQueryFeaturesForPseudoInChI(chem);
String inchiKey = chem.toInchi().getKey();
log.debug("Created InChIKey: {}", inchiKey);
Assertions.assertTrue(inchiKey.length()>0);
String processedSmiles = chem.toSmiles();
log.trace("processedSmiles:{}", processedSmiles);
String inChIKey = chem.toInchi().getKey();

String expectedInChIKey =getInChiKey(lexicallyCleaned);// "OTIPWSTYBNONSP-CQOJXGFHSA-N";
log.debug("Created InChIKey: {}", inChIKey);
Assertions.assertEquals(expectedInChIKey, inChIKey);
String molfile = chem.toMol();
log.trace("molfile:\n {}", molfile);
Assertions.assertFalse(molfile.contains("*"));
}

@Test
Expand All @@ -36,4 +56,109 @@ void testOutput() throws IOException {
Assertions.assertTrue(smiles.length()>0);
}


@Test
void testSmilesParse() throws IOException {
//test like we have a vocabulary term here
FragmentVocabularyTerm fragmentVocabularyTerm = new FragmentVocabularyTerm();
fragmentVocabularyTerm.setFragmentStructure("CCCCCCCC\\C=C/CCCCCCCCCCCCOCC(CO[*])OCCCCCCCCCCCC\\C=C/CCCCCCCC |$;;;;;;;;;;;;;;;;;;;;;;;;;;;_R92;;;;;;;;;;;;;;;;;;;;;;;$|");
String inputStructure = fragmentVocabularyTerm.getFragmentStructure().split(" ")[0];
String lexicallyCleaned = inputStructure.replace("*", "6He");
log.trace("lexicallyCleaned: {}", lexicallyCleaned);

Chemical chem = Chemical.parse(inputStructure);
chem = Chem.RemoveQueryFeaturesForPseudoInChI(chem);
String processedSmiles = chem.toSmiles();
log.trace("processedSmiles: {}", processedSmiles);
String inChIKey = chem.toInchi().getKey();

String expectedInChIKey =getInChiKey(lexicallyCleaned);
log.debug("Created InChIKey: {}", inChIKey);
Assertions.assertEquals(expectedInChIKey, inChIKey);
String molfile = chem.toMol();
log.trace("molfile:\n {}", molfile);
Assertions.assertFalse(molfile.contains("*"));
}

private static Stream<Arguments> fragmentSmiles() {
return Stream.of(
Arguments.of("C[C@H](N[*])C([*])=O |$;;;_R1;;_R2;$|"),
Arguments.of("[*]N[C@@H](CS[*])C([*])=O |$_R1;;;;;_R3;;_R2;$|"),
Arguments.of("[*]N[C@@H](CC([*])=O)C([*])=O |$_R1;;;;;_R3;;;_R2;$|"),
Arguments.of("[*]N[C@@H](CCC([*])=O)C([*])=O |$_R1;;;;;;_R3;;;_R2;$|"),
Arguments.of("[*]N[C@@H](CC1=CC=CC=C1)C([*])=O |$_R1;;;;;;;;;;;_R2;$,c:6,8,t:4|"),
Arguments.of("[*]NCC([*])=O |$_R1;;;;_R2;$|"),
Arguments.of("[*]N[C@@H](CC1=CNC=N1)C([*])=O |$_R1;;;;;;;;;;_R2;$,c:7,t:4|"),
Arguments.of("CC[C@H](C)[C@H](N[*])C([*])=O |$;;;;;;_R1;;_R2;$|"),
Arguments.of("[*]N[C@@H](CCCCN[*])C([*])=O |$_R1;;;;;;;;_R3;;_R2;$|"),
Arguments.of("CC(C)C[C@H](N[*])C([*])=O |$;;;;;;_R1;;_R2;$|"),
Arguments.of("CSCC[C@H](N[*])C([*])=O |$;;;;;;_R1;;_R2;$|"),
Arguments.of("NC(=O)C[C@H](N[*])C([*])=O |$;;;;;;_R1;;_R2;$|"),
Arguments.of("[*]N1CCC[C@H]1C([*])=O |$_R1;;;;;;;_R2;$|"),
Arguments.of("NC(=O)CC[C@H](N[*])C([*])=O |$;;;;;;;_R1;;_R2;$|"),
Arguments.of("NC(=N)NCCC[C@H](N[*])C([*])=O |$;;;;;;;;;_R1;;_R2;$|"),
Arguments.of("OC[C@H](N[*])C([*])=O |$;;;;_R1;;_R2;$|"),
Arguments.of("C[C@@H](O)[C@H](N[*])C([*])=O |$;;;;;_R1;;_R2;$|"),
Arguments.of("CC(C)[C@H](N[*])C([*])=O |$;;;;;_R1;;_R2;$|"),
Arguments.of("[*]N[C@@H](CC1=CNC2=C1C=CC=C2)C([*])=O |$_R1;;;;;;;;;;;;;;_R2;$,c:7,10,12,t:4|"),
Arguments.of("OC1=CC=C(C[C@H](N[*])C([*])=O)C=C1 |$;;;;;;;;_R1;;_R2;;;$,c:12,t:1,3|"),
Arguments.of("CCCCCCCC\\C=C/CCCCCCCCCCCCOCC(CO[*])OCCCCCCCCCCCC\\C=C/CCCCCCCC |$;;;;;;;;;;;;;;;;;;;;;;;;;;;_R92;;;;;;;;;;;;;;;;;;;;;;;$|"),
Arguments.of("[H]C(=O)c1ccc(cc1)C(=O)NCCCCCCO[*] |$;;;;;;;;;;;;;;;;;;;_R92$|"),
Arguments.of("[*]OC[C@]12CO[C@H]([C@H]([*])O1)[C@H]2O[*] |$_R91;;;;;;;;_R90;;;;_R92$|"),
Arguments.of("OC[C@H]([*])O[C@H](CO[*])CO[*] |$;;;_R90;;;;;_R91;;;_R92$|"),
Arguments.of("FC(F)(F)C(OCCO[C@H]1[C@H]([*])O[C@H](CO[*])[C@H]1O[*])(C(F)(F)F)C(F)(F)F |$;;;;;;;;;;;_R90;;;;;_R91;;;_R92;;;;;;;;$|"),
Arguments.of("O[C@@H]1[C@@H](CO[*])O[C@@H]([*])[C@@H]1O[*] |$;;;;;_R91;;;_R90;;;_R92$|"),
Arguments.of("CO[C@H]1[C@H]([*])O[C@H](CO[*])[C@H]1O[*] |$;;;;_R90;;;;;_R91;;;_R92$|"),
Arguments.of("O[C@H]1[C@H]([*])O[C@H](CO[*])[C@H]1N[*] |$;;;_R90;;;;;_R91;;;_R92$|"),
Arguments.of("[*]OC[C@@]12CO[C@@H]([C@H]([*])O1)[C@@H]2O[*] |$_R91;;;;;;;;_R90;;;;_R92$|"),
Arguments.of("O[C@H]1[C@H]([*])O[C@H](CO[*])[C@H]1O[*] |$;;;_R90;;;;;_R91;;;_R92$|"),
Arguments.of("[*]OC[C@H]1O[C@@H]([*])C[C@@H]1O[*] |$_R91;;;;;;_R90;;;;_R92$|"),
Arguments.of("COCCO[C@H]1[C@H]([*])O[C@H](CO[*])[C@H]1O[*] |$;;;;;;;_R90;;;;;_R91;;;_R92$|"),
Arguments.of("[*]OC[C@@H]1CN([*])C[C@H]([*])O1 |$_R91;;;;;;_R92;;;_R90;$|"),
Arguments.of("Oc1ccc2c(Oc3cc(O)ccc3C22OC(=O)c3ccc(cc23)C(=O)NCCCCC(CO[*])O[*])c1 |$;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;_R91;;_R92;$|"),
Arguments.of("[*]N([*])CCCCCCO[*] |$_R91;;_R90;;;;;;;;_R92$|")
);
}

@ParameterizedTest
@MethodSource("fragmentSmiles")
void testSmilesParsing(String rawInput) throws IOException {
String inputSmiles = rawInput.split(" ")[0];
String lexicallyCleaned = inputSmiles.replace("*", "6He");
log.debug("lexicallyCleaned: %s\n", lexicallyCleaned);

Chemical parsedChemical = Chemical.parse(inputSmiles);
Chemical cleanedChemical = Chem.RemoveQueryFeaturesForPseudoInChI(parsedChemical);
String processedSmiles = "unknown";
try {
processedSmiles= cleanedChemical.toSmiles();
}catch (NullPointerException npe) {
log.error("Error creating output SMILES from input {}", inputSmiles);
}
log.debug("processedSmiles: {}", processedSmiles);
String inChIKey = cleanedChemical.toInchi().getKey();

String expectedInChIKey =getInChiKey(lexicallyCleaned);
log.debug("Created InChIKey: {}", inChIKey);
Assertions.assertEquals(expectedInChIKey, inChIKey);
String molfile = cleanedChemical.toMol();
log.trace("molfile: {}}", molfile);
Assertions.assertFalse(molfile.contains("*"));
}

@Test
void testSmilesParse2() throws IOException {
//test like we have a vocabulary term here
FragmentVocabularyTerm fragmentVocabularyTerm = new FragmentVocabularyTerm();
fragmentVocabularyTerm.setFragmentStructure("CCCCCCCC\\C=C/CCCCCCCCCCCCOCC(CO[*])OCCCCCCCCCCCC\\C=C/CCCCCCCC |$;;;;;;;;;;;;;;;;;;;;;;;;;;;_R92;;;;;;;;;;;;;;;;;;;;;;;$|");
Optional<String> hash= CVFragmentStructureValidator.getHash(fragmentVocabularyTerm);
Assertions.assertTrue(hash.isPresent());
log.trace("hash: {}", hash.get());
}

private String getInChiKey(String smiles) throws IOException {
Chemical chem = Chemical.parse(smiles);
String inChIKey = chem.toInchi().getKey();
return inChIKey;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ logging.level.ix.core.EntityFetcher=OFF
#logging.level.gsrs.module.substance.scrubbers=trace
#logging.level.example.exports.scrubbers=trace

logging.level.ix.core.chem=TRACE;
logging.level.ix.core.chem=TRACE
logging.level.example.chem=TRACE

spring.jpa.database-platform=org.hibernate.dialect.H2Dialect
Expand Down
4 changes: 2 additions & 2 deletions gsrs-module-substances-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@
<dependency>
<groupId>gov.nih.ncats</groupId>
<artifactId>molwitch</artifactId>
<version>0.6.7</version>
<version>0.6.8</version>
</dependency>
<dependency>
<groupId>gov.nih.ncats</groupId>
<artifactId>molwitch-cdk</artifactId>
<version>1.0.18</version>
<version>1.0.19</version>
</dependency>
<dependency>
<groupId>gov.nih.ncats</groupId>
Expand Down
2 changes: 1 addition & 1 deletion gsrs-module-substances-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@
<dependency>
<groupId>gov.nih.ncats</groupId>
<artifactId>molwitch</artifactId>
<version>0.6.7</version>
<version>0.6.8</version>
</dependency>

<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
import javax.servlet.http.HttpServletRequest;
import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/**
Expand Down Expand Up @@ -96,6 +98,16 @@ public ResponseEntity<Object> uploadPayload(
return payloadController.handleFileUpload(file, queryParameters);

}

// are the slashes right, needed?
public final Pattern ID_PATTERN = Pattern.compile("[a-f0-9\\-]+");

private boolean checkId(String id) {
if (id == null) return false;
Matcher matcher = ID_PATTERN.matcher(id);
return matcher.find();
}

//GET /export/$id<[a-f0-9\-]+>.$format<(mol|sdf|smi|smiles|fas)>
// ix.ginas.controllers.GinasApp.structureExport(id: String, format: String, context: String ?= null)
@GetMapping({"export/{id:[a-f0-9\\-]+}.{format}","/ginas/app/export/{id:[a-f0-9\\-]+}.{format}"})
Expand All @@ -106,6 +118,10 @@ public Object exportStructure(@PathVariable String id, @PathVariable String form
@RequestParam(value = "stereo", required = false, defaultValue = "") Boolean stereo,
HttpServletRequest httpRequest, RedirectAttributes attributes,
@RequestParam Map<String, String> queryParameters){
if (!checkId(id)) {
// This is to satisfy Snyk security analysis, probably never gets here if annotation works.
return gsrsControllerConfiguration.handleBadRequest(400, "Badly formatted id in url placeholder", null);
}
if("mol".equalsIgnoreCase(format) || "sdf".equalsIgnoreCase(format) ||
"smi".equalsIgnoreCase(format) || "smiles".equalsIgnoreCase(format) ) {
//TODO: use cache where possible here
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import java.util.stream.Stream;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonProcessingException;

import gsrs.controller.AbstractLegacyTextSearchGsrsEntityController;
import gsrs.controller.hateoas.GsrsEntityToControllerMapper;
Expand Down Expand Up @@ -42,11 +41,13 @@ public void run(SchedulerPlugin.JobStats stats, SchedulerPlugin.TaskListener l)
log.info("DatabaseIndexSyncTask: No allowed entities defined for the database and index sync scheduler.");
return;
}

l.message("Getting entities synchronized");
for(String entity: syncEntities) {

String entityClassName = generateEntityClassName(entity);
log.info("DatabaseIndexSyncTask: Entity class: " + entityClassName);
//To improve: need a generalized way of dealing with this, there are several different versions of this
//need to do this in a utility class

l.message("Start indexing " + entity);
String entityClassName = generateEntityClassName(entity);

if(entityClassName.isEmpty()) {
log.error("Illegal entity class: " + entity + " in database and index sync scheduler.");
Expand All @@ -69,25 +70,38 @@ public void run(SchedulerPlugin.JobStats stats, SchedulerPlugin.TaskListener l)
if(!controllerOpt.isPresent()) {
continue;
}

AbstractLegacyTextSearchGsrsEntityController searchController = (AbstractLegacyTextSearchGsrsEntityController) StaticContextAccessor.getBean(controllerOpt.get());
try {
searchController.syncIndexesWithDatabase();
} catch (JsonProcessingException e) {
log.error("Error in database and index sync scheduler: " + entityClassName);

log.info("Starting DatabaseIndexSync job for Entity class: " + entity);

AbstractLegacyTextSearchGsrsEntityController.ReindexJobStatus stat = searchController.syncIndexesWithDatabaseWithStatus();

while(!stat.isDone()) {
l.message(entity + ": " + stat.getStatus());
stat = searchController.getJobStatus(stat.getStatusID());
Thread.sleep(2000);
}
l.message("Indexing " + entity + " is done");

log.info("Ending DatabaseIndexSync job for Entity class: " + entity);

} catch (Exception e) {
log.error("Error in database and index sync scheduler: " + entity);
e.printStackTrace();
continue;
}
}
}

private String generateEntityClassName(String entity) {
return "ix.ginas.models.v1." + entity;
}

@Override
public String getDescription() {
return "Reindex entities in backup tables that are not in indexes";
return "Database and index sync: reindex entities in backup tables that are not in indexes";
}

}
19 changes: 11 additions & 8 deletions gsrs-module-substances-core/src/main/java/ix/core/chem/Chem.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import ix.core.util.LogUtil;
import lombok.extern.slf4j.Slf4j;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
Expand All @@ -31,10 +32,16 @@ public static void setFormula (Structure struc) {

public static Chemical RemoveQueryFeaturesForPseudoInChI(Chemical c) {
Chemical chemicalToUse = c;
if(c.hasQueryAtoms() || c.atoms().filter(at->"A".equals(at.getSymbol())).count()>0){
/*try {
log.trace("RemoveQueryFeaturesForPseudoInChI processing molfile c {}", c.toMol());
} catch (IOException e) {
log.error("Error generating mol from Chemical");
}*/
if(c.hasQueryAtoms() || c.atoms().filter(at->("A".equals(at.getSymbol()) || "*".equals(at.getSymbol()) || "R".equals(at.getSymbol()))).count()>0){
chemicalToUse = c.copy();
chemicalToUse.atoms()
.filter(at->at.isQueryAtom() || "A".equals(at.getSymbol()))
.filter(at-> at.getSymbol() == null || "A".equals(at.getSymbol()) || "*".equals(at.getSymbol())
|| "R".equals(at.getSymbol()))//isQueryAtom returns true
.forEach(a->{
a.setAtomicNumber(2);
//verify that this is setting a symbol as well
Expand All @@ -43,16 +50,12 @@ public static Chemical RemoveQueryFeaturesForPseudoInChI(Chemical c) {
});
}
Chemical processBonds = chemicalToUse.copy();
//temporary diagnostics
/*System.out.println("total bonds: " + processBonds.getBondCount());
processBonds.bonds().forEach(b->{
System.out.printf("bond: %s; atom 1: %s; atom 2: %s\n", b.getBondType(), b.getAtom1().getSymbol(), b.getAtom2().getSymbol() );});*/
try{
Chemical finalChem= Chemical.parse(ChemCleaner.removeSGroupsAndLegacyAtomLists(processBonds.toMol()));
finalChem.bonds().filter(b->b.getBondType() == null || b.getBondType().equals(Bond.BondType.SINGLE_OR_DOUBLE) || b.getBondType().equals(b))
finalChem.bonds().filter(b->b.getBondType() == null || b.getBondType().equals(Bond.BondType.SINGLE_OR_DOUBLE) || b.isQueryBond())
.forEach(b->{
log.trace("about to replace bond {}", b);
b.setBondType(Bond.BondType.SINGLE);

});
return finalChem;
}catch(Exception e){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import gov.nih.ncats.molwitch.Chemical;
import gov.nih.ncats.molwitch.inchi.InChiResult;
import ix.core.models.Structure;
import ix.ginas.utils.ChemUtils;

import java.io.IOException;
import java.util.function.BiConsumer;
Expand All @@ -17,7 +18,9 @@ public void hash(Chemical chem, String mol, BiConsumer<String, String> keyValueC
try{
String key=null;
if(chem.getAtomCount()>0) {
InChiResult result = chem.toInchi();
Chemical cleaned = chem.copy();
cleaned =Chem.RemoveQueryFeaturesForPseudoInChI(cleaned);
InChiResult result = cleaned.toInchi();
key = result.getKey();
}else {
key = "MOSFIJXAXDLOML-UHFFFAOYSA-N";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,13 @@ public void accept(String key, String value){


Chem.setFormula(struc);
struc.setMwt(mol.getMass());
try {
struc.setMwt(mol.getMass());
} catch (Exception ignore){
//todo: deal with getMass for query atoms
//exception swallowed based on discussion between T. Peryea and M. Miller 27 June 2024
}


if(!query){
try {
Expand Down
Loading

0 comments on commit 3914dc3

Please sign in to comment.