Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Please do not merge Enhanced flex search #326

Merged
merged 5 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions gsrs-module-substance-example/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
<source>11</source>
<target>11</target>
</configuration>
</plugin>
<!-- Uncomment this for maven public release deployment
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
package example.structureSearch;

import example.GsrsModuleSubstanceApplication;
import gsrs.legacy.structureIndexer.StructureIndexerService;
import gsrs.module.substance.controllers.SubstanceController;
import gsrs.module.substance.controllers.SubstanceLegacySearchService;
import gsrs.module.substance.indexers.SubstanceDefinitionalHashIndexer;
import gsrs.springUtils.AutowireHelper;
import gsrs.startertests.TestGsrsValidatorFactory;
import gsrs.startertests.TestIndexValueMakerFactory;
import gsrs.substances.tests.AbstractSubstanceJpaFullStackEntityTest;
import gsrs.validator.DefaultValidatorConfig;
import gsrs.validator.ValidatorConfig;
import ix.core.chem.StructureProcessor;
import ix.core.models.Structure;
import ix.core.search.SearchRequest;
import ix.core.search.SearchResult;
import ix.core.util.EntityUtils;
import ix.ginas.modelBuilders.ChemicalSubstanceBuilder;
import ix.ginas.models.v1.ChemicalSubstance;
import ix.ginas.models.v1.Substance;
import ix.ginas.utils.validation.validators.ChemicalValidator;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.core.io.ClassPathResource;
import org.springframework.security.test.context.support.WithMockUser;
import org.springframework.transaction.support.TransactionTemplate;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.List;
import java.util.UUID;
import java.util.stream.Collectors;

import static org.junit.jupiter.api.Assertions.*;

@SpringBootTest(classes = GsrsModuleSubstanceApplication.class)
@WithMockUser(username = "admin", roles = "Admin")
@Slf4j
public class FlexSearchTest extends AbstractSubstanceJpaFullStackEntityTest {

@Autowired
StructureProcessor structureProcessor;

@Autowired
private TestIndexValueMakerFactory testIndexValueMakerFactory;

@Autowired
private TestGsrsValidatorFactory factory;

@Autowired
private StructureIndexerService indexer;

@Autowired
private SubstanceLegacySearchService searchService;

private String fileName = "testdumps/tartrate_set.gsrs";

private boolean loadedData = false;

@BeforeEach
public void clearIndexers() throws IOException {
if( !loadedData) {
log.trace("starting to load data");
SubstanceDefinitionalHashIndexer hashIndexer = new SubstanceDefinitionalHashIndexer();
AutowireHelper.getInstance().autowire(hashIndexer);
testIndexValueMakerFactory.addIndexValueMaker(hashIndexer);
{
ValidatorConfig config = new DefaultValidatorConfig();
config.setValidatorClass(ChemicalValidator.class);
config.setNewObjClass(ChemicalSubstance.class);
factory.addValidator("substances", config);
}

File dataFile = new ClassPathResource(fileName).getFile();
Assertions.assertTrue(dataFile.exists());
loadGsrsFile(dataFile);
loadedData = true;
log.info("loaded data");
}
}

@Test
@WithMockUser(value = "admin", roles = "Admin")
public void generateFlexSearchQuery() throws Exception {
String structure = "C(C(C(=O)O)O)(C(=O)O)O";
UUID uuid = UUID.randomUUID();
ChemicalSubstance substance= new ChemicalSubstanceBuilder()
.setStructureWithDefaultReference(structure)
.addName("Tartaric acid")
.setUUID(uuid)
.build();

log.trace("created query substance");
Structure structureStd = structureProcessor.taskFor(substance.getStructure().molfile)
.standardize(true)
.build()
.instrument()
.getStructure();
log.trace("created structureStd");
SubstanceController controller = new SubstanceController();
AutowireHelper.getInstance().autowireAndProxy(controller);
log.trace("created and wired controller");

String hash= controller.makeFlexSearchMoietyClauses(structureStd) + ")";
log.trace("search hash: {}", hash);
assertTrue(hash.contains("root_moieties_properties_STEREO_INSENSITIVE_HASH"));
}

@Test
@WithMockUser(value = "admin", roles = "Admin")
public void runFlexSearchQueryTartaric() throws Exception {
String structure = "C(C(C(=O)O)O)(C(=O)O)O";
UUID uuid = UUID.randomUUID();
ChemicalSubstance substance= new ChemicalSubstanceBuilder()
.setStructureWithDefaultReference(structure)
.addName("Tartaric acid")
.setUUID(uuid)
.build();

Structure structureStd = structureProcessor.taskFor(substance.getStructure().molfile)
.standardize(true)
.build()
.instrument()
.getStructure();
String sins= structureStd.getStereoInsensitiveHash();
SubstanceController controller = new SubstanceController();
AutowireHelper.getInstance().autowireAndProxy(controller);

String hash= controller.makeFlexSearch(structureStd);
log.trace("search hash: {}", hash);
SearchRequest request = new SearchRequest.Builder()
.kind(Substance.class)
.query(hash)
.build();
List<Substance> substances = getSearchList(request);
log.trace("Flex search hits:");
substances.forEach(s-> log.trace("ID {} - {}", s.uuid, s.getName()));

int expectedNumber = 7;
assertEquals(expectedNumber, substances.size());
}

@Test
@WithMockUser(value = "admin", roles = "Admin")
public void runFlexSearchQuerySodiumTartrate() throws Exception {
String structureSmiles = "C(C(C(=O)[O-])O)(C(=O)O)O.[Na+]";
String molfileSource = "molfiles/sodium_tartrate.mol";

File molfile = new ClassPathResource(molfileSource).getFile();
UUID uuid = UUID.randomUUID();

Structure structureStd = structureProcessor.taskFor(Files.readString(molfile.toPath()))
.standardize(true)
.build()
.instrument()
.getStructure();
String sins= structureStd.getStereoInsensitiveHash();
SubstanceController controller = new SubstanceController();
AutowireHelper.getInstance().autowireAndProxy(controller);

String hash= controller.makeFlexSearch(structureStd);
log.trace("search hash: {}", hash);
SearchRequest request = new SearchRequest.Builder()
.kind(Substance.class)
.query(hash)
.build();
List<Substance> substances = getSearchList(request);
substances.forEach(s-> log.trace("ID {} - {}", s.uuid, s.getName()));

int expectedNumber = 3;
assertEquals(expectedNumber, substances.size());
}

private List<Substance> getSearchList(SearchRequest sr) {
TransactionTemplate transactionSearch = new TransactionTemplate(transactionManager);
List<Substance> substances = transactionSearch.execute(ts -> {
try {
SearchResult sresult = searchService.search(sr.getQuery(), sr.getOptions());
List<Substance> first = sresult.getMatches();
return first.stream()
//force fetching
.peek(ss -> EntityUtils.EntityWrapper.of(ss).toInternalJson())
.collect(Collectors.toList());
} catch (Exception e) {
throw new RuntimeException(e);

}
});
return substances;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ substance.renderer.configPath=substances-default-renderer.json

approval_id_code_system="UNII from FDA"
logging.level.root=error
logging.level.example.imports=trace
#logging.level.example.imports=trace
#logging.level.gsrs.imports=trace
#logging.level.example.substance.processor=trace
Expand Down Expand Up @@ -41,11 +40,13 @@ logging.level.ix.core.EntityFetcher=OFF

#for travis CI
#logging.level.root=OFF
#logging.level.gsrs.module.substance.scrubbers=trace
#logging.level.example.exports.scrubbers=trace
logging.level.gsrs.module.substance.scrubbers=trace
logging.level.example.exports.scrubbers=trace

logging.level.gsrs.module.substance.controllers=TRACE
logging.level.ix.core.chem=TRACE
logging.level.example.chem=TRACE
logging.level.example.structureSearch=TRACE

spring.jpa.database-platform=org.hibernate.dialect.H2Dialect
spring.jpa.defer-datasource-initialization=true
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

JSDraw203202416202D

11 9 0 0 0 0 0 V2000
19.1884 -8.0856 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
20.5396 -7.3060 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
20.5396 -5.7460 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
21.8917 -8.0860 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
21.8917 -9.6460 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
23.2437 -7.3060 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
23.2437 -5.7460 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
24.5956 -8.0860 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
24.5956 -9.6460 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
25.9468 -7.3064 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
28.5476 -7.6960 0.0000 Na 0 3 0 0 0 0 0 0 0 0 0 0
1 2 2 0 0 0 0
2 3 1 0 0 0 0
2 4 1 0 0 0 0
4 5 1 0 0 0 0
4 6 1 0 0 0 0
6 7 1 0 0 0 0
6 8 1 0 0 0 0
8 9 1 0 0 0 0
8 10 2 0 0 0 0
M CHG 2 9 -1 11 1
M END
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@
import org.freehep.graphicsio.svg.SVGGraphics2D;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.DependsOn;
import org.springframework.core.env.ConfigurableEnvironment;
import org.springframework.core.env.Environment;
import org.springframework.core.io.ClassPathResource;
import org.springframework.hateoas.server.ExposesResourceFor;
import org.springframework.http.HttpHeaders;
Expand Down Expand Up @@ -742,8 +740,9 @@ public Object structureSearchGet(
hash = "root_structure_properties_EXACT_HASH:" + structure.getExactHash();
}else if(sanitizedRequest.getType() == SubstanceStructureSearchService.StructureSearchType.FLEX){
//note we purposefully don't have the lucene path so it finds moieties and polymers etc
String sins=structure.getStereoInsensitiveHash();
hash= "( root_structure_properties_STEREO_INSENSITIVE_HASH:" + sins + " OR " + "root_moieties_properties_STEREO_INSENSITIVE_HASH:" + sins + " )";
hash= makeFlexSearch(structure);
log.trace("search hash: {}", hash);
//"root_moieties_properties_STEREO_INSENSITIVE_HASH:" + sins + " )";
}

if(hash !=null){
Expand Down Expand Up @@ -1871,5 +1870,38 @@ public List<Text> getHardcodedConfigsBackup() throws JsonProcessingException {
return items;
}

}
public String makeFlexSearch(Structure structure) {
String sins=structure.getStereoInsensitiveHash();
return "( root_structure_properties_STEREO_INSENSITIVE_HASH:\"" + sins + "\" OR " + makeFlexSearchMoietyClauses(structure) + ")";
}

public String makeFlexSearchMoietyClauses(Structure structure) {

List<Structure> moieties = new ArrayList<>();
try {
structureProcessor.taskFor(structure.molfile)
.components(moieties)
.standardize(false)
.build()
.instrument()
.getStructure();
log.trace(" created {} moieties", moieties.size());
String moietySearchString= moieties.stream()
.map(m->{
StringBuilder sb = new StringBuilder();
sb.append("root_moieties_properties_STEREO_INSENSITIVE_HASH:\"");
sb.append(m.getStereoInsensitiveHash());
sb.append("\"");
return sb.toString();
})
.collect(Collectors.joining(" AND "));
if( moieties.size() > 1) {
moietySearchString = "(" + moietySearchString + ")";
}
return moietySearchString;
} catch (Exception e) {
log.error("Error constructing query: ", e);
throw new RuntimeException(e);
}
}
}