Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

flex plus searching is now like flex searching with the addition of salt stripping of the query structure #358

Merged
merged 2 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions gsrs-module-substance-example/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
<source>11</source>
<target>11</target>
</configuration>
</plugin>
<!-- Uncomment this for maven public release deployment
Expand Down
84 changes: 84 additions & 0 deletions gsrs-module-substance-example/salt_data_public.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
InChI Key SMILES Occurrences
VEXZGXHMUGYJMC-UHFFFAOYSA-N Cl 7206
FKNQFGJONOIPTF-UHFFFAOYSA-N [Na+] 3488
XLYOFNOQVPJJNP-UHFFFAOYSA-N O 1608
NPYPAHLBTDXSSS-UHFFFAOYSA-N [K+] 811
XLYOFNOQVPJJNP-UHFFFAOYSA-M [OH-] 791
CPELXLSAUQHCOX-UHFFFAOYSA-N Br 645
BHPQYMZQTOCNFJ-UHFFFAOYSA-N [Ca+2] 526
QAOWNCQODCNURD-UHFFFAOYSA-N OS(=O)(=O)O 510
AFVFQIVMOAPDHO-UHFFFAOYSA-N CS(=O)(=O)O 350
XMBWDFGMSWQBCA-UHFFFAOYSA-N I 326
JLVVSXFLKOJNIY-UHFFFAOYSA-N [Mg+2] 316
QGZKDVFQNNGYKY-UHFFFAOYSA-N N 300
QTBSBXVTEAMEQO-UHFFFAOYSA-N CC(=O)O 280
GPRLSGONYQIRFK-UHFFFAOYSA-N [H+] 277
REDXJYDRNCIFBQ-UHFFFAOYSA-N [Al+3] 270
VZCYOOQTPOCHFL-UPHRSURJSA-N OC(=O)/C=C\C(O)=O 259
PTFCDOFLOPIGGS-UHFFFAOYSA-N [Zn+2] 241
VEXZGXHMUGYJMC-UHFFFAOYSA-M [Cl-] 240
FEWJPZIEWOKRBE-JCYAYHJZSA-N [C@@H]([C@H](C(=O)O)O)(C(=O)O)O 214
NBIIXXVUZAFLBC-UHFFFAOYSA-N OP(=O)(O)O 212
JPVYNHNXODAKFH-UHFFFAOYSA-N [Cu+2] 172
VZCYOOQTPOCHFL-OWOJBTEDSA-N OC(=O)/C=C/C(O)=O 164
KRKNYBCHXYNGOX-UHFFFAOYSA-N C(C(=O)O)C(CC(=O)O)(C(=O)O)O 162
VTLYFUHAOXGGBS-UHFFFAOYSA-N [Fe+3] 139
HBBGRARXTFLTSG-UHFFFAOYSA-N [Li+] 139
JZMJDSHXVKJFKW-UHFFFAOYSA-M COS(=O)(=O)[O-] 126
CWYNVVGOOAEACU-UHFFFAOYSA-N [Fe+2] 124
QAOWNCQODCNURD-UHFFFAOYSA-L O=S(=O)([O-])[O-] 120
JOXIMZWYDAKGHI-UHFFFAOYSA-N Cc1ccc(cc1)S(=O)(=O)O 116
QTBSBXVTEAMEQO-UHFFFAOYSA-M CC(=O)[O-] 110
KDYFGRWQOYBRFD-UHFFFAOYSA-N C(CC(=O)O)C(=O)O 99
MUBZPKHOEPUJKR-UHFFFAOYSA-N C(=O)(C(=O)O)O 98
NHNBFGGVMKEFGY-UHFFFAOYSA-N [N+](=O)([O-])[O-] 97
XDFCIPNJCBUZJN-UHFFFAOYSA-N [Ba+2] 95
KRHYYFGTRYWZRS-UHFFFAOYSA-N F 90
WAEMQWOKJMHJLA-UHFFFAOYSA-N [Mn+2] 85
GSEJCLTVZPLZKY-UHFFFAOYSA-N C(CO)N(CCO)CCO 80
GRYLNZFGIOXLOG-UHFFFAOYSA-N [N+](=O)(O)[O-] 77
AHKZTVQIVOEVFO-UHFFFAOYSA-N [O-2] 72
BVKZGUZCCUSVTD-UHFFFAOYSA-L C(=O)([O-])[O-] 71
LELOWRISYMNNSU-UHFFFAOYSA-N C#N 70
XMBWDFGMSWQBCA-UHFFFAOYSA-M [I-] 69
QGZKDVFQNNGYKY-UHFFFAOYSA-O [NH4+] 65
XLJKHNWPARRRJB-UHFFFAOYSA-N [Co+2] 64
FOIXSVOLVBLSDH-UHFFFAOYSA-N [Ag+] 63
CPELXLSAUQHCOX-UHFFFAOYSA-M [Br-] 63
KRKNYBCHXYNGOX-UHFFFAOYSA-K C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 62
JDIBGQFKXXXXPN-UHFFFAOYSA-N [Bi+3] 61
KDXKERNSBIXSRK-YFKPBYRVSA-N C(CCN)C[C@@H](C(=O)O)N 59
BFGKITSFLPAWGI-UHFFFAOYSA-N [Cr+3] 59
RVPVRDXYQKGNMQ-UHFFFAOYSA-N [Pb+2] 59
HRGDZIGMBDGFTC-UHFFFAOYSA-N [Pt+2] 59
PWYYWQHXAPXYMF-UHFFFAOYSA-N [Sr+2] 58
HZAXFHJVJLSVMW-UHFFFAOYSA-N C(CO)N 57
JVTAAEKCZFNVCJ-UHFFFAOYSA-N CC(C(=O)O)O 57
ZBCBWPMODOFKDW-UHFFFAOYSA-N C(CO)NCCO 56
ODKSFYDXXFIFQN-BYPYZUCNSA-N C(C[C@@H](C(=O)O)N)CNC(=N)N 54
MBBZMMPHUWSWHV-BDVNFPICSA-N CNC[C@@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O 52
GBNDTYKAOXLLID-UHFFFAOYSA-N [Zr+4] 51
YGSDEFSMJLZEOE-UHFFFAOYSA-N c1ccc(c(c1)C(=O)O)O 51
SRSXLGNVWSONIS-UHFFFAOYSA-N c1ccc(cc1)S(=O)(=O)O 49
QAOWNCQODCNURD-UHFFFAOYSA-M OS(=O)(=O)[O-] 46
WLJNZVDCPSBLRP-UHFFFAOYSA-N c1ccc2c(c1)cc(c(c2Cc3c4ccccc4cc(c3O)C(=O)O)O)C(=O)O 46
WPYMKLBDIGXBTP-UHFFFAOYSA-N c1ccc(cc1)C(=O)O 45
WLZRMCYVCSSEQC-UHFFFAOYSA-N [Cd+2] 43
LENZDBCJOHFCAS-UHFFFAOYSA-N C(C(CO)(CO)N)O 41
LCKIEQZJEYYRIY-UHFFFAOYSA-N [Ti+4] 39
BQPIGGFYSBELGY-UHFFFAOYSA-N [Hg+2] 39
NBIIXXVUZAFLBC-UHFFFAOYSA-K O=P([O-])([O-])[O-] 39
MUBZPKHOEPUJKR-UHFFFAOYSA-L C(=O)(C(=O)[O-])[O-] 39
GLUUGHFHXGJENI-UHFFFAOYSA-N C1CNCCN1 39
VEQPNABPJHWNSG-UHFFFAOYSA-N [Ni+2] 38
RJOJUSXNYCILHH-UHFFFAOYSA-N [Gd+3] 38
RWSOTUBLDIXVET-UHFFFAOYSA-M [SH-] 37
DTQVDTLACAAQTR-UHFFFAOYSA-N C(=O)(C(F)(F)F)O 37
JOXIMZWYDAKGHI-UHFFFAOYSA-M Cc1ccc(cc1)S(=O)(=O)[O-] 35
XQTIWNLDFPPCIU-UHFFFAOYSA-N [Ce+3] 34
CRBHXDCYXIISFC-UHFFFAOYSA-N C[N+](C)(C)CC[O-] 33
KIWBPDUYBMNFTB-UHFFFAOYSA-M CCOS(=O)(=O)[O-] 33
JAWGVVJVYSANRY-UHFFFAOYSA-N [Co+3] 32
ZMZDMBWJUHKJPS-UHFFFAOYSA-N C(#N)S 32
RGHNJXZEOKUKBD-SQOUGZDYSA-N C([C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O)O 31
CKLJMWTZIZZHCS-REOHCLBHSA-L C([C@@H](C(=O)[O-])N)C(=O)[O-] 31
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
InChI Key SMILES Occurrences
VEXZGXHMUGYJMC-UHFFFAOYSA-N Cl 7206
FKNQFGJONOIPTF-UHFFFAOYSA-N [Na+] 3488
XLYOFNOQVPJJNP-UHFFFAOYSA-N O 1608
NPYPAHLBTDXSSS-UHFFFAOYSA-N [K+] 811
XLYOFNOQVPJJNP-UHFFFAOYSA-M [OH-] 791
CPELXLSAUQHCOX-UHFFFAOYSA-N Br 645
BHPQYMZQTOCNFJ-UHFFFAOYSA-N [Ca+2] 526
QAOWNCQODCNURD-UHFFFAOYSA-N OS(=O)(=O)O 510
AFVFQIVMOAPDHO-UHFFFAOYSA-N CS(=O)(=O)O 350
XMBWDFGMSWQBCA-UHFFFAOYSA-N I 326
JLVVSXFLKOJNIY-UHFFFAOYSA-N [Mg+2] 316
QGZKDVFQNNGYKY-UHFFFAOYSA-N N 300
QTBSBXVTEAMEQO-UHFFFAOYSA-N CC(=O)O 280
GPRLSGONYQIRFK-UHFFFAOYSA-N [H+] 277
REDXJYDRNCIFBQ-UHFFFAOYSA-N [Al+3] 270
VZCYOOQTPOCHFL-UPHRSURJSA-N OC(=O)/C=C\C(O)=O 259
PTFCDOFLOPIGGS-UHFFFAOYSA-N [Zn+2] 241
VEXZGXHMUGYJMC-UHFFFAOYSA-M [Cl-] 240
FEWJPZIEWOKRBE-JCYAYHJZSA-N [C@@H]([C@H](C(=O)O)O)(C(=O)O)O 214
NBIIXXVUZAFLBC-UHFFFAOYSA-N OP(=O)(O)O 212
JPVYNHNXODAKFH-UHFFFAOYSA-N [Cu+2] 172
VZCYOOQTPOCHFL-OWOJBTEDSA-N OC(=O)/C=C/C(O)=O 164
KRKNYBCHXYNGOX-UHFFFAOYSA-N C(C(=O)O)C(CC(=O)O)(C(=O)O)O 162
VTLYFUHAOXGGBS-UHFFFAOYSA-N [Fe+3] 139
HBBGRARXTFLTSG-UHFFFAOYSA-N [Li+] 139
JZMJDSHXVKJFKW-UHFFFAOYSA-M COS(=O)(=O)[O-] 126
CWYNVVGOOAEACU-UHFFFAOYSA-N [Fe+2] 124
QAOWNCQODCNURD-UHFFFAOYSA-L O=S(=O)([O-])[O-] 120
JOXIMZWYDAKGHI-UHFFFAOYSA-N Cc1ccc(cc1)S(=O)(=O)O 116
QTBSBXVTEAMEQO-UHFFFAOYSA-M CC(=O)[O-] 110
KDYFGRWQOYBRFD-UHFFFAOYSA-N C(CC(=O)O)C(=O)O 99
MUBZPKHOEPUJKR-UHFFFAOYSA-N C(=O)(C(=O)O)O 98
NHNBFGGVMKEFGY-UHFFFAOYSA-N [N+](=O)([O-])[O-] 97
XDFCIPNJCBUZJN-UHFFFAOYSA-N [Ba+2] 95
KRHYYFGTRYWZRS-UHFFFAOYSA-N F 90
WAEMQWOKJMHJLA-UHFFFAOYSA-N [Mn+2] 85
GSEJCLTVZPLZKY-UHFFFAOYSA-N C(CO)N(CCO)CCO 80
GRYLNZFGIOXLOG-UHFFFAOYSA-N [N+](=O)(O)[O-] 77
AHKZTVQIVOEVFO-UHFFFAOYSA-N [O-2] 72
BVKZGUZCCUSVTD-UHFFFAOYSA-L C(=O)([O-])[O-] 71
LELOWRISYMNNSU-UHFFFAOYSA-N C#N 70
XMBWDFGMSWQBCA-UHFFFAOYSA-M [I-] 69
QGZKDVFQNNGYKY-UHFFFAOYSA-O [NH4+] 65
XLJKHNWPARRRJB-UHFFFAOYSA-N [Co+2] 64
FOIXSVOLVBLSDH-UHFFFAOYSA-N [Ag+] 63
CPELXLSAUQHCOX-UHFFFAOYSA-M [Br-] 63
KRKNYBCHXYNGOX-UHFFFAOYSA-K C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 62
JDIBGQFKXXXXPN-UHFFFAOYSA-N [Bi+3] 61
KDXKERNSBIXSRK-YFKPBYRVSA-N C(CCN)C[C@@H](C(=O)O)N 59
BFGKITSFLPAWGI-UHFFFAOYSA-N [Cr+3] 59
RVPVRDXYQKGNMQ-UHFFFAOYSA-N [Pb+2] 59
HRGDZIGMBDGFTC-UHFFFAOYSA-N [Pt+2] 59
PWYYWQHXAPXYMF-UHFFFAOYSA-N [Sr+2] 58
HZAXFHJVJLSVMW-UHFFFAOYSA-N C(CO)N 57
JVTAAEKCZFNVCJ-UHFFFAOYSA-N CC(C(=O)O)O 57
ZBCBWPMODOFKDW-UHFFFAOYSA-N C(CO)NCCO 56
ODKSFYDXXFIFQN-BYPYZUCNSA-N C(C[C@@H](C(=O)O)N)CNC(=N)N 54
MBBZMMPHUWSWHV-BDVNFPICSA-N CNC[C@@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O 52
GBNDTYKAOXLLID-UHFFFAOYSA-N [Zr+4] 51
YGSDEFSMJLZEOE-UHFFFAOYSA-N c1ccc(c(c1)C(=O)O)O 51
SRSXLGNVWSONIS-UHFFFAOYSA-N c1ccc(cc1)S(=O)(=O)O 49
QAOWNCQODCNURD-UHFFFAOYSA-M OS(=O)(=O)[O-] 46
WLJNZVDCPSBLRP-UHFFFAOYSA-N c1ccc2c(c1)cc(c(c2Cc3c4ccccc4cc(c3O)C(=O)O)O)C(=O)O 46
WPYMKLBDIGXBTP-UHFFFAOYSA-N c1ccc(cc1)C(=O)O 45
WLZRMCYVCSSEQC-UHFFFAOYSA-N [Cd+2] 43
LENZDBCJOHFCAS-UHFFFAOYSA-N C(C(CO)(CO)N)O 41
LCKIEQZJEYYRIY-UHFFFAOYSA-N [Ti+4] 39
BQPIGGFYSBELGY-UHFFFAOYSA-N [Hg+2] 39
NBIIXXVUZAFLBC-UHFFFAOYSA-K O=P([O-])([O-])[O-] 39
MUBZPKHOEPUJKR-UHFFFAOYSA-L C(=O)(C(=O)[O-])[O-] 39
GLUUGHFHXGJENI-UHFFFAOYSA-N C1CNCCN1 39
VEQPNABPJHWNSG-UHFFFAOYSA-N [Ni+2] 38
RJOJUSXNYCILHH-UHFFFAOYSA-N [Gd+3] 38
RWSOTUBLDIXVET-UHFFFAOYSA-M [SH-] 37
DTQVDTLACAAQTR-UHFFFAOYSA-N C(=O)(C(F)(F)F)O 37
JOXIMZWYDAKGHI-UHFFFAOYSA-M Cc1ccc(cc1)S(=O)(=O)[O-] 35
XQTIWNLDFPPCIU-UHFFFAOYSA-N [Ce+3] 34
CRBHXDCYXIISFC-UHFFFAOYSA-N C[N+](C)(C)CC[O-] 33
KIWBPDUYBMNFTB-UHFFFAOYSA-M CCOS(=O)(=O)[O-] 33
JAWGVVJVYSANRY-UHFFFAOYSA-N [Co+3] 32
ZMZDMBWJUHKJPS-UHFFFAOYSA-N C(#N)S 32
RGHNJXZEOKUKBD-SQOUGZDYSA-N C([C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O)O 31
CKLJMWTZIZZHCS-REOHCLBHSA-L C([C@@H](C(=O)[O-])N)C(=O)[O-] 31
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package example.chem;

import example.GsrsModuleSubstanceApplication;
import gov.nih.ncats.molwitch.Chemical;

import gsrs.module.substance.utils.ChemicalUtils;
import gsrs.substances.tests.AbstractSubstanceJpaFullStackEntityTest;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.security.test.context.support.WithMockUser;

import java.io.*;
import java.util.stream.Stream;

@Slf4j
@SpringBootTest(classes = GsrsModuleSubstanceApplication.class)
@WithMockUser(username = "admin", roles = "Admin")
class SaltRemovalTest extends AbstractSubstanceJpaFullStackEntityTest {

@Autowired
private ChemicalUtils chemicalUtils;

@Test
void testRemoveSalts() throws IOException {

String inputSmiles = "C[C@H]1[C@@]2([H])CC[C@@]3([H])[C@]4([H])CC=C5C[C@H](CC[C@]5(C)[C@@]4([H])CC[C@]23CN1C)N(C)C.Br.Br";
Chemical chemical = Chemical.parseMol(inputSmiles);
double massBefore = chemical.getMass();
Chemical cleaned = chemicalUtils.stripSalts(chemical);
double massAfter = cleaned.getMass();
log.warn("formula before: {}; after: {}", chemical.getFormula(), cleaned.getFormula());
Assertions.assertTrue(massAfter < massBefore, String.format("formula before: %s; after: %s", chemical.getFormula(), cleaned.getFormula()));
}

@ParameterizedTest
@MethodSource("inputData")
void testSaltRemoval(String smiles, int atomCountChange) throws IOException {
Chemical chemical = Chemical.parseMol(smiles);
Chemical cleaned = chemicalUtils.stripSalts(chemical);
Assertions.assertEquals(atomCountChange, (chemical.getAtomCount() - cleaned.getAtomCount()),
String.format("formula before: %s; after: %s", chemical.getFormula(), cleaned.getFormula()));
}

private static Stream<Arguments> inputData() {
return Stream.of(
Arguments.of("C[C@H]1[C@@]2([H])CC[C@@]3([H])[C@]4([H])CC=C5C[C@H](CC[C@]5(C)[C@@]4([H])CC[C@]23CN1C)N(C)C.Br.Br", 2),
Arguments.of("CCC1C(Cc2cncn2C)COC1=O.Cl", 1),
Arguments.of("c1cc(ccc1C(=C2C=CC(=O)C=C2)c3ccc(cc3)[O-])[O-].[Na+].[Na+]", 2),
Arguments.of("[F-][Ti+4]([F-])([F-])([F-])([F-])[F-].[Li+].[Li+]", 2),
Arguments.of("Cc1c(CCN2CCC(=C(c3ccc(cc3)F)c4ccc(cc4)F)CC2)c(=O)n5ccsc5n1.[C@@H]([C@H](C(=O)O)O)(C(=O)O)O", 10)
);
}
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package example.structureSearch;

import example.GsrsModuleSubstanceApplication;
import gov.nih.ncats.molwitch.Chemical;
import gsrs.legacy.structureIndexer.StructureIndexerService;
import gsrs.module.substance.controllers.SubstanceController;
import gsrs.module.substance.controllers.SubstanceLegacySearchService;
import gsrs.module.substance.indexers.SubstanceDefinitionalHashIndexer;
import gsrs.module.substance.utils.ChemicalUtils;
import gsrs.springUtils.AutowireHelper;
import gsrs.startertests.TestGsrsValidatorFactory;
import gsrs.startertests.TestIndexValueMakerFactory;
Expand Down Expand Up @@ -59,6 +61,9 @@ public class FlexSearchTest extends AbstractSubstanceJpaFullStackEntityTest {
@Autowired
private SubstanceLegacySearchService searchService;

@Autowired
private ChemicalUtils chemicalUtils;

private String fileName = "testdumps/tartrate_set.gsrs";

private boolean loadedData = false;
Expand Down Expand Up @@ -107,7 +112,7 @@ public void generateFlexSearchQuery() throws Exception {
AutowireHelper.getInstance().autowireAndProxy(controller);
log.trace("created and wired controller");

String hash= controller.makeFlexSearchMoietyClauses(structureStd, false) + ")";
String hash= controller.makeFlexSearchMoietyClauses(structureStd) + ")";
log.trace("search hash: {}", hash);
assertTrue(hash.contains("root_moieties_properties_STEREO_INSENSITIVE_HASH"));
}
Expand All @@ -134,9 +139,9 @@ public void generateFlexPlusSearchQuery() throws Exception {
AutowireHelper.getInstance().autowireAndProxy(controller);
log.trace("created and wired controller");

String hash= controller.makeFlexSearchMoietyClauses(structureStd, true) + ")";
String hash= controller.makeFlexSearchMoietyClauses(structureStd) + ")";
log.trace("search hash: {}", hash);
assertTrue(hash.contains("root_moieties_properties_EXACT_HASH"));
assertTrue(hash.contains("root_moieties_properties_STEREO_INSENSITIVE_HASH"));
}

@Test
Expand All @@ -155,11 +160,10 @@ public void runFlexSearchQueryTartaric() throws Exception {
.build()
.instrument()
.getStructure();
String sins= structureStd.getStereoInsensitiveHash();
SubstanceController controller = new SubstanceController();
AutowireHelper.getInstance().autowireAndProxy(controller);

String hash= controller.makeFlexSearch(structureStd, false);
String hash= controller.makeFlexSearch(structureStd);
log.trace("search hash: {}", hash);
SearchRequest request = new SearchRequest.Builder()
.kind(Substance.class)
Expand All @@ -177,7 +181,6 @@ public void runFlexSearchQueryTartaric() throws Exception {
@WithMockUser(value = "admin", roles = "Admin")
public void runFlexSearchQuerySodiumTartrate() throws Exception {
String molfileSource = "molfiles/sodium_tartrate.mol";

File molfile = new ClassPathResource(molfileSource).getFile();
Structure structureStd = structureProcessor.taskFor(Files.readString(molfile.toPath()))
.standardize(true)
Expand All @@ -187,7 +190,7 @@ public void runFlexSearchQuerySodiumTartrate() throws Exception {
SubstanceController controller = new SubstanceController();
AutowireHelper.getInstance().autowireAndProxy(controller);

String hash = controller.makeFlexSearch(structureStd, false);
String hash = controller.makeFlexSearch(structureStd);
log.trace("search hash: {}", hash);
SearchRequest request = new SearchRequest.Builder()
.kind(Substance.class)
Expand All @@ -210,10 +213,14 @@ public void runFlexPlusSearchQuerySodiumTartrate() throws Exception {
.build()
.instrument()
.getStructure();
Chemical cleanedChemical = chemicalUtils.stripSalts(structureStd.toChemical());
Structure cleanedStructure = new Structure();
cleanedStructure.molfile = cleanedChemical.toMol();

SubstanceController controller = new SubstanceController();
AutowireHelper.getInstance().autowireAndProxy(controller);

String hash = controller.makeFlexSearch(structureStd,true);
String hash = controller.makeFlexSearch(cleanedStructure);
log.trace("search hash: {}", hash);
SearchRequest request = new SearchRequest.Builder()
.kind(Substance.class)
Expand All @@ -223,7 +230,7 @@ public void runFlexPlusSearchQuerySodiumTartrate() throws Exception {
log.trace("search results: (total: {})", substances.size());
substances.forEach(s-> log.trace("ID {} - {}", s.uuid, s.getName()));

int expectedNumber = 1;
int expectedNumber = 7;
assertEquals(expectedNumber, substances.size());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -586,3 +586,4 @@ gsrs.importAdapterFactories.substances =
}
]

gsrs.substance.structures.saltFilePath=salt_data_public.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package gsrs.module.substance;

import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;

@Configuration
@ConfigurationProperties("gsrs.substance.structures")
@Data
@Slf4j
public class StructureHandlingConfiguration {

private String saltFilePath;
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import gsrs.module.substance.standardizer.NameStandardizerConfiguration;
import gsrs.module.substance.standardizer.StructureStandardizerConfiguration;
import gsrs.module.substance.standardizer.SubstanceSynchronizer;
import gsrs.module.substance.utils.ChemicalUtils;
import gsrs.module.substance.utils.MolWeightCalculatorProperties;
import gsrs.module.substance.utils.SubstanceMatchViewGenerator;
import gsrs.module.substance.utils.SubstanceResultListRecordGenerator;
Expand Down Expand Up @@ -61,7 +62,9 @@
ImportMetadataLegacySearchService.class,
SubstanceFieldNameDecoratorConfiguration.class,
SubstanceSynchronizer.class,
StructureProcessorConfiguration.class
StructureProcessorConfiguration.class,
StructureHandlingConfiguration.class,
ChemicalUtils.class
})
public class SubstanceCoreConfiguration {

Expand Down
Loading