Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhanced clean-up of vocabularyfragment structures #343

Merged
merged 2 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package example.chem;

import gov.nih.ncats.molwitch.Chemical;
import ix.core.chem.Chem;
import ix.ginas.models.v1.FragmentVocabularyTerm;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import java.io.IOException;

@Slf4j
public class VocabFragmentCleanupTest {

@Test
void testQueryFeatures() throws IOException {
FragmentVocabularyTerm fragmentVocabularyTerm = new FragmentVocabularyTerm();
fragmentVocabularyTerm.setFragmentStructure("[*]N[C@@H](CS[*])C([*])=O |$_R1;;;;;_R3;;_R2;$|");
String inputStructure = fragmentVocabularyTerm.getFragmentStructure().split(" ")[0];
Chemical chem = Chemical.parse(inputStructure);
chem = Chem.RemoveQueryFeaturesForPseudoInChI(chem);
String inchiKey = chem.toInchi().getKey();
log.debug("Created InChIKey: {}", inchiKey);
Assertions.assertTrue(inchiKey.length()>0);
}

@Test
void testOutput() throws IOException {
FragmentVocabularyTerm fragmentVocabularyTerm = new FragmentVocabularyTerm();
fragmentVocabularyTerm.setFragmentStructure("[*]N[C@@H](CS[*])C([*])=O |$_R1;;;;;_R3;;_R2;$|");
String inputStructure = fragmentVocabularyTerm.getFragmentStructure().split(" ")[0];
Chemical chem = Chemical.parse(inputStructure);
chem = Chem.RemoveQueryFeaturesForPseudoInChI(chem);
String smiles = chem.toSmiles();
log.debug("Created SMILES: {}", smiles);
Assertions.assertTrue(smiles.length()>0);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ logging.level.ix.core.EntityFetcher=OFF
#logging.level.gsrs.module.substance.scrubbers=trace
#logging.level.example.exports.scrubbers=trace

logging.level.ix.core.chem=TRACE;
logging.level.example.chem=TRACE

spring.jpa.database-platform=org.hibernate.dialect.H2Dialect
spring.jpa.defer-datasource-initialization=true

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ public Map<Column, ColumnValueRecipe<Substance>> createColumnRecipes(Parameters

try {
Chemical chem = s.toChemical();
cell.writeString(Inchi.asStdInchi(Chem.RemoveQueryAtomsForPseudoInChI(chem))
cell.writeString(Inchi.asStdInchi(Chem.RemoveQueryFeaturesForPseudoInChI(chem))
.getKey()
.replace("InChIKey=", ""));
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,109 +159,109 @@ private void savingSubstance(final Substance s, boolean newInsert) {
// Tyler Oct 4 2021: It turns out setting the propagation settings helps isolate the session/
// transactions okay. We may need to basic to things like this in the future:
// transactionTemplate.setPropagationBehavior(TransactionDefinition.PROPAGATION_REQUIRES_NEW);

Relationship r1=s.getPrimaryDefinitionRelationships().get();
boolean worthChecking = false;
if(newInsert || r1.isDirty() || r1.relatedSubstance.isDirty() || r1.lastEdited==null ||
(r1.lastEdited!=null && r1.lastEdited.getTime()>TimeUtil.getCurrentTimeMillis()-60000)) {
worthChecking=true;
}

if(worthChecking ) {


// List<Substance> realPrimarysubs= substanceRepository.findSubstancesWithAlternativeDefinition(s);
//Note: trying to isolate in a transaction with propagation settings
// DOES prevent transaction problems.
TransactionTemplate transactionTemplate = new TransactionTemplate(transactionManager);
transactionTemplate.setReadOnly(true);
transactionTemplate.setPropagationBehavior(TransactionDefinition.PROPAGATION_REQUIRES_NEW);
List<Substance> realPrimarysubs= transactionTemplate.execute(status->{
List<Substance> subs= substanceRepository.findSubstancesWithAlternativeDefinition(s);
return subs;
});
if( s.getPrimaryDefinitionRelationships().isPresent()) {
Relationship r1 = s.getPrimaryDefinitionRelationships().get();
boolean worthChecking = false;
if (newInsert || r1.isDirty() || r1.relatedSubstance.isDirty() || r1.lastEdited == null ||
(r1.lastEdited != null && r1.lastEdited.getTime() > TimeUtil.getCurrentTimeMillis() - 60000)) {
worthChecking = true;
}

if (worthChecking) {


// List<Substance> realPrimarysubs= substanceRepository.findSubstancesWithAlternativeDefinition(s);
//Note: trying to isolate in a transaction with propagation settings
// DOES prevent transaction problems.
TransactionTemplate transactionTemplate = new TransactionTemplate(transactionManager);
transactionTemplate.setReadOnly(true);
transactionTemplate.setPropagationBehavior(TransactionDefinition.PROPAGATION_REQUIRES_NEW);
List<Substance> realPrimarysubs = transactionTemplate.execute(status -> {
List<Substance> subs = substanceRepository.findSubstancesWithAlternativeDefinition(s);
return subs;
});


log.debug("Got some relationships:" + realPrimarysubs.size());
Set<String> oldprimary = new HashSet<String>();
for (Substance pri : realPrimarysubs) {
oldprimary.add(pri.getUuid().toString());
}


log.debug("Got some relationships:" + realPrimarysubs.size());
Set<String> oldprimary = new HashSet<String>();
for(Substance pri:realPrimarysubs){
oldprimary.add(pri.getUuid().toString());
}
SubstanceReference sr = s.getPrimaryDefinitionReference();
if (sr != null) {

log.debug("Enforcing bidirectional relationship");
//remove old references
for (final Substance oldPri : realPrimarysubs) {
if (oldPri == null) {
continue;
}
//no need to remove the same relationship
if (oldPri.getUuid().toString().equals(sr.refuuid)) {
skipSaving = true;
continue;
}
log.debug("Removing stale bidirectional relationships");

SubstanceReference sr = s.getPrimaryDefinitionReference();
if (sr != null) {

log.debug("Enforcing bidirectional relationship");
//remove old references
for(final Substance oldPri: realPrimarysubs){
if(oldPri ==null){
continue;
}
//no need to remove the same relationship
if(oldPri.getUuid().toString().equals(sr.refuuid)) {
skipSaving=true;
continue;
}
log.debug("Removing stale bidirectional relationships");
transactionTemplate2.executeWithoutResult(stat -> {
entityPersistAdapter.performChangeOn(oldPri, obj -> {
List<Relationship> related = obj.removeAlternativeSubstanceDefinitionRelationship(s);
for (Relationship r : related) {
relationshipRepository.delete(r);
}
obj.forceUpdate();
substanceRepository.saveAndFlush(obj);


transactionTemplate2.executeWithoutResult(stat->{
entityPersistAdapter.performChangeOn(oldPri, obj->{
List<Relationship> related=obj.removeAlternativeSubstanceDefinitionRelationship(s);
for(Relationship r:related){
relationshipRepository.delete(r);
}
obj.forceUpdate();
substanceRepository.saveAndFlush(obj);

return Optional.of(obj);
});
});

return Optional.of(obj);
});
});


}
if(!skipSaving) {
log.debug("Expanding reference");
Substance subPrimary=null;
try{
subPrimary= transactionTemplate.execute(status->{
return substanceRepository.findBySubstanceReference(sr);
});
}catch(Exception e){
e.printStackTrace();
}
if (!skipSaving) {
log.debug("Expanding reference");
Substance subPrimary = null;
try {
subPrimary = transactionTemplate.execute(status -> {
return substanceRepository.findBySubstanceReference(sr);
});
} catch (Exception e) {
e.printStackTrace();
}

if (subPrimary != null) {
log.debug("Got parent sub, which is:" + EntityWrapper.of(subPrimary).getKey());
if (SubstanceDefinitionType.PRIMARY.equals(subPrimary.definitionType)) {

log.debug("Going to save");
Substance pri=subPrimary;
transactionTemplate2.executeWithoutResult(stat->{
entityPersistAdapter.performChangeOn(pri, obj -> {
if (!obj.addAlternativeSubstanceDefinitionRelationship(s)) {
log.info("Saving alt definition, now has:"
+ obj.getAlternativeDefinitionReferences().size());
}
obj.forceUpdate();
substanceRepository.saveAndFlush(obj);
return Optional.of(obj);
if (subPrimary != null) {
log.debug("Got parent sub, which is:" + EntityWrapper.of(subPrimary).getKey());
if (SubstanceDefinitionType.PRIMARY.equals(subPrimary.definitionType)) {

log.debug("Going to save");
Substance pri = subPrimary;
transactionTemplate2.executeWithoutResult(stat -> {
entityPersistAdapter.performChangeOn(pri, obj -> {
if (!obj.addAlternativeSubstanceDefinitionRelationship(s)) {
log.info("Saving alt definition, now has:"
+ obj.getAlternativeDefinitionReferences().size());
}
obj.forceUpdate();
substanceRepository.saveAndFlush(obj);
return Optional.of(obj);
});
});
});



}
}
}
}

}else{
log.error("Persist error. Alternative definition has no primary relationship");
} else {
log.error("Persist error. Alternative definition has no primary relationship");
}
}
} else {
log.warn("primary definitional relationship is missing");
}
}

Expand Down
23 changes: 17 additions & 6 deletions gsrs-module-substances-core/src/main/java/ix/core/chem/Chem.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
package ix.core.chem;

import gov.nih.ncats.molwitch.Atom;
import gov.nih.ncats.molwitch.Bond;
import gov.nih.ncats.molwitch.Chemical;
import ix.core.models.Structure;
import ix.core.util.LogUtil;
import lombok.extern.slf4j.Slf4j;

import ix.core.chem.ChemCleaner;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
Expand All @@ -31,21 +29,34 @@ public static void setFormula (Structure struc) {
}
}

public static Chemical RemoveQueryAtomsForPseudoInChI(Chemical c) {
public static Chemical RemoveQueryFeaturesForPseudoInChI(Chemical c) {
Chemical chemicalToUse = c;
if(c.hasQueryAtoms() || c.atoms().filter(at->"A".equals(at.getSymbol())).count()>0){
chemicalToUse = c.copy();
chemicalToUse.atoms()
.filter(at->at.isQueryAtom() || "A".equals(at.getSymbol()))
.forEach(a->{
a.setAtomicNumber(2);
//verify that this is setting a symbol as well
a.setAlias("He");
a.setMassNumber(6);
});
}
Chemical processBonds = chemicalToUse.copy();
//temporary diagnostics
/*System.out.println("total bonds: " + processBonds.getBondCount());
processBonds.bonds().forEach(b->{
System.out.printf("bond: %s; atom 1: %s; atom 2: %s\n", b.getBondType(), b.getAtom1().getSymbol(), b.getAtom2().getSymbol() );});*/
try{
return Chemical.parse(ChemCleaner.removeSGroupsAndLegacyAtomLists(chemicalToUse.toMol()));
Chemical finalChem= Chemical.parse(ChemCleaner.removeSGroupsAndLegacyAtomLists(processBonds.toMol()));
finalChem.bonds().filter(b->b.getBondType() == null || b.getBondType().equals(Bond.BondType.SINGLE_OR_DOUBLE) || b.getBondType().equals(b))
.forEach(b->{
b.setBondType(Bond.BondType.SINGLE);

});
return finalChem;
}catch(Exception e){
return chemicalToUse;
return processBonds;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,7 @@ public String encode(Chemical c1) throws Exception {
*/

private static String encodePseudoInchiKey(Chemical c) throws IOException{
Chemical chemicalToUse = Chem.RemoveQueryAtomsForPseudoInChI(c);
Chemical chemicalToUse = Chem.RemoveQueryFeaturesForPseudoInChI(c);
return chemicalToUse.toInchi().getKey();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,12 @@
import ix.core.EntityMapperOptions;
import ix.core.chem.Chem;
import ix.core.chem.ChemCleaner;
import ix.core.util.EntityUtils.EntityWrapper;
import ix.core.validator.GinasProcessingMessage;
import ix.ginas.models.converters.StereoConverter;
import ix.utils.Util;
import lombok.extern.slf4j.Slf4j;
import org.hibernate.annotations.GenericGenerator;
import org.hibernate.annotations.Type;
import org.hibernate.annotations.TypeDef;
import org.springframework.data.annotation.CreatedDate;
import org.springframework.data.annotation.LastModifiedBy;
import org.springframework.data.annotation.LastModifiedDate;

import javax.persistence.*;
Expand Down Expand Up @@ -106,7 +102,7 @@ public static class Stereo {
public static final Stereo UNKNOWN = new Stereo("UNKNOWN");

private String stereoType;

public Stereo(String stereo){
this.stereoType=stereo;
}
Expand Down Expand Up @@ -463,15 +459,15 @@ public String getInChIKey() {
@JsonIgnore
@Transient
public String getInChIKeyAndThrow() throws Exception{
return Inchi.asStdInchi(Chem.RemoveQueryAtomsForPseudoInChI(toChemical()), true).getKey();
return Inchi.asStdInchi(Chem.RemoveQueryFeaturesForPseudoInChI(toChemical()), true).getKey();

}


@JsonIgnore
@Transient
public String getInChIAndThrow() throws Exception{
return Inchi.asStdInchi(Chem.RemoveQueryAtomsForPseudoInChI(toChemical()), true).getInchi();
return Inchi.asStdInchi(Chem.RemoveQueryFeaturesForPseudoInChI(toChemical()), true).getInchi();

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,10 @@ private Optional<String> getHash(FragmentVocabularyTerm term) {
try {
String inputStructure = term.getFragmentStructure().split(" ")[0];
Chemical chem = Chemical.parse(inputStructure);
chem = Chem.RemoveQueryAtomsForPseudoInChI(chem);
chem = Chem.RemoveQueryFeaturesForPseudoInChI(chem);
return Optional.of(chem.toInchi().getKey());
} catch (IOException e) {
} catch (Exception e) {
log.error("Error processing fragment structure {}", term.getFragmentStructure());
e.printStackTrace();
return Optional.empty();
}
Expand Down Expand Up @@ -108,11 +109,12 @@ private void chemicalValidation(FragmentVocabularyTerm term, Map<String,List<Str

String smiles;
try {
smiles = chem.toSmiles();
Chemical cleanChemical = Chem.RemoveQueryFeaturesForPseudoInChI(chem);
smiles = cleanChemical.toSmiles();
// todo: may need to add warning with applicable change
if(!Optional.ofNullable(term.getSimplifiedStructure()).isPresent())
term.setSimplifiedStructure(smiles);
} catch (IOException e) {
} catch (Exception e) {
callback.addMessage(GinasProcessingMessage.ERROR_MESSAGE(
"Illegal chemical structure format: %s", term.getFragmentStructure()));
return;
Expand Down