Skip to content

Commit

Permalink
Merge pull request #350 from ncats/smiles_force
Browse files Browse the repository at this point in the history
when generation of canonical SMILES fails, return generic SMILES
  • Loading branch information
blueSwordfish authored Jul 10, 2024
2 parents b1a472d + d436d41 commit 098666f
Show file tree
Hide file tree
Showing 6 changed files with 456 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package example.chem;

import ix.core.chem.InchiStandardizer;
import ix.ginas.models.v1.GinasChemicalStructure;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;

import java.io.IOException;

import static org.junit.jupiter.api.Assertions.assertNotNull;

@Slf4j
public class SmilesGenerationTest {

@Test
void generateSmiles1() throws IOException {
String molfileText = IOUtils.toString(
this.getClass().getResourceAsStream("/molfiles/TG72BS085Y.mol"),
"UTF-8"
);
GinasChemicalStructure structure = new GinasChemicalStructure();
structure.molfile = molfileText;
InchiStandardizer standardizer = new InchiStandardizer();
String smiles = standardizer.canonicalSmiles(structure, molfileText);
log.warn("canonical SMILES within first test: {}", smiles);
assertNotNull(smiles);
}

@Test
void generateSmiles2() throws IOException {
String molfileText = IOUtils.toString(
this.getClass().getResourceAsStream("/molfiles/TG72BS085Y_cleaned2.mol"),
"UTF-8"
);
GinasChemicalStructure structure = new GinasChemicalStructure();
structure.molfile = molfileText;
InchiStandardizer standardizer = new InchiStandardizer();
String smiles = standardizer.canonicalSmiles(structure, molfileText);
log.warn("canonical SMILES: {}", smiles);
assertNotNull(smiles);
}

/*
No Sgroups
*/
@Test
void generateSmiles3() throws IOException {
String molfileText = IOUtils.toString(
this.getClass().getResourceAsStream("/molfiles/6L522LAQ9U.mol"),
"UTF-8"
);
GinasChemicalStructure structure = new GinasChemicalStructure();
structure.molfile = molfileText;
InchiStandardizer standardizer = new InchiStandardizer();
String smiles = standardizer.canonicalSmiles(structure, molfileText);
log.warn("canonical SMILES: {}", smiles);
assertNotNull(smiles);
}

}
10 changes: 9 additions & 1 deletion gsrs-module-substance-example/src/test/resources/logback.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,13 @@

<root level="ERROR">
<appender-ref ref="STDOUT" />
</root>
</root>

<logger name="example.chem" level="TRACE">
<appender-ref ref="STDOUT" />
</logger>

<logger name="ix.core.chem" level="TRACE">
<appender-ref ref="STDOUT" />
</logger>
</configuration>
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@

Marvin 01132103242D

27 26 0 0 0 0 999 V2000
15.8843 -4.6227 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
15.6260 -5.4107 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
15.2100 -4.1474 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
14.8148 -5.4107 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
14.5590 -4.6227 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
16.6698 -4.3359 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
14.3342 -6.0488 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
15.2100 -3.3180 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
18.8244 -4.3334 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0
18.8193 -3.4963 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
18.8193 -5.1756 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
17.9822 -4.3334 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
19.6641 -4.3334 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
12.8151 -5.6458 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
12.0297 -4.5736 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
13.3008 -5.0025 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
12.8151 -4.3127 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
12.0297 -5.3900 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
11.3115 -4.1732 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
11.3115 -5.7905 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
10.5984 -4.5736 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
10.5984 -5.3900 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
12.7893 -2.1012 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
9.8751 -4.1474 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
13.0864 -3.4989 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
12.5283 -2.8788 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
20.6872 -4.3334 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
12 9 1 0 0 0 0
13 9 1 0 0 0 0
5 3 1 0 0 0 0
6 1 2 0 0 0 0
7 4 2 0 0 0 0
8 3 1 0 0 0 0
5 4 1 0 0 0 0
2 1 1 0 0 0 0
3 1 1 0 0 0 0
4 2 1 0 0 0 0
10 9 2 0 0 0 0
15 18 2 0 0 0 0
16 14 1 0 0 0 0
17 16 2 0 0 0 0
18 14 1 0 0 0 0
19 15 1 0 0 0 0
20 18 1 0 0 0 0
21 22 1 0 0 0 0
22 20 2 0 0 0 0
23 26 1 0 0 0 0
24 21 1 0 0 0 0
25 17 1 0 0 0 0
26 25 1 0 0 0 0
17 15 1 0 0 0 0
21 19 2 0 0 0 0
11 9 2 0 0 0 0
M END
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@

Marvin 01132111442D

91 94 0 0 1 0 999 V2000
0.7638 -5.5871 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
0.7638 -6.4120 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
1.4783 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.4783 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
2.1927 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.1927 -4.7621 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
2.1927 -3.9371 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
1.4783 -4.3495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -4.7621 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0
0.0493 -4.3495 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
0.0493 -3.5246 0.0000 N 0 0 2 0 0 0 0 0 0 0 0 0
-0.6651 -3.1121 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -3.1121 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.6651 -4.7621 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.3797 -4.3496 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-0.6651 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.3797 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.0941 -5.5871 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-2.7261 -6.1174 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.4405 -5.7049 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-3.5268 -4.8844 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.3338 -4.7129 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.7463 -5.4274 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.1942 -6.0405 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.3797 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
0.0493 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.0493 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -3.9371 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
2.9073 -4.3495 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0
3.6217 -4.7621 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.3362 -4.3495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.0507 -4.7621 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.0507 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.3362 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.6217 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.9073 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.9073 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
4.2329 -6.8934 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
3.4376 -3.7176 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
2.3769 -3.7176 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.9492 -10.2805 0.0000 N 0 3 0 0 0 0 0 0 0 0 0 0
-1.7019 -10.6592 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-0.9492 -9.5232 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-0.2607 -10.6936 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
3.8210 -9.9483 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-0.9492 -10.2805 0.0000 N 0 3 0 0 0 0 0 0 0 0 0 0
-1.7019 -10.6592 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-0.9492 -9.5232 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-0.2607 -10.6936 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
3.8210 -9.9483 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
3.8210 -9.9483 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -5.5871 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
0.7638 -6.4120 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
1.4783 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.4783 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
2.1927 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.1927 -4.7621 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
2.1927 -3.9371 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
1.4783 -4.3495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -4.7621 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0
0.0493 -4.3495 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
0.0493 -3.5246 0.0000 N 0 0 2 0 0 0 0 0 0 0 0 0
-0.6651 -3.1121 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -3.1121 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.6651 -4.7621 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.3797 -4.3496 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-0.6651 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.3797 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.0941 -5.5871 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-2.7261 -6.1174 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.4405 -5.7049 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-3.5268 -4.8844 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.3338 -4.7129 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.7463 -5.4274 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.1942 -6.0405 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.3797 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
0.0493 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.0493 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -3.9371 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
2.9073 -4.3495 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0
3.6217 -4.7621 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.3362 -4.3495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.0507 -4.7621 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.0507 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.3362 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.6217 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.9073 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.9073 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
4.2329 -6.8934 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
3.4376 -3.7176 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
2.3769 -3.7176 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
41 44 1 0 0 0 0
1 2 1 1 0 0 0
1 3 1 0 0 0 0
3 4 1 0 0 0 0
5 3 2 0 0 0 0
5 6 1 0 0 0 0
6 7 1 1 0 0 0
6 8 1 0 0 0 0
9 8 1 0 0 0 0
9 10 1 0 0 0 0
10 11 1 1 0 0 0
11 12 1 0 0 0 0
11 13 1 0 0 0 0
14 10 1 0 0 0 0
14 15 1 0 0 0 0
16 14 2 0 0 0 0
16 17 1 0 0 0 0
17 18 1 0 0 0 0
18 19 1 0 0 0 0
20 19 1 0 0 0 0
20 21 1 0 0 0 0
21 22 1 0 0 0 0
23 22 1 0 0 0 0
24 23 1 0 0 0 0
20 24 1 0 0 0 0
17 25 2 0 0 0 0
16 26 1 0 0 0 0
26 27 2 0 0 0 0
1 26 1 0 0 0 0
9 28 1 1 0 0 0
1 9 1 0 0 0 0
6 29 1 0 0 0 0
29 30 1 0 0 0 0
30 31 2 0 0 0 0
31 32 1 0 0 0 0
32 33 2 0 0 0 0
34 33 1 0 0 0 0
35 34 2 0 0 0 0
35 30 1 0 0 0 0
36 35 1 0 0 0 0
36 37 2 0 0 0 0
5 36 1 0 0 0 0
34 38 1 0 0 0 0
29 39 1 6 0 0 0
29 40 1 0 0 0 0
41 42 1 0 0 0 0
41 43 2 0 0 0 0
46 49 1 0 0 0 0
46 47 1 0 0 0 0
46 48 2 0 0 0 0
52 53 1 1 0 0 0
52 54 1 0 0 0 0
52 77 1 0 0 0 0
52 60 1 0 0 0 0
54 55 1 0 0 0 0
56 54 2 0 0 0 0
56 57 1 0 0 0 0
56 87 1 0 0 0 0
57 58 1 1 0 0 0
57 59 1 0 0 0 0
57 80 1 0 0 0 0
60 59 1 0 0 0 0
60 61 1 0 0 0 0
60 79 1 1 0 0 0
61 62 1 1 0 0 0
65 61 1 0 0 0 0
62 63 1 0 0 0 0
62 64 1 0 0 0 0
65 66 1 0 0 0 0
67 65 2 0 0 0 0
67 68 1 0 0 0 0
67 77 1 0 0 0 0
68 69 1 0 0 0 0
68 76 2 0 0 0 0
69 70 1 0 0 0 0
71 70 1 0 0 0 0
71 72 1 0 0 0 0
71 75 1 0 0 0 0
72 73 1 0 0 0 0
74 73 1 0 0 0 0
75 74 1 0 0 0 0
77 78 2 0 0 0 0
80 81 1 0 0 0 0
80 90 1 6 0 0 0
80 91 1 0 0 0 0
81 82 2 0 0 0 0
86 81 1 0 0 0 0
82 83 1 0 0 0 0
83 84 2 0 0 0 0
85 84 1 0 0 0 0
86 85 2 0 0 0 0
85 89 1 0 0 0 0
87 86 1 0 0 0 0
87 88 2 0 0 0 0
M CHG 4 41 1 42 -1 46 1 47 -1
M STY 3 1 MUL 2 MUL 3 MUL
M SCN 1 1 HT
M SAL 1 8 41 42 43 44 46 47 48 49
M SPA 1 4 41 42 43 44
M SDI 1 4 -2.1219 -11.1136 -2.1219 -9.1032
M SDI 1 4 0.1593 -9.1032 0.1593 -11.1136
M SMT 1 2
M SCN 1 2 HT
M SAL 2 3 45 50 51
M SPA 2 1 45
M SDI 2 4 3.4010 -10.3683 3.4010 -9.5283
M SDI 2 4 4.2410 -9.5283 4.2410 -10.3683
M SMT 2 3
M SCN 1 3 HT
M SAL 3 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
M SAL 3 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
M SAL 3 15 31 32 33 34 35 36 37 38 39 40 52 53 54 55 56
M SAL 3 15 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
M SAL 3 15 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
M SAL 3 5 87 88 89 90 91
M SPA 3 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
M SPA 3 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
M SPA 3 10 31 32 33 34 35 36 37 38 39 40
M SDI 3 4 -5.1663 -7.3134 -5.1663 -2.6921
M SDI 3 4 5.4707 -2.6921 5.4707 -7.3134
M SMT 3 2
M END
Loading

0 comments on commit 098666f

Please sign in to comment.