Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

when generation of canonical SMILES fails, return generic SMILES #350

Merged
merged 1 commit into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package example.chem;

import ix.core.chem.InchiStandardizer;
import ix.ginas.models.v1.GinasChemicalStructure;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;

import java.io.IOException;

import static org.junit.jupiter.api.Assertions.assertNotNull;

@Slf4j
public class SmilesGenerationTest {

@Test
void generateSmiles1() throws IOException {
String molfileText = IOUtils.toString(
this.getClass().getResourceAsStream("/molfiles/TG72BS085Y.mol"),
"UTF-8"
);
GinasChemicalStructure structure = new GinasChemicalStructure();
structure.molfile = molfileText;
InchiStandardizer standardizer = new InchiStandardizer();
String smiles = standardizer.canonicalSmiles(structure, molfileText);
log.warn("canonical SMILES within first test: {}", smiles);
assertNotNull(smiles);
}

@Test
void generateSmiles2() throws IOException {
String molfileText = IOUtils.toString(
this.getClass().getResourceAsStream("/molfiles/TG72BS085Y_cleaned2.mol"),
"UTF-8"
);
GinasChemicalStructure structure = new GinasChemicalStructure();
structure.molfile = molfileText;
InchiStandardizer standardizer = new InchiStandardizer();
String smiles = standardizer.canonicalSmiles(structure, molfileText);
log.warn("canonical SMILES: {}", smiles);
assertNotNull(smiles);
}

/*
No Sgroups
*/
@Test
void generateSmiles3() throws IOException {
String molfileText = IOUtils.toString(
this.getClass().getResourceAsStream("/molfiles/6L522LAQ9U.mol"),
"UTF-8"
);
GinasChemicalStructure structure = new GinasChemicalStructure();
structure.molfile = molfileText;
InchiStandardizer standardizer = new InchiStandardizer();
String smiles = standardizer.canonicalSmiles(structure, molfileText);
log.warn("canonical SMILES: {}", smiles);
assertNotNull(smiles);
}

}
10 changes: 9 additions & 1 deletion gsrs-module-substance-example/src/test/resources/logback.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,13 @@

<root level="ERROR">
<appender-ref ref="STDOUT" />
</root>
</root>

<logger name="example.chem" level="TRACE">
<appender-ref ref="STDOUT" />
</logger>

<logger name="ix.core.chem" level="TRACE">
<appender-ref ref="STDOUT" />
</logger>
</configuration>
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@

Marvin 01132103242D

27 26 0 0 0 0 999 V2000
15.8843 -4.6227 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
15.6260 -5.4107 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
15.2100 -4.1474 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
14.8148 -5.4107 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
14.5590 -4.6227 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
16.6698 -4.3359 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
14.3342 -6.0488 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
15.2100 -3.3180 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
18.8244 -4.3334 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0
18.8193 -3.4963 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
18.8193 -5.1756 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
17.9822 -4.3334 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
19.6641 -4.3334 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
12.8151 -5.6458 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
12.0297 -4.5736 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
13.3008 -5.0025 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
12.8151 -4.3127 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
12.0297 -5.3900 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
11.3115 -4.1732 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
11.3115 -5.7905 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
10.5984 -4.5736 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
10.5984 -5.3900 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
12.7893 -2.1012 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
9.8751 -4.1474 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
13.0864 -3.4989 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
12.5283 -2.8788 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
20.6872 -4.3334 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
12 9 1 0 0 0 0
13 9 1 0 0 0 0
5 3 1 0 0 0 0
6 1 2 0 0 0 0
7 4 2 0 0 0 0
8 3 1 0 0 0 0
5 4 1 0 0 0 0
2 1 1 0 0 0 0
3 1 1 0 0 0 0
4 2 1 0 0 0 0
10 9 2 0 0 0 0
15 18 2 0 0 0 0
16 14 1 0 0 0 0
17 16 2 0 0 0 0
18 14 1 0 0 0 0
19 15 1 0 0 0 0
20 18 1 0 0 0 0
21 22 1 0 0 0 0
22 20 2 0 0 0 0
23 26 1 0 0 0 0
24 21 1 0 0 0 0
25 17 1 0 0 0 0
26 25 1 0 0 0 0
17 15 1 0 0 0 0
21 19 2 0 0 0 0
11 9 2 0 0 0 0
M END
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@

Marvin 01132111442D

91 94 0 0 1 0 999 V2000
0.7638 -5.5871 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
0.7638 -6.4120 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
1.4783 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.4783 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
2.1927 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.1927 -4.7621 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
2.1927 -3.9371 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
1.4783 -4.3495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -4.7621 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0
0.0493 -4.3495 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
0.0493 -3.5246 0.0000 N 0 0 2 0 0 0 0 0 0 0 0 0
-0.6651 -3.1121 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -3.1121 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.6651 -4.7621 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.3797 -4.3496 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-0.6651 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.3797 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.0941 -5.5871 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-2.7261 -6.1174 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.4405 -5.7049 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-3.5268 -4.8844 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.3338 -4.7129 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.7463 -5.4274 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.1942 -6.0405 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.3797 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
0.0493 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.0493 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -3.9371 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
2.9073 -4.3495 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0
3.6217 -4.7621 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.3362 -4.3495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.0507 -4.7621 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.0507 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.3362 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.6217 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.9073 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.9073 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
4.2329 -6.8934 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
3.4376 -3.7176 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
2.3769 -3.7176 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.9492 -10.2805 0.0000 N 0 3 0 0 0 0 0 0 0 0 0 0
-1.7019 -10.6592 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-0.9492 -9.5232 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-0.2607 -10.6936 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
3.8210 -9.9483 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-0.9492 -10.2805 0.0000 N 0 3 0 0 0 0 0 0 0 0 0 0
-1.7019 -10.6592 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0
-0.9492 -9.5232 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-0.2607 -10.6936 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
3.8210 -9.9483 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
3.8210 -9.9483 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -5.5871 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
0.7638 -6.4120 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
1.4783 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.4783 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
2.1927 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.1927 -4.7621 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
2.1927 -3.9371 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
1.4783 -4.3495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -4.7621 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0
0.0493 -4.3495 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0
0.0493 -3.5246 0.0000 N 0 0 2 0 0 0 0 0 0 0 0 0
-0.6651 -3.1121 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -3.1121 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.6651 -4.7621 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.3797 -4.3496 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-0.6651 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.3797 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.0941 -5.5871 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-2.7261 -6.1174 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.4405 -5.7049 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-3.5268 -4.8844 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.3338 -4.7129 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.7463 -5.4274 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.1942 -6.0405 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.3797 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
0.0493 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.0493 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
0.7638 -3.9371 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
2.9073 -4.3495 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0
3.6217 -4.7621 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.3362 -4.3495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.0507 -4.7621 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.0507 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.3362 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.6217 -5.5871 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.9073 -5.9996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.9073 -6.8246 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
4.2329 -6.8934 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
3.4376 -3.7176 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
2.3769 -3.7176 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
41 44 1 0 0 0 0
1 2 1 1 0 0 0
1 3 1 0 0 0 0
3 4 1 0 0 0 0
5 3 2 0 0 0 0
5 6 1 0 0 0 0
6 7 1 1 0 0 0
6 8 1 0 0 0 0
9 8 1 0 0 0 0
9 10 1 0 0 0 0
10 11 1 1 0 0 0
11 12 1 0 0 0 0
11 13 1 0 0 0 0
14 10 1 0 0 0 0
14 15 1 0 0 0 0
16 14 2 0 0 0 0
16 17 1 0 0 0 0
17 18 1 0 0 0 0
18 19 1 0 0 0 0
20 19 1 0 0 0 0
20 21 1 0 0 0 0
21 22 1 0 0 0 0
23 22 1 0 0 0 0
24 23 1 0 0 0 0
20 24 1 0 0 0 0
17 25 2 0 0 0 0
16 26 1 0 0 0 0
26 27 2 0 0 0 0
1 26 1 0 0 0 0
9 28 1 1 0 0 0
1 9 1 0 0 0 0
6 29 1 0 0 0 0
29 30 1 0 0 0 0
30 31 2 0 0 0 0
31 32 1 0 0 0 0
32 33 2 0 0 0 0
34 33 1 0 0 0 0
35 34 2 0 0 0 0
35 30 1 0 0 0 0
36 35 1 0 0 0 0
36 37 2 0 0 0 0
5 36 1 0 0 0 0
34 38 1 0 0 0 0
29 39 1 6 0 0 0
29 40 1 0 0 0 0
41 42 1 0 0 0 0
41 43 2 0 0 0 0
46 49 1 0 0 0 0
46 47 1 0 0 0 0
46 48 2 0 0 0 0
52 53 1 1 0 0 0
52 54 1 0 0 0 0
52 77 1 0 0 0 0
52 60 1 0 0 0 0
54 55 1 0 0 0 0
56 54 2 0 0 0 0
56 57 1 0 0 0 0
56 87 1 0 0 0 0
57 58 1 1 0 0 0
57 59 1 0 0 0 0
57 80 1 0 0 0 0
60 59 1 0 0 0 0
60 61 1 0 0 0 0
60 79 1 1 0 0 0
61 62 1 1 0 0 0
65 61 1 0 0 0 0
62 63 1 0 0 0 0
62 64 1 0 0 0 0
65 66 1 0 0 0 0
67 65 2 0 0 0 0
67 68 1 0 0 0 0
67 77 1 0 0 0 0
68 69 1 0 0 0 0
68 76 2 0 0 0 0
69 70 1 0 0 0 0
71 70 1 0 0 0 0
71 72 1 0 0 0 0
71 75 1 0 0 0 0
72 73 1 0 0 0 0
74 73 1 0 0 0 0
75 74 1 0 0 0 0
77 78 2 0 0 0 0
80 81 1 0 0 0 0
80 90 1 6 0 0 0
80 91 1 0 0 0 0
81 82 2 0 0 0 0
86 81 1 0 0 0 0
82 83 1 0 0 0 0
83 84 2 0 0 0 0
85 84 1 0 0 0 0
86 85 2 0 0 0 0
85 89 1 0 0 0 0
87 86 1 0 0 0 0
87 88 2 0 0 0 0
M CHG 4 41 1 42 -1 46 1 47 -1
M STY 3 1 MUL 2 MUL 3 MUL
M SCN 1 1 HT
M SAL 1 8 41 42 43 44 46 47 48 49
M SPA 1 4 41 42 43 44
M SDI 1 4 -2.1219 -11.1136 -2.1219 -9.1032
M SDI 1 4 0.1593 -9.1032 0.1593 -11.1136
M SMT 1 2
M SCN 1 2 HT
M SAL 2 3 45 50 51
M SPA 2 1 45
M SDI 2 4 3.4010 -10.3683 3.4010 -9.5283
M SDI 2 4 4.2410 -9.5283 4.2410 -10.3683
M SMT 2 3
M SCN 1 3 HT
M SAL 3 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
M SAL 3 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
M SAL 3 15 31 32 33 34 35 36 37 38 39 40 52 53 54 55 56
M SAL 3 15 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
M SAL 3 15 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
M SAL 3 5 87 88 89 90 91
M SPA 3 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
M SPA 3 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
M SPA 3 10 31 32 33 34 35 36 37 38 39 40
M SDI 3 4 -5.1663 -7.3134 -5.1663 -2.6921
M SDI 3 4 5.4707 -2.6921 5.4707 -7.3134
M SMT 3 2
M END
Loading