-
Notifications
You must be signed in to change notification settings - Fork 1
/
GenTempl.py
672 lines (637 loc) · 26.5 KB
/
GenTempl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
# %load ./GenTempl.py
import copy
import itertools
from collections import Counter, OrderedDict
import modin.pandas as mpd
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors, rdChemReactions
from AnalgCompds import findfrag
from MainFunctions import initray, molfromsmiles
# def replace_deuterated(smi):
# return re.sub('\[2H\]', r'[H]', smi)
def gentemplate(
analoguerxnsfinal, ncpus=16, restart=True, specificity="loose", processall=False
):
if ncpus > 1:
if restart:
initray(num_cpus=ncpus)
analoguerxnsfinaldis = mpd.DataFrame(analoguerxnsfinal)
else:
analoguerxnsfinaldis = analoguerxnsfinal
templts = analoguerxnsfinaldis.apply(
gen_template_row,
specificity=specificity,
processall=processall,
axis=1,
result_type="reduce",
)
templtser = pd.Series(
data=templts.values, index=templts.index
) # Optional convert modin back to pandas
templtdf = pd.DataFrame(
data=templtser.tolist(),
index=templtser.index,
columns=["template", "LHSdata", "RHSdata", "msg4", "farfg", "unusedprod"],
)
analoguerxnstempl = copy.deepcopy(analoguerxnsfinal)
analoguerxnstempl[
["template", "LHSdata", "RHSdata", "msg4", "farfg", "unusedprod"]
] = templtdf
return analoguerxnstempl
def gen_template_row(row, specificity="loose", processall=False):
LHSdata = copy.deepcopy(row.LHSdata)
RHSdata = copy.deepcopy(row.RHSdata)
specmap = copy.deepcopy(row.specmap)
rnbmap = copy.deepcopy(row.rnbmap)
outfrag = copy.deepcopy(row.outfrag)
unusedanalogue = row.unusedanalogue
return gen_template(
LHSdata,
RHSdata,
specmap,
rnbmap=rnbmap,
outfrag=outfrag,
unusedanalogue=unusedanalogue,
specificity=specificity,
processall=processall,
)
# def testrow(row):
# LHSdata=copy.deepcopy(row.LHSdata)
# hatomssuspect=set()
# for analoguecompd in LHSdata:
# LHSdata_=LHSdata[analoguecompd]
# if 'templatefragidx' not in LHSdata_ or 'templatefrag' not in LHSdata_:
# if 'reacfrag' not in LHSdata_ or not LHSdata_['reacfrag']:
# continue
# for inst,fraginf in LHSdata_['reacfrag'].items():
# reacfragidx={atomidx for frag,matchidx in fraginf.items() for match in matchidx for atomidx in list(LHSdata_['fragloc'][inst][frag]['corrmatches'])[match]}
# if type(inst)==tuple:
# reacmol=molfromsmiles(LHSdata_['mappedsmiles'][inst[0]][inst[1]])
# else:
# reacmol=molfromsmiles(LHSdata_['mappedsmiles'][inst])
# hatomssuspect.update(gen_template_fragment(reacfragidx,reacmol))
# return hatomssuspect
def gen_template_fragment(
fragidx,
mappedmol,
specificity="loose",
atomsymbols=OrderedDict({}),
funcgroupmapnum=set(),
):
# breakpoint()
if not atomsymbols:
# breakpoint()
mappedmol = Chem.RemoveHs(mappedmol) # Hydrogens become implicit
clear_isotope(mappedmol) # Redefining isotopes
hatomsidx = {
atom.GetIdx() for atom in mappedmol.GetAtoms() if atom.GetSymbol() == "H"
}
if fragidx.issubset(hatomsidx):
hatomsremaining = {}
else:
hatomsremaining = fragidx.intersection(hatomsidx)
if hatomsremaining: # Likely isotopes
affectedatomidx = {
nb.GetIdx()
for hatom in hatomsremaining
for nb in mappedmol.GetAtomWithIdx(hatom).GetNeighbors()
}
else:
affectedatomidx = {}
fragidx = {
idx
for idx in fragidx
if idx < len(mappedmol.GetAtoms())
if idx not in hatomsremaining
}
for atom in Chem.AddHs(
mappedmol
).GetAtoms(): # Functional group atoms described in detail, everything else has hydrogens stripped
# breakpoint()
numHs_ = None
degree_ = None
if atom.GetSymbol() == "H" and atom.GetIdx() not in hatomsidx:
continue
refatom = mappedmol.GetAtomWithIdx(atom.GetIdx())
if affectedatomidx and atom.GetIdx() in affectedatomidx:
Hslist = [nb.GetSymbol() for nb in atom.GetNeighbors()]
numHs_ = Hslist.count("H")
degree_ = atom.GetDegree() - numHs_
if specificity == "loose":
if (
funcgroupmapnum
and atom.HasProp("molAtomMapNumber")
and atom.GetAtomMapNum() in funcgroupmapnum
):
# if refatom.GetDegree()==1: #Terminal atom
symbol = get_strict_smarts_for_atom(
refatom, numHs_=numHs_, degree_=degree_
)
else:
symbol = atom.GetSmarts(
isomericSmiles=False
) # General SMARTS to yield a generalizable template
else:
symbol = get_strict_smarts_for_atom(
refatom, numHs_=numHs_, degree_=degree_
)
atomsymbols.update({atom.GetIdx(): symbol})
if specificity == "loose":
frag = Chem.MolFragmentToSmiles(
mappedmol, fragidx, atomSymbols=list(atomsymbols.values())
)
else: # Closer to rdchiral template
frag = Chem.MolFragmentToSmiles(
mappedmol,
fragidx,
atomSymbols=list(atomsymbols.values()),
allHsExplicit=True,
allBondsExplicit=True,
)
return frag, atomsymbols, fragidx
def resolvelargetemplate(fragidx, mol):
# breakpoint()
atomidxbound = set()
for atom in [mol.GetAtomWithIdx(idx) for idx in fragidx]:
if (
any([nb.GetIdx() not in fragidx for nb in atom.GetNeighbors()])
or atom.GetDegree() == 1
):
atomidxbound.add(atom.GetIdx())
if len(atomidxbound) > 2:
fragcurrlist = []
fragidxlist = []
fragidxold = copy.deepcopy(fragidx)
comblist = list(itertools.combinations(atomidxbound, 2))
for comb in comblist:
nblist = [nb.GetIdx() for nb in mol.GetAtomWithIdx(comb[0]).GetNeighbors()]
if comb[1] in nblist:
continue
fragidx = set(Chem.GetShortestPath(mol, comb[0], comb[1]))
fragidx = fragidx.union(fragidxold)
fragcurr = Chem.MolFragmentToSmiles(mol, fragidx)
fragidxlist += [fragidx]
fragcurrlist += [fragcurr]
if not len(fragcurr.split(".")) > 1:
return fragidx
numsplitfrags = [len(fragcurr.split(".")) for fragcurr in fragcurrlist]
minfragnum = min(numsplitfrags)
minidx = [idx for idx, val in enumerate(numsplitfrags) if val == minfragnum]
if len(minidx) > 1:
fragidxlist = [fragidxlist[idx] for idx in minidx]
fragcurrlist = [fragcurrlist[idx] for idx in minidx]
lenfrags = [len(fragcurr) for fragcurr in fragcurrlist]
minfraglen = min(lenfrags)
minidx = [idx for idx, val in enumerate(lenfrags) if val == minfraglen]
fragidx = fragidxlist[minidx[0]]
# breakpoint()
return resolvelargetemplate(fragidx, mol)
return fragidx
def gen_template(
LHSdata,
RHSdata,
specmap,
rnbmap={},
outfrag={},
unusedanalogue=[],
specificity="loose",
processall=False, # Will process all templates regardless of valid fragments reacting
): # Invalid reactions outside fragments may yield general templates
farfg = []
unusedprod = []
addstr = []
alloutfrag = {}
funcgroupmapnum = set()
combinedmap = {**specmap, **rnbmap}
for analoguecompd in LHSdata:
LHSdata_ = LHSdata[analoguecompd]
if "templatefragidx" not in LHSdata_ or "templatefrag" not in LHSdata_:
LHSdata_["templatefragidx"] = {}
LHSdata_["templatefrag"] = {}
# breakpoint()
if (
"reacfrag" not in LHSdata_ or not LHSdata_["reacfrag"]
): # Hydrogen will not be mapped and won't show up in reaction center
if (analoguecompd not in unusedanalogue) and (
analoguecompd not in outfrag
): # Hydrogen
for inst in LHSdata_["fragloc"]:
if type(inst) == tuple:
mappedmol = molfromsmiles(
LHSdata_["mappedsmiles"][inst[0]][inst[1]]
)
else:
mappedmol = molfromsmiles(LHSdata_["mappedsmiles"][inst])
fragidx = {atom.GetIdx() for atom in mappedmol.GetAtoms()}
fragcurr, ratomsymbols, fragidx = gen_template_fragment(
fragidx,
mappedmol,
specificity=specificity,
atomsymbols=OrderedDict({}),
)
LHSdata_["templatefragidx"].update({inst: fragidx})
LHSdata_["templatefrag"].update({inst: fragcurr})
elif (
analoguecompd in outfrag
): # Reaction center completely outside fragments
alloutfrag.update({analoguecompd: outfrag[analoguecompd]})
# alloutfrag += [analoguecompd]
if not processall:
continue
if "reacfrag" in LHSdata_:
instlist = list(LHSdata_["reacfrag"].keys())
else:
instlist = []
outfraginst = []
if outfrag and analoguecompd in outfrag:
outfraginst = list(outfrag[analoguecompd].keys())
for inst in set(instlist + outfraginst):
reacfragidx = set()
funcgroupids = set()
if inst in instlist:
fraginf = LHSdata_["reacfrag"][inst]
# for inst, fraginf in LHSdata_["reacfrag"].items():
reacfragidx = {
atomidx
for frag, matchidx in fraginf.items()
for match in matchidx
for atomidx in list(LHSdata_["fragloc"][inst][frag]["corrmatches"])[
match
]
}
try:
funcgroupids = {
atomidx
for frag, matchidx in fraginf.items()
for match in matchidx
for atomidx in list(
LHSdata_["fragloc"][inst][frag]["funcgroupids"]
)[match]
}
except IndexError:
pass
if inst in outfraginst:
if inst not in instlist:
alloutfrag.update({analoguecompd:{inst:outfrag[analoguecompd][inst]}})
reacfragidx = reacfragidx.union(
{combinedmap[mapnum][2] for mapnum in outfrag[analoguecompd][inst]}
)
# addstr.append(
# "Template expanded to include atoms in RC outside relevant fragments"
# )
if isinstance(inst, tuple):
reacmol = molfromsmiles(LHSdata_["mappedsmiles"][inst[0]][inst[1]])
else:
reacmol = molfromsmiles(LHSdata_["mappedsmiles"][inst])
if specificity == "loose":
funcgroupmapnum.update(
{
reacmol.GetAtomWithIdx(funcgroupid).GetAtomMapNum()
for funcgroupid in funcgroupids
if reacmol.GetAtomWithIdx(funcgroupid).HasProp(
"molAtomMapNumber"
)
}
)
reacfragcurr, ratomsymbols, reacfragidx = gen_template_fragment(
reacfragidx,
reacmol,
specificity=specificity,
atomsymbols=OrderedDict({}),
funcgroupmapnum=funcgroupmapnum,
)
if len(reacfragcurr.split(".")) > 1:
reacfragidx = resolvelargetemplate(reacfragidx, reacmol)
# reacfragidx=set(Chem.GetShortestPath(reacmol,min(reacfragidx),max(reacfragidx)))
reacfragcurr, ratomsymbols, reacfragidx = gen_template_fragment(
reacfragidx,
reacmol,
specificity=specificity,
atomsymbols=ratomsymbols,
funcgroupmapnum=funcgroupmapnum,
)
# breakpoint()
if (
len(reacfragcurr.split(".")) > 1 and analoguecompd not in farfg
): # Fragments involved in the reaction are too far away, and are decomposed into more than 1 species. Invalid template.
farfg += [analoguecompd]
LHSdata_["templatefragidx"].update({inst: reacfragidx})
LHSdata_["templatefrag"].update({inst: reacfragcurr})
for atomidx in reacfragidx:
# breakpoint()
RHSdata = updateRHSdata(RHSdata, analoguecompd, inst, atomidx, specmap)
for prodid in RHSdata:
# breakpoint()
RHSdata_ = RHSdata[prodid]
if "templatefragidx" not in RHSdata_ or "templatefrag" not in RHSdata_:
RHSdata_["templatefragidx"] = {}
RHSdata_["templatefrag"] = {}
if "rxnatomidx" not in RHSdata_:
if RHSdata_["formula"] == "H2": # Template may NOT be balanced!
for i, inst in enumerate(RHSdata_["mappedsmiles"]):
if type(inst) == tuple:
for j, inst1 in enumerate(inst):
mappedmol = molfromsmiles(inst1)
prodfragidx = {
atom.GetIdx() for atom in mappedmol.GetAtoms()
}
(
prodfragcurr,
patomsymbols,
prodfragidx,
) = gen_template_fragment(
prodfragidx,
mappedmol,
specificity=specificity,
atomsymbols=OrderedDict({}),
funcgroupmapnum=funcgroupmapnum,
)
RHSdata_["templatefragidx"].update({(i, j): prodfragidx})
RHSdata_["templatefrag"].update({(i, j): prodfragcurr})
else:
mappedmol = molfromsmiles(inst)
prodfragidx = {atom.GetIdx() for atom in mappedmol.GetAtoms()}
prodfragcurr, patomsymbols, prodfragidx = gen_template_fragment(
prodfragidx,
mappedmol,
specificity=specificity,
atomsymbols=OrderedDict({}),
funcgroupmapnum=funcgroupmapnum,
)
RHSdata_["templatefragidx"].update({i: prodfragidx})
RHSdata_["templatefrag"].update({i: prodfragcurr})
elif prodid not in unusedprod:
unusedprod += [prodid]
continue
for inst, prodfragidx in RHSdata_[
"rxnatomidx"
].items(): # Ideally need to keep looping
# breakpoint()
if type(inst) == tuple:
prodmol = molfromsmiles(RHSdata_["mappedsmiles"][inst[0]][inst[1]])
else:
prodmol = molfromsmiles(RHSdata_["mappedsmiles"][inst])
prodfragcurr, patomsymbols, prodfragidx = gen_template_fragment(
prodfragidx,
prodmol,
specificity=specificity,
atomsymbols=OrderedDict({}),
funcgroupmapnum=funcgroupmapnum,
)
# breakpoint()
if len(prodfragcurr.split(".")) > 1:
prodfragidx = resolvelargetemplate(prodfragidx, prodmol)
# prodfragidx=set(Chem.GetShortestPath(prodmol,min(prodfragidx),max(prodfragidx)))
prodfragcurr, patomsymbols, prodfragidx = gen_template_fragment(
prodfragidx,
prodmol,
specificity=specificity,
atomsymbols=patomsymbols,
funcgroupmapnum=funcgroupmapnum,
)
if (
len(prodfragcurr.split(".")) > 1 and prodid not in farfg
): # Product fragment is still split
farfg += [prodid]
prodfragmapidx = {
atom.GetAtomMapNum()
for frag in prodfragcurr.split(".")
for atom in Chem.MolFromSmarts(frag).GetAtoms()
}
reacfragmapidx = {
atom.GetAtomMapNum()
for analoguecompd in LHSdata
for reacfrag in LHSdata[analoguecompd]["templatefrag"].values()
for atom in Chem.MolFromSmarts(reacfrag).GetAtoms()
if atom.HasProp("molAtomMapNumber")
}
if prodfragmapidx != reacfragmapidx: # Extra atoms in product
# breakpoint()
missingmapidx = prodfragmapidx - reacfragmapidx
for missingmap in missingmapidx:
if missingmap not in specmap:
addstr.append(
"Mapped atom "
+ str(missingmap)
+ " in product "
+ str(prodid)
+ " cannot be traced back to reactant"
)
break
analoguecompd = specmap[missingmap][0]
inst = specmap[missingmap][1]
idx = specmap[missingmap][2]
LHSdata[analoguecompd]["templatefragidx"][inst].add(idx)
reacmol = molfromsmiles(
LHSdata[analoguecompd]["mappedsmiles"][inst]
)
reacfragcurr = gen_template_fragment(
LHSdata[analoguecompd]["templatefragidx"][inst],
reacmol,
specificity=specificity,
atomsymbols=OrderedDict({}),
funcgroupmapnum=funcgroupmapnum,
)[0]
LHSdata[analoguecompd]["templatefrag"][inst] = reacfragcurr
RHSdata_["templatefragidx"].update({inst: prodfragidx})
RHSdata_["templatefrag"].update({inst: prodfragcurr})
if farfg:
addstr.append(
"Reacting functional groups are too far away in "
+ "species "
+ ", ".join([str(rctid) for rctid in farfg])
)
if unusedprod:
addstr.append(
"Product "
+ ", ".join([str(ID) for ID in unusedprod])
+ " is not produced from reacting fragments"
)
if outfrag:
addstr.append(
"Template expanded to include atoms in RC outside relevant fragments"
)
if alloutfrag:
addstr.append(
"Species "
+ ", ".join([str(ID) for ID in alloutfrag])
+ " react completely outside relevant fragments"
)
if not processall:
return "Error", LHSdata, RHSdata, ", ".join(addstr), farfg, unusedprod
if not addstr:
msg4 = "Valid"
else:
msg4 = ", ".join(addstr)
reacfrag = [
reacfragcurr
for analoguecompd in LHSdata
for reacfragcurr in LHSdata[analoguecompd]["templatefrag"].values()
if reacfragcurr
]
prodfrag = [
prodfragcurr
for prodid in RHSdata
for prodfragcurr in RHSdata[prodid]["templatefrag"].values()
if prodfragcurr
]
return (
">>".join([".".join(reacfrag), ".".join(prodfrag)]),
LHSdata,
RHSdata,
msg4,
farfg,
unusedprod,
)
def updateRHSdata(RHSdata, analoguecompd, inst, atomidx, specmap):
for (
val
) in (
specmap.values()
): # Don't consider rnbmap as unmapped reactant atoms will not appear on RHS
if val[0] == analoguecompd and val[1] == inst and val[2] == atomidx:
# breakpoint()
prodid = val[3]
prodinst = val[4]
prodatomidx = val[5]
if "rxnatomidx" not in RHSdata[prodid].keys():
RHSdata[prodid]["rxnatomidx"] = {prodinst: {prodatomidx}}
break
elif prodinst in RHSdata[prodid]["rxnatomidx"]:
RHSdata[prodid]["rxnatomidx"][prodinst].add(prodatomidx)
break
else:
RHSdata[prodid]["rxnatomidx"].update({prodinst: {prodatomidx}})
break
return RHSdata
def matchtemplaterow(row, samplerxn):
LHSdata = copy.deepcopy(row.LHSdata)
template = copy.deepcopy(row.template)
return matchtemplate(samplerxn, LHSdata, template)
def matchtemplate(
samplerxn, LHSdata, template, exact=False
): # Pass in similarly sized fragments
# breakpoint()
samplerxn_ = rdChemReactions.ReactionFromSmarts(samplerxn, useSmiles=True)
samplercts = [
Chem.MolToSmiles(samplerct) for samplerct in samplerxn_.GetReactants()
]
samplerctscounter = Counter(samplercts)
sampleprods = [
Chem.MolToSmiles(sampleprod) for sampleprod in samplerxn_.GetProducts()
]
sampleprodscounter = Counter(sampleprods)
templaterxn = rdChemReactions.ReactionFromSmarts(template, useSmiles=True)
rdChemReactions.RemoveMappingNumbersFromReactions(templaterxn)
templprods = [Chem.MolToSmiles(prod) for prod in templaterxn.GetProducts()]
templprodscounter = Counter(templprods)
valid = True
for ID0 in LHSdata:
if LHSdata[ID0]["templatefrag"]:
if "reacfrag" not in LHSdata[ID0] or not LHSdata[ID0]["reacfrag"]:
fraglist = [
Chem.MolToSmiles(Chem.MolFromSmarts(frag))
for inst in LHSdata[ID0]["fragloc"]
for frag in LHSdata[ID0]["fragloc"][inst]
]
else:
fraglist = [
Chem.MolToSmiles(Chem.MolFromSmarts(frag))
for inst in LHSdata[ID0]["reacfrag"]
for frag in LHSdata[ID0]["reacfrag"][inst]
]
fraglistcounter = Counter(fraglist)
if set(fraglistcounter.keys()).issubset(set(samplerctscounter.keys())):
rem = Counter()
rem.update(samplerctscounter)
rem.subtract(fraglistcounter)
samplerctscounter = {i: rem[i] for i in rem if rem[i] > 0}
else:
valid = False
break
if samplerctscounter:
valid = False
if not valid:
return valid
rem = Counter()
rem.update(sampleprodscounter)
rem.subtract(templprodscounter)
sampleprodscounter2 = {i: rem[i] for i in rem if rem[i] > 0}
templprodscounter2 = {i: rem[i] for i in rem if rem[i] < 0}
if sampleprodscounter2:
if not exact:
for sampleprod in sampleprodscounter2:
for templprod in templprodscounter2:
match = findfrag(
templprod, sampleprod, fragment=True, returnindices=False
)
if match:
sampleprodscounter2[sampleprod] -= templprodscounter2[templprod]
if sampleprodscounter2[sampleprod] > 0:
valid = False
return valid
return valid
def get_strict_smarts_for_atom(atom, numHs_=None, degree_=None, charge_=None):
"""
For an RDkit atom object, generate a SMARTS pattern that
matches the atom as strictly as possible, taken from rdChiral
"""
USE_STEREOCHEMISTRY = True
symbol = atom.GetSmarts()
# if atom.GetSymbol() == 'H':
# symbol = '[#1]'
if numHs_ is not None:
numHs = numHs_
else:
numHs = atom.GetTotalNumHs()
if degree_ is not None:
degree = degree_
else:
degree = atom.GetDegree()
if charge_ is not None:
charge = charge_
else:
charge = atom.GetFormalCharge()
if "[" not in symbol:
symbol = "[" + symbol + "]"
# Explicit stereochemistry - *before* H
if USE_STEREOCHEMISTRY:
if atom.GetChiralTag() != Chem.rdchem.ChiralType.CHI_UNSPECIFIED:
if "@" not in symbol:
# Be explicit when there is a tetrahedral chiral tag
if atom.GetChiralTag() == Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW:
tag = "@"
elif atom.GetChiralTag() == Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW:
tag = "@@"
if ":" in symbol:
symbol = symbol.replace(":", ";{}:".format(tag))
else:
symbol = symbol.replace("]", ";{}]".format(tag))
if "H" not in symbol:
H_symbol = "H{}".format(numHs)
# Explicit number of hydrogens: include "H0" when no hydrogens present
if ":" in symbol: # stick H0 before label
symbol = symbol.replace(":", ";{}:".format(H_symbol))
else:
symbol = symbol.replace("]", ";{}]".format(H_symbol))
# Explicit degree
if ":" in symbol:
symbol = symbol.replace(":", ";D{}:".format(degree))
else:
symbol = symbol.replace("]", ";D{}]".format(degree))
# Explicit formal charge
if "+" not in symbol and "-" not in symbol:
charge_symbol = "+" if (charge >= 0) else "-"
charge_symbol += "{}".format(abs(charge))
if ":" in symbol:
symbol = symbol.replace(":", ";{}:".format(charge_symbol))
else:
symbol = symbol.replace("]", ";{}]".format(charge_symbol))
return symbol
def clear_isotope(mol):
[a.SetIsotope(0) for a in mol.GetAtoms()]
# analoguerxnsfinal = pd.read_pickle(
# "/home/aa2133/Impurity-Project/Input/Suzuki/Case6/DataProcessing/analoguerxnsfinal.pickle"
# )
# gen_template_row(analoguerxnsfinal.xs(45020110).iloc[0])