-
Notifications
You must be signed in to change notification settings - Fork 0
/
pubmed.py
131 lines (102 loc) · 4.49 KB
/
pubmed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python
## Imports
from eutils import Client
# https://eutils.readthedocs.io/en/stable/modules.html#main-classes
class PubMedArticle(object):
"""Formatting PubMed results stored in `eutils._internal.xmlfacades.pubmedarticle.PubmedArticle` instance for generating bibliography"""
def __init__(self, pma):
self.pma = pma
self.abstract = pma.abstract
self.authors = pma.authors
self.year = pma.year
self.title = pma.title
self.journal = pma.jrnl
self.volume = pma.volume
self.issue = pma.issue
self.pages = pma.pages
def bibliography(self, style='default', **kwarg):
return self._stylizer(style, **kwarg)
@staticmethod
def enclose(s, char):
return f'{char}{s}{char}'
@staticmethod
def initialize_name(name, join='. '):
initials = list()
tokens = name.split()
for token in tokens:
for i, t in enumerate(token.split('-')):
if i == 0:
initial = t[0].upper()
else:
initial += join.rstrip() + '-' + t[0].upper()
initials.append(initial)
return join.join(initials) + join.rstrip()
def _stylizer(self, style, highlight_names=False, highlight_journal=False):
authors = self.authors
year = self.year
title = self.title
journal = self.journal
volume = self.volume
issue = self.issue
pages = self.pages
abstract = self.abstract
if style == 'default':
def _highlight_names(authors, names):
return [self.enclose(n, '**') if (n in names) else n for n in authors ]
if highlight_names: authors = _highlight_names(authors, highlight_names)
if highlight_journal: journal = self.enclose(journal, '**')
elements = [\
', '.join(authors),
f"({year})",
f'"{title}"',
f"_{journal}_",
f"{volume}({issue}):{pages}"
]
output = ' '.join(elements)
elif style == 'bibtex':
def _bibtex_format_authors(authors):
names = []
for author in authors:
family_name, given_name = map(str.strip, author.split(','))
given_name = self.initialize_name(given_name)
names.append(f"{family_name}, {given_name}")
return ' and '.join(names)
formatted = "@article{"
label = authors[0].split(',')[0] + year
author = f"author = {{{_bibtex_format_authors(authors)}}}"
title = f"title = {{{title}}}"
year = f"year = {{{year}}}"
journal = f"journal = {{{journal}}}"
volume = f"volume = {{{volume}}}"
issue = f"issue = {{{issue}}}"
pages = f"pages = {{{pages}}}"
abstract = f"abstract = {{{abstract}}}"
elements = [label, author, title, year, journal, volume, issue, pages, abstract] # label should be placed at first
formatted += ',\n '.join(elements)
formatted += "\n}\n"
output = formatted
return output
def search_pubmed_by_author(client, author_name, affliations=None):
"""Carry out Esearch with 'eutils.Client' instance.
Search term uses the combination of author name and affiliations"""
term_list = [f'({author_name}[Author] AND {aff}[Affiliation])' for aff in affiliations]
search_term = ' OR'.join( term_list )
esearch = client.esearch(db='pubmed', term=search_term)
return esearch
def main(author_name, affiliations=None, api_key=None, style='default', highlight_names=None, highlight_journal=True):
"""Search PubMed via eutils and format the retreived results"""
ec = Client(api_key=api_key)
esr = search_pubmed_by_author(ec, author_name, affiliations)
pmasets = [pma for pma in iter(ec.efetch(db='pubmed', id=esr.ids))]
pubs = [PubMedArticle(pma) for pma in pmasets]
for pub in pubs:
print( pub.bibliography(style=style, highlight_names=highlight_names, highlight_journal=highlight_journal))
if __name__ == '__main__':
import yaml
yamlpath = "info.yaml"
info = yaml.load(open(yamlpath), Loader=yaml.FullLoader)
api_key = None
author_name = info['Name']
affiliations = info['Affiliations']
bibliography_style = info["Style"]
main(author_name, affiliations, api_key, bibliography_style, ['Park J'])