-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaddpapers.py
67 lines (57 loc) · 1.95 KB
/
addpapers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import queryCiteFile
import librarybase
import pywikibot
from epmclib.getPMCID import getPMCID
from epmclib.exceptions import IDNotResolvedException
import queue
import threading
import time
def rununthreaded():
citefile = queryCiteFile.CiteFile()
citations = citefile.findRowsWithIDType('pmc')
for idx, citation in enumerate(citations[10513:]):
addpaper(idx, citation)
def runthreaded():
threads = []
for i in range(10):
t = threading.Thread(target=worker())
t.start()
threads.append(t)
citefile = queryCiteFile.CiteFile()
citations = citefile.findRowsWithIDType('pmc')
for citation in enumerate(citations[10513:]):
q.put(citation)
q.join()
for i in range(10):
q.put(None)
for t in threads:
t.join()
def worker():
while True:
idx, citation = q.get()
addpaper( idx, citation )
q.task_done()
def addpaper( idx, citation ):
start=time.time()
print(citation)
if citation is None:
return
print('trying to add {} number {}'.format(citation[5], idx))
site = pywikibot.Site("librarybase", "librarybase")
item = librarybase.JournalArticlePage(site)
pmcidobj = getPMCID(citation[5])
try:
pmcidobj.getBBasicMetadata()
except IDNotResolvedException:
print('Couldn\'t find in EPMC:' + citation[5])
return
metadata = pmcidobj.metadata
print("Got metadata in:" + str(time.time()-start))
if not item.articleAlreadyExists(metadata['pmcid']):
print('Item doesn\'t seem to exist. Setting metadata for: ' + metadata['pmcid'])
item.setMetaData(metadata)
print("set metadata in" + str(time.time()-start))
else:
print("{} already exists. Doing nothing".format(metadata['pmcid']))
q=queue.Queue()
rununthreaded()