Skip to content

Commit

Permalink
added doi citation lookup
Browse files Browse the repository at this point in the history
  • Loading branch information
OllyButters committed Jan 15, 2016
1 parent 35e7d66 commit 1609495
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 46 deletions.
17 changes: 1 addition & 16 deletions config/lat_longs.csv
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
name,latitude,longitude
canonical name,latitude,longitude
23andMe INc,37.418977,-122.077228
"Abramson Research Center, The Children's Hospital of Philadelphia",39.94834,-75.194181
Addenbrooke's Hospital ,52.174791,0.140394
Albert Einstein College of Medicine,40.850826,-73.845623
Albert Einstein College of Medicine,40.850826,-73.845623
Birkbeck College London,51.521951,-0.130204
Blackpool Royal Victoria Hospital,53.817505,-3.035675
Boston University,42.3505,-71.105399
Bristol Homeopathic Hospital,51.454513,-2.58791
Bristol Royal Hospital for Children,51.457974,-2.596796
Bristol Royal Hospital for Children,51.457974,-2.596796
Bristol Royal Hospital for Children,51.457974,-2.596796
Bristol Royal Hospital for Children,51.457974,-2.596796
Bristol Royal Infirmary,51.458356,-2.596634
Bristol Speech & Language Therapy Research Unit,51.454513,-2.58791
Brunel University,51.532848,-0.472855
Expand All @@ -38,8 +34,6 @@ Eunice Kennedy Shriver National Institute of Child Health and Human Development,
EURAC Research,46.494312,11.347001
Federal University of Pelotas,-31.781431,-52.323364
"German Research Centre for Environmental Health, Neuherberg",48.20117,11.6086405
"German Research Centre for Environmental Health, Neuherberg",48.20117,11.6086405
"German Research Centre for Environmental Health, Neuherberg",48.20117,11.6086405
Gillings School of Global Public Health,35.906015,-79.05392
"GlaxoSmithKline, Verona, Italy",45.438384,10.991622
"Goldsmiths College, London",51.474982,-0.037137
Expand All @@ -65,8 +59,6 @@ Johns Hopkins University,39.324928,-76.623354
Karolinska Institutet,59.348148,18.023658
"Karolinska Institutet, Stockholm",59.348148,18.023658
King's College London,51.503335,-0.08974
King's College London,51.503335,-0.08974
King's College London,51.503335,-0.08974
Kingston University,51.403516,-0.303725
"KP Agricultural University, Peshawar",34.019833,71.468525
Lancaster University,54.010394,-2.787729
Expand Down Expand Up @@ -102,7 +94,6 @@ Queensland University of Technology,-27.455068,153.0131
Regensburg University Medical Center,49.01343,12.101624
"Retina Foundation of the Southwest, Dallas",32.78014,-96.800451
"Royal Free and University College Medical School, London",51.553419,-0.164645
"Royal Free and University College Medical School, London",51.553419,-0.164645
"Royal United Hospital, Bath",51.391922,-2.38914
"Rua Marechal Deodoro, 1160-3rd floor, CEP 96020-220, Pelotas, RS, Brazil",-31.763053,-52.342493
"Saarland University Hospital, Homburg",49.329583,7.339843
Expand All @@ -115,27 +106,21 @@ St George's Hospital London,51.426694,-0.174015
Syracuse University,43.039153,-76.135116
Technical University Munich,48.266149,11.671103
The Wellcome Trust Sanger Institute,52.078972,0.187583
The Wellcome Trust Sanger Institute,52.078972,0.187583
"Turning Point Alcohol and Drug Centre, Melbourne",-37.814107,144.96328
United Bristol Healthcare Trust,51.464527,-2.570859
University College London,51.524559,-0.13404
University College London,51.524559,-0.13404
"University Dental Hospital, Heath Park, Cardiff",51.506638,-3.192046
University Medical Center Groningen,53.219383,6.566502
University Medical Centre Utrecht,52.085677,5.179356
University of Aberdeen,57.16476,-2.101526
University of Adelaide,-34.920603,138.606228
University of Adelaide,-34.920603,138.606228
University of Alabama,33.214023,-87.539142
University of Amsterdam,52.355818,4.955726
University of Bath,51.377743,-2.326378
University of Belgrade,44.805895,20.475833
University of Birmingham,52.450817,-1.930513
University of Brighton,50.859087,-0.086689
University of Bristol,51.458417,-2.602979
University of Bristol,51.458417,-2.602979
University of Bristol,51.458417,-2.602979
University of Bristol,51.458417,-2.602979
University of California,34.413963,-119.848947
University of Cambridge,52.204267,0.114908
University of Chicago,41.529546,-88.134814
Expand Down
108 changes: 78 additions & 30 deletions source/add/citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@


def citations(pmids, papers):

import urllib2
import json
import datetime
import time
import csv
import logging

api_key='8024d746590aade6be6856a22a734783'
field='citedby-count'
url='http://api.elsevier.com/content/search/scopus'
Expand All @@ -40,40 +40,90 @@ def citations(pmids, papers):
#read the cache
try:
papers[this_pmid]['Extras']['Citations'] = cached_citations[this_pmid]['citation_count']
logging.info(str(this_pmid)+' not in citation cache')
logging.info(str(this_pmid)+' in citation cache')
except:
#Stick in a small nap so we arent hammering the api too much
time.sleep(1)
request_string=url+'?apiKey='+api_key+'&field=citedby-count&query=PMID('+this_pmid+')'
logging.info(request_string)
response = urllib2.urlopen(request_string).read()
t=json.loads(response)

#sometimes this returns multiple entries e.g. 22935244

#try querying with the DOI first - there might not be a DOI
try:
citations = t['search-results']['entry'][0]['citedby-count']
#print citations
papers[this_pmid]['Extras']['Citations']=citations
cached_citations[this_pmid] = {}
cached_citations[this_pmid]['citation_count'] = citations
cached_citations[this_pmid]['date_downloaded'] = datetime.datetime.now()
except:
#there wasnt a number of citations returned, so see if we can catch this.
request_string=url+'?apiKey='+api_key+'&field=citedby-count&query=DOI('+papers[this_pmid]['doi'][0]+')'
logging.info(request_string)
response = urllib2.urlopen(request_string).read()
t=json.loads(response)

try:
error = t['search-results']['entry'][0]['error']
if error == 'Result set was empty':
citations = t['search-results']['entry'][0]['citedby-count']
#print citations
papers[this_pmid]['Extras']['Citations']=citations
cached_citations[this_pmid] = {}
cached_citations[this_pmid]['citation_count'] = citations
cached_citations[this_pmid]['date_downloaded'] = datetime.datetime.now()
logging.info('Citation added via DOI')
except:
#there wasnt a number of citations returned, so see if we can catch this.
try:
error = t['search-results']['entry'][0]['error']
if error == 'Result set was empty':
logging.info('No citation results from scopus using DOI %s %s',str(papers[this_pmid]['doi']),str(this_pmid))
except:
#a different error happened!
#log this
logging.info('No citation results from scopus for '+str(this_pmid))
#print 'No citations'
logging.warn('An unexpected error happened getting the citations via DOI!')
logging.warn(t)
print 'An unexpected error happened getting the citations via DOI!'
print request_string
print t
print t['search-results']['entry'][0]['error']
except:
logging.info('No DOI for PMID= '+this_pmid)

#The above could have failed a couple of points - no DOI or nothing returned from a DOI query
try:
papers[this_pmid]['Extras']['Citations']
except:
try:
#Now try with a PMID
request_string=url+'?apiKey='+api_key+'&field=citedby-count&query=PMID('+this_pmid+')'
logging.info(request_string)
response = urllib2.urlopen(request_string).read()
t=json.loads(response)

#sometimes this returns multiple entries e.g. 22935244

try:
citations = t['search-results']['entry'][0]['citedby-count']
#print citations
papers[this_pmid]['Extras']['Citations']=citations
cached_citations[this_pmid] = {}
cached_citations[this_pmid]['citation_count'] = citations
cached_citations[this_pmid]['date_downloaded'] = datetime.datetime.now()
logging.info('Citation added via PMID')
except:
#there wasnt a number of citations returned, so see if we can catch this.
try:
error = t['search-results']['entry'][0]['error']
if error == 'Result set was empty':
#log this
logging.info('No citation results from scopus for '+str(this_pmid))
#print 'No citations'
except:
#a different error happened!
#log this
logging.warn('An unexpected error happened getting the citations!')
logging.warn(t)
print 'An unexpected error happened getting the citations!'
print request_string
print t

except:
#a different error happened!
#log this
logging.warn('An unexpected error happened getting the citations!')
logging.warn(t)
print 'An unexpected error happened getting the citations!'
print request_string
print t
pass

try:
papers[this_pmid]['Extras']['Citations']
except:
#If we get here then there is no citation.
logging.warn('No citations found for %s.',str(this_pmid))

csvfile = open('../cache/citations.csv', 'wb')
citation_file =csv.writer(csvfile)
Expand All @@ -82,5 +132,3 @@ def citations(pmids, papers):
temp_citation_count = cached_citations[this_citation]['citation_count']
temp_date_downloaded = cached_citations[this_citation]['date_downloaded']
citation_file.writerow([this_citation, str(temp_citation_count), temp_date_downloaded])


0 comments on commit 1609495

Please sign in to comment.