forked from IanMulvany/bds_scraping
-
Notifications
You must be signed in to change notification settings - Fork 0
/
common_functions.py
25 lines (22 loc) · 990 Bytes
/
common_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from simple_settings import settings
from elasticsearch import Elasticsearch
ES = Elasticsearch([{'host': settings.ES_HOST, 'port': settings.ES_PORT}])
def get_item_by_key(item, item_key, request_body):
## type: (Dict[Any, Any], str, Dict[Any, Any]) -> Dict[Any, Any]
"data may be missing in the crossref deposit, so if it's missing we pass back a nul value"
try:
# we manage to extract a new value, and we extend the request_body dict
item_value = item[item_key]
request_body[item_key] = item_value
return request_body
except:
request_body[item_key] = None # what's the python value for null?
return request_body
def index_populated(index):
# type: (str) -> bool
"""
check if there are documents in a specific index
"""
response = ES.indices.stats(index=index, metric="docs")
doc_count = response["indices"][index]["total"]["docs"]["count"]
return bool(doc_count) #TODO: learn what this does?