-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_data.py
69 lines (53 loc) · 2.05 KB
/
get_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""
Task 1
Downloads GREC Data into a specified directory
"""
import argparse
import os
import pathlib
from shutil import copyfile
import requests
import config
from utils.file import directory_check
GREC_URLS = [
"https://github.com/mjsumpter/google-relation-extraction-corpus-augmented/raw/master/dob_augment-200526.json",
"https://github.com/mjsumpter/google-relation-extraction-corpus-augmented/raw/master/education_augment-200526.json",
"https://github.com/mjsumpter/google-relation-extraction-corpus-augmented/raw/master/institution_augment-200526.json",
"https://github.com/mjsumpter/google-relation-extraction-corpus-augmented/raw/master/pob_augment-200526.json",
"https://github.com/mjsumpter/google-relation-extraction-corpus-augmented/raw/master/pod_augment-200526.json",
]
def arg_parse(arg_list=None):
parser = argparse.ArgumentParser(description="Download Augmented GREC Corpus")
# Save Directory
parser.add_argument(
"--output-directory",
"-out",
dest="output_dir",
help=f"Output Directory Path, default {config.JSON_DIR}",
type=str,
default=config.JSON_DIR,
)
# Parses and returns args
if arg_list:
return parser.parse_args(args=arg_list)
else:
return parser.parse_args()
def save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
def download_file(url, destination):
session = requests.Session()
response = session.get(url)
destination += "/" + pathlib.Path(url).name
save_response_content(response, destination)
args = arg_parse()
dir = args.output_dir
directory_check(dir)
for url in GREC_URLS:
print(f"Downloading { pathlib.Path(url).name } ...")
download_file(url, dir)
print(f"Copying ./data/cr.json to {dir}")
copyfile('./data/cr.json', (dir + '/cr.json'))