Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 494e938

Browse files
committed
add tables to sync, inc version
1 parent 841e65e commit 494e938

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
setup(
88
name='sysrev',
9-
version='1.3.1',
9+
version='1.3.2',
1010
description='get sysrev project data and use the sysrev api',
1111
long_description=long_description,
1212
long_description_content_type='text/markdown', # Specify the content type here

sysrev/client.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,33 @@ def sync(self, client, project_id):
111111
article_data_df = pd.DataFrame(article_data)
112112
article_data_df['resolve'] = article_data_df['resolve'].apply(json.dumps)
113113

114+
article_info = []
115+
for article_id in tqdm.tqdm(article_data_df['article-id'], total=n_articles):
116+
article_info.append(client.get_article_info(project_id, article_id))
117+
118+
full_texts = pd.DataFrame([{**ft} for a in article_info for ft in a['article'].get('full-texts', []) ])
119+
full_texts.columns = [col.split('/')[-1] for col in full_texts.columns]
120+
121+
auto_labels = pd.DataFrame([
122+
{**{'article-id': a['article'].get('article-id'), 'label-id': label_id}, **details} for a in article_info
123+
for label_id, details in a['article'].get('auto-labels', {}).items() ])
124+
auto_labels['answer'] = auto_labels['answer'].apply(json.dumps)
125+
126+
csl_citations = pd.DataFrame([
127+
{**{k: json.dumps(v) if isinstance(v, (dict, list)) else v for k, v in item['itemData'].items()},
128+
'article-id': a['article'].get('article-id')}
129+
for a in article_info for item in a['article'].get('csl-citation', {}).get('citationItems', [])])
130+
114131
# write everything to .sr/sr.sqlite
115132
conn = sqlite3.connect('.sr/sr.sqlite')
116133

117134
# Writing data to tables
118135
labels_df.to_sql('labels', conn, if_exists='replace', index=False)
119136
article_label_df.to_sql('article_label', conn, if_exists='replace', index=False)
120137
article_data_df.to_sql('article_data', conn, if_exists='replace', index=False)
138+
full_texts.to_sql('full_texts', conn, if_exists='replace', index=False)
139+
auto_labels.to_sql('auto_labels', conn, if_exists='replace', index=False)
140+
csl_citations.to_sql('csl_citations', conn, if_exists='replace', index=False)
121141

122142
conn.close()
123143
class Client():
@@ -185,7 +205,8 @@ def get_article_info(self, project_id, article_id):
185205
endpoint = f"{self.base_url}/api-json/article-info/{article_id}"
186206
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
187207
body = {"project-id": project_id,}
188-
return requests.get(endpoint, headers=headers, json=body)
208+
response = requests.get(endpoint, headers=headers, json=body)
209+
return response.json()['result']
189210

190211
def upload_jsonlines(self, file_path, project_id):
191212
url = f"{self.base_url}/api-json/import-files/{project_id}"

0 commit comments

Comments
 (0)