@@ -111,13 +111,33 @@ def sync(self, client, project_id):
111
111
article_data_df = pd .DataFrame (article_data )
112
112
article_data_df ['resolve' ] = article_data_df ['resolve' ].apply (json .dumps )
113
113
114
+ article_info = []
115
+ for article_id in tqdm .tqdm (article_data_df ['article-id' ], total = n_articles ):
116
+ article_info .append (client .get_article_info (project_id , article_id ))
117
+
118
+ full_texts = pd .DataFrame ([{** ft } for a in article_info for ft in a ['article' ].get ('full-texts' , []) ])
119
+ full_texts .columns = [col .split ('/' )[- 1 ] for col in full_texts .columns ]
120
+
121
+ auto_labels = pd .DataFrame ([
122
+ {** {'article-id' : a ['article' ].get ('article-id' ), 'label-id' : label_id }, ** details } for a in article_info
123
+ for label_id , details in a ['article' ].get ('auto-labels' , {}).items () ])
124
+ auto_labels ['answer' ] = auto_labels ['answer' ].apply (json .dumps )
125
+
126
+ csl_citations = pd .DataFrame ([
127
+ {** {k : json .dumps (v ) if isinstance (v , (dict , list )) else v for k , v in item ['itemData' ].items ()},
128
+ 'article-id' : a ['article' ].get ('article-id' )}
129
+ for a in article_info for item in a ['article' ].get ('csl-citation' , {}).get ('citationItems' , [])])
130
+
114
131
# write everything to .sr/sr.sqlite
115
132
conn = sqlite3 .connect ('.sr/sr.sqlite' )
116
133
117
134
# Writing data to tables
118
135
labels_df .to_sql ('labels' , conn , if_exists = 'replace' , index = False )
119
136
article_label_df .to_sql ('article_label' , conn , if_exists = 'replace' , index = False )
120
137
article_data_df .to_sql ('article_data' , conn , if_exists = 'replace' , index = False )
138
+ full_texts .to_sql ('full_texts' , conn , if_exists = 'replace' , index = False )
139
+ auto_labels .to_sql ('auto_labels' , conn , if_exists = 'replace' , index = False )
140
+ csl_citations .to_sql ('csl_citations' , conn , if_exists = 'replace' , index = False )
121
141
122
142
conn .close ()
123
143
class Client ():
@@ -185,7 +205,8 @@ def get_article_info(self, project_id, article_id):
185
205
endpoint = f"{ self .base_url } /api-json/article-info/{ article_id } "
186
206
headers = {"Authorization" : f"Bearer { self .api_key } " , "Content-Type" : "application/json" }
187
207
body = {"project-id" : project_id ,}
188
- return requests .get (endpoint , headers = headers , json = body )
208
+ response = requests .get (endpoint , headers = headers , json = body )
209
+ return response .json ()['result' ]
189
210
190
211
def upload_jsonlines (self , file_path , project_id ):
191
212
url = f"{ self .base_url } /api-json/import-files/{ project_id } "
0 commit comments