@@ -106,6 +106,7 @@ def transform_to_csv_and_xlsx(json_path: str) -> tuple[Optional[str], Optional[s
106106def save_dataset_metadata (
107107 dataset_id : str ,
108108 source_url : str ,
109+ publisher_country : str ,
109110 json_data : dict [str , Any ],
110111 json_url : Optional [str ],
111112 csv_url : Optional [str ],
@@ -122,6 +123,7 @@ def save_dataset_metadata(
122123 dataset_id = dataset_id ,
123124 source_url = source_url ,
124125 publisher_name = publisher_name ,
126+ publisher_country = publisher_country ,
125127 license_url = license_url ,
126128 license_title = license_title ,
127129 license_title_short = license_title_short ,
@@ -135,9 +137,9 @@ def save_dataset_metadata(
135137 raise ProcessDatasetError (f"Failed to update metadata for dataset: { e } " )
136138
137139
138- def process_dataset (dataset_id : str , source_url : str ) -> None :
140+ def process_dataset (dataset_id : str , registry_metadata : dict [ str , str ] ) -> None :
139141 logger .info (f"Processing dataset { dataset_id } " )
140- json_data = download_json (dataset_id , source_url )
142+ json_data = download_json (dataset_id , registry_metadata [ " source_url" ] )
141143 validate_json (dataset_id , json_data )
142144 json_path = write_json_to_file (
143145 file_name = f"data/{ dataset_id } /{ dataset_id } .json" ,
@@ -149,7 +151,8 @@ def process_dataset(dataset_id: str, source_url: str) -> None:
149151 )
150152 save_dataset_metadata (
151153 dataset_id = dataset_id ,
152- source_url = source_url ,
154+ source_url = registry_metadata ["source_url" ],
155+ publisher_country = registry_metadata ["country" ],
153156 json_data = json_data ,
154157 json_url = json_public_url ,
155158 csv_url = csv_public_url ,
@@ -158,7 +161,7 @@ def process_dataset(dataset_id: str, source_url: str) -> None:
158161 logger .info (f"Processed dataset { dataset_id } " )
159162
160163
161- def process_deleted_datasets (registered_datasets : dict [str , str ]) -> None :
164+ def process_deleted_datasets (registered_datasets : dict [str , dict [ str , str ] ]) -> None :
162165 stored_datasets = get_dataset_ids ()
163166 deleted_datasets = stored_datasets - registered_datasets .keys ()
164167 for dataset_id in deleted_datasets :
@@ -171,13 +174,17 @@ def process_registry() -> None:
171174 registered_datasets = fetch_registered_datasets ()
172175 process_deleted_datasets (registered_datasets )
173176 errors : list [dict [str , Any ]] = []
174- for dataset_id , url in registered_datasets .items ():
177+ for dataset_id , registry_metadata in registered_datasets .items ():
175178 try :
176- process_dataset (dataset_id , url )
179+ process_dataset (dataset_id , registry_metadata )
177180 except Exception as e :
178181 logger .warning (f"Failed to process dataset { dataset_id } with error { e } " )
179182 errors .append (
180- {"dataset_id" : dataset_id , "source_url" : url , "message" : str (e )}
183+ {
184+ "dataset_id" : dataset_id ,
185+ "source_url" : registry_metadata ["source_url" ],
186+ "message" : str (e ),
187+ }
181188 )
182189 if errors :
183190 logger .error (
0 commit comments