From 79582b93b80dafeeec305e50732bfd73a980a476 Mon Sep 17 00:00:00 2001 From: AlessioNar Date: Wed, 16 Oct 2024 15:15:11 +0200 Subject: [PATCH 1/5] restructured document and added docstrings --- op_cellar/documents.py | 195 ++++++++++++++++++++++++++++++----------- 1 file changed, 145 insertions(+), 50 deletions(-) diff --git a/op_cellar/documents.py b/op_cellar/documents.py index 339b50e..de39d98 100644 --- a/op_cellar/documents.py +++ b/op_cellar/documents.py @@ -11,33 +11,46 @@ BASE_URL = 'http://publications.europa.eu/resource/cellar/' LOG_DIR = 'logs/' -# Function to get the current timestamp -def get_current_timestamp(): - return datetime.now().strftime('%Y-%m-%d_%H-%M-%S') -# Function to print a list to a file -def print_list_to_file(filename, lst): - with open(filename, 'w+') as f: - for item in lst: - f.write(item + '\n') +# Function to send a GET request to download a zip file for the given id under the CELLAR URI +def rest_get_call(id: str) -> requests.Response: + """ + Send a GET request to download a zip file for the given id under the CELLAR URI. -# Function to download a zip file and extract it -def extract_zip(response: requests.Response, folder_path: str): - try: - z = zipfile.ZipFile(io.BytesIO(response.content)) - z.extractall(folder_path) - except Exception as e: - logging.error(f"Error downloading zip: {e}") + Parameters + ---------- + id : str + The id of the resource to be retrieved. -# Function to process a single file -def process_single_file(response: requests.Response, folder_path: str, id: str): - out_file = folder_path + '/' + id + '.html' - os.makedirs(os.path.dirname(out_file), exist_ok=True) - with open(out_file, 'w+', encoding="utf-8") as f: - f.write(response.text) + Returns + ------- + requests.Response + The response from the server. -# Function to send a GET request to download a zip file for the given id under the CELLAR URI -def rest_get_call(id: str) -> requests.Response: + Notes + ----- + The request is sent with the following headers: + - Accept: application/xhtml+xml + - Accept-Language: eng + - Content-Type: application/x-www-form-urlencoded + - Host: publications.europa.eu + + Raises + ------ + requests.RequestException + If there is an error sending the request. + + See Also + -------- + requests : The underlying library used for making HTTP requests. + + Examples + -------- + >>> import requests + >>> response = rest_get_call('some_id') + >>> if response is not None: + ... print(response.status_code) + """ try: url = BASE_URL + id headers = { @@ -55,21 +68,89 @@ def rest_get_call(id: str) -> requests.Response: # Function to create a list of CELLAR ids from the given cellar_results JSON dictionary and return the list def get_cellar_ids_from_json_results(cellar_results): + """ + Extract CELLAR ids from a JSON dictionary. + + Parameters + ---------- + cellar_results : dict + A dictionary containing the response of the CELLAR SPARQL query + + Returns + ------- + list + A list of CELLAR ids. + + Notes + ----- + The function assumes that the JSON dictionary has the following structure: + - The dictionary contains a key "results" that maps to another dictionary. + - The inner dictionary contains a key "bindings" that maps to a list of dictionaries. + - Each dictionary in the list contains a key "cellarURIs" that maps to a dictionary. + - The innermost dictionary contains a key "value" that maps to a string representing the CELLAR URI. + + The function extracts the CELLAR id by splitting the CELLAR URI at "cellar/" and taking the second part. + + Examples + -------- + >>> cellar_results = { + ... "results": { + ... "bindings": [ + ... {"cellarURIs": {"value": "https://example.com/cellar/some_id"}}, + ... {"cellarURIs": {"value": "https://example.com/cellar/another_id"}} + ... ] + ... } + ... } + >>> cellar_ids = get_cellar_ids_from_json_results(cellar_results) + >>> print(cellar_ids) + ['some_id', 'another_id'] + """ results_list = cellar_results["results"]["bindings"] cellar_ids_list = [results_list[i]["cellarURIs"]["value"].split("cellar/")[1] for i in range(len(results_list))] return cellar_ids_list -# Function to log downloaded files -def log_downloaded_files(downloaded_files: list, dir_to_check: str): - in_dir_name = LOG_DIR + 'in_dir_lists/' - os.makedirs(os.path.dirname(in_dir_name), exist_ok=True) - print_list_to_file(in_dir_name + 'in_dir_' + get_current_timestamp() + '.txt', downloaded_files) +def download_documents(results, download_dir, nthreads=1): + """ + Download Cellar documents in parallel using multiple threads. + + Sends a REST query to the Publications Office APIs and downloads the documents + corresponding to the given results. + + Parameters + ---------- + results : dict + A dictionary containing the JSON results from the Publications Office APIs. + download_dir : str + The directory where the downloaded documents will be saved. + nthreads : int + The number of threads to use to make the request + + Notes + ----- + The function uses a separate thread for each subset of Cellar ids. + The number of threads can be adjusted by modifying the `nthreads` parameter. + """ + cellar_ids = get_cellar_ids_from_json_results(results) + + if not os.path.exists(LOG_DIR): + os.makedirs(LOG_DIR) + + threads = [] + for i in range(nthreads): + sub_list = cellar_ids[i::nthreads] + t = threading.Thread(target=process_range, args=(sub_list, os.path.join(download_dir, str(sub_list)))) + threads.append(t) + [t.start() for t in threads] + [t.join() for t in threads] + + +# Function to process a single file +def process_single_file(response: requests.Response, folder_path: str, id: str): + out_file = folder_path + '/' + id + '.html' + os.makedirs(os.path.dirname(out_file), exist_ok=True) + with open(out_file, 'w+', encoding="utf-8") as f: + f.write(response.text) -# Function to log missing ids -def log_missing_ids(missing_ids: list): - new_ids_dir_name = LOG_DIR + 'cellar_ids/' - os.makedirs(os.path.dirname(new_ids_dir_name), exist_ok=True) - print_list_to_file(new_ids_dir_name + 'cellar_ids_' + get_current_timestamp() + '.txt', missing_ids) # Function to process a list of ids to download the corresponding zip files def process_range(ids: list, folder_path: str): @@ -107,23 +188,37 @@ def process_range(ids: list, folder_path: str): except Exception as e: logging.error(f"Error processing range: {e}") -# Function to download files in parallel using multiple threads -def download_documents(results, download_dir): - """ - Sends a REST query to the Publications Office APIs and downloads the Cellar documents - """ - cellar_ids = get_cellar_ids_from_json_results(results) - if not os.path.exists(LOG_DIR): - os.makedirs(LOG_DIR) - - nthreads = 1 - threads = [] - for i in range(nthreads): - sub_list = cellar_ids[i::nthreads] - t = threading.Thread(target=process_range, args=(sub_list, os.path.join(download_dir, str(sub_list)))) - threads.append(t) - [t.start() for t in threads] - [t.join() for t in threads] +# Function to log downloaded files +def log_downloaded_files(downloaded_files: list, dir_to_check: str): + in_dir_name = LOG_DIR + 'in_dir_lists/' + os.makedirs(os.path.dirname(in_dir_name), exist_ok=True) + print_list_to_file(in_dir_name + 'in_dir_' + get_current_timestamp() + '.txt', downloaded_files) + +# Function to log missing ids +def log_missing_ids(missing_ids: list): + new_ids_dir_name = LOG_DIR + 'cellar_ids/' + os.makedirs(os.path.dirname(new_ids_dir_name), exist_ok=True) + print_list_to_file(new_ids_dir_name + 'cellar_ids_' + get_current_timestamp() + '.txt', missing_ids) + + +# Function to get the current timestamp +def get_current_timestamp(): + return datetime.now().strftime('%Y-%m-%d_%H-%M-%S') + +# Function to print a list to a file +def print_list_to_file(filename, lst): + with open(filename, 'w+') as f: + for item in lst: + f.write(item + '\n') + +# Function to download a zip file and extract it +def extract_zip(response: requests.Response, folder_path: str): + try: + z = zipfile.ZipFile(io.BytesIO(response.content)) + z.extractall(folder_path) + except Exception as e: + logging.error(f"Error downloading zip: {e}") + # Main function if __name__ == "__main__": From cf84d9d73ebbfd2135aeccca053c629a70f74019 Mon Sep 17 00:00:00 2001 From: AlessioNar Date: Wed, 16 Oct 2024 15:17:20 +0200 Subject: [PATCH 2/5] Reordered files --- op_cellar/documents.py | 141 ++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 72 deletions(-) diff --git a/op_cellar/documents.py b/op_cellar/documents.py index de39d98..277ad19 100644 --- a/op_cellar/documents.py +++ b/op_cellar/documents.py @@ -11,60 +11,39 @@ BASE_URL = 'http://publications.europa.eu/resource/cellar/' LOG_DIR = 'logs/' - -# Function to send a GET request to download a zip file for the given id under the CELLAR URI -def rest_get_call(id: str) -> requests.Response: +def download_documents(results, download_dir, nthreads=1): """ - Send a GET request to download a zip file for the given id under the CELLAR URI. + Download Cellar documents in parallel using multiple threads. + + Sends a REST query to the Publications Office APIs and downloads the documents + corresponding to the given results. Parameters ---------- - id : str - The id of the resource to be retrieved. - - Returns - ------- - requests.Response - The response from the server. + results : dict + A dictionary containing the JSON results from the Publications Office APIs. + download_dir : str + The directory where the downloaded documents will be saved. + nthreads : int + The number of threads to use to make the request Notes ----- - The request is sent with the following headers: - - Accept: application/xhtml+xml - - Accept-Language: eng - - Content-Type: application/x-www-form-urlencoded - - Host: publications.europa.eu - - Raises - ------ - requests.RequestException - If there is an error sending the request. - - See Also - -------- - requests : The underlying library used for making HTTP requests. - - Examples - -------- - >>> import requests - >>> response = rest_get_call('some_id') - >>> if response is not None: - ... print(response.status_code) + The function uses a separate thread for each subset of Cellar ids. + The number of threads can be adjusted by modifying the `nthreads` parameter. """ - try: - url = BASE_URL + id - headers = { - 'Accept': "application/xhtml+xml", - 'Accept-Language': "eng", - 'Content-Type': "application/x-www-form-urlencoded", - 'Host': "publications.europa.eu" - } - response = requests.request("GET", url, headers=headers) - response.raise_for_status() - return response - except requests.RequestException as e: - logging.error(f"Error sending GET request: {e}") - return None + cellar_ids = get_cellar_ids_from_json_results(results) + + if not os.path.exists(LOG_DIR): + os.makedirs(LOG_DIR) + + threads = [] + for i in range(nthreads): + sub_list = cellar_ids[i::nthreads] + t = threading.Thread(target=process_range, args=(sub_list, os.path.join(download_dir, str(sub_list)))) + threads.append(t) + [t.start() for t in threads] + [t.join() for t in threads] # Function to create a list of CELLAR ids from the given cellar_results JSON dictionary and return the list def get_cellar_ids_from_json_results(cellar_results): @@ -109,40 +88,59 @@ def get_cellar_ids_from_json_results(cellar_results): cellar_ids_list = [results_list[i]["cellarURIs"]["value"].split("cellar/")[1] for i in range(len(results_list))] return cellar_ids_list -def download_documents(results, download_dir, nthreads=1): +# Function to send a GET request to download a zip file for the given id under the CELLAR URI +def rest_get_call(id: str) -> requests.Response: """ - Download Cellar documents in parallel using multiple threads. - - Sends a REST query to the Publications Office APIs and downloads the documents - corresponding to the given results. + Send a GET request to download a zip file for the given id under the CELLAR URI. Parameters ---------- - results : dict - A dictionary containing the JSON results from the Publications Office APIs. - download_dir : str - The directory where the downloaded documents will be saved. - nthreads : int - The number of threads to use to make the request + id : str + The id of the resource to be retrieved. + + Returns + ------- + requests.Response + The response from the server. Notes ----- - The function uses a separate thread for each subset of Cellar ids. - The number of threads can be adjusted by modifying the `nthreads` parameter. - """ - cellar_ids = get_cellar_ids_from_json_results(results) + The request is sent with the following headers: + - Accept: application/xhtml+xml + - Accept-Language: eng + - Content-Type: application/x-www-form-urlencoded + - Host: publications.europa.eu - if not os.path.exists(LOG_DIR): - os.makedirs(LOG_DIR) - - threads = [] - for i in range(nthreads): - sub_list = cellar_ids[i::nthreads] - t = threading.Thread(target=process_range, args=(sub_list, os.path.join(download_dir, str(sub_list)))) - threads.append(t) - [t.start() for t in threads] - [t.join() for t in threads] + Raises + ------ + requests.RequestException + If there is an error sending the request. + See Also + -------- + requests : The underlying library used for making HTTP requests. + + Examples + -------- + >>> import requests + >>> response = rest_get_call('some_id') + >>> if response is not None: + ... print(response.status_code) + """ + try: + url = BASE_URL + id + headers = { + 'Accept': "application/xhtml+xml", + 'Accept-Language': "eng", + 'Content-Type': "application/x-www-form-urlencoded", + 'Host': "publications.europa.eu" + } + response = requests.request("GET", url, headers=headers) + response.raise_for_status() + return response + except requests.RequestException as e: + logging.error(f"Error sending GET request: {e}") + return None # Function to process a single file def process_single_file(response: requests.Response, folder_path: str, id: str): @@ -219,7 +217,6 @@ def extract_zip(response: requests.Response, folder_path: str): except Exception as e: logging.error(f"Error downloading zip: {e}") - # Main function if __name__ == "__main__": logging.basicConfig(level=logging.INFO) From 535746619cabac9eb1431c3b0533b143f1e31c00 Mon Sep 17 00:00:00 2001 From: AlessioNar Date: Wed, 16 Oct 2024 16:00:35 +0200 Subject: [PATCH 3/5] Added further docstrings --- op_cellar/documents.py | 90 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 77 insertions(+), 13 deletions(-) diff --git a/op_cellar/documents.py b/op_cellar/documents.py index 277ad19..a56345c 100644 --- a/op_cellar/documents.py +++ b/op_cellar/documents.py @@ -45,7 +45,6 @@ def download_documents(results, download_dir, nthreads=1): [t.start() for t in threads] [t.join() for t in threads] -# Function to create a list of CELLAR ids from the given cellar_results JSON dictionary and return the list def get_cellar_ids_from_json_results(cellar_results): """ Extract CELLAR ids from a JSON dictionary. @@ -106,7 +105,7 @@ def rest_get_call(id: str) -> requests.Response: Notes ----- The request is sent with the following headers: - - Accept: application/xhtml+xml + - Accept: application/xhtml+xml @todo - cater for other kinds of requests too. - Accept-Language: eng - Content-Type: application/x-www-form-urlencoded - Host: publications.europa.eu @@ -144,6 +143,36 @@ def rest_get_call(id: str) -> requests.Response: # Function to process a single file def process_single_file(response: requests.Response, folder_path: str, id: str): + """ + Process a single file by saving its contents to a file. + + Parameters + ---------- + response : requests.Response + The HTTP response object containing the file contents. + folder_path : str + The path to the folder where the file will be saved. + id : str + The id of the file, used to construct the file name. + + Returns + ------- + None + + Notes + ----- + This function saves the contents of a single file from an HTTP response to a + file on disk. The file name is constructed by appending the id to the folder + path with an '.html' extension. The function ensures that the directory path + exists before attempting to write the file. + + Examples + -------- + >>> response = requests.get('http://example.com/file') + >>> folder_path = '/path/to/folder' + >>> id = 'file_id' + >>> process_single_file(response, folder_path, id) + """ out_file = folder_path + '/' + id + '.html' os.makedirs(os.path.dirname(out_file), exist_ok=True) with open(out_file, 'w+', encoding="utf-8") as f: @@ -152,6 +181,38 @@ def process_single_file(response: requests.Response, folder_path: str, id: str): # Function to process a list of ids to download the corresponding zip files def process_range(ids: list, folder_path: str): + """ + Process a list of ids to download the corresponding zip files. + + Parameters + ---------- + ids : list + List of ids to process. + folder_path : str + Path to the folder where the files will be downloaded. + + Returns + ------- + None + + Raises + ------ + Exception + If an error occurs during the processing. + + Notes + ----- + This function iterates over the list of ids, sends a GET request for each id, + and downloads the corresponding file. If the file is a zip file, it is extracted + to the specified folder. If the file is not a zip file, it is processed as a + single file. If the file cannot be downloaded, the id is logged to a file. + + Examples + -------- + >>> ids = ['id1', 'id2', 'id3'] + >>> folder_path = '/path/to/folder' + >>> process_range(ids, folder_path) + """ try: zip_files = [] single_files = [] @@ -186,6 +247,20 @@ def process_range(ids: list, folder_path: str): except Exception as e: logging.error(f"Error processing range: {e}") + +# Function to get the current timestamp +def get_current_timestamp(): + return datetime.now().strftime('%Y-%m-%d_%H-%M-%S') + + +# Function to download a zip file and extract it +def extract_zip(response: requests.Response, folder_path: str): + try: + z = zipfile.ZipFile(io.BytesIO(response.content)) + z.extractall(folder_path) + except Exception as e: + logging.error(f"Error downloading zip: {e}") + # Function to log downloaded files def log_downloaded_files(downloaded_files: list, dir_to_check: str): in_dir_name = LOG_DIR + 'in_dir_lists/' @@ -199,23 +274,12 @@ def log_missing_ids(missing_ids: list): print_list_to_file(new_ids_dir_name + 'cellar_ids_' + get_current_timestamp() + '.txt', missing_ids) -# Function to get the current timestamp -def get_current_timestamp(): - return datetime.now().strftime('%Y-%m-%d_%H-%M-%S') - # Function to print a list to a file def print_list_to_file(filename, lst): with open(filename, 'w+') as f: for item in lst: f.write(item + '\n') -# Function to download a zip file and extract it -def extract_zip(response: requests.Response, folder_path: str): - try: - z = zipfile.ZipFile(io.BytesIO(response.content)) - z.extractall(folder_path) - except Exception as e: - logging.error(f"Error downloading zip: {e}") # Main function if __name__ == "__main__": From 564570eabf4d48a5b59407762a99d0c86f8b8852 Mon Sep 17 00:00:00 2001 From: AlessioNar Date: Wed, 16 Oct 2024 17:20:55 +0200 Subject: [PATCH 4/5] reorganised functions and bugfix when storing files --- op_cellar/documents.py | 154 ++++++++++++++++++++--------------------- 1 file changed, 76 insertions(+), 78 deletions(-) diff --git a/op_cellar/documents.py b/op_cellar/documents.py index a56345c..0309827 100644 --- a/op_cellar/documents.py +++ b/op_cellar/documents.py @@ -36,11 +36,11 @@ def download_documents(results, download_dir, nthreads=1): if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) - threads = [] for i in range(nthreads): - sub_list = cellar_ids[i::nthreads] - t = threading.Thread(target=process_range, args=(sub_list, os.path.join(download_dir, str(sub_list)))) + cellar_ids_subset = cellar_ids[i::nthreads] + print(cellar_ids_subset) + t = threading.Thread(target=process_range, args=(cellar_ids_subset, os.path.join(download_dir))) threads.append(t) [t.start() for t in threads] [t.join() for t in threads] @@ -87,6 +87,74 @@ def get_cellar_ids_from_json_results(cellar_results): cellar_ids_list = [results_list[i]["cellarURIs"]["value"].split("cellar/")[1] for i in range(len(results_list))] return cellar_ids_list +# Function to process a list of ids to download the corresponding zip files +def process_range(ids: list, folder_path: str): + """ + Process a list of ids to download the corresponding zip files. + + Parameters + ---------- + ids : list + List of ids to process. + folder_path : str + Path to the folder where the files will be downloaded. + + Returns + ------- + None + + Raises + ------ + Exception + If an error occurs during the processing. + + Notes + ----- + This function iterates over the list of ids, sends a GET request for each id, + and downloads the corresponding file. If the file is a zip file, it is extracted + to the specified folder. If the file is not a zip file, it is processed as a + single file. If the file cannot be downloaded, the id is logged to a file. + + Examples + -------- + >>> ids = ['id1', 'id2', 'id3'] + >>> folder_path = '/path/to/folder' + >>> process_range(ids, folder_path) + """ + try: + zip_files = [] + single_files = [] + other_downloads = [] + + for id in ids: + sub_folder_path = os.path.join(folder_path, id) + + response = rest_get_call(id.strip()) + if response is None: + continue + + if 'Content-Type' in response.headers: + if 'zip' in response.headers['Content-Type']: + zip_files.append(id) + extract_zip(response, sub_folder_path) + else: + single_files.append(id) + process_single_file(response, sub_folder_path) + else: + other_downloads.append(id) + + if len(other_downloads) != 0: + # Log results + id_logs_path = LOG_DIR + 'failed_' + get_current_timestamp() + '.txt' + os.makedirs(os.path.dirname(id_logs_path), exist_ok=True) + with open(id_logs_path, 'w+') as f: + f.write('Failed downloads ' + get_current_timestamp() + '\n' + str(other_downloads)) + + with open(LOG_DIR + get_current_timestamp() + '.txt', 'w+') as f: + f.write(f"Zip files: {len(zip_files)}, Single files: {len(single_files)}, Failed downloads: {len(other_downloads)}") + except Exception as e: + logging.error(f"Error processing range: {e}") + # Function to send a GET request to download a zip file for the given id under the CELLAR URI def rest_get_call(id: str) -> requests.Response: """ @@ -129,7 +197,7 @@ def rest_get_call(id: str) -> requests.Response: try: url = BASE_URL + id headers = { - 'Accept': "application/xhtml+xml", + 'Accept': "application/zip;mtype=fmx4, application/xml;mtype=fmx4, application/xhtml+xml, text/html, text/html;type=simplified, application/msword, text/plain, application/xml;notice=object", 'Accept-Language': "eng", 'Content-Type': "application/x-www-form-urlencoded", 'Host': "publications.europa.eu" @@ -141,8 +209,9 @@ def rest_get_call(id: str) -> requests.Response: logging.error(f"Error sending GET request: {e}") return None + # Function to process a single file -def process_single_file(response: requests.Response, folder_path: str, id: str): +def process_single_file(response: requests.Response, folder_path: str): """ Process a single file by saving its contents to a file. @@ -152,8 +221,6 @@ def process_single_file(response: requests.Response, folder_path: str, id: str): The HTTP response object containing the file contents. folder_path : str The path to the folder where the file will be saved. - id : str - The id of the file, used to construct the file name. Returns ------- @@ -170,83 +237,14 @@ def process_single_file(response: requests.Response, folder_path: str, id: str): -------- >>> response = requests.get('http://example.com/file') >>> folder_path = '/path/to/folder' - >>> id = 'file_id' - >>> process_single_file(response, folder_path, id) + >>> process_single_file(response, folder_path) """ - out_file = folder_path + '/' + id + '.html' + out_file = folder_path + '.html' os.makedirs(os.path.dirname(out_file), exist_ok=True) with open(out_file, 'w+', encoding="utf-8") as f: f.write(response.text) -# Function to process a list of ids to download the corresponding zip files -def process_range(ids: list, folder_path: str): - """ - Process a list of ids to download the corresponding zip files. - - Parameters - ---------- - ids : list - List of ids to process. - folder_path : str - Path to the folder where the files will be downloaded. - - Returns - ------- - None - - Raises - ------ - Exception - If an error occurs during the processing. - - Notes - ----- - This function iterates over the list of ids, sends a GET request for each id, - and downloads the corresponding file. If the file is a zip file, it is extracted - to the specified folder. If the file is not a zip file, it is processed as a - single file. If the file cannot be downloaded, the id is logged to a file. - - Examples - -------- - >>> ids = ['id1', 'id2', 'id3'] - >>> folder_path = '/path/to/folder' - >>> process_range(ids, folder_path) - """ - try: - zip_files = [] - single_files = [] - other_downloads = [] - - for id in ids: - sub_folder_path = folder_path - - response = rest_get_call(id.strip()) - if response is None: - continue - - if 'Content-Type' in response.headers: - if 'zip' in response.headers['Content-Type']: - zip_files.append(id) - extract_zip(response, sub_folder_path) - else: - single_files.append(id) - process_single_file(response, sub_folder_path, id) - else: - other_downloads.append(id) - - if len(other_downloads) != 0: - # Log results - id_logs_path = LOG_DIR + 'failed_' + get_current_timestamp() + '.txt' - os.makedirs(os.path.dirname(id_logs_path), exist_ok=True) - with open(id_logs_path, 'w+') as f: - f.write('Failed downloads ' + get_current_timestamp() + '\n' + str(other_downloads)) - - with open(LOG_DIR + get_current_timestamp() + '.txt', 'w+') as f: - f.write(f"Zip files: {len(zip_files)}, Single files: {len(single_files)}, Failed downloads: {len(other_downloads)}") - except Exception as e: - logging.error(f"Error processing range: {e}") - # Function to get the current timestamp def get_current_timestamp(): From fcb5524ad0cfd7c14a1a0a7c7f179294ec441df5 Mon Sep 17 00:00:00 2001 From: AlessioNar Date: Wed, 16 Oct 2024 17:21:18 +0200 Subject: [PATCH 5/5] Added gitignore and named poetry project --- .gitignore | 3 +++ pyproject.toml | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2b4ee8c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +build/* +dist/* +op_cellar.egg-info/* \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 36fb864..6637964 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,13 @@ +[project] +name = "op_cellar" +version = "0.0.2" +description = "A generic package to query and retrieve documents from Cellar, the common data repository of the Publications Office of the European Union." + [tool.poetry] name = "op_cellar" version = "0.0.2" description = "A generic package to query and retrieve documents from Cellar, the common data repository of the Publications Office of the European Union." -authors = ["AlessioNar ", "seljaseppala"] +authors = ["AlessioNar "] license = "EUPL 1.2" readme = "README.md" classifiers = [