diff --git a/.gitignore b/.gitignore index 1253b41..6844f38 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,5 @@ ometha.egg-info/ misc/dist/ build/ Pipfile.lock +*.xml +*.json \ No newline at end of file diff --git a/ometha/cli.py b/ometha/cli.py index 58cce81..092c15e 100644 --- a/ometha/cli.py +++ b/ometha/cli.py @@ -183,10 +183,10 @@ def convert_common_args(args): PRM["conf_f"], ["baseurl", "set", "metadataPrefix", "datengeber", "timeout", "debug"], ) - PRM["n_procs"], PRM["out_p"] = read_yaml_file( - PRM["conf_f"], ["numberofprocesses", "outputfolder"], [16, os.getcwd()] - ) - # TODO b_url is not read from the config file + # outputfolder: if none is defined use the current working directory + PRM["out_f"] = read_yaml_file(PRM["conf_f"], ["outputfolder"], os.getcwd())[0] + # n_procs is not given in the config file, use default value + PRM["n_procs"] = 16 PRM["b_url"], PRM["pref"] = ( re.sub(r"/\s$", "", PRM["b_url"]), re.sub(r"\s$", "", PRM["pref"]), diff --git a/ometha/harvester.py b/ometha/harvester.py index 2523102..d157a74 100644 --- a/ometha/harvester.py +++ b/ometha/harvester.py @@ -110,7 +110,12 @@ def save_file(oai_id: str, folder: str, response, export_type): except Exception as e: print(f"Error converting XML to JSON: {e}") else: - print(f"Unsupported file type: {export_type}") + # print(f"Unsupported file type: {export_type}. Reverting to XML.") + # TODO this check should be done before the harvesting starts + with open( + os.path.join(folder, f"{filename}.xml"), "w", encoding="utf8" + ) as of: + of.write(response) def get_text(url, session, folder, export_type): oai_id = parse_qs(urlparse(url).query)["identifier"][0] @@ -193,12 +198,24 @@ def change_date(date: str, name: str, key: str): yaml.safe_dump(doc, f, default_flow_style=False, sort_keys=False) -def create_id_file(p, ids, folder, type=None): # type kann außerdem failed sein +def create_id_file(p, ids, folder, type=None): + """ + Create an ID file with the given parameters. + + Args: + p (dict): The parameters dictionary. + ids (list): The list of IDs. + folder (str): The folder path where the file will be created. + type (str, optional): The type of the file. Defaults to None. + + Returns: + str: The path of the created file. + """ # TODO add date or some other kind of identifier to the file name? - file = os.path.join(folder, f"{type}_ids.yaml") + file = os.path.join(folder, f"_ometha_{type}_ids.yaml") with open(file, "w", encoding="utf-8") as f: f.write( - f"Information: Liste erzeugt mit Ometha {__version__}\ndate: {TIMESTR}\nurl: {p['b_url']}\nset: {p['sets']}\nmprefix: {p['pref']}\nids:\n" + f"Information: Liste erzeugt mit Ometha {__version__}\ndate: {TIMESTR}\nbaseurl: {p['b_url']}\nset: {p['sets']}\nmetadataPrefix: {p['pref']}\ndatengeber: {p['dat_geb']}\ntimeout: {p['timeout']}\ndebug: {p['debug']}\nfromdate: {p['f_date']}\nuntildate: {p['u_date']}\noutputfolder: {p['out_f']}\nids:\n" ) f.write("\n".join([f"- '{fid}'" for fid in ids])) return file @@ -217,7 +234,7 @@ def read_yaml_file(file_path: str, keys: list, default: any = None) -> list: KeyError: If a key is not found in the file. Returns: - The values for the given keys. + A list containing the values for the given keys. """ try: with open(file_path, "r", encoding="utf-8") as ymlfile: diff --git a/ometha/main.py b/ometha/main.py index de7b9ac..a776f9d 100644 --- a/ometha/main.py +++ b/ometha/main.py @@ -109,16 +109,15 @@ def start_process(): if not PRM: logger.critical("No parameters were passed to Ometha.") sys.exit() - # Ordner für Log, Configfile und Output anlegen - print(PRM) + # Ordner für Log, Configfile und Output anlegen im aktuellen Verzeichnis if PRM["out_f"] is None: - PRM["out_f"] = os.path.join(application_path, "output") + PRM["out_f"] = os.path.join(os.getcwd(), "output") folder = os.path.join(PRM["out_f"], PRM["dat_geb"], TIMESTR) os.makedirs(folder, exist_ok=True) # Logfile anlegen logger.remove() # Initalen Logger löschen, damit er nicht alles in stderr loggt: - log_file = os.path.join(folder, f"Ometha_{PRM['dat_geb']}.log") + log_file = os.path.join(folder, f"_ometha_{PRM['dat_geb']}.log") logger.level("PARAMETER", no=38, color="") logger.add( log_file,