From ba63f81014fcf7d15c3002cc65a1ffd79c0261b0 Mon Sep 17 00:00:00 2001 From: maipet Date: Thu, 30 Jan 2025 07:07:24 +0100 Subject: [PATCH] Add comments to script --- scripts/sort_ttl_terms.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/sort_ttl_terms.py b/scripts/sort_ttl_terms.py index 66ae05b..8deffe5 100644 --- a/scripts/sort_ttl_terms.py +++ b/scripts/sort_ttl_terms.py @@ -5,7 +5,8 @@ def format_list_items(unformatted_list): formatted_list = [i + "\n\n" for i in unformatted_list] return formatted_list - +# open serialize and save current hfs file +# to standardise the formatting in the old file if necessary with open("hochschulfaechersystematik.ttl", "r") as rdf_file: base = 'https://w3id.org/kim/hochschulfaechersystematik/' g = Graph(base=base) @@ -15,11 +16,16 @@ def format_list_items(unformatted_list): with open("hochschulfaechersystematik.ttl", "r") as f: data = f.read() + # define regex matches for concepts -> only these sections should be sorted sort_block = re.findall(r'^[<:]n.*? \.$', data, flags=re.MULTILINE | re.DOTALL) + # define regex matches for used vocabularies (prefixes) prefix_matches = re.findall(r'^@.+\.$\n', data, flags=re.MULTILINE) + # match skos:ConceptScheme (incl. dcterms) other_matches = re.findall(r'^(?!