From ba63f81014fcf7d15c3002cc65a1ffd79c0261b0 Mon Sep 17 00:00:00 2001
From: maipet <maier.pepsi@gmail.com>
Date: Thu, 30 Jan 2025 07:07:24 +0100
Subject: [PATCH] Add comments to script

---
 scripts/sort_ttl_terms.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/scripts/sort_ttl_terms.py b/scripts/sort_ttl_terms.py
index 66ae05b..8deffe5 100644
--- a/scripts/sort_ttl_terms.py
+++ b/scripts/sort_ttl_terms.py
@@ -5,7 +5,8 @@ def format_list_items(unformatted_list):
     formatted_list = [i + "\n\n" for i in unformatted_list]
     return formatted_list
 
-
+# open serialize and save current hfs file
+# to standardise the formatting in the old file if necessary
 with open("hochschulfaechersystematik.ttl", "r") as rdf_file:
     base = 'https://w3id.org/kim/hochschulfaechersystematik/'
     g = Graph(base=base)
@@ -15,11 +16,16 @@ def format_list_items(unformatted_list):
 
 with open("hochschulfaechersystematik.ttl", "r") as f:
     data = f.read()
+    # define regex matches for concepts ->  only these sections should be sorted
     sort_block = re.findall(r'^[<:]n.*? \.$', data, flags=re.MULTILINE | re.DOTALL)
+    # define regex matches for used vocabularies (prefixes)
     prefix_matches = re.findall(r'^@.+\.$\n', data, flags=re.MULTILINE)
+    # match skos:ConceptScheme (incl. dcterms)
     other_matches = re.findall(r'^(?!<n|:n| |@|\n).*? \.$', data, flags=re.MULTILINE | re.DOTALL)
+    # sort concept matches
     list_sort = sorted(sort_block, key=lambda x: (len(x.split(' ')[0]), x))
     prefix_matches.insert(len(prefix_matches), "\n")
+    # define required order of the output
     result = prefix_matches + format_list_items(other_matches) + format_list_items(list_sort)
 
     with open("hochschulfaechersystematik.ttl", "w") as outfile: