From 476c887ea8b7f85751b3cdc0447931a583077aa0 Mon Sep 17 00:00:00 2001 From: eiglesias34 Date: Fri, 4 Oct 2024 12:23:22 +0200 Subject: [PATCH] Fixing issue with XML namespace --- README.md | 2 +- VERSION | 2 +- rdfizer/rdfizer/__init__.py | 1 - rdfizer/rdfizer/functions.py | 38 ++++++++++++++++++++++++++++++------ rdfizer/rdfizer/semantify.py | 1 - 5 files changed, 34 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index a014546..4143a41 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ You can easily customize your own configurations from the set of features that S ## Version ``` -4.7.4.11 +4.7.4.12 ``` ## RML-Test Cases diff --git a/VERSION b/VERSION index e1f202d..6314e65 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.7.4.11 \ No newline at end of file +4.7.4.12 \ No newline at end of file diff --git a/rdfizer/rdfizer/__init__.py b/rdfizer/rdfizer/__init__.py index 3384b03..1ce765a 100755 --- a/rdfizer/rdfizer/__init__.py +++ b/rdfizer/rdfizer/__init__.py @@ -2333,7 +2333,6 @@ def semantify_xml(triples_map, triples_map_list, output_file_descriptor): for child in root.iterfind(level, namespace): create_subject = True global generated_subjects - if mapping_partitions == "yes": if "_" in triples_map.triples_map_id: componets = triples_map.triples_map_id.split("_")[:-1] diff --git a/rdfizer/rdfizer/functions.py b/rdfizer/rdfizer/functions.py index aa57b68..096b1b1 100644 --- a/rdfizer/rdfizer/functions.py +++ b/rdfizer/rdfizer/functions.py @@ -1212,6 +1212,24 @@ def string_substitution_xml(string, pattern, row, term, iterator, parent_map, na else: return None + elif namespace != {}: + if row.find("{"+namespace[""]+"}"+match) is not None: + if re.search("^[\s|\t]*$", row.find("{"+namespace[""]+"}"+match).text) is None: + new_string = new_string[:start + offset_current_substitution] + encode_char(row.find("{"+namespace[""]+"}"+match).text.strip()) + new_string[ end + offset_current_substitution:] + offset_current_substitution = offset_current_substitution + len(encode_char(row.find("{"+namespace[""]+"}"+match).text.strip())) - (end - start) + + else: + return None + else: + if match in row.attrib: + if row.attrib[match] is not None: + if re.search("^[\s|\t]*$", row.attrib[match]) is None: + new_string = new_string[:start + offset_current_substitution] + encode_char(row.attrib[match].strip()) + new_string[end + offset_current_substitution:] + offset_current_substitution = offset_current_substitution + len(encode_char(row.attrib[match].strip())) - (end - start) + else: + return None + else: + return None else: if match in row.attrib: if row.attrib[match] is not None: @@ -1272,12 +1290,20 @@ def string_substitution_xml(string, pattern, row, term, iterator, parent_map, na temp_list[i] = {"string":new_string,"offset_current_substitution":offset_current_substitution} i += 1 else: - for child in row.findall(match, namespace): - if re.search("^[\s|\t]*$", child.text) is None: - new_string = temp_list[i]["string"][:start + temp_list[i]["offset_current_substitution"]] + encode_char(child.text.strip()) + temp_list[i]["string"][ end + temp_list[i]["offset_current_substitution"]:] - offset_current_substitution = temp_list[i]["offset_current_substitution"] + len(encode_char(child.text.strip())) - (end - start) - temp_list[i] = {"string":new_string,"offset_current_substitution":offset_current_substitution} - i += 1 + if namespace != {}: + for child in row.findall("{"+namespace[""]+"}"+match, namespace): + if re.search("^[\s|\t]*$", child.text) is None: + new_string = temp_list[i]["string"][:start + temp_list[i]["offset_current_substitution"]] + encode_char(child.text.strip()) + temp_list[i]["string"][ end + temp_list[i]["offset_current_substitution"]:] + offset_current_substitution = temp_list[i]["offset_current_substitution"] + len(encode_char(child.text.strip())) - (end - start) + temp_list[i] = {"string":new_string,"offset_current_substitution":offset_current_substitution} + i += 1 + else: + for child in row.findall(match, namespace): + if re.search("^[\s|\t]*$", child.text) is None: + new_string = temp_list[i]["string"][:start + temp_list[i]["offset_current_substitution"]] + encode_char(child.text.strip()) + temp_list[i]["string"][ end + temp_list[i]["offset_current_substitution"]:] + offset_current_substitution = temp_list[i]["offset_current_substitution"] + len(encode_char(child.text.strip())) - (end - start) + temp_list[i] = {"string":new_string,"offset_current_substitution":offset_current_substitution} + i += 1 else: match = reference_match.group(1).split("[")[0] if "@" in match: diff --git a/rdfizer/rdfizer/semantify.py b/rdfizer/rdfizer/semantify.py index 1de27f8..8c569e9 100755 --- a/rdfizer/rdfizer/semantify.py +++ b/rdfizer/rdfizer/semantify.py @@ -2333,7 +2333,6 @@ def semantify_xml(triples_map, triples_map_list, output_file_descriptor): for child in root.iterfind(level, namespace): create_subject = True global generated_subjects - if mapping_partitions == "yes": if "_" in triples_map.triples_map_id: componets = triples_map.triples_map_id.split("_")[:-1]