Skip to content

Commit

Permalink
Fixing issue with XML namespace
Browse files Browse the repository at this point in the history
  • Loading branch information
eiglesias34 committed Oct 4, 2024
1 parent 45632ca commit 476c887
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 10 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ You can easily customize your own configurations from the set of features that S

## Version
```
4.7.4.11
4.7.4.12
```

## RML-Test Cases
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4.7.4.11
4.7.4.12
1 change: 0 additions & 1 deletion rdfizer/rdfizer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2333,7 +2333,6 @@ def semantify_xml(triples_map, triples_map_list, output_file_descriptor):
for child in root.iterfind(level, namespace):
create_subject = True
global generated_subjects

if mapping_partitions == "yes":
if "_" in triples_map.triples_map_id:
componets = triples_map.triples_map_id.split("_")[:-1]
Expand Down
38 changes: 32 additions & 6 deletions rdfizer/rdfizer/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1212,6 +1212,24 @@ def string_substitution_xml(string, pattern, row, term, iterator, parent_map, na

else:
return None
elif namespace != {}:
if row.find("{"+namespace[""]+"}"+match) is not None:
if re.search("^[\s|\t]*$", row.find("{"+namespace[""]+"}"+match).text) is None:
new_string = new_string[:start + offset_current_substitution] + encode_char(row.find("{"+namespace[""]+"}"+match).text.strip()) + new_string[ end + offset_current_substitution:]
offset_current_substitution = offset_current_substitution + len(encode_char(row.find("{"+namespace[""]+"}"+match).text.strip())) - (end - start)

else:
return None
else:
if match in row.attrib:
if row.attrib[match] is not None:
if re.search("^[\s|\t]*$", row.attrib[match]) is None:
new_string = new_string[:start + offset_current_substitution] + encode_char(row.attrib[match].strip()) + new_string[end + offset_current_substitution:]
offset_current_substitution = offset_current_substitution + len(encode_char(row.attrib[match].strip())) - (end - start)
else:
return None
else:
return None
else:
if match in row.attrib:
if row.attrib[match] is not None:
Expand Down Expand Up @@ -1272,12 +1290,20 @@ def string_substitution_xml(string, pattern, row, term, iterator, parent_map, na
temp_list[i] = {"string":new_string,"offset_current_substitution":offset_current_substitution}
i += 1
else:
for child in row.findall(match, namespace):
if re.search("^[\s|\t]*$", child.text) is None:
new_string = temp_list[i]["string"][:start + temp_list[i]["offset_current_substitution"]] + encode_char(child.text.strip()) + temp_list[i]["string"][ end + temp_list[i]["offset_current_substitution"]:]
offset_current_substitution = temp_list[i]["offset_current_substitution"] + len(encode_char(child.text.strip())) - (end - start)
temp_list[i] = {"string":new_string,"offset_current_substitution":offset_current_substitution}
i += 1
if namespace != {}:
for child in row.findall("{"+namespace[""]+"}"+match, namespace):
if re.search("^[\s|\t]*$", child.text) is None:
new_string = temp_list[i]["string"][:start + temp_list[i]["offset_current_substitution"]] + encode_char(child.text.strip()) + temp_list[i]["string"][ end + temp_list[i]["offset_current_substitution"]:]
offset_current_substitution = temp_list[i]["offset_current_substitution"] + len(encode_char(child.text.strip())) - (end - start)
temp_list[i] = {"string":new_string,"offset_current_substitution":offset_current_substitution}
i += 1
else:
for child in row.findall(match, namespace):
if re.search("^[\s|\t]*$", child.text) is None:
new_string = temp_list[i]["string"][:start + temp_list[i]["offset_current_substitution"]] + encode_char(child.text.strip()) + temp_list[i]["string"][ end + temp_list[i]["offset_current_substitution"]:]
offset_current_substitution = temp_list[i]["offset_current_substitution"] + len(encode_char(child.text.strip())) - (end - start)
temp_list[i] = {"string":new_string,"offset_current_substitution":offset_current_substitution}
i += 1
else:
match = reference_match.group(1).split("[")[0]
if "@" in match:
Expand Down
1 change: 0 additions & 1 deletion rdfizer/rdfizer/semantify.py
Original file line number Diff line number Diff line change
Expand Up @@ -2333,7 +2333,6 @@ def semantify_xml(triples_map, triples_map_list, output_file_descriptor):
for child in root.iterfind(level, namespace):
create_subject = True
global generated_subjects

if mapping_partitions == "yes":
if "_" in triples_map.triples_map_id:
componets = triples_map.triples_map_id.split("_")[:-1]
Expand Down

0 comments on commit 476c887

Please sign in to comment.