Skip to content

Commit

Permalink
fix: illegal prefix generation error
Browse files Browse the repository at this point in the history
previous implementation allowed illegal prefixes to be generated, which
would cause errors parsing the data with n3.js
  • Loading branch information
lalewis1 committed Oct 18, 2024
1 parent 5230eb1 commit eae19a3
Showing 1 changed file with 18 additions and 0 deletions.
18 changes: 18 additions & 0 deletions prez/services/curie_functions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import re
from urllib.parse import urlparse

from aiocache import caches
Expand Down Expand Up @@ -31,6 +32,20 @@ def namespace_registered(namespace):
return False


def valid_prefix(prefix: str):
"""For turtle serialization, as per https://www.w3.org/TR/turtle/#grammar-production-PN_PREFIX"""
valid = True
PN_CHARS_BASE = r"([A-Z]|[a-z]|[\u00C0-\u00D6]|[\u00D8-\u00F6]|[\u00F8-\u02FF]|[\u0370-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]|[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]|[\U00010000-\U000EFFFF])"
PN_CHARS_U = rf"({PN_CHARS_BASE}|_)"
PN_CHARS = rf"({PN_CHARS_U}|-|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040])"
PN_PREFIX = rf"({PN_CHARS_BASE}(({PN_CHARS}|.)*{PN_CHARS})?)"
matches = re.match(PN_PREFIX, prefix)
if not matches:
valid = False
return valid
pass


def generate_new_prefix(uri):
"""
Generates a new prefix for a uri
Expand All @@ -54,6 +69,9 @@ def generate_new_prefix(uri):
proposed_prefix = "".join(
[c for c in to_generate_prefix_from if c not in "aeiou!@#$%^&*()_+-=,."]
)
if not valid_prefix(proposed_prefix):
# if we still can't get a nice prefix. use an ugly but valid one using a hash of the IRI
proposed_prefix = f"ns{hash(to_generate_prefix_from)}"
if not prefix_registered(proposed_prefix):
prefix_graph.bind(proposed_prefix, ns)
return
Expand Down

0 comments on commit eae19a3

Please sign in to comment.