From 3d9950088e80d417f921f2e117c599c87cc8085c Mon Sep 17 00:00:00 2001 From: Marko Ristin Date: Thu, 22 Aug 2024 14:52:56 +0500 Subject: [PATCH] Make SHACL regex patterns follow JSON Schema ones We included the regex pattern as-is from the input which caused problems with the regex engines as the patterns in the meta-model are written in a Python dialect (and assuming that the regex engine works on UTF-32 characters). However, most regex engines in the wild operating on SHACL (*e.g.*, Java SHACL validators) use UTF-16 to represent the text and do not support some parts of the Python regex dialect. For example, in the input meta-model specification, we omit the minimum bound 0 (*e.g.*, ``{,4}``), which breaks with the Java regex engine beneath the SHACL validator. Instead, with this patch, we parse the pattern from the specification and re-render it into the form that we also use in JSON Schema. We pick JSON Schema regex dialect as most SHACL validators in the wild can deal with it, in particular those based on Java as a platform. Hence, we decide to serve this user base with priority. Discovered in [aas-core-meta issue 342]. [aas-core-meta issue 342]: https://github.com/aas-core-works/aas-core-meta/issues/342 --- aas_core_codegen/jsonschema/main.py | 11 ++-- aas_core_codegen/rdf_shacl/shacl.py | 19 +++++- .../expected_output/shacl-schema.ttl | 64 +++++++++---------- tests/our_jsonschema/test_main.py | 20 +++--- 4 files changed, 67 insertions(+), 47 deletions(-) diff --git a/aas_core_codegen/jsonschema/main.py b/aas_core_codegen/jsonschema/main.py index ad3aebc95..e0d91979b 100644 --- a/aas_core_codegen/jsonschema/main.py +++ b/aas_core_codegen/jsonschema/main.py @@ -140,8 +140,11 @@ def _define_type( ) -def _fix_pattern_for_utf16(pattern: str) -> str: - """Fix the pattern for UTF-16-only regex engines.""" +# NOTE (mristin): +# This function is made public so that we can use it in other schema generators such +# as the SHACL generator. +def fix_pattern_for_utf16(pattern: str) -> str: + """Parse the pattern and re-render it for UTF-16-only regex engines.""" regex, error = parse_retree.parse([pattern]) if error is not None: raise ValueError( @@ -195,7 +198,7 @@ def _define_constraints_for_primitive_type( and len(pattern_constraints) > 0 ): if len(pattern_constraints) == 1: - definition["pattern"] = _fix_pattern_for_utf16( + definition["pattern"] = fix_pattern_for_utf16( pattern_constraints[0].pattern ) else: @@ -207,7 +210,7 @@ def _define_constraints_for_primitive_type( [ ( "pattern", - _fix_pattern_for_utf16(pattern_constraint.pattern), + fix_pattern_for_utf16(pattern_constraint.pattern), ) ] ) diff --git a/aas_core_codegen/rdf_shacl/shacl.py b/aas_core_codegen/rdf_shacl/shacl.py index f97fbe788..fc547d9e7 100644 --- a/aas_core_codegen/rdf_shacl/shacl.py +++ b/aas_core_codegen/rdf_shacl/shacl.py @@ -7,6 +7,7 @@ from aas_core_codegen import intermediate, specific_implementations, infer_for_schema from aas_core_codegen.common import Stripped, Error, assert_never, Identifier +from aas_core_codegen.jsonschema import main as jsonschema_main from aas_core_codegen.rdf_shacl import ( naming as rdf_shacl_naming, common as rdf_shacl_common, @@ -214,7 +215,23 @@ def _define_property_shape( # region Define patterns for pattern_constraint in pattern_constraints: - pattern_literal = rdf_shacl_common.string_literal(pattern_constraint.pattern) + # NOTE (mristin): + # We need to render the regular expression so that the pattern appears in + # the canonical form. The original pattern in the specification might be written + # in Python dialect, which does not translate directly to many Regex Engines. + # + # For example, repetition bounds can be given with 0 omitted (*e.g.*, ``{,4}``), + # while SHACL and Java need an explicit zero (``{0, 4}``). Our standard renderer + # puts an explicit zero. + # + # In addition, we render the pattern exactly as we do for JSON Schema since most + # SHACL validators in the wild run regex engines which understand the patterns + # for JSON Schema and work in UTF-16. + rendered_pattern = jsonschema_main.fix_pattern_for_utf16( + pattern_constraint.pattern + ) + + pattern_literal = rdf_shacl_common.string_literal(rendered_pattern) stmts.append(Stripped(f"sh:pattern {pattern_literal} ;")) diff --git a/test_data/rdf_shacl/test_main/expected/aas_core_meta.v3/expected_output/shacl-schema.ttl b/test_data/rdf_shacl/test_main/expected/aas_core_meta.v3/expected_output/shacl-schema.ttl index 4b734c5c0..579ec95d3 100644 --- a/test_data/rdf_shacl/test_main/expected/aas_core_meta.v3/expected_output/shacl-schema.ttl +++ b/test_data/rdf_shacl/test_main/expected/aas_core_meta.v3/expected_output/shacl-schema.ttl @@ -36,7 +36,7 @@ aas:AbstractLangStringShape a sh:NodeShape ; sh:datatype xs:string ; sh:minCount 1 ; sh:maxCount 1 ; - sh:pattern "^(([a-zA-Z]{2,3}(-[a-zA-Z]{3}(-[a-zA-Z]{3}){,2})?|[a-zA-Z]{4}|[a-zA-Z]{5,8})(-[a-zA-Z]{4})?(-([a-zA-Z]{2}|[0-9]{3}))?(-(([a-zA-Z0-9]){5,8}|[0-9]([a-zA-Z0-9]){3}))*(-[0-9A-WY-Za-wy-z](-([a-zA-Z0-9]){2,8})+)*(-[xX](-([a-zA-Z0-9]){1,8})+)?|[xX](-([a-zA-Z0-9]){1,8})+|((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$" ; + sh:pattern "^(([a-zA-Z]{2,3}(-[a-zA-Z]{3}(-[a-zA-Z]{3}){0,2})?|[a-zA-Z]{4}|[a-zA-Z]{5,8})(-[a-zA-Z]{4})?(-([a-zA-Z]{2}|[0-9]{3}))?(-(([a-zA-Z0-9]){5,8}|[0-9]([a-zA-Z0-9]){3}))*(-[0-9A-WY-Za-wy-z](-([a-zA-Z0-9]){2,8})+)*(-[xX](-([a-zA-Z0-9]){1,8})+)?|[xX](-([a-zA-Z0-9]){1,8})+|((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -45,7 +45,7 @@ aas:AbstractLangStringShape a sh:NodeShape ; sh:minCount 1 ; sh:maxCount 1 ; sh:minLength 1 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; . @@ -60,7 +60,7 @@ aas:AdministrativeInformationShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 4 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; sh:pattern "^(0|[1-9][0-9]*)$" ; ] ; sh:property [ @@ -71,7 +71,7 @@ aas:AdministrativeInformationShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 4 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; sh:pattern "^(0|[1-9][0-9]*)$" ; ] ; sh:property [ @@ -89,7 +89,7 @@ aas:AdministrativeInformationShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; . @@ -147,7 +147,7 @@ aas:AssetInformationShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -163,7 +163,7 @@ aas:AssetInformationShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -206,7 +206,7 @@ aas:BasicEventElementShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 255 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -259,8 +259,8 @@ aas:BlobShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 100 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; - sh:pattern "^([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+/([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+([ ]*;[ ]*([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+=(([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+|\"(([ !#-\\[\\]-~]|[\\x80-\\xff])|\\\\([ !-~]|[\\x80-\\xff]))*\"))*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; + sh:pattern "^([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+/([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+([ \\t]*;[ \\t]*([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+=(([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+|\"(([\\t !#-\\[\\]-~]|[\\x80-\\xff])|\\\\([\\t !-~]|[\\x80-\\xff]))*\"))*$" ; ] ; . @@ -336,7 +336,7 @@ aas:DataSpecificationIec61360Shape a sh:NodeShape ; sh:minCount 0 ; sh:maxCount 1 ; sh:minLength 1 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -352,7 +352,7 @@ aas:DataSpecificationIec61360Shape a sh:NodeShape ; sh:minCount 0 ; sh:maxCount 1 ; sh:minLength 1 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -361,7 +361,7 @@ aas:DataSpecificationIec61360Shape a sh:NodeShape ; sh:minCount 0 ; sh:maxCount 1 ; sh:minLength 1 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -383,7 +383,7 @@ aas:DataSpecificationIec61360Shape a sh:NodeShape ; sh:minCount 0 ; sh:maxCount 1 ; sh:minLength 1 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -400,7 +400,7 @@ aas:DataSpecificationIec61360Shape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -453,7 +453,7 @@ aas:EntityShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -540,7 +540,7 @@ aas:EventPayloadShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 255 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -577,7 +577,7 @@ aas:ExtensionShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 128 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -612,7 +612,7 @@ aas:FileShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -622,8 +622,8 @@ aas:FileShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 100 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; - sh:pattern "^([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+/([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+([ ]*;[ ]*([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+=(([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+|\"(([ !#-\\[\\]-~]|[\\x80-\\xff])|\\\\([ !-~]|[\\x80-\\xff]))*\"))*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; + sh:pattern "^([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+/([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+([ \\t]*;[ \\t]*([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+=(([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+|\"(([\\t !#-\\[\\]-~]|[\\x80-\\xff])|\\\\([\\t !-~]|[\\x80-\\xff]))*\"))*$" ; ] ; . @@ -753,7 +753,7 @@ aas:IdentifiableShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; . @@ -774,7 +774,7 @@ aas:KeyShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; . @@ -993,7 +993,7 @@ aas:QualifierShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 128 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -1067,7 +1067,7 @@ aas:ReferableShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 128 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -1077,7 +1077,7 @@ aas:ReferableShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 128 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; sh:pattern "^[a-zA-Z][a-zA-Z0-9_]*$" ; ] ; sh:property [ @@ -1159,7 +1159,7 @@ aas:ResourceShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -1169,8 +1169,8 @@ aas:ResourceShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 100 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; - sh:pattern "^([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+/([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+([ ]*;[ ]*([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+=(([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+|\"(([ !#-\\[\\]-~]|[\\x80-\\xff])|\\\\([ !-~]|[\\x80-\\xff]))*\"))*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; + sh:pattern "^([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+/([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+([ \\t]*;[ \\t]*([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+=(([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+|\"(([\\t !#-\\[\\]-~]|[\\x80-\\xff])|\\\\([\\t !-~]|[\\x80-\\xff]))*\"))*$" ; ] ; . @@ -1185,7 +1185,7 @@ aas:SpecificAssetIdShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 64 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -1195,7 +1195,7 @@ aas:SpecificAssetIdShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -1311,7 +1311,7 @@ aas:ValueReferencePairShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^([\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd]|\\ud800[\\udc00-\\udfff]|[\\ud801-\\udbfe][\\udc00-\\udfff]|\\udbff[\\udc00-\\udfff])*$" ; ] ; sh:property [ a sh:PropertyShape ; diff --git a/tests/our_jsonschema/test_main.py b/tests/our_jsonschema/test_main.py index 8b1662bdf..c315263b9 100644 --- a/tests/our_jsonschema/test_main.py +++ b/tests/our_jsonschema/test_main.py @@ -157,33 +157,33 @@ def test_on_examples(self) -> None: class Test_pattern_transpilation(unittest.TestCase): def test_unescaped_above_ascii_character_in_bmp(self) -> None: - pattern = aas_core_codegen.jsonschema.main._fix_pattern_for_utf16("\ud7ff") + pattern = aas_core_codegen.jsonschema.main.fix_pattern_for_utf16("\ud7ff") self.assertEqual("\ud7ff", pattern) def test_escaped_above_ascii_character_in_bmp(self) -> None: - pattern = aas_core_codegen.jsonschema.main._fix_pattern_for_utf16("\\ud7ff") + pattern = aas_core_codegen.jsonschema.main.fix_pattern_for_utf16("\\ud7ff") self.assertEqual("\\ud7ff", pattern) def test_unescaped_range_in_bmp(self) -> None: - pattern = aas_core_codegen.jsonschema.main._fix_pattern_for_utf16( + pattern = aas_core_codegen.jsonschema.main.fix_pattern_for_utf16( "[\x20-\uD7FF]" ) self.assertEqual("[\x20-\uD7FF]", pattern) def test_escaped_range_in_bmp(self) -> None: - pattern = aas_core_codegen.jsonschema.main._fix_pattern_for_utf16( + pattern = aas_core_codegen.jsonschema.main.fix_pattern_for_utf16( "[\\x20-\\ud7ff]" ) self.assertEqual("[\\x20-\\ud7ff]", pattern) def test_escaped_range_in_bmp_always_lowercase(self) -> None: - pattern = aas_core_codegen.jsonschema.main._fix_pattern_for_utf16( + pattern = aas_core_codegen.jsonschema.main.fix_pattern_for_utf16( "[\\x20-\\uD7FF]" ) self.assertEqual("[\\x20-\\ud7ff]", pattern) def test_unescaped_above_bmp(self) -> None: - pattern = aas_core_codegen.jsonschema.main._fix_pattern_for_utf16( + pattern = aas_core_codegen.jsonschema.main.fix_pattern_for_utf16( "[\U00010000-\U0010FFFF]" ) self.assertEqual( @@ -193,7 +193,7 @@ def test_unescaped_above_bmp(self) -> None: ) def test_escaped_above_bmp(self) -> None: - pattern = aas_core_codegen.jsonschema.main._fix_pattern_for_utf16( + pattern = aas_core_codegen.jsonschema.main.fix_pattern_for_utf16( "[\\U00010000-\\U0010FFFF]" ) self.assertEqual( @@ -203,15 +203,15 @@ def test_escaped_above_bmp(self) -> None: ) def test_unescaped_special_ascii_characters(self) -> None: - pattern = aas_core_codegen.jsonschema.main._fix_pattern_for_utf16("[\t\n\r]") + pattern = aas_core_codegen.jsonschema.main.fix_pattern_for_utf16("[\t\n\r]") self.assertEqual("[\\t\\n\\r]", pattern) def test_escaped_special_ascii_characters(self) -> None: - pattern = aas_core_codegen.jsonschema.main._fix_pattern_for_utf16("[\\t\\n\\r]") + pattern = aas_core_codegen.jsonschema.main.fix_pattern_for_utf16("[\\t\\n\\r]") self.assertEqual("[\\t\\n\\r]", pattern) def test_on_XML_string_pattern(self) -> None: - pattern = aas_core_codegen.jsonschema.main._fix_pattern_for_utf16( + pattern = aas_core_codegen.jsonschema.main.fix_pattern_for_utf16( r"^[\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]*$" ) self.assertEqual(