From 1fed64c4c3ab13ed22236d816aa816f34b6d7349 Mon Sep 17 00:00:00 2001 From: Sven Boeckelmann Date: Tue, 31 Oct 2023 12:46:00 +0100 Subject: [PATCH] improve prescan for schemaVersion attribute --- .../convert/AttributePreScanUtil.java | 36 +++++++++++++++++++ .../openepcis/convert/VersionTransformer.java | 35 +++--------------- 2 files changed, 40 insertions(+), 31 deletions(-) create mode 100644 src/main/java/io/openepcis/convert/AttributePreScanUtil.java diff --git a/src/main/java/io/openepcis/convert/AttributePreScanUtil.java b/src/main/java/io/openepcis/convert/AttributePreScanUtil.java new file mode 100644 index 0000000..8023782 --- /dev/null +++ b/src/main/java/io/openepcis/convert/AttributePreScanUtil.java @@ -0,0 +1,36 @@ +package io.openepcis.convert; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class AttributePreScanUtil { + + private static final String SCHEMA_VERSION_REGEX ="schemaVersion\"?'?\\s*[=:]\\s*([\"'])?([^\"']*)"; + private static final Pattern SCHEMA_VERSION_PATTERN = Pattern.compile(SCHEMA_VERSION_REGEX); + private static final int READ_LIMIT = 4096; + public static final String scanSchemaVersion(final BufferedInputStream input) throws IOException { + input.mark(READ_LIMIT); + try { + final StringBuilder sb = new StringBuilder(); + final byte[] buffer = new byte[64]; + int len = -1; + int bytesReceived = 0; + Matcher matcher = SCHEMA_VERSION_PATTERN.matcher(sb.toString()); + while (!matcher.find(0) && bytesReceived < READ_LIMIT && (len = input.read(buffer)) != -1) { + sb.append(new String(buffer, 0, len, StandardCharsets.UTF_8)); + bytesReceived += len; + matcher = SCHEMA_VERSION_PATTERN.matcher(sb.toString()); + } + if (matcher.find(0)) { + return matcher.group(2); + } + return ""; + } finally { + input.reset(); + } + } + +} diff --git a/src/main/java/io/openepcis/convert/VersionTransformer.java b/src/main/java/io/openepcis/convert/VersionTransformer.java index 37b8b69..f795ec8 100644 --- a/src/main/java/io/openepcis/convert/VersionTransformer.java +++ b/src/main/java/io/openepcis/convert/VersionTransformer.java @@ -32,13 +32,11 @@ import io.openepcis.convert.xml.XmlToJsonConverter; import io.openepcis.convert.xml.XmlVersionTransformer; import io.openepcis.model.rest.ProblemResponseBody; -import io.openepcis.validation.xml.PreScanUtil; import jakarta.xml.bind.JAXBContext; import jakarta.xml.bind.JAXBException; import lombok.extern.slf4j.Slf4j; import java.io.*; -import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Optional; import java.util.concurrent.ExecutorService; @@ -181,40 +179,15 @@ public final EPCISVersion versionDetector(final BufferedInputStream epcisDocumen return conversion.fromVersion(); } - // TODO: optimize prescan with regex matcher - String preScanVersion = null; - if (EPCISFormat.XML.equals(conversion.fromMediaType())) { - preScanVersion = PreScanUtil.scanFirstTag(epcisDocument); - } else { - epcisDocument.mark(1024); - // pre scan 1024 bytes to detect version - final byte[] preScan = new byte[1024]; - epcisDocument.read(preScan, 0, preScan.length); - epcisDocument.reset(); - preScanVersion = new String(preScan, StandardCharsets.UTF_8); - } + final String preScanVersion = AttributePreScanUtil.scanSchemaVersion(epcisDocument); - if (!preScanVersion.contains(EPCIS.SCHEMA_VERSION)) { + if (preScanVersion.isEmpty()) { throw new FormatConverterException( "Unable to detect EPCIS schemaVersion for given document, please check the document again"); } - EPCISVersion fromVersion; - - if (preScanVersion.contains(EPCIS.SCHEMA_VERSION + "=\"1.2\"") - || preScanVersion.contains(EPCIS.SCHEMA_VERSION + "='1.2'") - || preScanVersion.replace(" ", "").contains("\"" + EPCIS.SCHEMA_VERSION + "\":\"1.2\"")) { - fromVersion = EPCISVersion.VERSION_1_2_0; - } else if (preScanVersion.contains(EPCIS.SCHEMA_VERSION + "=\"2.0\"") - || preScanVersion.contains(EPCIS.SCHEMA_VERSION + "='2.0'") - || preScanVersion.replace(" ", "").contains("\"" + EPCIS.SCHEMA_VERSION + "\":\"2.0\"")) { - fromVersion = EPCISVersion.VERSION_2_0_0; - } else { - throw new FormatConverterException( - "Provided document contains unsupported EPCIS document version"); - } - - return fromVersion; + return EPCISVersion.fromString(preScanVersion).orElseThrow(() -> new FormatConverterException( + String.format("Provided document contains unsupported EPCIS document version %s", preScanVersion))); } /**