From 64da476443b908bf835eaa72379c2cac75ad57bf Mon Sep 17 00:00:00 2001 From: Michael He Date: Fri, 13 Dec 2024 00:38:36 +0000 Subject: [PATCH] handle escape chars and add unit tests in java v2 --- .../awssdk/v2_2/BedrockJsonParser.java | 59 +++++++++-- .../awssdk/v2_2/BedrockJsonParserTest.java | 98 +++++++++++++++++++ 2 files changed, 151 insertions(+), 6 deletions(-) create mode 100644 instrumentation/aws-sdk/aws-sdk-2.2/library/src/test/java/io/opentelemetry/instrumentation/awssdk/v2_2/BedrockJsonParserTest.java diff --git a/instrumentation/aws-sdk/aws-sdk-2.2/library/src/main/java/io/opentelemetry/instrumentation/awssdk/v2_2/BedrockJsonParser.java b/instrumentation/aws-sdk/aws-sdk-2.2/library/src/main/java/io/opentelemetry/instrumentation/awssdk/v2_2/BedrockJsonParser.java index a2277f95e77e..b416dac97fe3 100644 --- a/instrumentation/aws-sdk/aws-sdk-2.2/library/src/main/java/io/opentelemetry/instrumentation/awssdk/v2_2/BedrockJsonParser.java +++ b/instrumentation/aws-sdk/aws-sdk-2.2/library/src/main/java/io/opentelemetry/instrumentation/awssdk/v2_2/BedrockJsonParser.java @@ -42,6 +42,10 @@ private char currentChar() { return json.charAt(position); } + private static boolean isHexDigit(char c) { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); + } + private void expect(char c) { skipWhitespace(); if (currentChar() != c) { @@ -56,12 +60,55 @@ private String readString() { expect('"'); // Ensure the string starts with a quote StringBuilder result = new StringBuilder(); while (currentChar() != '"') { - // Handle escaped quotes within the string - if (currentChar() == '\\' - && position + 1 < json.length() - && json.charAt(position + 1) == '"') { - result.append('"'); - position += 2; // Skip the backslash and the escaped quote + // Handle escape sequences + if (currentChar() == '\\') { + position++; // Move past the backslash + if (position >= json.length()) { + throw new IllegalArgumentException("Unexpected end of input in string escape sequence"); + } + char escapeChar = currentChar(); + switch (escapeChar) { + case '"': + case '\\': + case '/': + result.append(escapeChar); + break; + case 'b': + result.append('\b'); + break; + case 'f': + result.append('\f'); + break; + case 'n': + result.append('\n'); + break; + case 'r': + result.append('\r'); + break; + case 't': + result.append('\t'); + break; + case 'u': // Unicode escape sequence + if (position + 4 >= json.length()) { + throw new IllegalArgumentException("Invalid unicode escape sequence in string"); + } + char[] hexChars = new char[4]; + for (int i = 0; i < 4; i++) { + position++; // Move to the next character + char hexChar = json.charAt(position); + if (!isHexDigit(hexChar)) { + throw new IllegalArgumentException( + "Invalid hexadecimal digit in unicode escape sequence"); + } + hexChars[i] = hexChar; + } + int unicodeValue = Integer.parseInt(new String(hexChars), 16); + result.append((char) unicodeValue); + break; + default: + throw new IllegalArgumentException("Invalid escape character: \\" + escapeChar); + } + position++; } else { result.append(currentChar()); position++; diff --git a/instrumentation/aws-sdk/aws-sdk-2.2/library/src/test/java/io/opentelemetry/instrumentation/awssdk/v2_2/BedrockJsonParserTest.java b/instrumentation/aws-sdk/aws-sdk-2.2/library/src/test/java/io/opentelemetry/instrumentation/awssdk/v2_2/BedrockJsonParserTest.java new file mode 100644 index 000000000000..c13f6332191e --- /dev/null +++ b/instrumentation/aws-sdk/aws-sdk-2.2/library/src/test/java/io/opentelemetry/instrumentation/awssdk/v2_2/BedrockJsonParserTest.java @@ -0,0 +1,98 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.instrumentation.awssdk.v2_2; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.Map; +import org.junit.Test; + +public class BedrockJsonParserTest { + + @Test + public void shouldParseSimpleObject() { + // given + String json = "{\"key\":\"value\",\"number\":123,\"boolean\":true}"; + + // when + LlmJson parsedJson = BedrockJsonParser.parse(json); + + // then + assertThat(parsedJson.getJsonBody()).containsEntry("key", "value"); + assertThat(parsedJson.getJsonBody()).containsEntry("number", 123); + assertThat(parsedJson.getJsonBody()).containsEntry("boolean", true); + } + + @Test + public void shouldParseNestedObject() { + // given + String json = "{\"parent\":{\"child\":\"value\"}}"; + + // when + LlmJson parsedJson = BedrockJsonParser.parse(json); + + // then + Object parentObj = parsedJson.getJsonBody().get("parent"); + assertThat(parentObj).isInstanceOf(Map.class); // Ensure it's a Map + + @SuppressWarnings("unchecked") + Map parent = (Map) parentObj; + assertThat(parent).containsEntry("child", "value"); + } + + @Test + public void shouldParseEscapeSequences() { + // given + String json = + "{\"escaped\":\"Line1\\nLine2\\tTabbed\\\"Quoted\\\"\\bBackspace\\fFormfeed\\rCarriageReturn\\\\Backslash\\/Slash\\u0041\"}"; + + // when + LlmJson parsedJson = BedrockJsonParser.parse(json); + + // then + assertThat(parsedJson.getJsonBody()) + .containsEntry( + "escaped", + "Line1\nLine2\tTabbed\"Quoted\"\bBackspace\fFormfeed\rCarriageReturn\\Backslash/SlashA"); + } + + @Test + public void shouldParseUnicodeEscapeSequences() { + // given + String json = "{\"unicode\":\"\\u0041\\u0042\\u0043\"}"; + + // when + LlmJson parsedJson = BedrockJsonParser.parse(json); + + // then + assertThat(parsedJson.getJsonBody()).containsEntry("unicode", "ABC"); + } + + @Test + public void shouldHandleEmptyObject() { + // given + String json = "{}"; + + // when + LlmJson parsedJson = BedrockJsonParser.parse(json); + + // then + assertThat(parsedJson.getJsonBody()).isEmpty(); + } + + @Test + public void shouldHandleEmptyArray() { + // given + String json = "{\"array\":[]}"; + + // when + LlmJson parsedJson = BedrockJsonParser.parse(json); + + // then + assertThat(parsedJson.getJsonBody()).containsKey("array"); + assertThat((Iterable) parsedJson.getJsonBody().get("array")).isEmpty(); + } +}