Skip to content

Commit

Permalink
Resolve map(varchar, json) canonicalization bug
Browse files Browse the repository at this point in the history
The map function will not sort a json object by its keys, despite the
json_parse function sorting the same input.
If implemented, this will sort json objects.

Resolves prestodb#24207
  • Loading branch information
infvg committed Dec 10, 2024
1 parent 68b26f3 commit ef14d84
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 4 deletions.
23 changes: 19 additions & 4 deletions presto-main/src/main/java/com/facebook/presto/util/JsonUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.TreeNode;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.primitives.Shorts;
import com.google.common.primitives.SignedBytes;
import io.airlift.slice.Slice;
Expand All @@ -53,6 +56,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand Down Expand Up @@ -86,6 +90,7 @@
import static com.fasterxml.jackson.core.JsonToken.FIELD_NAME;
import static com.fasterxml.jackson.core.JsonToken.START_ARRAY;
import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
import static com.fasterxml.jackson.databind.SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Verify.verify;
import static it.unimi.dsi.fastutil.HashCommon.arraySize;
Expand All @@ -102,10 +107,8 @@ public final class JsonUtil
{
public static final JsonFactory JSON_FACTORY = new JsonFactory().disable(CANONICALIZE_FIELD_NAMES);

// This object mapper is constructed without .configure(ORDER_MAP_ENTRIES_BY_KEYS, true) because
// `OBJECT_MAPPER.writeValueAsString(parser.readValueAsTree());` preserves input order.
// Be aware. Using it arbitrarily can produce invalid json (ordered by key is required in Presto).
private static final ObjectMapper OBJECT_MAPPED_UNORDERED = new ObjectMapper(JSON_FACTORY);
private static final ObjectMapper OBJECT_MAPPED_ORDERED = new ObjectMapper(JSON_FACTORY).configure(ORDER_MAP_ENTRIES_BY_KEYS, true);

private static final int MAX_JSON_LENGTH_IN_ERROR_MESSAGE = 10_000;

Expand Down Expand Up @@ -956,7 +959,18 @@ static BlockBuilderAppender createBlockBuilderAppender(Type type)
return new VarcharBlockBuilderAppender(type);
case StandardTypes.JSON:
return (parser, blockBuilder, sqlFunctionProperties) -> {
String json = OBJECT_MAPPED_UNORDERED.writeValueAsString(parser.readValueAsTree());
TreeNode obj = parser.readValueAsTree();
String json = "";
if (obj instanceof ObjectNode) {
Map<String, JsonNode> recreatedChildren = new LinkedHashMap<>();
((ObjectNode) obj).fields().forEachRemaining(entry -> {
recreatedChildren.put(entry.getKey(), entry.getValue());
});
json = OBJECT_MAPPED_ORDERED.writeValueAsString(recreatedChildren);
}
else {
json = OBJECT_MAPPED_ORDERED.writeValueAsString(obj);
}
JSON.writeSlice(blockBuilder, Slices.utf8Slice(json));
};
case StandardTypes.ARRAY:
Expand Down Expand Up @@ -1165,6 +1179,7 @@ public void append(JsonParser parser, BlockBuilder blockBuilder, SqlFunctionProp
throws IOException
{
Slice result = currentTokenAsVarchar(parser);
String s = result.toStringUtf8();
if (result == null) {
blockBuilder.appendNull();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package com.facebook.presto.operator;

import com.facebook.presto.Session;
import com.facebook.presto.testing.LocalQueryRunner;
import com.facebook.presto.testing.MaterializedResult;
import com.facebook.presto.testing.MaterializedRow;
import com.facebook.presto.testing.QueryRunner;
import com.google.common.collect.Iterables;
import org.intellij.lang.annotations.Language;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import java.util.HashMap;
import java.util.Map;

import static com.facebook.presto.testing.TestingSession.testSessionBuilder;
import static java.lang.String.format;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;

public class TestMapVarcharJsonOperator
{
private QueryRunner queryRunner;
@BeforeClass
public void setUp()
{
Session session = testSessionBuilder().build();
this.queryRunner = new LocalQueryRunner(session);
}

@AfterClass(alwaysRun = true)
public void tearDown()
{
queryRunner.close();
queryRunner = null;
}

@Test
public void testFunction()
{
Map<String, String> map = new HashMap<>();
map.put("m", "[\"rn\",\"w\",\"a\"]");
assertThatQueryReturnsValue("SELECT TRY(CAST(json_parse(c0) AS map(varchar, json))) from (values ('{\"m\": [\"rn\", \"w\", \"a\"]}')) t(c0)", map);
map.put("m", "{\"pl\":\"4\",\"rn\":\"w\"}");
assertThatQueryReturnsValue("SELECT TRY(CAST(json_parse(c0) AS map(varchar, json))) from (values ('{\"m\": {\"rn\": \"w\", \"pl\": \"4\"}}')) t(c0)", map);
}

private void assertThatQueryReturnsValue(@Language("SQL") String sql, Object expected)
{
MaterializedResult rows = queryRunner.execute(sql);
MaterializedRow materializedRow = Iterables.getOnlyElement(rows);
int fieldCount = materializedRow.getFieldCount();
assertTrue(fieldCount == 1, format("Expected only one column, but got '%d'", fieldCount));
Object value = materializedRow.getField(0);
assertEquals(value, expected);
assertTrue(Iterables.getOnlyElement(rows).getFieldCount() == 1);
}
}

0 comments on commit ef14d84

Please sign in to comment.