Skip to content

Commit

Permalink
Added integration tests for escaping backslashes and double quotes in…
Browse files Browse the repository at this point in the history
… JSON input values. Fixed bug that sometimes would cause Query.makeScriptFieldsClause() to create script fields when not needed.
  • Loading branch information
davemoore- committed May 7, 2024
1 parent b5c00ba commit b762e98
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 31 deletions.
27 changes: 20 additions & 7 deletions src/main/java/io/zentity/common/Json.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,38 @@ public class Json {
public static final ObjectMapper ORDERED_MAPPER = new ObjectMapper().configure(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS, true);
private static final JsonStringEncoder STRING_ENCODER = new JsonStringEncoder();

/**
* Escape a JSON value and wrap it in double quotes.
*
* @param value The value to be escaped and wrapped in double quotes.
* @return
*/
public static String quoteString(String value) {
return jsonStringFormat(value);
return jsonStringQuote(jsonStringEscape(value));
}

/**
* Escape a JSON value.
*
* @param value The value to be escaped.
* @return
*/
public static String jsonStringEscape(String value) {
if (value == null)
return "null"; // Prevents NullPointerException on STRING_ENCODER.quoteAsString()
return new String(STRING_ENCODER.quoteAsString(value));
}

private static String jsonStringQuote(String value) {
/**
* Wrap a JSON value in double quotes.
*
* @param value The value to be wrapped in quotes.
* @return
*/
public static String jsonStringQuote(String value) {
return "\"" + value + "\"";
}

private static String jsonStringFormat(String value) {
return jsonStringQuote(jsonStringEscape(value));
}


/**
* Converts an object {@link JsonNode JsonNode's} fields iterator to a {@link Map} of strings.
*
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/io/zentity/resolution/Job.java
Original file line number Diff line number Diff line change
Expand Up @@ -926,7 +926,7 @@ else if (!responseData.get("hits").has("hits"))
String attributeValueSerialized = new String(Base64.getDecoder().decode(_name[3]));
String attributeType = job.input().model().attributes().get(attributeName).type();
if (attributeType.equals("string") || attributeType.equals("date"))
attributeValueSerialized = "\"" + attributeValueSerialized + "\"";
attributeValueSerialized = Json.jsonStringQuote(attributeValueSerialized);
JsonNode attributeValueNode = Json.MAPPER.readTree("{\"attribute_value\":" + attributeValueSerialized + "}").get("attribute_value");
JsonNode matcherParamsNode;
if (job.input().attributes().containsKey(attributeName))
Expand Down
24 changes: 12 additions & 12 deletions src/main/java/io/zentity/resolution/Query.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import io.zentity.model.Model;
import io.zentity.model.ValidationException;
import io.zentity.resolution.input.Attribute;
import io.zentity.resolution.input.Input;
import io.zentity.resolution.input.Term;
import io.zentity.resolution.input.value.StringValue;
import io.zentity.resolution.input.value.Value;
Expand Down Expand Up @@ -69,20 +68,21 @@ public class Query {
* Builds the "script_fields" clause of an Elasticsearch query.
* This is required by some zentity attribute types such as the "date" type.
*
* @param input The input of the resolution job.
* @param indexName The index name of the query.
* @param attributes The input attributes of the resolution job.
* @param model The input model of the resolution job.
* @param indexName The index name of the query.
* @return A JSON-formatted string of the "script_fields" clause of an Elasticsearch query.
* @throws ValidationException
*/
public static String makeScriptFieldsClause(Input input, String indexName) throws ValidationException {
public static String makeScriptFieldsClause(Map<String, Attribute> attributes, Model model, String indexName) throws ValidationException {
List<String> scriptFieldClauses = new ArrayList<>();

// Find any index fields that need to be included in the "script_fields" clause.
// Currently this includes any index field that is associated with a "date" attribute,
// which requires the "_source" value to be reformatted to a normalized format.
Index index = input.model().indices().get(indexName);
for (String attributeName : index.attributeIndexFieldsMap().keySet()) {
switch (input.model().attributes().get(attributeName).type()) {
Index index = model.indices().get(indexName);
for (String attributeName :attributes.keySet()) {
switch (model.attributes().get(attributeName).type()) {
case "date":

// Required params
Expand All @@ -91,17 +91,17 @@ public static String makeScriptFieldsClause(Input input, String indexName) throw
// Make a "script" clause for each index field associated with this attribute.
for (String indexFieldName : index.attributeIndexFieldsMap().get(attributeName).keySet()) {
// Check if the required params are defined in the input attribute.
if (input.attributes().containsKey(attributeName) && input.attributes().get(attributeName).params().containsKey("format") && !input.attributes().get(attributeName).params().get("format").equals("null") && !Patterns.EMPTY_STRING.matcher(input.attributes().get(attributeName).params().get("format")).matches()) {
format = input.attributes().get(attributeName).params().get("format");
if (attributes.containsKey(attributeName) && attributes.get(attributeName).params().containsKey("format") && !attributes.get(attributeName).params().get("format").equals("null") && !Patterns.EMPTY_STRING.matcher(attributes.get(attributeName).params().get("format")).matches()) {
format = attributes.get(attributeName).params().get("format");
} else {
// Otherwise check if the required params are defined in the model attribute.
Map<String, String> params = input.model().attributes().get(attributeName).params();
Map<String, String> params = model.attributes().get(attributeName).params();
if (params.containsKey("format") && !params.get("format").equals("null") && !Patterns.EMPTY_STRING.matcher(params.get("format")).matches()) {
format = params.get("format");
} else {
// Otherwise check if the required params are defined in the matcher associated with the index field.
String matcherName = index.attributeIndexFieldsMap().get(attributeName).get(indexFieldName).matcher();
params = input.model().matchers().get(matcherName).params();
params = model.matchers().get(matcherName).params();
if (params.containsKey("format") && !params.get("format").equals("null") && !Patterns.EMPTY_STRING.matcher(params.get("format")).matches()) {
format = params.get("format");
} else {
Expand Down Expand Up @@ -746,7 +746,7 @@ else if (!resolversClause.isEmpty())
topLevelClauses.add(queryClause);

// Construct the "script_fields" clause.
String scriptFieldsClause = makeScriptFieldsClause(job.input(), indexName);
String scriptFieldsClause = makeScriptFieldsClause(job.attributes(), job.input().model(), indexName);
if (scriptFieldsClause != null)
topLevelClauses.add(scriptFieldsClause);

Expand Down
43 changes: 33 additions & 10 deletions src/test/java/io/zentity/resolution/JobTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,13 @@ public void testPopulateMatcherClauseIgnoreUnusedParams() throws Exception {
}

/**
* Sometimes the value extracted from the document contains quotations or backslashes that need to be escaped
* Populate the clause of a matcher by substituting the {{ field }} and {{ value }} variables.
* Ensure double quotes are properly escaped.
*
* @throws Exception
*/
@Test
public void testPopulateMatcherClauseQuoteJsonString() throws Exception {
public void testPopulateMatcherClauseEscapeQuotes() throws Exception {
String matcherJson = "{\n" +
" \"clause\": {\n" +
" \"match\": {\n" +
Expand All @@ -132,6 +133,28 @@ public void testPopulateMatcherClauseQuoteJsonString() throws Exception {
Assert.assertEquals(expected, matcherClause);
}

/**
* Populate the clause of a matcher by substituting the {{ field }} and {{ value }} variables.
* Ensure backslashes are properly escaped.
*
* @throws Exception
*/
@Test
public void testPopulateMatcherClauseEscapeBackslashes() throws Exception {
String matcherJson = "{\n" +
" \"clause\": {\n" +
" \"match\": {\n" +
" \"{{ field }}\": \"{{ value }}\"\n" +
" }" +
" }\n" +
"}";
Matcher matcher = new Matcher("matcher_filepath", matcherJson);
TreeMap<String, String> params = new TreeMap<>();
String matcherClause = Query.populateMatcherClause(matcher, "field_filepath", "C:\\Windows\\System32", params);
String expected = "{\"match\":{\"field_filepath\":\"C:\\\\Windows\\\\System32\"}}";
Assert.assertEquals(expected, matcherClause);
}

/**
* Populate the clause of a matcher by substituting the {{ field }} and {{ value }} variables,
* but don't include {{ field }} and expect an exception to be raised.
Expand Down Expand Up @@ -675,7 +698,7 @@ public void testMakeScriptFieldsClauseTypeDateFormatInputAttributeOnly() throws
" }\n" +
"}";
Input input = new Input(json, model);
String scriptFieldsClause = Query.makeScriptFieldsClause(input, "index");
String scriptFieldsClause = Query.makeScriptFieldsClause(input.attributes(), input.model(), "index");
String expected = "\"script_fields\":{\"field_timestamp\":{\"script\":{\"lang\":\"painless\",\"source\":\"DateFormat df = new SimpleDateFormat(params.format); df.setTimeZone(TimeZone.getTimeZone('UTC')); return df.format(doc[params.field].value.toInstant().toEpochMilli())\",\"params\":{\"field\":\"field_timestamp\",\"format\":\"yyyy-MM-dd\"}}}}";
Assert.assertEquals(scriptFieldsClause, expected);
}
Expand Down Expand Up @@ -741,7 +764,7 @@ public void testMakeScriptFieldsClauseTypeDateFormatMatcherOnly() throws Excepti
" }\n" +
"}";
Input input = new Input(json, model);
String scriptFieldsClause = Query.makeScriptFieldsClause(input, "index");
String scriptFieldsClause = Query.makeScriptFieldsClause(input.attributes(), input.model(), "index");
String expected = "\"script_fields\":{\"field_timestamp\":{\"script\":{\"lang\":\"painless\",\"source\":\"DateFormat df = new SimpleDateFormat(params.format); df.setTimeZone(TimeZone.getTimeZone('UTC')); return df.format(doc[params.field].value.toInstant().toEpochMilli())\",\"params\":{\"field\":\"field_timestamp\",\"format\":\"yyyy-MM-dd\"}}}}";
Assert.assertEquals(scriptFieldsClause, expected);
}
Expand Down Expand Up @@ -803,7 +826,7 @@ public void testMakeScriptFieldsClauseTypeDateFormatModelAttributeOnly() throws
" }\n" +
"}";
Input input = new Input(json, model);
String scriptFieldsClause = Query.makeScriptFieldsClause(input, "index");
String scriptFieldsClause = Query.makeScriptFieldsClause(input.attributes(), input.model(), "index");
String expected = "\"script_fields\":{\"field_timestamp\":{\"script\":{\"lang\":\"painless\",\"source\":\"DateFormat df = new SimpleDateFormat(params.format); df.setTimeZone(TimeZone.getTimeZone('UTC')); return df.format(doc[params.field].value.toInstant().toEpochMilli())\",\"params\":{\"field\":\"field_timestamp\",\"format\":\"yyyy-MM-dd\"}}}}";
Assert.assertEquals(scriptFieldsClause, expected);
}
Expand Down Expand Up @@ -868,7 +891,7 @@ public void testMakeScriptFieldsClauseTypeDateFormatModelAttributeOverridesMatch
" }\n" +
"}";
Input input = new Input(json, model);
String scriptFieldsClause = Query.makeScriptFieldsClause(input, "index");
String scriptFieldsClause = Query.makeScriptFieldsClause(input.attributes(), input.model(), "index");
String expected = "\"script_fields\":{\"field_timestamp\":{\"script\":{\"lang\":\"painless\",\"source\":\"DateFormat df = new SimpleDateFormat(params.format); df.setTimeZone(TimeZone.getTimeZone('UTC')); return df.format(doc[params.field].value.toInstant().toEpochMilli())\",\"params\":{\"field\":\"field_timestamp\",\"format\":\"yyyy-MM-dd'T'HH:mm:ss\"}}}}";
Assert.assertEquals(scriptFieldsClause, expected);
}
Expand Down Expand Up @@ -935,7 +958,7 @@ public void testMakeScriptFieldsClauseTypeDateFormatInputAttributeOverridesModel
" }\n" +
"}";
Input input = new Input(json, model);
String scriptFieldsClause = Query.makeScriptFieldsClause(input, "index");
String scriptFieldsClause = Query.makeScriptFieldsClause(input.attributes(), input.model(), "index");
String expected = "\"script_fields\":{\"field_timestamp\":{\"script\":{\"lang\":\"painless\",\"source\":\"DateFormat df = new SimpleDateFormat(params.format); df.setTimeZone(TimeZone.getTimeZone('UTC')); return df.format(doc[params.field].value.toInstant().toEpochMilli())\",\"params\":{\"field\":\"field_timestamp\",\"format\":\"yyyy-MM-dd'T'HH:mm:ss.SSS\"}}}}";
Assert.assertEquals(scriptFieldsClause, expected);
}
Expand Down Expand Up @@ -1002,7 +1025,7 @@ public void testMakeScriptFieldsClauseTypeDateFormatNullNotOverrides() throws Ex
" }\n" +
"}";
Input input = new Input(json, model);
String scriptFieldsClause = Query.makeScriptFieldsClause(input, "index");
String scriptFieldsClause = Query.makeScriptFieldsClause(input.attributes(), input.model(), "index");
String expected = "\"script_fields\":{\"field_timestamp\":{\"script\":{\"lang\":\"painless\",\"source\":\"DateFormat df = new SimpleDateFormat(params.format); df.setTimeZone(TimeZone.getTimeZone('UTC')); return df.format(doc[params.field].value.toInstant().toEpochMilli())\",\"params\":{\"field\":\"field_timestamp\",\"format\":\"yyyy-MM-dd'T'HH:mm:ss\"}}}}";
Assert.assertEquals(scriptFieldsClause, expected);
}
Expand Down Expand Up @@ -1064,7 +1087,7 @@ public void testMakeScriptFieldsClauseTypeDateFormatMissing() throws Exception {
" }\n" +
"}";
Input input = new Input(json, model);
Query.makeScriptFieldsClause(input, "index");
Query.makeScriptFieldsClause(input.attributes(), input.model(), "index");
}

/**
Expand Down Expand Up @@ -1124,7 +1147,7 @@ public void testMakeScriptFieldsClauseTypeDateFormatNull() throws Exception {
" }\n" +
"}";
Input input = new Input(json, model);
Query.makeScriptFieldsClause(input, "index");
Query.makeScriptFieldsClause(input.attributes(), input.model(), "index");
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import java.util.Set;
Expand Down Expand Up @@ -423,6 +424,38 @@ public class ResolutionActionIT extends AbstractIT {
" }\n" +
"}", ContentType.APPLICATION_JSON);

/**
* A value of foo\bar would be represented as foo\\bar in a JSON string value,
* and then represented as foo\\\\bar for Java string concatenation.
*/
public static final StringEntity TEST_PAYLOAD_JOB_ESCAPE_BACKSLASHES = new StringEntity("{\n" +
" \"attributes\": {\n" +
" \"attribute_a\": [ \"escape\\\\backslashes\" ]\n" +
" },\n" +
" \"scope\": {\n" +
" \"include\": {\n" +
" \"indices\": [ \"zentity_test_index_a\" ],\n" +
" \"resolvers\": [ \"resolver_a\" ]\n" +
" }\n" +
" }\n" +
"}", ContentType.APPLICATION_JSON);

/**
* A value of foo"bar" would be represented as foo\"bar\" in a JSON string value,
* and then represented as foo\\\"bar\\\" for Java string concatenation.
*/
public static final StringEntity TEST_PAYLOAD_JOB_ESCAPE_DOUBLE_QUOTES = new StringEntity("{\n" +
" \"attributes\": {\n" +
" \"attribute_a\": [ \"escape\\\"doublequotes\\\"\" ]\n" +
" },\n" +
" \"scope\": {\n" +
" \"include\": {\n" +
" \"indices\": [ \"zentity_test_index_a\" ],\n" +
" \"resolvers\": [ \"resolver_a\" ]\n" +
" }\n" +
" }\n" +
"}", ContentType.APPLICATION_JSON);

public static final StringEntity TEST_PAYLOAD_JOB_SCOPE_EXCLUDE_ATTRIBUTES = new StringEntity("{\n" +
" \"attributes\": {\n" +
" \"attribute_a\": [ \"a_00\" ]\n" +
Expand Down Expand Up @@ -1252,6 +1285,34 @@ public void testJobObject() throws Exception {
assertEquals(docsExpectedA, getActual(j1));
}

@Test
public void testJobEscapeBackslashes() throws Exception {
String endpoint = "_zentity/resolution/zentity_test_entity_a";
Request postResolution = new Request("POST", endpoint);
postResolution.setEntity(TEST_PAYLOAD_JOB_ESCAPE_BACKSLASHES);
postResolution.addParameter("max_hops", "0");
Response response = client().performRequest(postResolution);
JsonNode json = Json.MAPPER.readTree(response.getEntity().getContent());
assertEquals(json.get("hits").get("total").asInt(), 1);
Set<String> docsExpected = new TreeSet<>();
docsExpected.add("escapeBackslashes,0");
assertEquals(docsExpected, getActual(json));
}

@Test
public void testJobEscapeDoubleQuotes() throws Exception {
String endpoint = "_zentity/resolution/zentity_test_entity_a";
Request postResolution = new Request("POST", endpoint);
postResolution.setEntity(TEST_PAYLOAD_JOB_ESCAPE_DOUBLE_QUOTES);
postResolution.addParameter("max_hops", "0");
Response response = client().performRequest(postResolution);
JsonNode json = Json.MAPPER.readTree(response.getEntity().getContent());
assertEquals(json.get("hits").get("total").asInt(), 1);
Set<String> docsExpected = new TreeSet<>();
docsExpected.add("escapeDoubleQuotes,0");
assertEquals(docsExpected, getActual(json));
}

@Test
public void testJobScopeExcludeAttributes() throws Exception {
String endpoint = "_zentity/resolution/zentity_test_entity_a";
Expand Down
4 changes: 4 additions & 0 deletions src/test/resources/TestData.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,7 @@
{"field_a": "a_11", "field_b": "b_11", "field_c": "c_10", "field_d": "d_03", "object": {"a": {"b": {"c": "a"}}}, "type_boolean": true, "type_date": "2020-01-01T00:00:00.000", "type_double": 3.141592653589793, "type_float": 1.0, "type_integer": 1, "type_long": 922337203685477, "type_string": "a", "type_string_null": null}
{ "index" : { "_index": "zentity_test_index_d", "_id": "d9" }}
{"field_a": "a_11", "field_b": "b_11", "field_c": "c_11", "field_d": "d_03", "object": {"a": {"b": {"c": "b"}}}, "type_boolean": false, "type_date": "2020-01-01T00:00:00.000", "type_double": -3.141592653589793, "type_float": -1.0, "type_integer": -1, "type_long": -922337203685477, "type_string": "b", "type_string_null": null}
{ "index" : { "_index": "zentity_test_index_a", "_id": "escapeBackslashes" }}
{"field_a": "escape\\backslashes"}
{ "index" : { "_index": "zentity_test_index_a", "_id": "escapeDoubleQuotes" }}
{"field_a": "escape\"doublequotes\""}
2 changes: 1 addition & 1 deletion src/test/resources/TestEntityModelA.json
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@
}
},
"params" :{
"format": "yyyy-MM-dd'T'HH:mm:ss.0000",
"format": "yyyy-MM-dd'T'HH:mm:ss.000",
"window": "2s"
},
"quality": null
Expand Down

0 comments on commit b762e98

Please sign in to comment.