Skip to content

Commit

Permalink
Add ISP to structured metadata (#1247)
Browse files Browse the repository at this point in the history
  • Loading branch information
relud authored May 21, 2020
1 parent de9626f commit 89c4db0
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ public class AddMetadata {
private static final String GEO = "geo";
private static final String GEO_PREFIX = GEO + "_";

private static final String ISP = "isp";
private static final String ISP_PREFIX = ISP + "_";

private static final String USER_AGENT_PREFIX = Attribute.USER_AGENT + "_";

private static final String HEADER = "header";
Expand Down Expand Up @@ -123,6 +126,7 @@ public static ObjectNode attributesToMetadataPayload(Map<String, String> attribu
// are not specifically Map<String, String>.
ObjectNode metadata = Json.createObjectNode();
metadata.set(GEO, geoFromAttributes(attributes));
metadata.set(ISP, ispFromAttributes(attributes));
metadata.set(Attribute.USER_AGENT, userAgentFromAttributes(attributes));
metadata.set(HEADER, headersFromAttributes(attributes));
if (ParseUri.TELEMETRY.equals(namespace)) {
Expand Down Expand Up @@ -166,6 +170,7 @@ static void stripPayloadMetadataToAttributes(Map<String, String> attributes, Obj
.map(ObjectNode.class::cast) //
.ifPresent(metadata -> {
putGeoAttributes(attributes, metadata);
putIspAttributes(attributes, metadata);
putUserAgentAttributes(attributes, metadata);
putHeaderAttributes(attributes, metadata);
putUriAttributes(attributes, metadata);
Expand Down Expand Up @@ -197,6 +202,18 @@ static void putGeoAttributes(Map<String, String> attributes, ObjectNode metadata
putAttributes(attributes, metadata, GEO, GEO_PREFIX);
}

private static ObjectNode ispFromAttributes(Map<String, String> attributes) {
ObjectNode isp = Json.createObjectNode();
attributes.keySet().stream() //
.filter(k -> k.startsWith(ISP_PREFIX)) //
.forEach(k -> isp.put(k.substring(4), attributes.get(k)));
return isp;
}

static void putIspAttributes(Map<String, String> attributes, ObjectNode metadata) {
putAttributes(attributes, metadata, ISP, ISP_PREFIX);
}

static ObjectNode userAgentFromAttributes(Map<String, String> attributes) {
ObjectNode userAgent = Json.createObjectNode();
attributes.entrySet().stream() //
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public class AddMetadataTest extends TestWithDeterministicJson {
public void testOutput() {
final List<String> input = Arrays.asList("{}", "{\"id\":null}", "[]", "{");
Map<String, String> attributes = ImmutableMap.<String, String>builder().put("sample_id", "18")
.put("geo_country", "CA").put("x_debug_id", "mysession")
.put("geo_country", "CA").put("isp_name", "service provider").put("x_debug_id", "mysession")
.put("normalized_channel", "release").build();
WithFailures.Result<PCollection<PubsubMessage>, PubsubMessage> output = pipeline //
.apply(Create.of(input)) //
Expand All @@ -48,11 +48,13 @@ public void testOutput() {

final List<String> expectedMain = ImmutableList.of(//
"{\"metadata\":{\"geo\":{\"country\":\"CA\"}" //
+ ",\"isp\":{\"name\":\"service provider\"}" //
+ ",\"user_agent\":{}" //
+ ",\"header\":{\"x_debug_id\":\"mysession\"}}" //
+ ",\"normalized_channel\":\"release\"" //
+ ",\"sample_id\":18}", //
"{\"metadata\":{\"geo\":{\"country\":\"CA\"}" //
+ ",\"isp\":{\"name\":\"service provider\"}" //
+ ",\"user_agent\":{}" //
+ ",\"header\":{\"x_debug_id\":\"mysession\"}}" //
+ ",\"normalized_channel\":\"release\"" //
Expand Down Expand Up @@ -173,6 +175,7 @@ public void testAttributesToMetadataPayload() throws Exception {
.put("app_name", "Firefox") //
.put("sample_id", "18") //
.put("geo_country", "CA") //
.put("isp_name", "my isp") //
.put("x_debug_id", "mysession") //
.put("normalized_channel", "release") //
.put("x_forwarded_for", "??") //
Expand All @@ -184,6 +187,7 @@ public void testAttributesToMetadataPayload() throws Exception {
.put("uri", ImmutableMap.of("app_name", "Firefox")) //
.put("header", ImmutableMap.of("x_debug_id", "mysession")) //
.put("geo", ImmutableMap.of("country", "CA")) //
.put("isp", ImmutableMap.of("name", "my isp")) //
.put("user_agent", ImmutableMap.of()) //
.build()) //
.put("normalized_channel", "release") //
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,9 @@ public void testDecodedFormat() throws Exception {
.put("app_name", "Firefox").put("app_version", "58.0.2")
.put("app_update_channel", "release").put("app_build_id", "20180206200532")
.put("geo_country", "US").put("geo_subdivision1", "WA").put("geo_subdivision2", "Clark")
.put("geo_city", "Vancouver").put("submission_timestamp", "2018-03-12T21:02:18.123456Z")
.put("geo_city", "Vancouver").put("isp_db_version", "test db")
.put("isp_name", "test isp").put("isp_organization", "test org")
.put("submission_timestamp", "2018-03-12T21:02:18.123456Z")
.put("date", "Mon, 12 Mar 2018 21:02:18 GMT").put("dnt", "1")
.put("x_pingsender_version", "1.0").put("x_debug_id", "my_debug_session_1")
.put("user_agent_browser", "pingsender").put("user_agent_browser_version", "1.0")
Expand All @@ -351,24 +353,30 @@ public void testDecodedFormat() throws Exception {
String expected = Json.asString(ImmutableMap.<String, Object>builder()
.put("client_id", "5c49ec73-4350-45a0-9c8a-6c8f5aded0da")
.put("document_id", "6c49ec73-4350-45a0-9c8a-6c8f5aded0cf")
.put("metadata", ImmutableMap.<String, Object>builder()
.put("document_namespace", "telemetry").put("document_version", "4")
.put("document_type", "main")
.put("geo",
ImmutableMap.<String, String>builder().put("country", "US")
.put("subdivision1", "WA").put("subdivision2", "Clark").put("city", "Vancouver")
.build())
.put("header",
ImmutableMap.<String, String>builder().put("date", "Mon, 12 Mar 2018 21:02:18 GMT")
.put("dnt", "1").put("x_pingsender_version", "1.0")
.put("x_debug_id", "my_debug_session_1").build())
.put("uri",
ImmutableMap.<String, String>builder().put("app_name", "Firefox")
.put("app_version", "58.0.2").put("app_update_channel", "release")
.put("app_build_id", "20180206200532").build())
.put("user_agent", ImmutableMap.<String, String>builder().put("browser", "pingsender")
.put("browser_version", "1.0").put("os", "Windows").put("os_version", "10").build())
.build())
.put("metadata",
ImmutableMap.<String, Object>builder().put("document_namespace", "telemetry")
.put("document_version", "4").put("document_type", "main")
.put("geo",
ImmutableMap.<String, String>builder().put("country", "US")
.put("subdivision1", "WA").put("subdivision2", "Clark")
.put("city", "Vancouver").build())
.put("isp",
ImmutableMap.<String, String>builder().put("db_version", "test db")
.put("name", "test isp").put("organization", "test org").build())
.put("header",
ImmutableMap.<String, String>builder()
.put("date", "Mon, 12 Mar 2018 21:02:18 GMT").put("dnt", "1")
.put("x_pingsender_version", "1.0").put("x_debug_id", "my_debug_session_1")
.build())
.put("uri",
ImmutableMap.<String, String>builder().put("app_name", "Firefox")
.put("app_version", "58.0.2").put("app_update_channel", "release")
.put("app_build_id", "20180206200532").build())
.put("user_agent",
ImmutableMap.<String, String>builder().put("browser", "pingsender")
.put("browser_version", "1.0").put("os", "Windows").put("os_version", "10")
.build())
.build())
.put("submission_timestamp", "2018-03-12T21:02:18.123456Z")
.put("normalized_app_name", "Firefox").put("normalized_channel", "release")
.put("normalized_country_code", "US").put("normalized_os", "Windows")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{"attributeMap":{"error_message":"com.mozilla.telemetry.decoder.ParseUri$UnexpectedPathElementsException: Found -1 more path elements in the URI than expected for this endpoint","error_type":"ParseUri","exception_class":"com.mozilla.telemetry.decoder.ParseUri$UnexpectedPathElementsException","geo_country":"PH","geo_db_version":"2019-01-03T21:26:19Z","host":"test","isp_db_version":"2018-01-15T22:27:16Z","uri":"/submit/telemetry/main/Firefox/61.0a1/nightly/20180328030202"},"payload":""}
{"attributeMap":{"document_id":"2c3a0767-d84a-4d02-8a92-fa54a3376049","document_namespace":"eng-workflow","document_type":"hgpush","document_version":"1","error_message":"org.everit.json.schema.ValidationException: #: required key [changesetID] not found","error_type":"ParsePayload","exception_class":"org.everit.json.schema.ValidationException","geo_country":"PH","geo_db_version":"2019-01-03T21:26:19Z","host":"test","isp_db_version":"2018-01-15T22:27:16Z","uri":"/submit/eng-workflow/hgpush/1/2c3a0767-d84a-4d02-8a92-fa54a3376049"},"payload":"e30="}
{"attributeMap":{"document_id":"invalid_id","document_namespace":"test","document_type":"test","document_version":"1","error_message":"java.lang.IllegalArgumentException: Invalid UUID string: invalid_id","error_type":"Deduplicate.RemoveDuplicates","exception_class":"java.lang.IllegalArgumentException","geo_country":"PH","geo_db_version":"2019-01-03T21:26:19Z","host":"test","isp_db_version":"2018-01-15T22:27:16Z","normalized_country_code":"PH","uri":"/submit/test/test/1/invalid_id"},"payload":"eyJtZXRhZGF0YSI6eyJnZW8iOnsiY291bnRyeSI6IlBIIiwiZGJfdmVyc2lvbiI6IjIwMTktMDEtMDNUMjE6MjY6MTlaIn0sInVzZXJfYWdlbnQiOnt9LCJoZWFkZXIiOnt9LCJkb2N1bWVudF9uYW1lc3BhY2UiOiJ0ZXN0IiwiZG9jdW1lbnRfdHlwZSI6InRlc3QiLCJkb2N1bWVudF92ZXJzaW9uIjoiMSJ9LCJkb2N1bWVudF9pZCI6ImludmFsaWRfaWQiLCJub3JtYWxpemVkX2NvdW50cnlfY29kZSI6IlBIIn0="}
{"attributeMap":{"document_id":"invalid_id","document_namespace":"test","document_type":"test","document_version":"1","error_message":"java.lang.IllegalArgumentException: Invalid UUID string: invalid_id","error_type":"Deduplicate.RemoveDuplicates","exception_class":"java.lang.IllegalArgumentException","geo_country":"PH","geo_db_version":"2019-01-03T21:26:19Z","host":"test","isp_db_version":"2018-01-15T22:27:16Z","normalized_country_code":"PH","uri":"/submit/test/test/1/invalid_id"},"payload":"eyJtZXRhZGF0YSI6eyJnZW8iOnsiY291bnRyeSI6IlBIIiwiZGJfdmVyc2lvbiI6IjIwMTktMDEtMDNUMjE6MjY6MTlaIn0sImlzcCI6eyJkYl92ZXJzaW9uIjoiMjAxOC0wMS0xNVQyMjoyNzoxNloifSwidXNlcl9hZ2VudCI6e30sImhlYWRlciI6e30sImRvY3VtZW50X25hbWVzcGFjZSI6InRlc3QiLCJkb2N1bWVudF90eXBlIjoidGVzdCIsImRvY3VtZW50X3ZlcnNpb24iOiIxIn0sImRvY3VtZW50X2lkIjoiaW52YWxpZF9pZCIsIm5vcm1hbGl6ZWRfY291bnRyeV9jb2RlIjoiUEgifQ=="}
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{"attributeMap":{"document_id":"2c3a0767-d84a-4d02-8a92-fa54a3376048","document_namespace":"test","document_type":"test","document_version":"1","geo_db_version":"2019-01-03T21:26:19Z","host":"test1","isp_db_version":"2018-01-15T22:27:16Z","uri":"/submit/test/test/1/2c3a0767-d84a-4d02-8a92-fa54a3376048"},"payload":"eyJtZXRhZGF0YSI6eyJnZW8iOnsiZGJfdmVyc2lvbiI6IjIwMTktMDEtMDNUMjE6MjY6MTlaIn0sInVzZXJfYWdlbnQiOnt9LCJoZWFkZXIiOnt9LCJkb2N1bWVudF9uYW1lc3BhY2UiOiJ0ZXN0IiwiZG9jdW1lbnRfdHlwZSI6InRlc3QiLCJkb2N1bWVudF92ZXJzaW9uIjoiMSJ9LCJkb2N1bWVudF9pZCI6IjJjM2EwNzY3LWQ4NGEtNGQwMi04YTkyLWZhNTRhMzM3NjA0OCJ9"}
{"attributeMap":{"document_id":"2c3a0767-d84a-4d02-8a92-fa54a3376049","document_namespace":"test","document_type":"test","document_version":"1","geo_country":"PH","geo_db_version":"2019-01-03T21:26:19Z","host":"test2","isp_db_version":"2018-01-15T22:27:16Z","normalized_country_code":"PH","uri":"/submit/test/test/1/2c3a0767-d84a-4d02-8a92-fa54a3376049"},"payload":"eyJtZXRhZGF0YSI6eyJnZW8iOnsiY291bnRyeSI6IlBIIiwiZGJfdmVyc2lvbiI6IjIwMTktMDEtMDNUMjE6MjY6MTlaIn0sInVzZXJfYWdlbnQiOnt9LCJoZWFkZXIiOnt9LCJkb2N1bWVudF9uYW1lc3BhY2UiOiJ0ZXN0IiwiZG9jdW1lbnRfdHlwZSI6InRlc3QiLCJkb2N1bWVudF92ZXJzaW9uIjoiMSJ9LCJkb2N1bWVudF9pZCI6IjJjM2EwNzY3LWQ4NGEtNGQwMi04YTkyLWZhNTRhMzM3NjA0OSIsIm5vcm1hbGl6ZWRfY291bnRyeV9jb2RlIjoiUEgifQ=="}
{"attributeMap":{"document_id":"3c3a0767-d84a-4d02-8a92-fa54a3376049","document_namespace":"test","document_type":"test","document_version":"1","geo_country":"PH","geo_db_version":"2019-01-03T21:26:19Z","host":"test3","isp_db_version":"2018-01-15T22:27:16Z","normalized_country_code":"PH","uri":"/submit/test/test/1/3c3a0767-d84a-4d02-8a92-fa54a3376049"},"payload":"eyJtZXRhZGF0YSI6eyJnZW8iOnsiY291bnRyeSI6IlBIIiwiZGJfdmVyc2lvbiI6IjIwMTktMDEtMDNUMjE6MjY6MTlaIn0sInVzZXJfYWdlbnQiOnt9LCJoZWFkZXIiOnt9LCJkb2N1bWVudF9uYW1lc3BhY2UiOiJ0ZXN0IiwiZG9jdW1lbnRfdHlwZSI6InRlc3QiLCJkb2N1bWVudF92ZXJzaW9uIjoiMSJ9LCJkb2N1bWVudF9pZCI6IjNjM2EwNzY3LWQ4NGEtNGQwMi04YTkyLWZhNTRhMzM3NjA0OSIsIm5vcm1hbGl6ZWRfY291bnRyeV9jb2RlIjoiUEgifQ=="}
{"attributeMap":{"document_id":"2c3a0767-d84a-4d02-8a92-fa54a3376048","document_namespace":"test","document_type":"test","document_version":"1","geo_db_version":"2019-01-03T21:26:19Z","host":"test1","isp_db_version":"2018-01-15T22:27:16Z","uri":"/submit/test/test/1/2c3a0767-d84a-4d02-8a92-fa54a3376048"},"payload":"eyJtZXRhZGF0YSI6eyJnZW8iOnsiZGJfdmVyc2lvbiI6IjIwMTktMDEtMDNUMjE6MjY6MTlaIn0sImlzcCI6eyJkYl92ZXJzaW9uIjoiMjAxOC0wMS0xNVQyMjoyNzoxNloifSwidXNlcl9hZ2VudCI6e30sImhlYWRlciI6e30sImRvY3VtZW50X25hbWVzcGFjZSI6InRlc3QiLCJkb2N1bWVudF90eXBlIjoidGVzdCIsImRvY3VtZW50X3ZlcnNpb24iOiIxIn0sImRvY3VtZW50X2lkIjoiMmMzYTA3NjctZDg0YS00ZDAyLThhOTItZmE1NGEzMzc2MDQ4In0="}
{"attributeMap":{"document_id":"2c3a0767-d84a-4d02-8a92-fa54a3376049","document_namespace":"test","document_type":"test","document_version":"1","geo_country":"PH","geo_db_version":"2019-01-03T21:26:19Z","host":"test2","isp_db_version":"2018-01-15T22:27:16Z","normalized_country_code":"PH","uri":"/submit/test/test/1/2c3a0767-d84a-4d02-8a92-fa54a3376049"},"payload":"eyJtZXRhZGF0YSI6eyJnZW8iOnsiY291bnRyeSI6IlBIIiwiZGJfdmVyc2lvbiI6IjIwMTktMDEtMDNUMjE6MjY6MTlaIn0sImlzcCI6eyJkYl92ZXJzaW9uIjoiMjAxOC0wMS0xNVQyMjoyNzoxNloifSwidXNlcl9hZ2VudCI6e30sImhlYWRlciI6e30sImRvY3VtZW50X25hbWVzcGFjZSI6InRlc3QiLCJkb2N1bWVudF90eXBlIjoidGVzdCIsImRvY3VtZW50X3ZlcnNpb24iOiIxIn0sImRvY3VtZW50X2lkIjoiMmMzYTA3NjctZDg0YS00ZDAyLThhOTItZmE1NGEzMzc2MDQ5Iiwibm9ybWFsaXplZF9jb3VudHJ5X2NvZGUiOiJQSCJ9"}
{"attributeMap":{"document_id":"3c3a0767-d84a-4d02-8a92-fa54a3376049","document_namespace":"test","document_type":"test","document_version":"1","geo_country":"PH","geo_db_version":"2019-01-03T21:26:19Z","host":"test3","isp_db_version":"2018-01-15T22:27:16Z","normalized_country_code":"PH","uri":"/submit/test/test/1/3c3a0767-d84a-4d02-8a92-fa54a3376049"},"payload":"eyJtZXRhZGF0YSI6eyJnZW8iOnsiY291bnRyeSI6IlBIIiwiZGJfdmVyc2lvbiI6IjIwMTktMDEtMDNUMjE6MjY6MTlaIn0sImlzcCI6eyJkYl92ZXJzaW9uIjoiMjAxOC0wMS0xNVQyMjoyNzoxNloifSwidXNlcl9hZ2VudCI6e30sImhlYWRlciI6e30sImRvY3VtZW50X25hbWVzcGFjZSI6InRlc3QiLCJkb2N1bWVudF90eXBlIjoidGVzdCIsImRvY3VtZW50X3ZlcnNpb24iOiIxIn0sImRvY3VtZW50X2lkIjoiM2MzYTA3NjctZDg0YS00ZDAyLThhOTItZmE1NGEzMzc2MDQ5Iiwibm9ybWFsaXplZF9jb3VudHJ5X2NvZGUiOiJQSCJ9"}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ private AddMetadata() {
private static final String GEO = "geo";
private static final String GEO_PREFIX = GEO + "_";

private static final String ISP = "isp";
private static final String ISP_PREFIX = ISP + "_";

private static final String USER_AGENT_PREFIX = Attribute.USER_AGENT + "_";

private static final String HEADER = "header";
Expand Down Expand Up @@ -62,6 +65,7 @@ public static Map<String, Object> attributesToMetadataPayload(Map<String, String
// are not specifically Map<String, String>.
Map<String, Object> metadata = new HashMap<>();
metadata.put(GEO, geoFromAttributes(attributes));
metadata.put(ISP, ispFromAttributes(attributes));
metadata.put(Attribute.USER_AGENT, userAgentFromAttributes(attributes));
metadata.put(HEADER, headersFromAttributes(attributes));
if ("telemetry".equals(namespace)) {
Expand Down Expand Up @@ -96,6 +100,14 @@ private static Map<String, Object> geoFromAttributes(Map<String, String> attribu
return geo;
}

private static Map<String, Object> ispFromAttributes(Map<String, String> attributes) {
HashMap<String, Object> isp = new HashMap<>();
attributes.keySet().stream() //
.filter(k -> k.startsWith(ISP_PREFIX)) //
.forEach(k -> isp.put(k.substring(4), attributes.get(k)));
return isp;
}

private static Map<String, Object> userAgentFromAttributes(Map<String, String> attributes) {
HashMap<String, Object> userAgent = new HashMap<>();
attributes.keySet().stream() //
Expand Down
Loading

0 comments on commit 89c4db0

Please sign in to comment.