diff --git a/table/queries/merged_reduced_scans.sql b/table/queries/merged_reduced_scans.sql index 2e9307e..017b8f1 100644 --- a/table/queries/merged_reduced_scans.sql +++ b/table/queries/merged_reduced_scans.sql @@ -32,62 +32,62 @@ CREATE TEMP FUNCTION AddHyperquackOutcomeEmoji(outcome STRING) AS ( # https://www.iana.org/assignments/dns-parameters/dns-parameters.xhtml#dns-parameters-6 CREATE TEMP FUNCTION ClassifySatelliteRCode(rcode INTEGER) AS ( CASE - WHEN rcode = 1 THEN "❗️dns/rcode:FormErr" - WHEN rcode = 2 THEN "❓dns/rcode:ServFail" - WHEN rcode = 3 THEN "❗️dns/rcode:NXDomain" - WHEN rcode = 4 THEN "❗️dns/rcode:NotImp" - WHEN rcode = 5 THEN "❗️dns/rcode:Refused" - WHEN rcode = 6 THEN "❗️dns/rcode:YXDomain" - WHEN rcode = 7 THEN "❗️dns/rcode:YXRRSet" - WHEN rcode = 8 THEN "❗️dns/rcode:NXRRSet" - WHEN rcode = 9 THEN "❗️dns/rcode:NotAuth" - WHEN rcode = 10 THEN "❗️dns/rcode:NotZone" - WHEN rcode = 11 THEN "❗️dns/rcode:DSOTYPENI" - WHEN rcode = 12 THEN "❗️dns/rcode:Unassigned" - WHEN rcode = 13 THEN "❗️dns/rcode:Unassigned" - WHEN rcode = 14 THEN "❗️dns/rcode:Unassigned" - WHEN rcode = 15 THEN "❗️dns/rcode:Unassigned" - WHEN rcode = 16 THEN "❗️dns/rcode:BadVers" - WHEN rcode = 17 THEN "❗️dns/rcode:BadSig" - WHEN rcode = 18 THEN "❗️dns/rcode:BadKey" - WHEN rcode = 19 THEN "❗️dns/rcode:BadTime" - WHEN rcode = 20 THEN "❗️dns/rcode:BadMode" - WHEN rcode = 21 THEN "❗️dns/rcode:BadAlg" - WHEN rcode = 22 THEN "❗️dns/rcode:BadTrunc" - WHEN rcode = 23 THEN "❗️dns/rcode:BadCookie" - ELSE CONCAT("❗️dns/unknown_rcode:", rcode) + WHEN rcode = 1 THEN "❗️dns.error:FormErr" + WHEN rcode = 2 THEN "❓dns.error:ServFail" + WHEN rcode = 3 THEN "❗️dns.error:NXDomain" + WHEN rcode = 4 THEN "❗️dns.error:NotImp" + WHEN rcode = 5 THEN "❗️dns.error:Refused" + WHEN rcode = 6 THEN "❗️dns.error:YXDomain" + WHEN rcode = 7 THEN "❗️dns.error:YXRRSet" + WHEN rcode = 8 THEN "❗️dns.error:NXRRSet" + WHEN rcode = 9 THEN "❗️dns.error:NotAuth" + WHEN rcode = 10 THEN "❗️dns.error:NotZone" + WHEN rcode = 11 THEN "❗️dns.error:DSOTYPENI" + WHEN rcode = 12 THEN "❗️dns.error:Unassigned" + WHEN rcode = 13 THEN "❗️dns.error:Unassigned" + WHEN rcode = 14 THEN "❗️dns.error:Unassigned" + WHEN rcode = 15 THEN "❗️dns.error:Unassigned" + WHEN rcode = 16 THEN "❗️dns.error:BadVers" + WHEN rcode = 17 THEN "❗️dns.error:BadSig" + WHEN rcode = 18 THEN "❗️dns.error:BadKey" + WHEN rcode = 19 THEN "❗️dns.error:BadTime" + WHEN rcode = 20 THEN "❗️dns.error:BadMode" + WHEN rcode = 21 THEN "❗️dns.error:BadAlg" + WHEN rcode = 22 THEN "❗️dns.error:BadTrunc" + WHEN rcode = 23 THEN "❗️dns.error:BadCookie" + ELSE CONCAT("❗️dns.unknown_rcode:", rcode) END ); CREATE TEMP FUNCTION ClassifySatelliteError(error STRING) AS ( CASE # Satellite v1 - WHEN REGEXP_CONTAINS(error, '"Err": {}') THEN "❓read/udp.timeout" - WHEN REGEXP_CONTAINS(error, '"Err": 90') THEN "❗️read/dns.msgsize" - WHEN REGEXP_CONTAINS(error, '"Err": 111') THEN "❗️read/udp.refused" - WHEN REGEXP_CONTAINS(error, '"Err": 113') THEN "❔read/ip.host_no_route" - WHEN REGEXP_CONTAINS(error, '"Err": 24') THEN "❔setup/system_failure" # Too many open files - WHEN error = "{}" THEN "❗️dns/unknown" # TODO figure out origin - WHEN error = "no_answer" THEN "❗️dns/answer:no_answer" + WHEN REGEXP_CONTAINS(error, '"Err": {}') THEN "❓dns.timedout" + WHEN REGEXP_CONTAINS(error, '"Err": 90') THEN "❗️dns.msgsize" + WHEN REGEXP_CONTAINS(error, '"Err": 111') THEN "❗️dns.connrefused" + WHEN REGEXP_CONTAINS(error, '"Err": 113') THEN "❔dns.hostunreach" + WHEN REGEXP_CONTAINS(error, '"Err": 24') THEN "❔setup.system_failure" # Too many open files + WHEN error = "{}" THEN "❗️dns.unknown" # TODO figure out origin + WHEN error = "no_answer" THEN "❗️dns.no_answer" #Satellite v2 - WHEN ENDS_WITH(error, "i/o timeout") THEN "❓read/udp.timeout" - WHEN ENDS_WITH(error, "message too long") THEN "❗️read/dns.msgsize" - WHEN ENDS_WITH(error, "connection refused") THEN "❗️read/udp.refused" - WHEN ENDS_WITH(error, "no route to host") THEN "❔read/ip.host_no_route" - WHEN ENDS_WITH(error, "short read") THEN "❗️read/dns.msgsize" - WHEN ENDS_WITH(error, "read: protocol error") THEN "❗️read/protocol_error" - WHEN ENDS_WITH(error, "socket: too many open files") THEN "❔setup/system_failure" - ELSE CONCAT("❗️dns/unknown_error:", error) + WHEN ENDS_WITH(error, "i/o timeout") THEN "❓dns.timedout" + WHEN ENDS_WITH(error, "message too long") THEN "❗️dns.msgsize" + WHEN ENDS_WITH(error, "connection refused") THEN "❗️dns.connrefused" + WHEN ENDS_WITH(error, "no route to host") THEN "❔dns.hostunreach" + WHEN ENDS_WITH(error, "short read") THEN "❗️dns.msgsize" + WHEN ENDS_WITH(error, "read: protocol error") THEN "❗️dns.protocol_error" + WHEN ENDS_WITH(error, "socket: too many open files") THEN "❔setup.system_failure" + ELSE CONCAT("❗️unknown_error:", error) END ); CREATE TEMP FUNCTION InvalidIpType(ip STRING) AS ( CASE - WHEN STARTS_WITH(ip, "0.") THEN "❗️ip_invalid:zero" - WHEN STARTS_WITH(ip, "127.") THEN "❗️ip_invalid:local_host" - WHEN STARTS_WITH(ip, "10.") THEN "❗️ip_invalid:local_net" - WHEN NET.IP_TO_STRING(NET.IP_TRUNC(NET.SAFE_IP_FROM_STRING(ip), 12)) = "172.16.0.0" THEN "❗️ip_invalid:local_net" - WHEN STARTS_WITH(ip, "192.168.") THEN "❗️ip_invalid:local_net" + WHEN STARTS_WITH(ip, "0.") THEN "❗️ip.invalid:zero" + WHEN STARTS_WITH(ip, "127.") THEN "❗️ip.invalid:local_host" + WHEN STARTS_WITH(ip, "10.") THEN "❗️ip.invalid:local_net" + WHEN NET.IP_TO_STRING(NET.IP_TRUNC(NET.SAFE_IP_FROM_STRING(ip), 12)) = "172.16.0.0" THEN "❗️ip.invalid:local_net" + WHEN STARTS_WITH(ip, "192.168.") THEN "❗️ip.invalid:local_net" ELSE NULL END ); @@ -131,32 +131,32 @@ CREATE TEMP FUNCTION SatelliteOutcomeString(domain_name STRING, AND dns_error != "null" AND dns_error != "SERVFAIL") THEN ClassifySatelliteError(dns_error) # TODO fix -1 rcodes in v1 data in the pipeline - WHEN rcode = -1 THEN "❓read/udp.timeout" + WHEN rcode = -1 THEN "❓dns.timedout" WHEN rcode != 0 THEN ClassifySatelliteRCode(rcode) - WHEN ARRAY_LENGTH(answers) = 0 THEN "❗️answer:no_answer" + WHEN ARRAY_LENGTH(answers) = 0 THEN "❗️ip.empty" ELSE IFNULL( (SELECT InvalidIpType(answer.ip) FROM UNNEST(answers) answer LIMIT 1), CASE WHEN (SELECT LOGICAL_OR(answer.matches_control.ip) FROM UNNEST(answers) answer) - THEN "✅answer:matches_ip" + THEN "✅ip.matchip" WHEN (SELECT LOGICAL_OR(a.https_tls_cert_matches_domain AND a.https_tls_cert_has_trusted_ca) FROM UNNEST(answers) a) - THEN "✅answer:valid_cert" + THEN "✅tls.validcert" WHEN (SELECT LOGICAL_OR(a.https_tls_cert_matches_domain AND NOT a.https_tls_cert_has_trusted_ca) FROM UNNEST(answers) a) - THEN CONCAT("❗️answer:invalid_ca_valid_domain:", answers[OFFSET(0)].https_tls_cert_issuer) + THEN CONCAT("❗️tls.badca:", answers[OFFSET(0)].https_tls_cert_issuer) WHEN (SELECT LOGICAL_AND(NOT a.https_tls_cert_matches_domain) FROM UNNEST(answers) a) - THEN CONCAT("❗️answer:cert_not_for_domain:", answers[OFFSET(0)].https_tls_cert_common_name) + THEN CONCAT("❗️️tls.baddomain:", answers[OFFSET(0)].https_tls_cert_common_name) WHEN (SELECT LOGICAL_OR(answer.http_analysis_is_known_blockpage) FROM UNNEST(answers) answer) - THEN CONCAT("❗️page:http_blockpage:", answers[OFFSET(0)].http_analysis_page_signature) + THEN CONCAT("❗️http.blockpage:", answers[OFFSET(0)].http_analysis_page_signature) -- We check AS after cert/blockpage because we've seen (rare) cases of blockpages hosted on the ISP that also hosts Akamai servers. WHEN (SELECT LOGICAL_OR(answer.matches_control.asn) FROM UNNEST(answers) answer) - THEN "✅answer:matches_asn" - ELSE CONCAT("❓answer:not_validated:", AnswersSignature(answers)) + THEN "✅ip.matchasn" + ELSE CONCAT("❓tls.connerror:", AnswersSignature(answers)) END ) END