From e0e239f4ddb8e3c6efe77b0f1d17e13744b5b7da Mon Sep 17 00:00:00 2001 From: Rendijs Smukulis Date: Tue, 17 Dec 2024 16:28:10 +0200 Subject: [PATCH] Added domain and url canonicalization reference tests for the malicious site implementation --- malicious-sites/canonicalization/tests.json | 177 ++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 malicious-sites/canonicalization/tests.json diff --git a/malicious-sites/canonicalization/tests.json b/malicious-sites/canonicalization/tests.json new file mode 100644 index 0000000..6c8a752 --- /dev/null +++ b/malicious-sites/canonicalization/tests.json @@ -0,0 +1,177 @@ +{ + "domains": { + "name": "Domain Canonicalization", + "desc": "Domain Canonicalization - tests provide input URI, with expected output", + "tests": [ + { + "name": "Simple domain - extract hostname portion from the URL", + "siteURL": "http://www.somesite.com", + "expectDomain": "www.somesite.com", + "exceptPlatforms": [] + }, + { + "name": "Simple domain with a port", + "siteURL": "http://www.somesite.com:8000/", + "expectDomain": "www.somesite.com", + "exceptPlatforms": [] + }, + { + "name": "Simple domain with a username", + "siteURL": "http://user:pass@www.somesite.com", + "expectDomain": "www.somesite.com", + "exceptPlatforms": [] + }, + { + "name": "Simple domain with a fragment", + "siteURL": "http://www.somesite.com#fragment", + "expectDomain": "www.somesite.com", + "exceptPlatforms": [] + }, + { + "name": "Simple domain with a query string", + "siteURL": "http://www.somesite.com?some=value", + "expectDomain": "www.somesite.com", + "exceptPlatforms": [] + }, + { + "name": "Decode any %XX escapes present in the hostname", + "siteURL": "http://www.%73ome%73ite.com", + "expectDomain": "www.somesite.com", + "exceptPlatforms": [] + }, + { + "name": "Discard any leading and/or trailing full-stops", + "siteURL": "http://.www.somesite.com.", + "expectDomain": "www.somesite.com", + "exceptPlatforms": [] + }, + { + "name": "Replace sequences of two or more full-stops with a single full-stop.", + "siteURL": "http://www..example...com", + "expectDomain": "www.example.com", + "exceptPlatforms": [] + }, + { + "name": "If the hostname is a numeric IPv4 address, reduce it to the canonical dotted quad form.", + "siteURL": "http://192.168.000.001:8000/", + "expectDomain": "192.168.0.1", + "exceptPlatforms": [] + }, + { + "name": "If there are more than six components in the resulting hostname, discard all but the rightmost six components.", + "siteURL": "http://a.b.c.d.e.f.g.h.i.j.example.com", + "expectDomain": "g.h.i.j.example.com", + "exceptPlatforms": [] + }, + { + "name": "If the hostname is a numeric IPv4 address, reduce it to the canonical dotted quad form.", + "siteURL": "http://192.168.001.001:8080/", + "expectDomain": "192.168.1.1", + "exceptPlatforms": [] + } + ] + }, + "urls": { + "name": "URL Canonicalization", + "desc": "URL Canonicalization - tests provide input URI, with expected output", + "tests": [ + { + "name": "Remove any fragments (#frag)", + "siteURL": "https://broken.third-party.site/path/to/resource#fragment", + "expectUrl": "https://broken.third-party.site/path/to/resource", + "exceptPlatforms": [] + }, + { + "name": "Remove all trailing slashes, but keep any single slash after the domain - with path", + "siteURL": "https://broken.third-party.site/path/to/resource/", + "expectUrl": "https://broken.third-party.site/path/to/resource", + "exceptPlatforms": [] + }, + { + "name": "Remove all trailing slashes, but keep any single slash after the domain - domain only", + "siteURL": "https://broken.third-party.site/", + "expectUrl": "https://broken.third-party.site/", + "exceptPlatforms": [] + }, + { + "name": "Add trailing slash after domain", + "siteURL": "https://broken.third-party.site", + "expectUrl": "https://broken.third-party.site/", + "exceptPlatforms": [] + }, + { + "name": "Path Canonicalization - Resolve all occurences of '../' in the path", + "siteURL": "https://broken.third-party.site/path/to/../resource", + "expectUrl": "https://broken.third-party.site/path/resource", + "exceptPlatforms": [] + }, + { + "name": "Path Canonicalization - Replace all occurrences of more than one '/' with just one '/', excluding the protocol", + "siteURL": "https://broken.third-party.site//path//to//resource", + "expectUrl": "https://broken.third-party.site/path/to/resource", + "exceptPlatforms": [] + }, + { + "name": "Path Canonicalization - Remove all occurrences of './' in the path", + "siteURL": "https://broken.third-party.site/path/./to/./resource", + "expectUrl": "https://broken.third-party.site/path/to/resource", + "exceptPlatforms": [] + }, + { + "name": "Percent-escape all characters that are <= ASCII 32, >= 127, '#', or '%'. The escapes should use uppercase hex characters.", + "siteURL": "https://broken.third-party.site/path!resource", + "expectUrl": "https://broken.third-party.site/path%21resource", + "exceptPlatforms": [] + }, + { + "name": "Space canonicalization", + "siteURL": "https://broken.third-party.site/path/to/resource with%20some+spaces", + "expectUrl": "https://broken.third-party.site/path/to/resource%20with%20some%20spaces", + "exceptPlatforms": [] + }, + { + "name": "Slash canonicalization and removal", + "siteURL": "https://broken.third-party.site/path/to/%2F%2F%2F%2F%2F%2F%2F%2F%2F", + "expectUrl": "https://broken.third-party.site/path/to", + "exceptPlatforms": [] + }, + { + "name": "Period canonicalization and removal", + "siteURL": "https://broken.third-party.site/path/to/%2E%2E/%2E%2E/resource", + "expectUrl": "https://broken.third-party.site/resource", + "exceptPlatforms": [] + }, + { + "name": "Encoded fragment character", + "siteURL": "https://broken.third-party.site/path/to/resource%23encodedfragment", + "expectUrl": "https://broken.third-party.site/path/to/resource%23encodedfragment", + "exceptPlatforms": [] + }, + { + "name": "Resolve mixed occurences of '../' and './' in the path", + "siteURL": "https://broken.third-party.site/path/to/./.././resource", + "expectUrl": "https://broken.third-party.site/path/resource", + "exceptPlatforms": [] + }, + { + "name": "The escapes should use uppercase hex characters", + "siteURL": "https://broken.third-party.site/path%3c", + "expectUrl": "https://broken.third-party.site/path%3C", + "exceptPlatforms": [] + }, + { + "name": "Do not apply path canonicalizations to query parameters", + "siteURL": "https://broken.third-party.site/path?q=a/../path/./", + "expectUrl": "https://broken.third-party.site/path?q=a/../path/./", + "exceptPlatforms": [] + }, + { + "name": "Escape ASCII >= 127", + "siteURL": "https://broken.third-party.site/path/žebērklis/to", + "expectUrl": "https://broken.third-party.site/path/%C5%BEeb%C4%93rklis/to", + "exceptPlatforms": [] + } + ] + } +} + \ No newline at end of file