From 4d7040ed229bc012154cb8be74a9b32818bd605d Mon Sep 17 00:00:00 2001 From: Kenny Niehage Date: Mon, 8 Apr 2024 11:27:38 +0200 Subject: [PATCH 1/4] fix crawler join Signed-off-by: Kenny Niehage --- sender_policy_flattener/crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sender_policy_flattener/crawler.py b/sender_policy_flattener/crawler.py index 8b37cc3..c4d37c1 100644 --- a/sender_policy_flattener/crawler.py +++ b/sender_policy_flattener/crawler.py @@ -30,7 +30,7 @@ def crawl(rrname, rrtype, domain, ns=default_resolvers): except Exception as err: print(repr(err), rrname, rrtype) else: - answer = " ".join([str(a) for a in answers]) + answer = "".join([str(a) for a in answers]) for pair in tokenize(answer): rname, rtype = pair if rtype is None: From afff6ffb1992da34588a72c55ddbc014a4c73fa1 Mon Sep 17 00:00:00 2001 From: Kenny Niehage Date: Wed, 10 Apr 2024 10:53:57 +0200 Subject: [PATCH 2/4] rework TXT record handling and tokenization Signed-off-by: Kenny Niehage --- sender_policy_flattener/crawler.py | 28 +++++++++++++-------------- sender_policy_flattener/mechanisms.py | 19 +++++++++++++++--- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/sender_policy_flattener/crawler.py b/sender_policy_flattener/crawler.py index c4d37c1..fa70975 100644 --- a/sender_policy_flattener/crawler.py +++ b/sender_policy_flattener/crawler.py @@ -30,17 +30,17 @@ def crawl(rrname, rrtype, domain, ns=default_resolvers): except Exception as err: print(repr(err), rrname, rrtype) else: - answer = "".join([str(a) for a in answers]) - for pair in tokenize(answer): - rname, rtype = pair - if rtype is None: - continue - if rtype == "txt": - for ip in crawl(rname, "txt", domain, ns): - yield ip - continue - try: - for ip in handler_mapping[rtype](rname, domain, ns): - yield ip - except (NXDOMAIN, NoAnswer) as e: - print(e) + for answer in answers: + for pair in tokenize(str(answer), rrtype): + rname, rtype = pair + if rtype is None: + continue + if rtype == "txt": + for ip in crawl(rname, rtype, domain, ns): + yield ip + continue + try: + for ip in handler_mapping[rtype](rname, domain, ns): + yield ip + except (NXDOMAIN, NoAnswer) as e: + print(e) diff --git a/sender_policy_flattener/mechanisms.py b/sender_policy_flattener/mechanisms.py index 48f6694..3078272 100644 --- a/sender_policy_flattener/mechanisms.py +++ b/sender_policy_flattener/mechanisms.py @@ -52,11 +52,24 @@ def ptr(token): exists = partial(process_alias, keyword="exists") -def tokenize(answer): +def tokenize(answer, rrtype): + # TXT records potentially contain multiple strings that + # must be concatenated first, they also contain other + # quotes which will screw up the tokens, see: + # https://datatracker.ietf.org/doc/html/rfc7208#section-3.3 + if rrtype == "txt": + answer = answer.replace("\" \"", "") + answer = answer.strip("\"") tokens = answer.split() + # TXT records have to begin with "v=spf1" or otherwise + # they should be discarded, see: + # https://datatracker.ietf.org/doc/html/rfc7208#section-4.5 + if rrtype == "txt": + if (len(tokens) > 0) and (tokens[0] == "v=spf1"): + tokens = tokens[1:] + else: + tokens = [] for token in tokens: - # TXT records often contain quotes and will screw with the token. - token = token.strip("\"' ") for pattern, fn in mechanism_mapping.items(): if re.match(pattern, token): yield fn(token) From 4e761c1c519ba6d9ed614dcd19d0104956ebe95e Mon Sep 17 00:00:00 2001 From: Kenny Niehage Date: Wed, 24 Apr 2024 17:03:12 +0200 Subject: [PATCH 3/4] cleaned up code --- sender_policy_flattener/crawler.py | 18 ++++------ sender_policy_flattener/handlers.py | 55 ++++++++++++++++------------- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/sender_policy_flattener/crawler.py b/sender_policy_flattener/crawler.py index fa70975..45a3bc6 100644 --- a/sender_policy_flattener/crawler.py +++ b/sender_policy_flattener/crawler.py @@ -26,21 +26,15 @@ def spf2ips(records, domain, resolvers=default_resolvers): def crawl(rrname, rrtype, domain, ns=default_resolvers): try: - answers = ns.query(from_text(rrname), rrtype) + answers = ns.resolve(from_text(rrname), rrtype) except Exception as err: print(repr(err), rrname, rrtype) else: for answer in answers: for pair in tokenize(str(answer), rrtype): rname, rtype = pair - if rtype is None: - continue - if rtype == "txt": - for ip in crawl(rname, rtype, domain, ns): - yield ip - continue - try: - for ip in handler_mapping[rtype](rname, domain, ns): - yield ip - except (NXDOMAIN, NoAnswer) as e: - print(e) + if rtype is not None: + try: + yield from handler_mapping[rtype](rname, domain, ns): + except (NXDOMAIN, NoAnswer) as e: + print(e) diff --git a/sender_policy_flattener/handlers.py b/sender_policy_flattener/handlers.py index 2dcaa8c..561137e 100644 --- a/sender_policy_flattener/handlers.py +++ b/sender_policy_flattener/handlers.py @@ -2,79 +2,85 @@ from netaddr import IPNetwork, IPAddress from dns.name import from_text +import sender_policy_flattener.crawler as crawler + def handle_ip(name, domain, ns): yield name -def handle_mx(name, domain, ns): - answers = ns.query(from_text(domain), "mx") +def handle_mx(_, domain, ns): + answers = ns.resolve(from_text(domain), "mx") for mailexchange in answers: - ips = ns.query(mailexchange.exchange, "a") + ips = ns.resolve(mailexchange.exchange, "a") for ip in ips: yield IPAddress(ip.address) -def handle_mx_domain(name, domain, ns): - answers = ns.query(from_text(name), "mx") +def handle_mx_domain(name, __, ns): + answers = ns.resolve(from_text(name), "mx") for mailexchange in answers: - ips = ns.query(mailexchange, "a") + ips = ns.resolve(mailexchange, "a") for ip in ips: yield IPAddress(ip.address) def handle_mx_prefix(name, domain, ns): - _name, prefix = name - answers = ns.query(from_text(domain), "mx") + _, prefix = name + answers = ns.resolve(from_text(domain), "mx") for mailexchange in answers: - ips = ns.query(mailexchange.exchange, "a") + ips = ns.resolve(mailexchange.exchange, "a") for ip in ips: yield IPNetwork("{0}/{1}".format(ip, prefix)) -def handle_mx_domain_prefix(name, domain, ns): - _name, prefix = name - answers = ns.query(from_text(_name), "mx") +def handle_mx_domain_prefix(name, __, ns): + _, prefix = name + answers = ns.resolve(from_text(_name), "mx") for mailexchange in answers: - ips = ns.query(mailexchange, "a") + ips = ns.resolve(mailexchange, "a") for ip in ips: yield IPNetwork("{0}/{1}".format(ip, prefix)) -def handle_a(name, domain, ns): - answers = ns.query(from_text(domain), "a") +def handle_a(_, domain, ns): + answers = ns.resolve(from_text(domain), "a") for ip in answers: yield IPAddress(ip.address) -def handle_a_domain(name, domain, ns): - answers = ns.query(from_text(name), "a") +def handle_a_domain(name, __, ns): + answers = ns.resolve(from_text(name), "a") for ip in answers: yield IPAddress(ip.address) def handle_a_prefix(name, domain, ns): - _name, prefix = name - answers = ns.query(from_text(domain), "a") + _, prefix = name + answers = ns.resolve(from_text(domain), "a") for ip in answers: yield IPNetwork("{0}/{1}".format(ip, prefix)) -def handle_a_domain_prefix(name, domain, ns): - _name, prefix = name - answers = ns.query(from_text(_name), "a") +def handle_a_domain_prefix(name, __, ns): + _, prefix = name + answers = ns.resolve(from_text(_name), "a") for ip in answers: yield IPNetwork("{0}/{1}".format(ip, prefix)) -def handle_ptr(name, domain, ns): +def handle_ptr(name, __, ___): yield "ptr:{0}".format(name) -def handle_exists(name, domain, ns): +def handle_exists(name, __, ___): yield "exists:{0}".format(name) +def handle_txt(name, domain, ns): + yield from crawler.crawl(name, "txt", domain, ns) + + handler_mapping = { "ip": handle_ip, "mx": handle_mx, @@ -87,4 +93,5 @@ def handle_exists(name, domain, ns): "a_domain_prefix": handle_a_domain_prefix, "ptr": handle_ptr, "exists": handle_exists, + "txt": handle_txt } From 62cfc2034fb9e86eb4d699a740e1b74136d47390 Mon Sep 17 00:00:00 2001 From: Kenny Niehage Date: Wed, 24 Apr 2024 17:12:45 +0200 Subject: [PATCH 4/4] fix typo --- sender_policy_flattener/crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sender_policy_flattener/crawler.py b/sender_policy_flattener/crawler.py index 45a3bc6..c0b570a 100644 --- a/sender_policy_flattener/crawler.py +++ b/sender_policy_flattener/crawler.py @@ -35,6 +35,6 @@ def crawl(rrname, rrtype, domain, ns=default_resolvers): rname, rtype = pair if rtype is not None: try: - yield from handler_mapping[rtype](rname, domain, ns): + yield from handler_mapping[rtype](rname, domain, ns) except (NXDOMAIN, NoAnswer) as e: print(e)