Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rework TXT record handling and tokenization #20

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 9 additions & 15 deletions sender_policy_flattener/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,15 @@ def spf2ips(records, domain, resolvers=default_resolvers):

def crawl(rrname, rrtype, domain, ns=default_resolvers):
try:
answers = ns.query(from_text(rrname), rrtype)
answers = ns.resolve(from_text(rrname), rrtype)
except Exception as err:
print(repr(err), rrname, rrtype)
else:
answer = " ".join([str(a) for a in answers])
for pair in tokenize(answer):
rname, rtype = pair
if rtype is None:
continue
if rtype == "txt":
for ip in crawl(rname, "txt", domain, ns):
yield ip
continue
try:
for ip in handler_mapping[rtype](rname, domain, ns):
yield ip
except (NXDOMAIN, NoAnswer) as e:
print(e)
for answer in answers:
for pair in tokenize(str(answer), rrtype):
rname, rtype = pair
if rtype is not None:
try:
yield from handler_mapping[rtype](rname, domain, ns)
except (NXDOMAIN, NoAnswer) as e:
print(e)
55 changes: 31 additions & 24 deletions sender_policy_flattener/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,79 +2,85 @@
from netaddr import IPNetwork, IPAddress
from dns.name import from_text

import sender_policy_flattener.crawler as crawler


def handle_ip(name, domain, ns):
yield name


def handle_mx(name, domain, ns):
answers = ns.query(from_text(domain), "mx")
def handle_mx(_, domain, ns):
answers = ns.resolve(from_text(domain), "mx")
for mailexchange in answers:
ips = ns.query(mailexchange.exchange, "a")
ips = ns.resolve(mailexchange.exchange, "a")
for ip in ips:
yield IPAddress(ip.address)


def handle_mx_domain(name, domain, ns):
answers = ns.query(from_text(name), "mx")
def handle_mx_domain(name, __, ns):
answers = ns.resolve(from_text(name), "mx")
for mailexchange in answers:
ips = ns.query(mailexchange, "a")
ips = ns.resolve(mailexchange, "a")
for ip in ips:
yield IPAddress(ip.address)


def handle_mx_prefix(name, domain, ns):
_name, prefix = name
answers = ns.query(from_text(domain), "mx")
_, prefix = name
answers = ns.resolve(from_text(domain), "mx")
for mailexchange in answers:
ips = ns.query(mailexchange.exchange, "a")
ips = ns.resolve(mailexchange.exchange, "a")
for ip in ips:
yield IPNetwork("{0}/{1}".format(ip, prefix))


def handle_mx_domain_prefix(name, domain, ns):
_name, prefix = name
answers = ns.query(from_text(_name), "mx")
def handle_mx_domain_prefix(name, __, ns):
_, prefix = name
answers = ns.resolve(from_text(_name), "mx")
for mailexchange in answers:
ips = ns.query(mailexchange, "a")
ips = ns.resolve(mailexchange, "a")
for ip in ips:
yield IPNetwork("{0}/{1}".format(ip, prefix))


def handle_a(name, domain, ns):
answers = ns.query(from_text(domain), "a")
def handle_a(_, domain, ns):
answers = ns.resolve(from_text(domain), "a")
for ip in answers:
yield IPAddress(ip.address)


def handle_a_domain(name, domain, ns):
answers = ns.query(from_text(name), "a")
def handle_a_domain(name, __, ns):
answers = ns.resolve(from_text(name), "a")
for ip in answers:
yield IPAddress(ip.address)


def handle_a_prefix(name, domain, ns):
_name, prefix = name
answers = ns.query(from_text(domain), "a")
_, prefix = name
answers = ns.resolve(from_text(domain), "a")
for ip in answers:
yield IPNetwork("{0}/{1}".format(ip, prefix))


def handle_a_domain_prefix(name, domain, ns):
_name, prefix = name
answers = ns.query(from_text(_name), "a")
def handle_a_domain_prefix(name, __, ns):
_, prefix = name
answers = ns.resolve(from_text(_name), "a")
for ip in answers:
yield IPNetwork("{0}/{1}".format(ip, prefix))


def handle_ptr(name, domain, ns):
def handle_ptr(name, __, ___):
yield "ptr:{0}".format(name)


def handle_exists(name, domain, ns):
def handle_exists(name, __, ___):
yield "exists:{0}".format(name)


def handle_txt(name, domain, ns):
yield from crawler.crawl(name, "txt", domain, ns)


handler_mapping = {
"ip": handle_ip,
"mx": handle_mx,
Expand All @@ -87,4 +93,5 @@ def handle_exists(name, domain, ns):
"a_domain_prefix": handle_a_domain_prefix,
"ptr": handle_ptr,
"exists": handle_exists,
"txt": handle_txt
}
19 changes: 16 additions & 3 deletions sender_policy_flattener/mechanisms.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,24 @@ def ptr(token):
exists = partial(process_alias, keyword="exists")


def tokenize(answer):
def tokenize(answer, rrtype):
# TXT records potentially contain multiple strings that
# must be concatenated first, they also contain other
# quotes which will screw up the tokens, see:
# https://datatracker.ietf.org/doc/html/rfc7208#section-3.3
if rrtype == "txt":
answer = answer.replace("\" \"", "")
answer = answer.strip("\"")
tokens = answer.split()
# TXT records have to begin with "v=spf1" or otherwise
# they should be discarded, see:
# https://datatracker.ietf.org/doc/html/rfc7208#section-4.5
if rrtype == "txt":
if (len(tokens) > 0) and (tokens[0] == "v=spf1"):
tokens = tokens[1:]
else:
tokens = []
for token in tokens:
# TXT records often contain quotes and will screw with the token.
token = token.strip("\"' ")
for pattern, fn in mechanism_mapping.items():
if re.match(pattern, token):
yield fn(token)
Expand Down