Skip to content

Commit

Permalink
Merge pull request #124 from smsearcy/fix-poller-timeouts
Browse files Browse the repository at this point in the history
Updates to (hopefully) make the collector more resilient:

* Does not fail after five failed attempts to get topology data.
* Add timeout to reverse DNS lookup.
* Upgrade *aiohttp* library.
  • Loading branch information
smsearcy authored May 20, 2024
2 parents 7ec4d68 + 8940d7b commit 8da6aba
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 30 deletions.
22 changes: 2 additions & 20 deletions meshinfo/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,28 +91,14 @@ def main(
return

try:
asyncio.run(
service(
collection,
polling_period=config.period,
max_retries=config.max_retries,
)
)
except ServiceError as exc:
return str(exc)
asyncio.run(service(collection, polling_period=config.period))
except KeyboardInterrupt as exc:
logger.exception("Aborted!", exc=exc)
return str(exc)
return


class ServiceError(Exception):
"""Custom exception for known issues to report on the command line."""

pass


async def service(collect, *, polling_period: int, max_retries: int = 5):
async def service(collect, *, polling_period: int):
run_period_seconds = polling_period * 60
connection_failures = 0
while True:
Expand All @@ -123,10 +109,6 @@ async def service(collect, *, polling_period: int, max_retries: int = 5):
except ConnectionError as exc:
connection_failures += 1
logger.exception("Connection error", error=exc, tries=connection_failures)
if connection_failures >= max_retries:
raise ServiceError(
f"{exc!s} {connection_failures} times in a row. Aborting."
)
await asyncio.sleep(run_period_seconds)
continue
else:
Expand Down
1 change: 0 additions & 1 deletion meshinfo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ class Collector:
node_inactive: int = environ.var(default=7, converter=int)
link_inactive: int = environ.var(default=1, converter=int)
period: int = environ.var(default=5, converter=int)
max_retries: int = environ.var(default=5, converter=int)

@environ.config
class DB:
Expand Down
15 changes: 13 additions & 2 deletions meshinfo/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@
import asyncio
import random
import struct
import sys

import attrs
import structlog
from structlog.contextvars import bound_contextvars

if sys.version_info >= (3, 11):
import asyncio as async_timeout
else:
import async_timeout

logger = structlog.get_logger()


Expand Down Expand Up @@ -108,15 +114,20 @@ async def reverse_dns_lookup(
"""
with bound_contextvars(ip_address=ip_address):
logger.debug("Reverse DNS lookup", dns_server=dns_server)
loop = asyncio.get_running_loop()
on_con_lost = loop.create_future()
transport, protocol = await loop.create_datagram_endpoint(
transport, _protocol = await loop.create_datagram_endpoint(
lambda: _DnsClientProtocol(ip_address, on_con_lost),
remote_addr=(dns_server, 53),
)

try:
response = await on_con_lost
# There was weird issues with the poller hanging, so I addd a timeout here
# in case that was the issue. I think a simultaneous upgrade to aiohttp
# might have fixed the issue, but I'm leaving this in for good measure.
async with async_timeout.timeout(5):
response = await on_con_lost
except Exception as exc:
logger.exception("Error querying DNS server", error=exc)
return ""
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ classifiers = [
"Programming Language :: Python :: 3.11",
]
dependencies = [
"aiohttp ~= 3.8",
"aiohttp ~= 3.9.5",
"alembic ~= 1.8",
"async-timeout ; python_version < '3.11'",
"attrs ~= 23.1",
"environ-config ~= 23.2",
"gunicorn ~= 22.0",
Expand Down
12 changes: 6 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
#
# This file is autogenerated by pip-compile with Python 3.11
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile --output-file=requirements.txt pyproject.toml
#
aiohttp==3.8.6
aiohttp==3.9.5
# via mesh-info (pyproject.toml)
aiosignal==1.3.1
# via aiohttp
alembic==1.12.1
# via mesh-info (pyproject.toml)
async-timeout==4.0.3
# via aiohttp
async-timeout==4.0.3 ; python_version < "3.11"
# via
# aiohttp
# mesh-info (pyproject.toml)
attrs==23.1.0
# via
# aiohttp
# environ-config
# mesh-info (pyproject.toml)
charset-normalizer==3.3.1
# via aiohttp
environ-config==23.2.0
# via mesh-info (pyproject.toml)
frozenlist==1.4.0
Expand Down

0 comments on commit 8da6aba

Please sign in to comment.