Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add scheduling for config hash verficiation #203

Merged
merged 31 commits into from
Oct 29, 2020
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
d886e49
Set config hash max retry times
pomodorox Oct 26, 2020
c4a9f20
fix assert
pomodorox Oct 26, 2020
2c9d8a9
add logging
pomodorox Oct 26, 2020
10504d9
changes
pomodorox Oct 27, 2020
8a84ca4
build proto
pomodorox Oct 27, 2020
905015e
add logging
pomodorox Oct 27, 2020
d32b7fc
fix testing
pomodorox Oct 27, 2020
459b33f
build proto
pomodorox Oct 27, 2020
71f0697
change config hash retry to cooling time
pomodorox Oct 27, 2020
829421d
build proto
pomodorox Oct 27, 2020
4df5dad
changes
pomodorox Oct 27, 2020
b0b8940
lint
pomodorox Oct 27, 2020
43c1b55
update logging
pomodorox Oct 27, 2020
670eb94
update naming
pomodorox Oct 27, 2020
8a0ec03
lint
pomodorox Oct 27, 2020
846a554
build proto
pomodorox Oct 27, 2020
d01f5fb
fix logging
pomodorox Oct 27, 2020
6e3d45e
add timer
pomodorox Oct 28, 2020
267a9f8
fix
pomodorox Oct 28, 2020
231b829
raise exception for config hash clash in main loop
pomodorox Oct 28, 2020
bdc75e1
increase timeout
pomodorox Oct 28, 2020
04aff69
raise exception in event client
pomodorox Oct 28, 2020
d35f482
compare hash when timeout
pomodorox Oct 28, 2020
7d557d4
lint
pomodorox Oct 28, 2020
09140c1
changes
pomodorox Oct 28, 2020
87c23ec
use existing heartbeat scheduler
pomodorox Oct 28, 2020
27c2570
lint
pomodorox Oct 28, 2020
70f69d1
change timer trigger to config writing
pomodorox Oct 28, 2020
e227df6
build proto
pomodorox Oct 28, 2020
eab9235
clear config writting time
pomodorox Oct 28, 2020
99633cf
Merge branch 'master' into confighash
pomodorox Oct 29, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion forch/faucet_event_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,11 @@ def _dispatch_faucet_event(self, target, target_event):
def _should_log_event(self, event):
return event and os.getenv('FAUCET_EVENT_DEBUG')

def next_event(self, blocking=False):
def next_event(self, get_config_hash_clashed, blocking=False):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not right -- the concept of a config hash shouldn't bleed down into the event client itself

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed

"""Return the next event from the queue"""
while self.event_socket_connected and self.has_event(blocking=blocking):
if get_config_hash_clashed():
raise Exception('Config hash clashed')
with self._buffer_lock:
line, remainder = self.buffer.split('\n', 1)
self.buffer = remainder
Expand Down
49 changes: 46 additions & 3 deletions forch/forchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
_FAUCET_PROM_PORT_DEFAULT = 9302
_GAUGE_PROM_HOST = '127.0.0.1'
_GAUGE_PROM_PORT_DEFAULT = 9303
_CONFIG_HASH_CLASH_TIMEOUT_SEC_DEFAULT = '60'

_TARGET_FAUCET_METRICS = (
'port_status',
Expand Down Expand Up @@ -113,6 +114,14 @@ def __init__(self, config):
self._metrics = None
self._varz_proxy = None

self._config_hash_clash_timer = None
self._config_hash_clashed = False
self._config_hash_clash_timeout_sec = (
self._config.event_client.config_hash_clash_timeout_sec or
int(os.getenv(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why env variable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed..

'_CONFIG_HASH_CLASH_TIMEOUT_SEC', _CONFIG_HASH_CLASH_TIMEOUT_SEC_DEFAULT))
)

self._lock = threading.Lock()

def initialize(self):
Expand Down Expand Up @@ -377,7 +386,7 @@ def _process_device_placement(self, eth_src, device_placement, static=False, exp
self._authenticator.process_device_placement(eth_src, device_placement)
else:
LOGGER.info(
'Ignored vlan expiration for device %s with expired vlan %d', eth_src, expired_vlan)
'Ignored vlan expiration for device %s with expired vlan %s', eth_src, expired_vlan)

def handle_auth_result(self, mac, access, segment, role):
"""Method passed as callback to authenticator to forward auth results"""
Expand Down Expand Up @@ -436,9 +445,43 @@ def _restore_states(self):
def _restore_faucet_config(self, timestamp, config_hash):
config_info, faucet_dps, _ = self._get_faucet_config()
self._update_config_warning_varz()
assert config_hash == config_info['hashes'], 'config hash info does not match'

if config_hash == config_info['hashes']:
self._attempt_cancel_config_hash_clash_timer()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in these cases I'd leave out the "attempt" -- it's not semantically meaningful

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not using a seprate timer now.

else:
LOGGER.warning('Config hash does not match')
self._attempt_start_config_hash_clash_timer()

self._faucet_collector.process_dataplane_config_change(timestamp, faucet_dps)

def _attempt_start_config_hash_clash_timer(self):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this shouldn't be "start a timer" -- but rather just a mark of when the last config hash clash was

if self._config_hash_clash_timer and self._config_hash_clash_timer.is_alive():
return
self._config_hash_clash_timer = threading.Timer(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should reuse one of the existing heartbeat schedulers for this -- creating lots of little threads ends up being very messy in the long run.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using _faucet_state_scheduler now..

interval=self._config_hash_clash_timeout_sec,
function=self._check_config_hash_clashed)
self._config_hash_clash_timer.start()
LOGGER.info(
'Config hash clash timer started with %s seconds', self._config_hash_clash_timeout_sec)

def _attempt_cancel_config_hash_clash_timer(self):
if not self._config_hash_clash_timer or not self._config_hash_clash_timer.is_alive():
return
self._config_hash_clash_timer.cancel()
LOGGER.info('Config hash clash timer cancelled')

def _check_config_hash_clashed(self):
_, varz_config_hashes = self._get_varz_config()
config_info, _, _ = self._get_faucet_config()

if varz_config_hashes != config_info['hashes']:
LOGGER.error(
'Config hash does not match after %s seconds', self._config_hash_clash_timeout_sec)
self._config_hash_clashed = True

def _get_config_hash_clashed(self):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

private getter is redundant -- just use the variable directly

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed

return self._config_hash_clashed

def _process_config_change(self, event):
self._faucet_collector.process_dp_config_change(
event.timestamp, event.dp_name, event.restart_type, event.dp_id)
Expand All @@ -465,7 +508,7 @@ def main_loop(self):
self._faucet_events_connect()

try:
self._faucet_events.next_event(blocking=True)
self._faucet_events.next_event(self._get_config_hash_clashed, blocking=True)
except FaucetEventOrderError as e:
LOGGER.error("Faucet event order error: %s", e)
if self._metrics:
Expand Down
13 changes: 8 additions & 5 deletions forch/proto/acl_state_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions forch/proto/authentication_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading