From 047e37a101ca74ccf918a6e7541a63beba35eb1b Mon Sep 17 00:00:00 2001 From: Oleksandr Mazur Date: Wed, 11 Dec 2024 16:02:30 +0200 Subject: [PATCH 1/5] Connection processor: add a force TLS-accept timeout Whenever TLS accept (tungstenite::accept(tls_stream)) blocks for too long (>15 seconds), stop trying to accept the stream using tokio_selector. This is done to ensure we don't have a hang connection processor that might hang for a very long period of time waiting for a connection to be accepted. Also run cargo fmt to fix some import indentation. Signed-off-by: Oleksandr Mazur --- src/cgw_connection_processor.rs | 19 ++++++++++++++++++- src/cgw_ucentral_switch_parser.rs | 8 ++++---- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/cgw_connection_processor.rs b/src/cgw_connection_processor.rs index 24e3a8a..8567734 100644 --- a/src/cgw_connection_processor.rs +++ b/src/cgw_connection_processor.rs @@ -105,7 +105,24 @@ impl CGWConnectionProcessor { client_cn: MacAddress, allow_mismatch: bool, ) -> Result<()> { - let ws_stream = tokio_tungstenite::accept_async(tls_stream).await?; + let ws_stream = tokio::select! { + _val = tokio_tungstenite::accept_async(tls_stream) => { + match _val { + Ok(s) => s, + Err(e) => { + error!("Failed to accept TLS stream from: {}! Reason: {}. Closing connection", + self.addr, e); + return Err(Error::ConnectionProcessor("Failed to accept TLS stream!")); + } + } + } + // TODO: configurable duration (upon server creation) + _val = sleep(Duration::from_millis(15000)) => { + error!("Failed to accept TLS stream from: {}! Closing connection", self.addr); + return Err(Error::ConnectionProcessor("Failed to accept TLS stream for too long")); + } + + }; let (sink, mut stream) = ws_stream.split(); diff --git a/src/cgw_ucentral_switch_parser.rs b/src/cgw_ucentral_switch_parser.rs index ece37cf..0c7d70a 100644 --- a/src/cgw_ucentral_switch_parser.rs +++ b/src/cgw_ucentral_switch_parser.rs @@ -5,10 +5,10 @@ use std::{collections::HashMap, str::FromStr}; use crate::cgw_errors::{Error, Result}; use crate::cgw_ucentral_parser::{ - CGWUCentralEvent, CGWUCentralEventLog, CGWUCentralEventState, CGWUCentralEventStateClients, - CGWUCentralEventStateClientsData, CGWUCentralEventStateClientsType, - CGWUCentralEventStateLLDPData, CGWUCentralEventStateLinks, CGWUCentralEventStatePort, - CGWUCentralEventType, CGWUCentralJRPCMessage, CGWUCentralEventReply + CGWUCentralEvent, CGWUCentralEventLog, CGWUCentralEventReply, CGWUCentralEventState, + CGWUCentralEventStateClients, CGWUCentralEventStateClientsData, + CGWUCentralEventStateClientsType, CGWUCentralEventStateLLDPData, CGWUCentralEventStateLinks, + CGWUCentralEventStatePort, CGWUCentralEventType, CGWUCentralJRPCMessage, }; fn parse_lldp_data( From 49555495e8bd170e049b28a4925e6732c40b3bc9 Mon Sep 17 00:00:00 2001 From: Oleksandr Mazur Date: Wed, 11 Dec 2024 16:06:48 +0200 Subject: [PATCH 2/5] Tests: core: add timeout values for common clients Force redis, PSQL, kafka clients to have a strict timeout value for any requests. It's done to ensure our tests do not hang for an unknown period of time in case if something goes wrong. Signed-off-by: Oleksandr Mazur --- tests/metrics.py | 8 +++++--- utils/client_simulator/src/simulation_runner.py | 4 +++- utils/kafka_producer/src/producer.py | 6 +++++- utils/redis_client/redis_client.py | 5 ++++- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/metrics.py b/tests/metrics.py index 0a277b7..c0eab0f 100644 --- a/tests/metrics.py +++ b/tests/metrics.py @@ -6,12 +6,14 @@ def cgw_metric_get(host: str = "localhost", port: int = 8080) -> str: metrics = "" try: - r = requests.get(f"http://{host}:{port}/metrics") - print("CGW metrics: " + str(r.status_code) + ', txt:' + r.text) + # Try to fetch metrics with 5 seconds timeout value + r = requests.get(f"http://{host}:{port}/metrics", timeout=5) + print("CGW metrics ret code: " + str(r.status_code)) assert r is not None and r.status_code == 200, \ f"CGW metrics is not available" metrics = r.text - except: + except Exception as e: + print("CGW metrics: raised exception when tried to fetch metrics:" + e) raise Exception('CGW metrics fetch failed (Not running?)') return metrics diff --git a/utils/client_simulator/src/simulation_runner.py b/utils/client_simulator/src/simulation_runner.py index dbcac5e..8c06d2e 100644 --- a/utils/client_simulator/src/simulation_runner.py +++ b/utils/client_simulator/src/simulation_runner.py @@ -126,7 +126,9 @@ def handle_reboot(self, socket: client.ClientConnection, msg: dict): def connect(self): if self._socket is None: - self._socket = client.connect(self.server_addr, ssl=self.ssl_context, open_timeout=7200) + # 20 seconds is more then enough to establish conne and exchange + # them handshakes. + self._socket = client.connect(self.server_addr, ssl=self.ssl_context, open_timeout=20, close_timeout=20) return self._socket def disconnect(self): diff --git a/utils/kafka_producer/src/producer.py b/utils/kafka_producer/src/producer.py index 161f3ce..ff9c732 100644 --- a/utils/kafka_producer/src/producer.py +++ b/utils/kafka_producer/src/producer.py @@ -132,7 +132,11 @@ def __exit__(self, exc_type, exc_val, exc_tb): def connect(self) -> kafka.KafkaProducer: if self.is_connected() is False: - self.conn = kafka.KafkaProducer(bootstrap_servers=self.db, client_id="producer") + self.conn = kafka.KafkaProducer( + bootstrap_servers=self.db, + client_id="producer", + max_block_ms=12000, + request_timeout_ms=12000) logger.info("producer: connected to kafka") else: logger.info("producer: already connected to kafka") diff --git a/utils/redis_client/redis_client.py b/utils/redis_client/redis_client.py index f069d4e..7cad8e0 100644 --- a/utils/redis_client/redis_client.py +++ b/utils/redis_client/redis_client.py @@ -12,7 +12,10 @@ def connect(self): """Connect to the Redis database.""" try: # Establish connection to Redis server - self.connection = redis.StrictRedis(host=self.host, port=self.port, db=0, decode_responses=True) + self.connection = redis.StrictRedis( + host=self.host, port=self.port, + db=0, decode_responses=True, socket_timeout=5.0, + socket_connect_timeout=2.0) # Check if the connection is successful self.connection.ping() print(f"Connected to Redis server at {self.host}:{self.port}") From e79afca822f0efe303096e9276aae69de3fcf529 Mon Sep 17 00:00:00 2001 From: Oleksandr Mazur Date: Wed, 11 Dec 2024 16:10:50 +0200 Subject: [PATCH 3/5] Tests: execute tests in tmpfs context (folder) Whenever tests start, they might do some enviromental changes to local FS. Handle this by moving the 'context' of tests execution to a tmpfs folder, that can be easily purged after tests are done / new test run is being executed. NOTE: further changes in regard to tests isolation should potentially utilize a separate docker container that serves as a fresh clean and isolated enviroment for tests run. Signed-off-by: Oleksandr Mazur --- tests/requirements.txt | 1 + tests/run.sh | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/tests/requirements.txt b/tests/requirements.txt index 2d746a6..bc73e4c 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -4,3 +4,4 @@ pytest==8.3.3 randmac==0.1 psycopg2-binary==2.9.10 redis==5.2.0 +requests==2.32.3 diff --git a/tests/run.sh b/tests/run.sh index 4ecfdf6..088811e 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -2,6 +2,14 @@ # Separate exports for clearer visibility of _what exactly_ # we're putting in python path + +rm -rf /tmp/cgw_tests_runner; +mkdir /tmp/cgw_tests_runner && \ + cp -rf ../tests /tmp/cgw_tests_runner/ && \ + cp -rf ../utils /tmp/cgw_tests_runner/; + +cd /tmp/cgw_tests_runner/tests + export PYTHONPATH="$PYTHONPATH:$PWD" export PYTHONPATH="$PYTHONPATH:$PWD/../utils" From af4dfb427293c32eb90be1261f73fa83f90d26b6 Mon Sep 17 00:00:00 2001 From: Oleksandr Mazur Date: Wed, 11 Dec 2024 16:12:24 +0200 Subject: [PATCH 4/5] Build: use multi-cgw deployment target as a default one Replace script-based approach for CGW/services creation, in favor of multi-cgw templated generated compose file. Also, add a new run-tests target, that executes / starts test run, for the sake of convinience. Signed-off-by: Oleksandr Mazur --- Makefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index cbfd4f8..6233a25 100644 --- a/Makefile +++ b/Makefile @@ -16,9 +16,9 @@ CGW_BUILD_ENV_IMG_TAG := $(shell cat Dockerfile | sha1sum | awk '{print substr($ CGW_BUILD_ENV_IMG_CONTAINER_NAME := "cgw_build_env" -.PHONY: all cgw-app cgw-build-env-img cgw-img stop clean run run_docker_services start-multi-cgw stop-multi-cgw +.PHONY: all cgw-app cgw-build-env-img cgw-img stop clean run run_docker_services start-multi-cgw stop-multi-cgw run-tests -all: cgw-build-env-img run_docker_services run +all: start-multi-cgw @echo "uCentral CGW build app (container) done" # Executed inside build-env @@ -48,7 +48,7 @@ cgw-img: stop cgw-build-env-img . @echo Docker build done; -stop: +stop: stop-multi-cgw @echo "Stopping / removing container ${CGW_IMG_CONTAINER_NAME}" @docker stop ${CGW_IMG_CONTAINER_NAME} > /dev/null 2>&1 || true; @docker container rm ${CGW_IMG_CONTAINER_NAME} > /dev/null 2>&1 || true; @@ -77,3 +77,6 @@ stop-multi-cgw: run_docker_services: @cd ./utils/docker/ && docker compose up -d + +run-tests: + @cd ./tests && ./run.sh From 4a20121b4bcb19a213120b98770f9585241887b5 Mon Sep 17 00:00:00 2001 From: Oleksandr Mazur Date: Wed, 11 Dec 2024 17:04:20 +0200 Subject: [PATCH 5/5] Update readme Signed-off-by: Oleksandr Mazur --- README.md | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index a6bd7b5..6cef86f 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,14 @@ while others are required to be running for the CGW to operate. **NOTE**: while runtime CGW depends on services like kafka, redis and PGSQL, the *make* / *make all* targets would build a complete out-of-the-box setup with default configs and container params: -- Kafka, Redis, PGSQL containers would be created and attached to default - automatically created - *docker_cgw_network* network; - All three (and one additional - *init-broker-container* - needed for kafka topics initialization) will be created as part of single - container project group. -- CGW will be created as separate standalone container, attached to same *docker_cgw_network* network; +- Kafka, Redis, PGSQL containers would be created and attached to default - automatically created - *docker_cgw_multi_instances_network* network; + All three (and one additional - *init-broker-container* - needed for kafka topics initialization) are all part of single docker compose file. +- CGW, while also part of the same docker compose file, yet is being partially generated. + The reason, is that multiple CGW instances can be created within single compose-file, +and thus container details are being generated. + +More information about the compose generation can be found in the +'Automated multi-CGW instances start/stop with Docker Compose' topic. ## gRPC CGW utilizes gRPC to communicate with other CGW instances (referred to as Shards). This functionality does not depend on some external thirdparty services. @@ -53,9 +57,11 @@ Two new docker images will be generated on host system: # Running The following script can be used to launch the CGW app ```console -$ make run +$ make ``` -Command creates and executed (starts) docker container name 'openlan_cgw' +Command creates and executed (starts) docker container group consisting of cgw services +as well as thirdpart depending services (redis, kafka, pgsql) + To stop the container from running (remove it) use the following cmd: ```console $ make stop @@ -170,9 +176,14 @@ Currently, tests should be run manually by changin PWD to *tests* and launching cd ./test ./run.sh ``` +or using make target (added for convinience): +```console +make run-tests +``` *NOTE:* currently, tests are not running inside a container. -This means, that it's up to the caller make sure tests can communicate with whatever CGW's deployment as well as thirdparty services. -E.g. tests inside running *host* enviroment must be able to communicate with CGW, Redis, Kafka, PGSQL etc. +To make sure tests can communicate with CGW-enviroment, tests are currently +reaching environment through ports exposed to host system. +e.g. for WSS - tests try to reach 'wss://localhost:15002' by default and so on. # Automated multi-CGW instances start/stop with Docker Compose Automated multi-CGW start/stop based on "docker-compose-template.yml.j2" file located inside the *utils/docker* directory.