diff --git a/llumnix/config/default.py b/llumnix/config/default.py index 358d9e1b..c5fba7a8 100644 --- a/llumnix/config/default.py +++ b/llumnix/config/default.py @@ -112,7 +112,7 @@ # Number of kv-cache layers to transfer in each round during migration _C.MANAGER.MIGRATION_NUM_LAYERS = 1 # Number of internal cache size in migration backend for sending and receiving -_C.MANAGER.MIGRATION_INTERNAL_BUFFER_NUM = 2 +_C.MANAGER.MIGRATION_INTERNAL_BUFFER_NUM = 1 # ----------------------------------------------------------------------------- # SCALING CONFIGURATION diff --git a/tests/e2e_test/utils.py b/tests/e2e_test/utils.py index 4783496d..146f6dfe 100644 --- a/tests/e2e_test/utils.py +++ b/tests/e2e_test/utils.py @@ -71,7 +71,7 @@ def generate_launch_command(result_filename: str = "", f"--request-migration-policy {request_migration_policy} " f"--migration-backend {migration_backend} " f"--migration-buffer-blocks 32 " - f"--migration-internal-buffer-num 2 " + f"--migration-internal-buffer-num 1 " f"--tensor-parallel-size 1 " f"--request-output-queue-port {1234+port} " f"{'--launch-ray-cluster ' if launch_ray_cluster else ''}"