diff --git a/changes/3199.fix.md b/changes/3199.fix.md new file mode 100644 index 0000000000..d7036015ab --- /dev/null +++ b/changes/3199.fix.md @@ -0,0 +1 @@ +Optimize the route selection in App Proxy using `random.choices()` based on the native C implementation in CPython diff --git a/src/ai/backend/wsproxy/proxy/backend/http.py b/src/ai/backend/wsproxy/proxy/backend/http.py index e02710af67..f4a18504cf 100644 --- a/src/ai/backend/wsproxy/proxy/backend/http.py +++ b/src/ai/backend/wsproxy/proxy/backend/http.py @@ -34,22 +34,8 @@ def selected_route(self) -> RouteInfo: if selected_route.traffic_ratio == 0: raise WorkerNotAvailable else: - routes = [ - r for r in sorted(self.routes, key=lambda r: r.traffic_ratio) if r.traffic_ratio > 0 - ] - ranges: list[float] = [] - ratio_sum = 0.0 - for route in routes: - ratio_sum += route.traffic_ratio - ranges.append(ratio_sum) - rand = random.random() * ranges[-1] - for i in range(len(ranges)): - ceiling = ranges[i] - if (i == 0 and rand < ceiling) or (ranges[i - 1] <= rand and rand < ceiling): - selected_route = routes[i] - break - else: - selected_route = routes[-1] + ratios: list[float] = [r.traffic_ratio for r in self.routes] + selected_route = random.choices(self.routes, weights=ratios, k=1)[0] return selected_route def get_x_forwarded_proto(self, request: web.Request) -> str: diff --git a/src/ai/backend/wsproxy/proxy/backend/tcp.py b/src/ai/backend/wsproxy/proxy/backend/tcp.py index d8f8ec95c2..701e9f3de7 100644 --- a/src/ai/backend/wsproxy/proxy/backend/tcp.py +++ b/src/ai/backend/wsproxy/proxy/backend/tcp.py @@ -31,22 +31,8 @@ def selected_route(self) -> RouteInfo: if selected_route.traffic_ratio == 0: raise WorkerNotAvailable else: - routes = [ - r for r in sorted(self.routes, key=lambda r: r.traffic_ratio) if r.traffic_ratio > 0 - ] - ranges: list[float] = [] - ratio_sum = 0.0 - for route in routes: - ratio_sum += route.traffic_ratio - ranges.append(ratio_sum) - rand = random.random() * ranges[-1] - for i in range(len(ranges)): - ceiling = ranges[i] - if (i == 0 and rand < ceiling) or (ranges[i - 1] <= rand and rand < ceiling): - selected_route = routes[i] - break - else: - selected_route = routes[-1] + ratios: list[float] = [r.traffic_ratio for r in self.routes] + selected_route = random.choices(self.routes, weights=ratios, k=1)[0] return selected_route async def bind(