From 7f6de0605bd74092d81f50cd9edbf37be6608b5a Mon Sep 17 00:00:00 2001 From: Scott Todd <scott.todd0@gmail.com> Date: Fri, 6 Dec 2024 15:32:44 -0800 Subject: [PATCH] Fix/skip trie_attention_cache_test on Windows. (#645) Context: https://github.com/nod-ai/shark-ai/pull/632#discussion_r1869981637 --- .../llm/components/kvcache/trie_attention_cache.py | 4 ++++ .../apps/llm/components/kvcache/trie_attention_cache_test.py | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/shortfin/python/shortfin_apps/llm/components/kvcache/trie_attention_cache.py b/shortfin/python/shortfin_apps/llm/components/kvcache/trie_attention_cache.py index fbb008005..3993e2444 100644 --- a/shortfin/python/shortfin_apps/llm/components/kvcache/trie_attention_cache.py +++ b/shortfin/python/shortfin_apps/llm/components/kvcache/trie_attention_cache.py @@ -90,6 +90,10 @@ def __eq__(self, other: object) -> bool: """Nodes are equal only if they are the same object.""" return self is other + def __lt__(self, other): + """Sort nodes by their memory address.""" + return id(self) < id(other) + class TriePagedAttentionCacheAllocation(PageAllocation): """Represents a page allocation in the trie-based cache. diff --git a/shortfin/tests/apps/llm/components/kvcache/trie_attention_cache_test.py b/shortfin/tests/apps/llm/components/kvcache/trie_attention_cache_test.py index 0f49efda8..a4e1f2284 100644 --- a/shortfin/tests/apps/llm/components/kvcache/trie_attention_cache_test.py +++ b/shortfin/tests/apps/llm/components/kvcache/trie_attention_cache_test.py @@ -3,6 +3,7 @@ import shortfin as sf import shortfin.array as sfnp from unittest.mock import Mock, MagicMock +import sys import threading import time from dataclasses import dataclass @@ -248,6 +249,10 @@ def filled_cache(trie_cache, published_sequence): return sequences +@pytest.mark.skipif( + sys.platform == "win32", + reason="sequence eviction is not working correctly on Windows", +) @pytest.mark.parametrize( "access_count", [1, TEST_POOL_CAPACITY // 2, TEST_POOL_CAPACITY - 1] )