diff --git a/models/all-minilm-L6-v2-q5_k_m.gguf b/models/all-minilm-L6-v2-q5_k_m.gguf deleted file mode 100644 index 99b1ef37..00000000 --- a/models/all-minilm-L6-v2-q5_k_m.gguf +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:60c7e141495321c7d303ec5ccc79296cfeb044263af840c583fed695d423aee8 -size 21717952 diff --git a/models/qwen2.5-coder-1.5b-instruct-q5_k_m.gguf b/models/qwen2.5-coder-1.5b-instruct-q5_k_m.gguf deleted file mode 100644 index b1c22cee..00000000 --- a/models/qwen2.5-coder-1.5b-instruct-q5_k_m.gguf +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f15d02e8e51a5c6f9448b972819acfa66aee4d8bb7d881a8b8ba3d90da08ef09 -size 1285494336 diff --git a/tests/conftest.py b/tests/conftest.py index 23def424..afbb5956 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,7 +11,6 @@ import yaml from codegate.config import Config -from codegate.inference import LlamaCppInferenceEngine @pytest.fixture @@ -95,8 +94,3 @@ def parse_json_log(log_line: str) -> dict[str, Any]: return json.loads(log_line) except json.JSONDecodeError as e: pytest.fail(f"Invalid JSON log line: {e}") - - -@pytest.fixture -def inference_engine() -> LlamaCppInferenceEngine: - return LlamaCppInferenceEngine() diff --git a/tests/test_inference.py b/tests/test_inference.py deleted file mode 100644 index 9cfe5d14..00000000 --- a/tests/test_inference.py +++ /dev/null @@ -1,64 +0,0 @@ -import pytest - - -@pytest.mark.asyncio -async def test_generate(inference_engine) -> None: - """Test code generation.""" - - completion_request = { - "model": "qwen2.5-coder-1.5b-instruct-q5_k_m", - "max_tokens": 4096, - "temperature": 0, - "stream": True, - "stop": [ - "<|endoftext|>", - "<|fim_prefix|>", - "<|fim_middle|>", - "<|fim_suffix|>", - "<|fim_pad|>", - "<|repo_name|>", - "<|file_sep|>", - "<|im_start|>", - "<|im_end|>", - "/src/", - "#- coding: utf-8", - "```", - ], - "prompt": "<|fim_prefix|>\\n# codegate/test.py\\nimport requests\\n\\ndef call_api(url):\\n" - " <|fim_suffix|>\\n\\n\\n\\nresponse = call_api('http://localhost/test')" - "\\nprint(response)<|fim_middle|>", - } - model_path = f"./models/{completion_request['model']}.gguf" - response = await inference_engine.complete(model_path, **completion_request) - - for chunk in response: - assert chunk["choices"][0]["text"] is not None - - -@pytest.mark.asyncio -async def test_chat(inference_engine) -> None: - """Test chat completion.""" - - chat_request = { - "messages": [{"role": "user", "content": "hello"}], - "model": "qwen2.5-coder-1.5b-instruct-q5_k_m", - "max_tokens": 4096, - "temperature": 0, - "stream": True, - } - - model_path = f"./models/{chat_request['model']}.gguf" - response = await inference_engine.chat(model_path, **chat_request) - - for chunk in response: - assert "delta" in chunk["choices"][0] - - -@pytest.mark.asyncio -async def test_embed(inference_engine) -> None: - """Test content embedding.""" - - content = "Can I use invokehttp package in my project?" - model_path = "./models/all-minilm-L6-v2-q5_k_m.gguf" - vector = await inference_engine.embed(model_path, content=content) - assert len(vector) == 384