Skip to content

Commit d13fd2b

Browse files
ashwinbmergify[bot]
authored andcommitted
fix: harden storage semantics (#4118)
Fixes issues in the storage system by guaranteeing immediate durability for responses and ensuring background writers stay alive. Three related fixes: * Responses to the OpenAI-compatible API now write directly to Postgres/SQLite inside the request instead of detouring through an async queue that might never drain; this restores the expected read-after-write behavior and removes the "response not found" races reported by users. * The access-control shim was stamping owner_principal/access_attributes as SQL NULL, which Postgres interprets as non-public rows; fixing it to use the empty-string/JSON-null pattern means conversations and responses stored without an authenticated user stay queryable (matching SQLite). * The inference-store queue remains for batching, but its worker tasks now start lazily on the live event loop so server startup doesn't cancel them—writes keep flowing even when the stack is launched via llama stack run. Closes #4115 ### Test Plan Added a matrix entry to test our "base" suite against Postgres as the store. (cherry picked from commit 492f79c) # Conflicts: # .github/workflows/integration-tests.yml # llama_stack/distributions/ci-tests/run-with-postgres-store.yaml # llama_stack/distributions/starter-gpu/run.yaml # llama_stack/distributions/starter/run.yaml # llama_stack/distributions/starter/starter.py # llama_stack/providers/utils/inference/inference_store.py # llama_stack/providers/utils/responses/responses_store.py # tests/integration/ci_matrix.json
1 parent a380b5f commit d13fd2b

File tree

12 files changed

+588
-69
lines changed

12 files changed

+588
-69
lines changed

.github/actions/setup-test-environment/action.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,32 @@ runs:
3939
if: ${{ inputs.setup == 'vllm' && inputs.inference-mode == 'record' }}
4040
uses: ./.github/actions/setup-vllm
4141

42+
- name: Start Postgres service
43+
if: ${{ contains(inputs.setup, 'postgres') }}
44+
shell: bash
45+
run: |
46+
sudo docker rm -f postgres-ci || true
47+
sudo docker run -d --name postgres-ci \
48+
-e POSTGRES_USER=llamastack \
49+
-e POSTGRES_PASSWORD=llamastack \
50+
-e POSTGRES_DB=llamastack \
51+
-p 5432:5432 \
52+
postgres:16
53+
54+
echo "Waiting for Postgres to become ready..."
55+
for i in {1..30}; do
56+
if sudo docker exec postgres-ci pg_isready -U llamastack -d llamastack >/dev/null 2>&1; then
57+
echo "Postgres is ready"
58+
break
59+
fi
60+
if [ "$i" -eq 30 ]; then
61+
echo "Postgres failed to start in time"
62+
sudo docker logs postgres-ci || true
63+
exit 1
64+
fi
65+
sleep 2
66+
done
67+
4268
- name: Build Llama Stack
4369
shell: bash
4470
run: |

.github/workflows/integration-tests.yml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,16 @@ jobs:
4646

4747
run-replay-mode-tests:
4848
runs-on: ubuntu-latest
49-
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
49+
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
5050

5151
strategy:
5252
fail-fast: false
5353
matrix:
54+
<<<<<<< HEAD
5455
client-type: [library, server, docker]
56+
=======
57+
client: [library, docker, server]
58+
>>>>>>> 492f79ca (fix: harden storage semantics (#4118))
5559
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
5660
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
5761
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
@@ -73,6 +77,7 @@ jobs:
7377
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
7478

7579
- name: Setup test environment
80+
if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
7681
uses: ./.github/actions/setup-test-environment
7782
with:
7883
python-version: ${{ matrix.python-version }}
@@ -82,11 +87,16 @@ jobs:
8287
inference-mode: 'replay'
8388

8489
- name: Run tests
90+
if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
8591
uses: ./.github/actions/run-and-record-tests
8692
env:
8793
OPENAI_API_KEY: dummy
8894
with:
89-
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || matrix.client-type == 'server' && 'server:ci-tests' || 'docker:ci-tests' }}
95+
stack-config: >-
96+
${{ matrix.config.stack_config
97+
|| (matrix.client == 'library' && 'ci-tests')
98+
|| (matrix.client == 'server' && 'server:ci-tests')
99+
|| 'docker:ci-tests' }}
90100
setup: ${{ matrix.config.setup }}
91101
inference-mode: 'replay'
92102
suite: ${{ matrix.config.suite }}
Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
version: 2
2+
image_name: ci-tests
3+
apis:
4+
- agents
5+
- batches
6+
- datasetio
7+
- eval
8+
- files
9+
- inference
10+
- post_training
11+
- safety
12+
- scoring
13+
- tool_runtime
14+
- vector_io
15+
providers:
16+
inference:
17+
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
18+
provider_type: remote::cerebras
19+
config:
20+
base_url: https://api.cerebras.ai
21+
api_key: ${env.CEREBRAS_API_KEY:=}
22+
- provider_id: ${env.OLLAMA_URL:+ollama}
23+
provider_type: remote::ollama
24+
config:
25+
url: ${env.OLLAMA_URL:=http://localhost:11434}
26+
- provider_id: ${env.VLLM_URL:+vllm}
27+
provider_type: remote::vllm
28+
config:
29+
url: ${env.VLLM_URL:=}
30+
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
31+
api_token: ${env.VLLM_API_TOKEN:=fake}
32+
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
33+
- provider_id: ${env.TGI_URL:+tgi}
34+
provider_type: remote::tgi
35+
config:
36+
url: ${env.TGI_URL:=}
37+
- provider_id: fireworks
38+
provider_type: remote::fireworks
39+
config:
40+
url: https://api.fireworks.ai/inference/v1
41+
api_key: ${env.FIREWORKS_API_KEY:=}
42+
- provider_id: together
43+
provider_type: remote::together
44+
config:
45+
url: https://api.together.xyz/v1
46+
api_key: ${env.TOGETHER_API_KEY:=}
47+
- provider_id: bedrock
48+
provider_type: remote::bedrock
49+
config:
50+
api_key: ${env.AWS_BEDROCK_API_KEY:=}
51+
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
52+
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
53+
provider_type: remote::nvidia
54+
config:
55+
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
56+
api_key: ${env.NVIDIA_API_KEY:=}
57+
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
58+
- provider_id: openai
59+
provider_type: remote::openai
60+
config:
61+
api_key: ${env.OPENAI_API_KEY:=}
62+
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
63+
- provider_id: anthropic
64+
provider_type: remote::anthropic
65+
config:
66+
api_key: ${env.ANTHROPIC_API_KEY:=}
67+
- provider_id: gemini
68+
provider_type: remote::gemini
69+
config:
70+
api_key: ${env.GEMINI_API_KEY:=}
71+
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
72+
provider_type: remote::vertexai
73+
config:
74+
project: ${env.VERTEX_AI_PROJECT:=}
75+
location: ${env.VERTEX_AI_LOCATION:=us-central1}
76+
- provider_id: groq
77+
provider_type: remote::groq
78+
config:
79+
url: https://api.groq.com
80+
api_key: ${env.GROQ_API_KEY:=}
81+
- provider_id: sambanova
82+
provider_type: remote::sambanova
83+
config:
84+
url: https://api.sambanova.ai/v1
85+
api_key: ${env.SAMBANOVA_API_KEY:=}
86+
- provider_id: ${env.AZURE_API_KEY:+azure}
87+
provider_type: remote::azure
88+
config:
89+
api_key: ${env.AZURE_API_KEY:=}
90+
api_base: ${env.AZURE_API_BASE:=}
91+
api_version: ${env.AZURE_API_VERSION:=}
92+
api_type: ${env.AZURE_API_TYPE:=}
93+
- provider_id: sentence-transformers
94+
provider_type: inline::sentence-transformers
95+
vector_io:
96+
- provider_id: faiss
97+
provider_type: inline::faiss
98+
config:
99+
persistence:
100+
namespace: vector_io::faiss
101+
backend: kv_default
102+
- provider_id: sqlite-vec
103+
provider_type: inline::sqlite-vec
104+
config:
105+
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
106+
persistence:
107+
namespace: vector_io::sqlite_vec
108+
backend: kv_default
109+
- provider_id: ${env.MILVUS_URL:+milvus}
110+
provider_type: inline::milvus
111+
config:
112+
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
113+
persistence:
114+
namespace: vector_io::milvus
115+
backend: kv_default
116+
- provider_id: ${env.CHROMADB_URL:+chromadb}
117+
provider_type: remote::chromadb
118+
config:
119+
url: ${env.CHROMADB_URL:=}
120+
persistence:
121+
namespace: vector_io::chroma_remote
122+
backend: kv_default
123+
- provider_id: ${env.PGVECTOR_DB:+pgvector}
124+
provider_type: remote::pgvector
125+
config:
126+
host: ${env.PGVECTOR_HOST:=localhost}
127+
port: ${env.PGVECTOR_PORT:=5432}
128+
db: ${env.PGVECTOR_DB:=}
129+
user: ${env.PGVECTOR_USER:=}
130+
password: ${env.PGVECTOR_PASSWORD:=}
131+
persistence:
132+
namespace: vector_io::pgvector
133+
backend: kv_default
134+
- provider_id: ${env.QDRANT_URL:+qdrant}
135+
provider_type: remote::qdrant
136+
config:
137+
api_key: ${env.QDRANT_API_KEY:=}
138+
persistence:
139+
namespace: vector_io::qdrant_remote
140+
backend: kv_default
141+
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
142+
provider_type: remote::weaviate
143+
config:
144+
weaviate_api_key: null
145+
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
146+
persistence:
147+
namespace: vector_io::weaviate
148+
backend: kv_default
149+
files:
150+
- provider_id: meta-reference-files
151+
provider_type: inline::localfs
152+
config:
153+
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
154+
metadata_store:
155+
table_name: files_metadata
156+
backend: sql_default
157+
safety:
158+
- provider_id: llama-guard
159+
provider_type: inline::llama-guard
160+
config:
161+
excluded_categories: []
162+
- provider_id: code-scanner
163+
provider_type: inline::code-scanner
164+
agents:
165+
- provider_id: meta-reference
166+
provider_type: inline::meta-reference
167+
config:
168+
persistence:
169+
agent_state:
170+
namespace: agents
171+
backend: kv_default
172+
responses:
173+
table_name: responses
174+
backend: sql_default
175+
max_write_queue_size: 10000
176+
num_writers: 4
177+
post_training:
178+
- provider_id: torchtune-cpu
179+
provider_type: inline::torchtune-cpu
180+
config:
181+
checkpoint_format: meta
182+
eval:
183+
- provider_id: meta-reference
184+
provider_type: inline::meta-reference
185+
config:
186+
kvstore:
187+
namespace: eval
188+
backend: kv_default
189+
datasetio:
190+
- provider_id: huggingface
191+
provider_type: remote::huggingface
192+
config:
193+
kvstore:
194+
namespace: datasetio::huggingface
195+
backend: kv_default
196+
- provider_id: localfs
197+
provider_type: inline::localfs
198+
config:
199+
kvstore:
200+
namespace: datasetio::localfs
201+
backend: kv_default
202+
scoring:
203+
- provider_id: basic
204+
provider_type: inline::basic
205+
- provider_id: llm-as-judge
206+
provider_type: inline::llm-as-judge
207+
- provider_id: braintrust
208+
provider_type: inline::braintrust
209+
config:
210+
openai_api_key: ${env.OPENAI_API_KEY:=}
211+
tool_runtime:
212+
- provider_id: brave-search
213+
provider_type: remote::brave-search
214+
config:
215+
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
216+
max_results: 3
217+
- provider_id: tavily-search
218+
provider_type: remote::tavily-search
219+
config:
220+
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
221+
max_results: 3
222+
- provider_id: rag-runtime
223+
provider_type: inline::rag-runtime
224+
- provider_id: model-context-protocol
225+
provider_type: remote::model-context-protocol
226+
batches:
227+
- provider_id: reference
228+
provider_type: inline::reference
229+
config:
230+
kvstore:
231+
namespace: batches
232+
backend: kv_default
233+
storage:
234+
backends:
235+
kv_default:
236+
type: kv_postgres
237+
host: ${env.POSTGRES_HOST:=localhost}
238+
port: ${env.POSTGRES_PORT:=5432}
239+
db: ${env.POSTGRES_DB:=llamastack}
240+
user: ${env.POSTGRES_USER:=llamastack}
241+
password: ${env.POSTGRES_PASSWORD:=llamastack}
242+
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
243+
sql_default:
244+
type: sql_postgres
245+
host: ${env.POSTGRES_HOST:=localhost}
246+
port: ${env.POSTGRES_PORT:=5432}
247+
db: ${env.POSTGRES_DB:=llamastack}
248+
user: ${env.POSTGRES_USER:=llamastack}
249+
password: ${env.POSTGRES_PASSWORD:=llamastack}
250+
stores:
251+
metadata:
252+
namespace: registry
253+
backend: kv_default
254+
inference:
255+
table_name: inference_store
256+
backend: sql_default
257+
max_write_queue_size: 10000
258+
num_writers: 4
259+
conversations:
260+
table_name: openai_conversations
261+
backend: sql_default
262+
prompts:
263+
namespace: prompts
264+
backend: kv_default
265+
registered_resources:
266+
models: []
267+
shields:
268+
- shield_id: llama-guard
269+
provider_id: ${env.SAFETY_MODEL:+llama-guard}
270+
provider_shield_id: ${env.SAFETY_MODEL:=}
271+
- shield_id: code-scanner
272+
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
273+
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
274+
vector_dbs: []
275+
datasets: []
276+
scoring_fns: []
277+
benchmarks: []
278+
tool_groups:
279+
- toolgroup_id: builtin::websearch
280+
provider_id: tavily-search
281+
- toolgroup_id: builtin::rag
282+
provider_id: rag-runtime
283+
server:
284+
port: 8321
285+
telemetry:
286+
enabled: true
287+
vector_stores:
288+
default_provider_id: faiss
289+
default_embedding_model:
290+
provider_id: sentence-transformers
291+
model_id: nomic-ai/nomic-embed-text-v1.5
292+
safety:
293+
default_shield_id: llama-guard

0 commit comments

Comments
 (0)