Skip to content

Commit fe04c8f

Browse files
committed
[BUG] schema: build default with config ef & knn_index, remove #document population in defaults
1 parent 36af0d1 commit fe04c8f

File tree

10 files changed

+721
-65
lines changed

10 files changed

+721
-65
lines changed

go/pkg/sysdb/coordinator/model/collection_configuration.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ type ValueTypes struct {
249249
SparseVector *SparseVectorValueType `json:"sparse_vector,omitempty"`
250250
Int *IntValueType `json:"int,omitempty"`
251251
Float *FloatValueType `json:"float,omitempty"`
252-
Boolean *BoolValueType `json:"boolean,omitempty"`
252+
Boolean *BoolValueType `json:"bool,omitempty"`
253253
}
254254

255255
type Schema struct {

go/pkg/sysdb/coordinator/model/collection_configuration_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ func TestUpdateSchemaFromConfig_HnswSuccess(t *testing.T) {
147147
"config": {}
148148
}
149149
},
150-
"boolean": {
150+
"bool": {
151151
"bool_inverted_index": {
152152
"enabled": true,
153153
"config": {}
@@ -312,7 +312,7 @@ func TestUpdateSchemaFromConfig_SpannSuccess(t *testing.T) {
312312
"config": {}
313313
}
314314
},
315-
"boolean": {
315+
"bool": {
316316
"bool_inverted_index": {
317317
"enabled": true,
318318
"config": {}
@@ -481,7 +481,7 @@ func TestUpdateSchemaFromConfig_EmbeddingFunction(t *testing.T) {
481481
"config": {}
482482
}
483483
},
484-
"boolean": {
484+
"bool": {
485485
"bool_inverted_index": {
486486
"enabled": true,
487487
"config": {}

go/pkg/sysdb/coordinator/table_catalog_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1555,7 +1555,7 @@ func TestUpdateCollection_WithSchema(t *testing.T) {
15551555
"config": {}
15561556
}
15571557
},
1558-
"boolean": {
1558+
"bool": {
15591559
"bool_inverted_index": {
15601560
"enabled": true,
15611561
"config": {}

rust/cli/src/commands/vacuum.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use chroma_segment::local_segment_manager::LocalSegmentManager;
1111
use chroma_sqlite::db::SqliteDb;
1212
use chroma_sysdb::SysDb;
1313
use chroma_system::System;
14-
use chroma_types::{CollectionUuid, KnnIndex, ListCollectionsRequest};
14+
use chroma_types::{CollectionUuid, KnnIndex, ListCollectionsRequest, Schema};
1515
use clap::Parser;
1616
use colored::Colorize;
1717
use dialoguer::Confirm;
@@ -108,10 +108,15 @@ async fn trigger_vector_segments_max_seq_id_migration(
108108
for collection_id in collection_ids {
109109
let mut collection = sysdb.get_collection_with_segments(collection_id).await?;
110110

111-
collection
112-
.collection
113-
.reconcile_schema_with_config(default_knn_index)
114-
.map_err(|e| Box::new(e) as Box<dyn Error>)?;
111+
if collection.collection.schema.is_none() {
112+
collection.collection.schema = Some(
113+
Schema::convert_collection_config_to_schema(
114+
&collection.collection.config,
115+
default_knn_index,
116+
)
117+
.map_err(|e| Box::new(e) as Box<dyn Error>)?,
118+
);
119+
}
115120

116121
// If collection is uninitialized, that means nothing has been written yet.
117122
let dim = match collection.collection.dimension {

rust/frontend/src/get_collection_with_segments_provider.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -185,14 +185,16 @@ impl CollectionsWithSegmentsProvider {
185185
.await?
186186
};
187187

188-
// reconcile schema and config
189-
let reconciled_schema = Schema::reconcile_schema_and_config(
190-
collection_and_segments_sysdb.collection.schema.as_ref(),
191-
Some(&collection_and_segments_sysdb.collection.config),
192-
knn_index,
193-
)
194-
.map_err(CollectionsWithSegmentsProviderError::InvalidSchema)?;
195-
collection_and_segments_sysdb.collection.schema = Some(reconciled_schema);
188+
if collection_and_segments_sysdb.collection.schema.is_none() {
189+
collection_and_segments_sysdb.collection.schema = Some(
190+
Schema::convert_collection_config_to_schema(
191+
&collection_and_segments_sysdb.collection.config,
192+
knn_index,
193+
)
194+
.map_err(CollectionsWithSegmentsProviderError::InvalidSchema)?,
195+
);
196+
}
197+
196198
self.set_collection_with_segments(collection_and_segments_sysdb.clone())
197199
.await;
198200
Ok(collection_and_segments_sysdb)

rust/frontend/src/impls/service_based_frontend.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ impl ServiceBasedFrontend {
381381
if self.enable_schema {
382382
for collection in collections.iter_mut() {
383383
collection
384-
.reconcile_schema_with_config(self.default_knn_index)
384+
.reconcile_schema_for_read(self.default_knn_index)
385385
.map_err(GetCollectionsError::InvalidSchema)?;
386386
}
387387
}
@@ -425,7 +425,7 @@ impl ServiceBasedFrontend {
425425
if self.enable_schema {
426426
for collection in &mut collections {
427427
collection
428-
.reconcile_schema_with_config(self.default_knn_index)
428+
.reconcile_schema_for_read(self.default_knn_index)
429429
.map_err(GetCollectionError::InvalidSchema)?;
430430
}
431431
}
@@ -450,7 +450,7 @@ impl ServiceBasedFrontend {
450450

451451
if self.enable_schema {
452452
collection
453-
.reconcile_schema_with_config(self.default_knn_index)
453+
.reconcile_schema_for_read(self.default_knn_index)
454454
.map_err(GetCollectionByCrnError::InvalidSchema)?;
455455
}
456456
Ok(collection)
@@ -630,9 +630,10 @@ impl ServiceBasedFrontend {
630630
// that was retrieved from sysdb, rather than the one that was passed in
631631
if self.enable_schema {
632632
collection
633-
.reconcile_schema_with_config(self.default_knn_index)
633+
.reconcile_schema_for_read(self.default_knn_index)
634634
.map_err(CreateCollectionError::InvalidSchema)?;
635635
}
636+
636637
Ok(collection)
637638
}
638639

@@ -735,7 +736,7 @@ impl ServiceBasedFrontend {
735736
.await?;
736737
collection_and_segments
737738
.collection
738-
.reconcile_schema_with_config(self.default_knn_index)
739+
.reconcile_schema_for_read(self.default_knn_index)
739740
.map_err(ForkCollectionError::InvalidSchema)?;
740741
let collection = collection_and_segments.collection.clone();
741742
let latest_collection_logical_size_bytes = collection_and_segments

rust/log/src/local_compaction_manager.rs

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use chroma_sysdb::SysDb;
1515
use chroma_system::Handler;
1616
use chroma_system::{Component, ComponentContext};
1717
use chroma_types::{
18-
Chunk, CollectionUuid, GetCollectionWithSegmentsError, KnnIndex, LogRecord, SchemaError,
18+
Chunk, CollectionUuid, GetCollectionWithSegmentsError, KnnIndex, LogRecord, Schema, SchemaError,
1919
};
2020
use serde::{Deserialize, Serialize};
2121
use thiserror::Error;
@@ -141,9 +141,15 @@ impl Handler<BackfillMessage> for LocalCompactionManager {
141141
.get_collection_with_segments(message.collection_id)
142142
.await?;
143143
let schema_previously_persisted = collection_and_segments.collection.schema.is_some();
144-
collection_and_segments
145-
.collection
146-
.reconcile_schema_with_config(KnnIndex::Hnsw)?;
144+
if !schema_previously_persisted {
145+
collection_and_segments.collection.schema = Some(
146+
Schema::convert_collection_config_to_schema(
147+
&collection_and_segments.collection.config,
148+
KnnIndex::Hnsw,
149+
)
150+
.map_err(CompactionManagerError::SchemaReconcileError)?,
151+
);
152+
}
147153
// If collection is uninitialized, that means nothing has been written yet.
148154
let dim = match collection_and_segments.collection.dimension {
149155
Some(dim) => dim,
@@ -267,7 +273,12 @@ impl Handler<PurgeLogsMessage> for LocalCompactionManager {
267273
.get_collection_with_segments(message.collection_id)
268274
.await?;
269275
let mut collection = collection_segments.collection.clone();
270-
collection.reconcile_schema_with_config(KnnIndex::Hnsw)?;
276+
if collection.schema.is_none() {
277+
collection.schema = Some(
278+
Schema::convert_collection_config_to_schema(&collection.config, KnnIndex::Hnsw)
279+
.map_err(CompactionManagerError::SchemaReconcileError)?,
280+
);
281+
}
271282
// If dimension is None, that means nothing has been written yet.
272283
let dim = match collection.dimension {
273284
Some(dim) => dim,

rust/segment/src/distributed_spann.rs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -112,14 +112,14 @@ impl SpannSegmentWriter {
112112
return Err(SpannSegmentWriterError::InvalidArgument);
113113
}
114114

115-
let reconciled_schema = Schema::reconcile_schema_and_config(
116-
collection.schema.as_ref(),
117-
Some(&collection.config),
118-
KnnIndex::Spann,
119-
)
120-
.map_err(SpannSegmentWriterError::InvalidSchema)?;
115+
let schema = if let Some(schema) = collection.schema.as_ref() {
116+
schema.clone()
117+
} else {
118+
Schema::convert_collection_config_to_schema(&collection.config, KnnIndex::Spann)
119+
.map_err(SpannSegmentWriterError::InvalidSchema)?
120+
};
121121

122-
let params = reconciled_schema
122+
let params = schema
123123
.get_internal_spann_config()
124124
.ok_or(SpannSegmentWriterError::MissingSpannConfiguration)?;
125125

@@ -690,8 +690,8 @@ mod test {
690690
..Default::default()
691691
};
692692
collection.schema = Some(
693-
Schema::reconcile_schema_and_config(None, Some(&collection.config), KnnIndex::Spann)
694-
.expect("Error reconciling schema for test collection"),
693+
Schema::convert_collection_config_to_schema(&collection.config, KnnIndex::Spann)
694+
.expect("Error converting config to schema for test collection"),
695695
);
696696

697697
let pl_block_size = 5 * 1024 * 1024;
@@ -927,8 +927,8 @@ mod test {
927927
..Default::default()
928928
};
929929
collection.schema = Some(
930-
Schema::reconcile_schema_and_config(None, Some(&collection.config), KnnIndex::Spann)
931-
.expect("Error reconciling schema for test collection"),
930+
Schema::convert_collection_config_to_schema(&collection.config, KnnIndex::Spann)
931+
.expect("Error converting config to schema for test collection"),
932932
);
933933

934934
let pl_block_size = 5 * 1024 * 1024;
@@ -1089,8 +1089,8 @@ mod test {
10891089
..Default::default()
10901090
};
10911091
collection.schema = Some(
1092-
Schema::reconcile_schema_and_config(None, Some(&collection.config), KnnIndex::Spann)
1093-
.expect("Error reconciling schema for test collection"),
1092+
Schema::convert_collection_config_to_schema(&collection.config, KnnIndex::Spann)
1093+
.expect("Error converting config to schema for test collection"),
10941094
);
10951095

10961096
let segment_id = SegmentUuid::new();
@@ -1220,8 +1220,8 @@ mod test {
12201220
..Default::default()
12211221
};
12221222
collection.schema = Some(
1223-
Schema::reconcile_schema_and_config(None, Some(&collection.config), KnnIndex::Spann)
1224-
.expect("Error reconciling schema for test collection"),
1223+
Schema::convert_collection_config_to_schema(&collection.config, KnnIndex::Spann)
1224+
.expect("Error converting config to schema for test collection"),
12251225
);
12261226

12271227
let pl_block_size = 5 * 1024 * 1024;

rust/types/src/collection.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,23 @@ impl Collection {
243243
Ok(())
244244
}
245245

246+
/// Reconcile the collection schema and configuration when serving read requests.
247+
///
248+
/// The read path needs to tolerate collections that only have a configuration persisted.
249+
/// This helper hydrates `schema` from the stored configuration when needed, or regenerates
250+
/// the configuration from the existing schema to keep both representations consistent.
251+
pub fn reconcile_schema_for_read(&mut self, knn_index: KnnIndex) -> Result<(), SchemaError> {
252+
if let Some(schema) = self.schema.as_ref() {
253+
self.config = InternalCollectionConfiguration::try_from(schema)
254+
.map_err(|reason| SchemaError::InvalidSchema { reason })?;
255+
} else {
256+
let schema = Schema::convert_collection_config_to_schema(&self.config, knn_index)?;
257+
self.schema = Some(schema);
258+
}
259+
260+
Ok(())
261+
}
262+
246263
pub fn test_collection(dim: i32) -> Self {
247264
Collection {
248265
name: "test_collection".to_string(),

0 commit comments

Comments
 (0)