Skip to content

Commit 564feed

Browse files
committed
[ENH] Add schema support to collection configuration
1 parent be76229 commit 564feed

File tree

8 files changed

+144
-3
lines changed

8 files changed

+144
-3
lines changed

chromadb/api/collection_configuration.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import TypedDict, Dict, Any, Optional, cast, get_args
1+
from typing import TypedDict, Dict, Any, Optional, cast, get_args, Literal
22
import json
33
import copy
44
from chromadb.api.types import (
@@ -15,6 +15,13 @@
1515
from multiprocessing import cpu_count
1616
import warnings
1717

18+
ValueType = Literal["int", "float", "string", "boolean"]
19+
20+
21+
class CollectionSchema(TypedDict):
22+
value_type: ValueType
23+
metadata_index: bool
24+
1825

1926
class HNSWConfiguration(TypedDict, total=False):
2027
space: Space
@@ -44,6 +51,7 @@ class CollectionConfiguration(TypedDict, total=True):
4451
spann: Optional[SpannConfiguration]
4552
embedding_function: Optional[EmbeddingFunction] # type: ignore
4653
query_embedding_function: Optional[EmbeddingFunction] # type: ignore
54+
schema: Optional[Dict[str, CollectionSchema]]
4755

4856

4957
def load_collection_configuration_from_json_str(
@@ -126,6 +134,7 @@ def load_collection_configuration_from_json(
126134
spann=spann_config,
127135
embedding_function=ef, # type: ignore
128136
query_embedding_function=query_ef, # type: ignore
137+
schema=config_json_map.get("schema"),
129138
)
130139

131140

@@ -278,6 +287,7 @@ class CreateCollectionConfiguration(TypedDict, total=False):
278287
spann: Optional[CreateSpannConfiguration]
279288
embedding_function: Optional[EmbeddingFunction] # type: ignore
280289
query_config: Optional[QueryConfig]
290+
schema: Optional[Dict[str, CollectionSchema]]
281291

282292

283293
def create_collection_configuration_from_legacy_collection_metadata(
@@ -416,6 +426,7 @@ def create_collection_configuration_to_json(
416426
"spann": spann_config,
417427
"embedding_function": ef_config,
418428
"query_config": query_config,
429+
"schema": config.get("schema"),
419430
}
420431

421432

@@ -488,6 +499,7 @@ class UpdateCollectionConfiguration(TypedDict, total=False):
488499
spann: Optional[UpdateSpannConfiguration]
489500
embedding_function: Optional[EmbeddingFunction] # type: ignore
490501
query_config: Optional[QueryConfig]
502+
schema: Optional[Dict[str, CollectionSchema]]
491503

492504

493505
def update_collection_configuration_from_legacy_collection_metadata(
@@ -587,6 +599,7 @@ def update_collection_configuration_to_json(
587599
"spann": spann_config,
588600
"embedding_function": ef_config,
589601
"query_config": query_config,
602+
"schema": config.get("schema"),
590603
}
591604

592605

@@ -750,14 +763,34 @@ def overwrite_collection_configuration(
750763
ef_config[k] = v
751764
query_ef = updated_embedding_function.build_from_config(ef_config)
752765

766+
existing_schema = existing_config.get("schema")
767+
new_diff_schema = update_config.get("schema")
768+
updated_schema: Optional[Dict[str, CollectionSchema]] = None
769+
if existing_schema is not None:
770+
if new_diff_schema is not None:
771+
updated_schema = overwrite_schema(existing_schema, new_diff_schema)
772+
else:
773+
updated_schema = existing_schema
774+
else:
775+
updated_schema = new_diff_schema
776+
753777
return CollectionConfiguration(
754778
hnsw=updated_hnsw_config,
755779
spann=updated_spann_config,
756780
embedding_function=updated_embedding_function,
757781
query_embedding_function=query_ef,
782+
schema=updated_schema,
758783
)
759784

760785

786+
def overwrite_schema(
787+
existing_schema: Dict[str, CollectionSchema],
788+
new_diff_schema: Dict[str, CollectionSchema],
789+
) -> Dict[str, CollectionSchema]:
790+
"""Overwrite a schema with a new configuration"""
791+
return {**existing_schema, **new_diff_schema}
792+
793+
761794
def validate_embedding_function_conflict_on_create(
762795
embedding_function: Optional[EmbeddingFunction], # type: ignore
763796
configuration_ef: Optional[EmbeddingFunction], # type: ignore

go/pkg/sysdb/coordinator/model/collection_configuration.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,16 @@ type SpannConfiguration struct {
5353
MergeThreshold int `json:"merge_threshold"`
5454
}
5555

56+
type CollectionSchema struct {
57+
ValueType string `json:"value_type"`
58+
MetadataIndex bool `json:"metadata_index"`
59+
}
60+
5661
type InternalCollectionConfiguration struct {
5762
VectorIndex *VectorIndexConfiguration `json:"vector_index"`
5863
EmbeddingFunction *EmbeddingFunctionConfiguration `json:"embedding_function,omitempty"`
5964
QueryConfig interface{} `json:"query_config,omitempty"`
65+
Schema *map[string]CollectionSchema `json:"schema,omitempty"`
6066
}
6167

6268
// DefaultHnswCollectionConfiguration returns a default configuration using HNSW
@@ -129,4 +135,5 @@ type InternalUpdateCollectionConfiguration struct {
129135
VectorIndex *UpdateVectorIndexConfiguration `json:"vector_index,omitempty"`
130136
EmbeddingFunction *EmbeddingFunctionConfiguration `json:"embedding_function,omitempty"`
131137
QueryConfig interface{} `json:"query_config,omitempty"`
138+
Schema *map[string]CollectionSchema `json:"schema,omitempty"`
132139
}

go/pkg/sysdb/coordinator/table_catalog.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,10 @@ func (tc *Catalog) updateCollectionConfiguration(
858858
existingConfig.QueryConfig = updateConfig.QueryConfig
859859
}
860860

861+
if updateConfig.Schema != nil {
862+
existingConfig.Schema = updateConfig.Schema
863+
}
864+
861865
// Serialize updated config back to JSON
862866
updatedConfigBytes, err := json.Marshal(existingConfig)
863867
if err != nil {

rust/python_bindings/src/bindings.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ impl Bindings {
281281
spann: None,
282282
embedding_function: None,
283283
query_config: None,
284+
schema: None,
284285
},
285286
self.frontend.get_default_knn_index(),
286287
)?),

rust/segment/src/distributed_hnsw.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,7 @@ pub mod test {
435435
),
436436
embedding_function: None,
437437
query_config: None,
438+
schema: None,
438439
},
439440
..Default::default()
440441
};

rust/segment/src/distributed_spann.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,7 @@ mod test {
657657
vector_index: chroma_types::VectorIndexConfiguration::Spann(params),
658658
embedding_function: None,
659659
query_config: None,
660+
schema: None,
660661
},
661662
metadata: None,
662663
dimension: None,
@@ -887,6 +888,7 @@ mod test {
887888
vector_index: chroma_types::VectorIndexConfiguration::Spann(params),
888889
embedding_function: None,
889890
query_config: None,
891+
schema: None,
890892
},
891893
..Default::default()
892894
};

rust/sysdb/src/sqlite.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,9 @@ impl SqliteSysDb {
373373
let collections = collections.unwrap();
374374
let collection = collections.into_iter().next().unwrap();
375375
let mut existing_configuration = collection.config;
376-
existing_configuration.update(&configuration);
376+
existing_configuration
377+
.update(&configuration)
378+
.map_err(|e| UpdateCollectionError::Internal(e.boxed()))?;
377379
configuration_json_str = Some(
378380
serde_json::to_string(&existing_configuration)
379381
.map_err(UpdateCollectionError::Configuration)?,
@@ -1363,6 +1365,7 @@ mod tests {
13631365
spann: None,
13641366
embedding_function: None,
13651367
query_config: None,
1368+
schema: None,
13661369
}),
13671370
)
13681371
.await

0 commit comments

Comments
 (0)