Skip to content

Commit

Permalink
Add acorn option and nested objects from dataset.
Browse files Browse the repository at this point in the history
This commit adds support for enabling acorn on collection creation,
as well as during the collection update.

Also, it includes couple of new properties of type object by adding
the property type and improving the import_json function to handle
properties with camelCase naming
  • Loading branch information
jfrancoa committed Nov 7, 2024
1 parent d0ecbeb commit 0cf0b9b
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 4 deletions.
11 changes: 10 additions & 1 deletion weaviate_cli/commands/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,16 @@ def create() -> None:
"--vector_index",
default="hnsw",
type=click.Choice(
["hnsw", "flat", "dynamic", "hnsw_pq", "hnsw_bq", "hnsw_sq", "flat_bq"]
[
"hnsw",
"flat",
"dynamic",
"hnsw_pq",
"hnsw_bq",
"hnsw_sq",
"hnsw_acorn",
"flat_bq",
]
),
help="Vector index type (default: 'hnsw').",
)
Expand Down
4 changes: 3 additions & 1 deletion weaviate_cli/commands/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def update() -> None:
@click.option(
"--vector_index",
default=None,
type=click.Choice(["hnsw", "flat", "hnsw_pq", "hnsw_bq", "hnsw_sq", "flat_bq"]),
type=click.Choice(
["hnsw", "flat", "hnsw_pq", "hnsw_bq", "hnsw_sq", "flat_bq", "hnsw_acorn"]
),
help='Vector index type (default: "None").',
)
@click.option(
Expand Down
24 changes: 24 additions & 0 deletions weaviate_cli/managers/collection_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from weaviate.client import WeaviateClient
from weaviate.collections import Collection
from weaviate.collections.classes.tenants import TenantActivityStatus
from weaviate.classes.config import VectorFilterStrategy
import weaviate.classes.config as wvc


Expand Down Expand Up @@ -86,6 +87,9 @@ def create_collection(
training_limit=training_limit
)
),
"hnsw_acorn": wvc.Configure.VectorIndex.hnsw(
filter_strategy=VectorFilterStrategy.ACORN
),
# Should fail at the moment as Flat and PQ are not compatible
"flat_pq": wvc.Configure.VectorIndex.flat(
quantizer=wvc.Configure.VectorIndex.Quantizer.pq()
Expand Down Expand Up @@ -117,6 +121,7 @@ def create_collection(
"length": wvc.Configure.inverted_index(index_property_length=True),
}

# Collection schema
properties: List[wvc.Property] = [
wvc.Property(name="title", data_type=wvc.DataType.TEXT),
wvc.Property(name="genres", data_type=wvc.DataType.TEXT),
Expand All @@ -126,6 +131,22 @@ def create_collection(
wvc.Property(name="runtime", data_type=wvc.DataType.TEXT),
wvc.Property(name="cast", data_type=wvc.DataType.TEXT),
wvc.Property(name="originalLanguage", data_type=wvc.DataType.TEXT),
wvc.Property(
name="productionCountries",
data_type=wvc.DataType.OBJECT_ARRAY,
nested_properties=[
wvc.Property(name="iso_3166_1", data_type=wvc.DataType.TEXT),
wvc.Property(name="name", data_type=wvc.DataType.TEXT),
],
),
wvc.Property(
name="spokenLanguages",
data_type=wvc.DataType.OBJECT_ARRAY,
nested_properties=[
wvc.Property(name="iso_639_1", data_type=wvc.DataType.TEXT),
wvc.Property(name="name", data_type=wvc.DataType.TEXT),
],
),
wvc.Property(name="tagline", data_type=wvc.DataType.TEXT),
wvc.Property(name="budget", data_type=wvc.DataType.NUMBER),
wvc.Property(name="releaseDate", data_type=wvc.DataType.DATE),
Expand Down Expand Up @@ -195,6 +216,9 @@ def update_collection(
"hnsw_bq": wvc.Reconfigure.VectorIndex.hnsw(
quantizer=wvc.Reconfigure.VectorIndex.Quantizer.bq()
),
"hnsw_acorn": wvc.Reconfigure.VectorIndex.hnsw(
filter_strategy=VectorFilterStrategy.ACORN
),
"flat_bq": wvc.Reconfigure.VectorIndex.flat(
quantizer=wvc.Reconfigure.VectorIndex.Quantizer.bq()
),
Expand Down
19 changes: 17 additions & 2 deletions weaviate_cli/managers/data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@
import importlib.resources as resources
from pathlib import Path

PROPERTY_NAME_MAPPING = {"release_date": "releaseDate"}
PROPERTY_NAME_MAPPING = {
"releaseDate": "release_date",
"originalLanguage": "original_language",
"productionCountries": "production_countries",
"spokenLanguages": "spoken_languages",
}


class DataManager:
Expand Down Expand Up @@ -48,7 +53,7 @@ def __import_json(
for prop in properties:
prop_name = PROPERTY_NAME_MAPPING.get(prop.name, prop.name)
if prop_name in obj:
added_obj[prop_name] = self.__convert_property_value(
added_obj[prop.name] = self.__convert_property_value(
obj[prop_name], prop.data_type
)
batch.add_object(properties=added_obj)
Expand Down Expand Up @@ -95,6 +100,14 @@ def create_single_object() -> Dict:
date = datetime.strptime("1980-01-01", "%Y-%m-%d")
random_date = date + timedelta(days=random.randint(1, 15_000))
release_date = random_date.strftime("%Y-%m-%dT%H:%M:%SZ")
spoken_languages = [
{"iso_639_1": get_random_string(3), "name": get_random_string(3)}
for _ in range(random.randint(1, 3))
]
production_countries = [
{"iso_3166_1": get_random_string(3), "name": get_random_string(3)}
for _ in range(random.randint(1, 3))
]

prefix = "update-" if is_update else ""
return {
Expand All @@ -111,6 +124,8 @@ def create_single_object() -> Dict:
"releaseDate": release_date,
"revenue": random.randint(1_000_000, 10_000_0000_000),
"status": f"{prefix}status" + get_random_string(3),
"spokenLanguages": spoken_languages,
"productionCountries": production_countries,
}

if is_update:
Expand Down

0 comments on commit 0cf0b9b

Please sign in to comment.