From f11fcca39510ea713bc2897584df398d6dc3e730 Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Thu, 7 Nov 2024 16:03:44 +0100 Subject: [PATCH] Improve the logic to generate vectors. Also adds some warning messages for easy of use. --- test/unittests/test_managers/test_data_manager.py | 2 +- weaviate_cli/commands/create.py | 6 ++++++ weaviate_cli/managers/data_manager.py | 14 +++++++++++++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/test/unittests/test_managers/test_data_manager.py b/test/unittests/test_managers/test_data_manager.py index 8f32fab..4ca359a 100644 --- a/test/unittests/test_managers/test_data_manager.py +++ b/test/unittests/test_managers/test_data_manager.py @@ -24,7 +24,7 @@ def test_ingest_data(mock_client): consistency_level="quorum", randomize=True, auto_tenants=0, - vector_dimensions=None, + vector_dimensions=1536, ) mock_client.collections.get.assert_called_once_with("TestCollection") diff --git a/weaviate_cli/commands/create.py b/weaviate_cli/commands/create.py index 1f919c2..8716af3 100644 --- a/weaviate_cli/commands/create.py +++ b/weaviate_cli/commands/create.py @@ -254,6 +254,12 @@ def create_data_cli( ): """Ingest data into a collection in Weaviate.""" + if vector_dimensions != 1536 and not randomize: + click.echo( + "Error: --vector_dimensions has no effect unless --randomize is enabled." + ) + sys.exit(1) + client = None try: client = get_client_from_context(ctx) diff --git a/weaviate_cli/managers/data_manager.py b/weaviate_cli/managers/data_manager.py index f5b4f5b..a936bb9 100644 --- a/weaviate_cli/managers/data_manager.py +++ b/weaviate_cli/managers/data_manager.py @@ -132,14 +132,26 @@ def __ingest_data( cl_collection = collection.with_consistency_level(cl) vectorizer = cl_collection.config.get().vectorizer if vectorizer == "text2vec-contextionary": + ( + print("Warning: Using vector dimensions: 300") + if vector_dimensions != 1536 + else None + ) vector_dimensions = 300 elif vectorizer == "text2vec-transformers": + ( + print("Warning: Using vector dimensions: 768") + if vector_dimensions != 1536 + else None + ) vector_dimensions = 768 with cl_collection.batch.dynamic() as batch: for obj in data_objects: batch.add_object( properties=obj, - vector=np.random.rand(1, vector_dimensions)[0].tolist(), + vector=( + 2 * np.random.rand(1, vector_dimensions)[0] - 1 + ).tolist(), ) counter += 1