Skip to content

Commit

Permalink
Merge pull request #90 from weaviate/rodrigo/vector_dimensions
Browse files Browse the repository at this point in the history
ingest_data: Add vector dimensions as a parameter
  • Loading branch information
jfrancoa authored Nov 7, 2024
2 parents d0ecbeb + f11fcca commit ccaf06a
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 5 deletions.
1 change: 1 addition & 0 deletions test/unittests/test_managers/test_data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def test_ingest_data(mock_client):
consistency_level="quorum",
randomize=True,
auto_tenants=0,
vector_dimensions=1536,
)

mock_client.collections.get.assert_called_once_with("TestCollection")
Expand Down
22 changes: 21 additions & 1 deletion weaviate_cli/commands/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,29 @@ def create_backup_cli(ctx, backend, backup_id, include, exclude, wait, cpu_for_b
default=0,
help="Number of tenants for which we will send data. NOTE: Requires class with --auto_tenant_creation (default: 0).",
)
@click.option(
"--vector_dimensions",
default=1536,
help="Number of vector dimensions to be used when the data is randomized.",
)
@click.pass_context
def create_data_cli(ctx, collection, limit, consistency_level, randomize, auto_tenants):
def create_data_cli(
ctx,
collection,
limit,
consistency_level,
randomize,
auto_tenants,
vector_dimensions,
):
"""Ingest data into a collection in Weaviate."""

if vector_dimensions != 1536 and not randomize:
click.echo(
"Error: --vector_dimensions has no effect unless --randomize is enabled."
)
sys.exit(1)

client = None
try:
client = get_client_from_context(ctx)
Expand All @@ -252,6 +271,7 @@ def create_data_cli(ctx, collection, limit, consistency_level, randomize, auto_t
consistency_level=consistency_level,
randomize=randomize,
auto_tenants=auto_tenants,
vector_dimensions=vector_dimensions,
)
except Exception as e:
click.echo(f"Error: {e}")
Expand Down
23 changes: 19 additions & 4 deletions weaviate_cli/managers/data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,22 +124,34 @@ def __ingest_data(
num_objects: int,
cl: wvc.ConsistencyLevel,
randomize: bool,
vector_dimensions: Optional[int] = 1536,
) -> int:
if randomize:
counter = 0
data_objects = self.__generate_data_object(num_objects)
cl_collection = collection.with_consistency_level(cl)
vectorizer = cl_collection.config.get().vectorizer
dimensions = 1536
if vectorizer == "text2vec-contextionary":
dimensions = 300
(
print("Warning: Using vector dimensions: 300")
if vector_dimensions != 1536
else None
)
vector_dimensions = 300
elif vectorizer == "text2vec-transformers":
dimensions = 768
(
print("Warning: Using vector dimensions: 768")
if vector_dimensions != 1536
else None
)
vector_dimensions = 768
with cl_collection.batch.dynamic() as batch:
for obj in data_objects:
batch.add_object(
properties=obj,
vector=np.random.rand(1, dimensions)[0].tolist(),
vector=(
2 * np.random.rand(1, vector_dimensions)[0] - 1
).tolist(),
)
counter += 1

Expand Down Expand Up @@ -167,6 +179,7 @@ def ingest_data(
consistency_level: str,
randomize: bool,
auto_tenants: int,
vector_dimensions: Optional[int] = 1536,
) -> None:

if not self.client.collections.exists(collection):
Expand Down Expand Up @@ -218,6 +231,7 @@ def ingest_data(
limit,
cl_map[consistency_level],
randomize,
vector_dimensions,
)
else:
click.echo(f"Processing tenant '{tenant}'")
Expand All @@ -226,6 +240,7 @@ def ingest_data(
limit,
cl_map[consistency_level],
randomize,
vector_dimensions,
)

if ret == -1:
Expand Down

0 comments on commit ccaf06a

Please sign in to comment.