diff --git a/README.html b/README.html new file mode 100644 index 0000000..97ed03f --- /dev/null +++ b/README.html @@ -0,0 +1,504 @@ +

Vector IO

+

+ PyPI - Version + PyPI - Downloads + Discord +

+

+ + +

+

This library uses a universal format for vector datasets to easily + export and import data from all vector databases.

+

Request support for a VectorDB by voting/commenting on this + poll

+

See the Contributing section to add + support for your favorite vector database.

+

Supported Vector Databases

+
+ + Fully Supported + +

+ + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Vector DatabaseImportExport
Pinecone
Qdrant
Milvus
GCP Vertex AI Vector Search
KDB.AI
LanceDB
DataStax Astra DB
Chroma
Turbopuffer
+
+
+
+ + Partial + + + + + + + + + + + +
Vector DatabaseImportExport
+
+ +
+
+ + In Progress + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Vector DatabaseImportExport
pgvector
Azure AI Search
Weaviate
MongoDB Atlas
Apache Cassandra
txtai
SQLite-VSS
+
+
+
+ + Not Supported + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Vector DatabaseImportExport
Vespa
AWS Neptune
Neo4j
Marqo
OpenSearch
Elasticsearch
Apache Solr
Redis Search
ClickHouse
USearch
Rockset
Epsilla
Activeloop Deep Lake
ApertureDB
CrateDB
Meilisearch
MyScale
Nuclia DB
OramaSearch
Typesense
Anari AI
Vald
+
+

Installation

+

Using pip

+
+
pip install vdf-io
+
+

From source

+
+
git clone https://github.com/AI-Northstar-Tech/vector-io.git
+cd vector-io
+pip install -r requirements.txt
+
+

Universal + Vector Dataset Format (VDF) specification

+
    +
  1. VDF_META.json: It is a json file with the following schema VDFMeta + defined in src/vdf_io/meta_types.py:
  2. +
+
+
class NamespaceMeta(BaseModel):
+    namespace: str
+    index_name: str
+    total_vector_count: int
+    exported_vector_count: int
+    dimensions: int
+    model_name: str | None = None
+    vector_columns: List[str] = ["vector"]
+    data_path: str
+    metric: str | None = None
+    index_config: Optional[Dict[Any, Any]] = None
+    schema_dict: Optional[Dict[str, Any]] = None
+
+
+class VDFMeta(BaseModel):
+    version: str
+    file_structure: List[str]
+    author: str
+    exported_from: str
+    indexes: Dict[str, List[NamespaceMeta]]
+    exported_at: str
+    id_column: Optional[str] = None
+
+
    +
  1. Parquet files/folders for metadata and vectors.
  2. +
+

Export Script

+
+
export_vdf --help
+usage: export_vdf [-h] [-m MODEL_NAME]
+                  [--max_file_size MAX_FILE_SIZE]
+                  [--push_to_hub | --no-push_to_hub]
+                  [--public | --no-public]
+                  {pinecone,qdrant,kdbai,milvus,vertexai_vectorsearch}
+                  ...
+
+Export data from various vector databases to the VDF format for vector datasets
+
+options:
+  -h, --help            show this help message and exit
+  -m MODEL_NAME, --model_name MODEL_NAME
+                        Name of model used
+  --max_file_size MAX_FILE_SIZE
+                        Maximum file size in MB (default:
+                        1024)
+  --push_to_hub, --no-push_to_hub
+                        Push to hub
+  --public, --no-public
+                        Make dataset public (default:
+                        False)
+
+Vector Databases:
+  Choose the vectors database to export data from
+
+  {pinecone,qdrant,kdbai,milvus,vertexai_vectorsearch}
+    pinecone            Export data from Pinecone
+    qdrant              Export data from Qdrant
+    kdbai               Export data from KDB.AI
+    milvus              Export data from Milvus
+    vertexai_vectorsearch
+                        Export data from Vertex AI Vector
+                        Search
+
+

Import script

+
+
import_vdf --help
+usage: import_vdf [-h] [-d DIR] [-s | --subset | --no-subset]
+                  [--create_new | --no-create_new]
+                  {milvus,pinecone,qdrant,vertexai_vectorsearch,kdbai}
+                  ...
+
+Import data from VDF to a vector database
+
+options:
+  -h, --help            show this help message and exit
+  -d DIR, --dir DIR     Directory to import
+  -s, --subset, --no-subset
+                        Import a subset of data (default: False)
+  --create_new, --no-create_new
+                        Create a new index (default: False)
+
+Vector Databases:
+  Choose the vectors database to export data from
+
+  {milvus,pinecone,qdrant,vertexai_vectorsearch,kdbai}
+    milvus              Import data to Milvus
+    pinecone            Import data to Pinecone
+    qdrant              Import data to Qdrant
+    vertexai_vectorsearch
+                        Import data to Vertex AI Vector Search
+    kdbai               Import data to KDB.AI
+
+

Re-embed script

+

This Python script is used to re-embed a vector dataset. It takes a + directory of vector dataset in the VDF format and re-embeds it using a + new model. The script also allows you to specify the name of the column + containing text to be embedded.

+
+
reembed_vdf --help
+usage: reembed_vdf [-h] -d DIR [-m NEW_MODEL_NAME]
+                  [-t TEXT_COLUMN]
+
+Reembed a vector dataset
+
+options:
+  -h, --help            show this help message and exit
+  -d DIR, --dir DIR     Directory of vector dataset in
+                        the VDF format
+  -m NEW_MODEL_NAME, --new_model_name NEW_MODEL_NAME
+                        Name of new model to be used
+  -t TEXT_COLUMN, --text_column TEXT_COLUMN
+                        Name of the column containing
+                        text to be embedded
+
+

Examples

+
+
export_vdf -m hkunlp/instructor-xl --push_to_hub pinecone --environment gcp-starter
+
+import_vdf -d /path/to/vdf/dataset milvus
+
+reembed_vdf -d /path/to/vdf/dataset -m sentence-transformers/all-MiniLM-L6-v2 -t title
+
+

Follow the prompt to select the index and id range to export.

+

Contributing

+

Adding a new vector database

+

If you wish to add an import/export implementation for a new vector + database, you must also implement the other side of the import/export + for the same database. Please fork the repo and send a PR for both the + import and export scripts.

+

Steps to add a new vector database (ABC):

+
    +
  1. Add your database name in src/vdf_io/names.py in the DBNames enum + class.
  2. +
  3. Create new files src/vdf_io/export_vdf/export_abc.py + and src/vdf_io/import_vdf/import_abc.py for the new + DB.
  4. +
+

Export:

+
    +
  1. In your export file, define a class ExportABC which inherits from + ExportVDF.
  2. +
  3. Specify a DB_NAME_SLUG for the class
  4. +
  5. The class should implement: +
      +
    1. make_parser() function to add database specific arguments to the + export_vdf CLI
    2. +
    3. export_vdb() function to prompt user for info not provided in the + CLI. It should then call the get_data() function.
    4. +
    5. get_data() function to download points (in a batched manner) with + all the metadata from the specified index of the vector database. This + data should be stored in a series of parquet files/folders. The metadata + should be stored in a json file with the schema + above.
    6. +
    +
  6. +
  7. Use the script to export data from an example index of the vector + database and verify that the data is exported correctly.
  8. +
+

Import:

+
    +
  1. In your import file, define a class ImportABC which inherits from + ImportVDF.
  2. +
  3. Specify a DB_NAME_SLUG for the class
  4. +
  5. The class should implement: +
      +
    1. make_parser() function to add database specific arguments to the + import_vdf CLI, such as the url of the database, any authentication + tokens, etc.
    2. +
    3. import_vdb() function to prompt user for info not provided in the + CLI. It should then call the upsert_data() function.
    4. +
    5. upsert_data() function to upload points from a vdf dataset (in a + batched manner) with all the metadata to the specified index of the + vector database. All metadata about the dataset should be read from the + VDF_META.json file in the vdf folder.
    6. +
    +
  6. +
  7. Use the script to import data from the example vdf dataset exported + in the previous step and verify that the data is imported + correctly.
  8. +
+

Changing the VDF + specification

+

If you wish to change the VDF specification, please open an issue to + discuss the change before sending a PR.

+

Efficiency improvements

+

If you wish to improve the efficiency of the import/export scripts, + please fork the repo and send a PR.

+

Telemetry

+

Running the scripts in the repo will send anonymous usage data to AI + Northstar Tech to help improve the library.

+

You can opt out this by setting the environment variable + DISABLE_TELEMETRY_VECTORIO to 1. +

+

Questions

+

If you have any questions, please open an issue on the repo or + message Dhruv Anand on LinkedIn

\ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 0000000..97ed03f --- /dev/null +++ b/index.html @@ -0,0 +1,504 @@ +

Vector IO

+

+ PyPI - Version + PyPI - Downloads + Discord +

+

+ + +

+

This library uses a universal format for vector datasets to easily + export and import data from all vector databases.

+

Request support for a VectorDB by voting/commenting on this + poll

+

See the Contributing section to add + support for your favorite vector database.

+

Supported Vector Databases

+
+ + Fully Supported + +

+ + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Vector DatabaseImportExport
Pinecone
Qdrant
Milvus
GCP Vertex AI Vector Search
KDB.AI
LanceDB
DataStax Astra DB
Chroma
Turbopuffer
+
+
+
+ + Partial + + + + + + + + + + + +
Vector DatabaseImportExport
+
+ +
+
+ + In Progress + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Vector DatabaseImportExport
pgvector
Azure AI Search
Weaviate
MongoDB Atlas
Apache Cassandra
txtai
SQLite-VSS
+
+
+
+ + Not Supported + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Vector DatabaseImportExport
Vespa
AWS Neptune
Neo4j
Marqo
OpenSearch
Elasticsearch
Apache Solr
Redis Search
ClickHouse
USearch
Rockset
Epsilla
Activeloop Deep Lake
ApertureDB
CrateDB
Meilisearch
MyScale
Nuclia DB
OramaSearch
Typesense
Anari AI
Vald
+
+

Installation

+

Using pip

+
+
pip install vdf-io
+
+

From source

+
+
git clone https://github.com/AI-Northstar-Tech/vector-io.git
+cd vector-io
+pip install -r requirements.txt
+
+

Universal + Vector Dataset Format (VDF) specification

+
    +
  1. VDF_META.json: It is a json file with the following schema VDFMeta + defined in src/vdf_io/meta_types.py:
  2. +
+
+
class NamespaceMeta(BaseModel):
+    namespace: str
+    index_name: str
+    total_vector_count: int
+    exported_vector_count: int
+    dimensions: int
+    model_name: str | None = None
+    vector_columns: List[str] = ["vector"]
+    data_path: str
+    metric: str | None = None
+    index_config: Optional[Dict[Any, Any]] = None
+    schema_dict: Optional[Dict[str, Any]] = None
+
+
+class VDFMeta(BaseModel):
+    version: str
+    file_structure: List[str]
+    author: str
+    exported_from: str
+    indexes: Dict[str, List[NamespaceMeta]]
+    exported_at: str
+    id_column: Optional[str] = None
+
+
    +
  1. Parquet files/folders for metadata and vectors.
  2. +
+

Export Script

+
+
export_vdf --help
+usage: export_vdf [-h] [-m MODEL_NAME]
+                  [--max_file_size MAX_FILE_SIZE]
+                  [--push_to_hub | --no-push_to_hub]
+                  [--public | --no-public]
+                  {pinecone,qdrant,kdbai,milvus,vertexai_vectorsearch}
+                  ...
+
+Export data from various vector databases to the VDF format for vector datasets
+
+options:
+  -h, --help            show this help message and exit
+  -m MODEL_NAME, --model_name MODEL_NAME
+                        Name of model used
+  --max_file_size MAX_FILE_SIZE
+                        Maximum file size in MB (default:
+                        1024)
+  --push_to_hub, --no-push_to_hub
+                        Push to hub
+  --public, --no-public
+                        Make dataset public (default:
+                        False)
+
+Vector Databases:
+  Choose the vectors database to export data from
+
+  {pinecone,qdrant,kdbai,milvus,vertexai_vectorsearch}
+    pinecone            Export data from Pinecone
+    qdrant              Export data from Qdrant
+    kdbai               Export data from KDB.AI
+    milvus              Export data from Milvus
+    vertexai_vectorsearch
+                        Export data from Vertex AI Vector
+                        Search
+
+

Import script

+
+
import_vdf --help
+usage: import_vdf [-h] [-d DIR] [-s | --subset | --no-subset]
+                  [--create_new | --no-create_new]
+                  {milvus,pinecone,qdrant,vertexai_vectorsearch,kdbai}
+                  ...
+
+Import data from VDF to a vector database
+
+options:
+  -h, --help            show this help message and exit
+  -d DIR, --dir DIR     Directory to import
+  -s, --subset, --no-subset
+                        Import a subset of data (default: False)
+  --create_new, --no-create_new
+                        Create a new index (default: False)
+
+Vector Databases:
+  Choose the vectors database to export data from
+
+  {milvus,pinecone,qdrant,vertexai_vectorsearch,kdbai}
+    milvus              Import data to Milvus
+    pinecone            Import data to Pinecone
+    qdrant              Import data to Qdrant
+    vertexai_vectorsearch
+                        Import data to Vertex AI Vector Search
+    kdbai               Import data to KDB.AI
+
+

Re-embed script

+

This Python script is used to re-embed a vector dataset. It takes a + directory of vector dataset in the VDF format and re-embeds it using a + new model. The script also allows you to specify the name of the column + containing text to be embedded.

+
+
reembed_vdf --help
+usage: reembed_vdf [-h] -d DIR [-m NEW_MODEL_NAME]
+                  [-t TEXT_COLUMN]
+
+Reembed a vector dataset
+
+options:
+  -h, --help            show this help message and exit
+  -d DIR, --dir DIR     Directory of vector dataset in
+                        the VDF format
+  -m NEW_MODEL_NAME, --new_model_name NEW_MODEL_NAME
+                        Name of new model to be used
+  -t TEXT_COLUMN, --text_column TEXT_COLUMN
+                        Name of the column containing
+                        text to be embedded
+
+

Examples

+
+
export_vdf -m hkunlp/instructor-xl --push_to_hub pinecone --environment gcp-starter
+
+import_vdf -d /path/to/vdf/dataset milvus
+
+reembed_vdf -d /path/to/vdf/dataset -m sentence-transformers/all-MiniLM-L6-v2 -t title
+
+

Follow the prompt to select the index and id range to export.

+

Contributing

+

Adding a new vector database

+

If you wish to add an import/export implementation for a new vector + database, you must also implement the other side of the import/export + for the same database. Please fork the repo and send a PR for both the + import and export scripts.

+

Steps to add a new vector database (ABC):

+
    +
  1. Add your database name in src/vdf_io/names.py in the DBNames enum + class.
  2. +
  3. Create new files src/vdf_io/export_vdf/export_abc.py + and src/vdf_io/import_vdf/import_abc.py for the new + DB.
  4. +
+

Export:

+
    +
  1. In your export file, define a class ExportABC which inherits from + ExportVDF.
  2. +
  3. Specify a DB_NAME_SLUG for the class
  4. +
  5. The class should implement: +
      +
    1. make_parser() function to add database specific arguments to the + export_vdf CLI
    2. +
    3. export_vdb() function to prompt user for info not provided in the + CLI. It should then call the get_data() function.
    4. +
    5. get_data() function to download points (in a batched manner) with + all the metadata from the specified index of the vector database. This + data should be stored in a series of parquet files/folders. The metadata + should be stored in a json file with the schema + above.
    6. +
    +
  6. +
  7. Use the script to export data from an example index of the vector + database and verify that the data is exported correctly.
  8. +
+

Import:

+
    +
  1. In your import file, define a class ImportABC which inherits from + ImportVDF.
  2. +
  3. Specify a DB_NAME_SLUG for the class
  4. +
  5. The class should implement: +
      +
    1. make_parser() function to add database specific arguments to the + import_vdf CLI, such as the url of the database, any authentication + tokens, etc.
    2. +
    3. import_vdb() function to prompt user for info not provided in the + CLI. It should then call the upsert_data() function.
    4. +
    5. upsert_data() function to upload points from a vdf dataset (in a + batched manner) with all the metadata to the specified index of the + vector database. All metadata about the dataset should be read from the + VDF_META.json file in the vdf folder.
    6. +
    +
  6. +
  7. Use the script to import data from the example vdf dataset exported + in the previous step and verify that the data is imported + correctly.
  8. +
+

Changing the VDF + specification

+

If you wish to change the VDF specification, please open an issue to + discuss the change before sending a PR.

+

Efficiency improvements

+

If you wish to improve the efficiency of the import/export scripts, + please fork the repo and send a PR.

+

Telemetry

+

Running the scripts in the repo will send anonymous usage data to AI + Northstar Tech to help improve the library.

+

You can opt out this by setting the environment variable + DISABLE_TELEMETRY_VECTORIO to 1. +

+

Questions

+

If you have any questions, please open an issue on the repo or + message Dhruv Anand on LinkedIn

\ No newline at end of file diff --git a/src/vdf_io/notebooks/aerospike-qs.ipynb b/src/vdf_io/notebooks/aerospike-qs.ipynb new file mode 100644 index 0000000..e69de29 diff --git a/src/vdf_io/notebooks/upsert_pinecone.ipynb b/src/vdf_io/notebooks/upsert_pinecone.ipynb index d1341f2..0dd1d8b 100644 --- a/src/vdf_io/notebooks/upsert_pinecone.ipynb +++ b/src/vdf_io/notebooks/upsert_pinecone.ipynb @@ -1,5 +1,32 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import json\n", + "import os\n", + "from dotenv import load_dotenv, find_dotenv\n", + "from typing import List, Dict, Any\n", + "from rich import print as rprint\n", + "\n", + "load_dotenv(find_dotenv(), override=True)\n" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -1120,165 +1147,1209 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Collecting pinecone-datasets\n", - " Downloading pinecone_datasets-0.6.2-py3-none-any.whl.metadata (11 kB)\n", - "Requirement already satisfied: fsspec<2024.0.0,>=2023.1.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-datasets) (2023.5.0)\n", - "Collecting gcsfs<2024.0.0,>=2023.1.0 (from pinecone-datasets)\n", - " Downloading gcsfs-2023.12.2.post1-py2.py3-none-any.whl.metadata (1.6 kB)\n", - "Requirement already satisfied: pandas<3.0.0,>=2.0.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-datasets) (2.0.2)\n", - "Requirement already satisfied: pinecone-client<3.0.0,>=2.2.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-datasets) (2.2.4)\n", - "Requirement already satisfied: pyarrow<12.0.0,>=11.0.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-datasets) (11.0.0)\n", - "Collecting pydantic<2.0.0,>=1.10.5 (from pinecone-datasets)\n", - " Downloading pydantic-1.10.13-cp39-cp39-macosx_11_0_arm64.whl.metadata (149 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.6/149.6 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hCollecting s3fs<2024.0.0,>=2023.1.0 (from pinecone-datasets)\n", - " Downloading s3fs-2023.12.2-py3-none-any.whl.metadata (1.6 kB)\n", - "Requirement already satisfied: tqdm<5.0.0,>=4.65.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-datasets) (4.65.0)\n", - "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (3.9.1)\n", - "Requirement already satisfied: decorator>4.1.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (5.1.1)\n", - "Collecting fsspec<2024.0.0,>=2023.1.0 (from pinecone-datasets)\n", - " Downloading fsspec-2023.12.2-py3-none-any.whl.metadata (6.8 kB)\n", - "Requirement already satisfied: google-auth>=1.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.17.3)\n", - "Requirement already satisfied: google-auth-oauthlib in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.0.0)\n", - "Requirement already satisfied: google-cloud-storage in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.9.0)\n", - "Requirement already satisfied: requests in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.31.0)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets) (2023.3)\n", - "Requirement already satisfied: tzdata>=2022.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets) (2023.3)\n", - "Requirement already satisfied: numpy>=1.20.3 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets) (1.23.5)\n", - "Requirement already satisfied: pyyaml>=5.4 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-client<3.0.0,>=2.2.2->pinecone-datasets) (6.0)\n", - "Requirement already satisfied: loguru>=0.5.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-client<3.0.0,>=2.2.2->pinecone-datasets) (0.7.0)\n", - "Requirement already satisfied: typing-extensions>=3.7.4 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-client<3.0.0,>=2.2.2->pinecone-datasets) (4.7.1)\n", - "Requirement already satisfied: dnspython>=2.0.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-client<3.0.0,>=2.2.2->pinecone-datasets) (2.3.0)\n", - "Requirement already satisfied: urllib3>=1.21.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-client<3.0.0,>=2.2.2->pinecone-datasets) (1.26.15)\n", - "Collecting aiobotocore<3.0.0,>=2.5.4 (from s3fs<2024.0.0,>=2023.1.0->pinecone-datasets)\n", - " Downloading aiobotocore-2.9.0-py3-none-any.whl.metadata (20 kB)\n", - "Collecting botocore<1.33.14,>=1.33.2 (from aiobotocore<3.0.0,>=2.5.4->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets)\n", - " Downloading botocore-1.33.13-py3-none-any.whl.metadata (6.1 kB)\n", - "Requirement already satisfied: wrapt<2.0.0,>=1.10.10 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiobotocore<3.0.0,>=2.5.4->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.14.1)\n", - "Requirement already satisfied: aioitertools<1.0.0,>=0.5.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiobotocore<3.0.0,>=2.5.4->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets) (0.11.0)\n", - "Requirement already satisfied: attrs>=17.3.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (22.2.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (6.0.4)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.3.3)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.3.1)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (4.0.2)\n", - "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (5.3.0)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (0.3.0)\n", - "Requirement already satisfied: six>=1.9.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.16.0)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (4.9)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from requests->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.1.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from requests->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from requests->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2023.5.7)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-auth-oauthlib->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.3.1)\n", - "Requirement already satisfied: google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.11.0)\n", - "Requirement already satisfied: google-cloud-core<3.0dev,>=2.3.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.3.2)\n", - "Requirement already satisfied: google-resumable-media>=2.3.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.5.0)\n", - "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from botocore<1.33.14,>=1.33.2->aiobotocore<3.0.0,>=2.5.4->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.0.1)\n", - "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.56.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.59.0)\n", - "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (3.20.3)\n", - "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-resumable-media>=2.3.2->google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.5.0)\n", - "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (0.5.0)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (3.2.2)\n", - "Downloading pinecone_datasets-0.6.2-py3-none-any.whl (12 kB)\n", - "Downloading gcsfs-2023.12.2.post1-py2.py3-none-any.whl (34 kB)\n", - "Downloading fsspec-2023.12.2-py3-none-any.whl (168 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m169.0/169.0 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pydantic-1.10.13-cp39-cp39-macosx_11_0_arm64.whl (2.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading s3fs-2023.12.2-py3-none-any.whl (28 kB)\n", - "Downloading aiobotocore-2.9.0-py3-none-any.whl (75 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.0/76.0 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading botocore-1.33.13-py3-none-any.whl (11.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.8/11.8 MB\u001b[0m \u001b[31m19.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25h\u001b[33mDEPRECATION: omegaconf 2.0.6 has a non-standard dependency specifier PyYAML>=5.1.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of omegaconf or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mDEPRECATION: torchsde 0.2.5 has a non-standard dependency specifier numpy>=1.19.*; python_version >= \"3.7\". pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of torchsde or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mDEPRECATION: voicefixer 0.1.2 has a non-standard dependency specifier streamlit>=1.12.0pyyaml. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of voicefixer or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", - "\u001b[0mInstalling collected packages: pydantic, fsspec, botocore, aiobotocore, s3fs, gcsfs, pinecone-datasets\n", - " Attempting uninstall: pydantic\n", - " Found existing installation: pydantic 2.5.2\n", - " Uninstalling pydantic-2.5.2:\n", - " Successfully uninstalled pydantic-2.5.2\n", - " Attempting uninstall: fsspec\n", - " Found existing installation: fsspec 2023.5.0\n", - " Uninstalling fsspec-2023.5.0:\n", - " Successfully uninstalled fsspec-2023.5.0\n", + "Collecting aiobotocore==2.12.3\n", + " Obtaining dependency information for aiobotocore==2.12.3 from https://files.pythonhosted.org/packages/71/86/bbe79b24d4603c65a67e405661092c2fe0fa9b14e78dc8270bc83777412e/aiobotocore-2.12.3-py3-none-any.whl.metadata\n", + " Downloading aiobotocore-2.12.3-py3-none-any.whl.metadata (21 kB)\n", + "Requirement already satisfied: aioitertools==0.11.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (0.11.0)\n", + "Collecting botocore==1.34.69\n", + " Obtaining dependency information for botocore==1.34.69 from https://files.pythonhosted.org/packages/c6/78/919e50b633035216dfb68627b1a4eac1235148b89b34a28f07fd99e8ac17/botocore-1.34.69-py3-none-any.whl.metadata\n", + " Downloading botocore-1.34.69-py3-none-any.whl.metadata (5.7 kB)\n", + "Requirement already satisfied: fsspec==2023.12.2 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (2023.12.2)\n", + "Requirement already satisfied: gcsfs==2023.12.2.post1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (2023.12.2.post1)\n", + "Collecting google-api-core==2.19.0\n", + " Obtaining dependency information for google-api-core==2.19.0 from https://files.pythonhosted.org/packages/2d/ed/e514e0c59cdf1a469b1a1ab21b77698d0692adaa7cbc920c3a0b287e8493/google_api_core-2.19.0-py3-none-any.whl.metadata\n", + " Downloading google_api_core-2.19.0-py3-none-any.whl.metadata (2.7 kB)\n", + "Requirement already satisfied: google-cloud-core==2.4.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (2.4.1)\n", + "Collecting google-cloud-storage==2.16.0\n", + " Obtaining dependency information for google-cloud-storage==2.16.0 from https://files.pythonhosted.org/packages/cb/e5/7d045d188f4ef85d94b9e3ae1bf876170c6b9f4c9a950124978efc36f680/google_cloud_storage-2.16.0-py2.py3-none-any.whl.metadata\n", + " Downloading google_cloud_storage-2.16.0-py2.py3-none-any.whl.metadata (6.1 kB)\n", + "Requirement already satisfied: google-crc32c==1.5.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (1.5.0)\n", + "Requirement already satisfied: google-resumable-media==2.7.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (2.7.0)\n", + "Collecting googleapis-common-protos==1.63.0\n", + " Obtaining dependency information for googleapis-common-protos==1.63.0 from https://files.pythonhosted.org/packages/dc/a6/12a0c976140511d8bc8a16ad15793b2aef29ac927baa0786ccb7ddbb6e1c/googleapis_common_protos-1.63.0-py2.py3-none-any.whl.metadata\n", + " Downloading googleapis_common_protos-1.63.0-py2.py3-none-any.whl.metadata (1.5 kB)\n", + "Collecting pinecone-client==3.2.2\n", + " Obtaining dependency information for pinecone-client==3.2.2 from https://files.pythonhosted.org/packages/cb/bb/c51fa42d85f431b3b3ec4c35a13a8bb99cafc0671918139a48767421d354/pinecone_client-3.2.2-py3-none-any.whl.metadata\n", + " Using cached pinecone_client-3.2.2-py3-none-any.whl.metadata (16 kB)\n", + "Collecting pinecone-datasets==0.7.0\n", + " Obtaining dependency information for pinecone-datasets==0.7.0 from https://files.pythonhosted.org/packages/ba/6d/62d3a757c5c0806078895a0f2b23d33edd977cb51ae233d313580927ffcb/pinecone_datasets-0.7.0-py3-none-any.whl.metadata\n", + " Using cached pinecone_datasets-0.7.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: proto-plus==1.23.0 in /Users/dhruvanand/.local/lib/python3.10/site-packages (1.23.0)\n", + "Requirement already satisfied: pyarrow==11.0.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (11.0.0)\n", + "Requirement already satisfied: pydantic==1.10.15 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (1.10.15)\n", + "Requirement already satisfied: s3fs==2023.12.2 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (2023.12.2)\n", + "Requirement already satisfied: wrapt==1.16.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (1.16.0)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.7.4.post0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiobotocore==2.12.3) (3.9.1)\n", + "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from botocore==1.34.69) (1.0.1)\n", + "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from botocore==1.34.69) (2.8.2)\n", + "Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from botocore==1.34.69) (2.0.7)\n", + "Requirement already satisfied: decorator>4.1.2 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from gcsfs==2023.12.2.post1) (5.1.1)\n", + "Requirement already satisfied: google-auth>=1.2 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from gcsfs==2023.12.2.post1) (2.26.1)\n", + "Requirement already satisfied: google-auth-oauthlib in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from gcsfs==2023.12.2.post1) (1.2.0)\n", + "Requirement already satisfied: requests in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from gcsfs==2023.12.2.post1) (2.31.0)\n", + "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0.dev0,>=3.19.5 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from google-api-core==2.19.0) (4.25.3)\n", + "Requirement already satisfied: certifi>=2019.11.17 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pinecone-client==3.2.2) (2023.11.17)\n", + "Requirement already satisfied: tqdm>=4.64.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pinecone-client==3.2.2) (4.66.4)\n", + "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pinecone-client==3.2.2) (4.9.0)\n", + "Requirement already satisfied: pandas<3.0.0,>=2.0.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pinecone-datasets==0.7.0) (2.1.4)\n", + "Requirement already satisfied: numpy>=1.16.6 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pyarrow==11.0.0) (1.26.4)\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.7.4.post0->aiobotocore==2.12.3) (23.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.7.4.post0->aiobotocore==2.12.3) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.7.4.post0->aiobotocore==2.12.3) (1.9.4)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.7.4.post0->aiobotocore==2.12.3) (1.4.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.7.4.post0->aiobotocore==2.12.3) (1.3.1)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.7.4.post0->aiobotocore==2.12.3) (4.0.3)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from google-auth>=1.2->gcsfs==2023.12.2.post1) (5.3.2)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from google-auth>=1.2->gcsfs==2023.12.2.post1) (0.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from google-auth>=1.2->gcsfs==2023.12.2.post1) (4.9)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets==0.7.0) (2023.4)\n", + "Requirement already satisfied: tzdata>=2022.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets==0.7.0) (2023.4)\n", + "Requirement already satisfied: six>=1.5 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from python-dateutil<3.0.0,>=2.1->botocore==1.34.69) (1.16.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from requests->gcsfs==2023.12.2.post1) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from requests->gcsfs==2023.12.2.post1) (3.6)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from google-auth-oauthlib->gcsfs==2023.12.2.post1) (1.3.1)\n", + "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.2->gcsfs==2023.12.2.post1) (0.5.1)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib->gcsfs==2023.12.2.post1) (3.2.2)\n", + "Downloading aiobotocore-2.12.3-py3-none-any.whl (76 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.5/76.5 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0meta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading botocore-1.34.69-py3-none-any.whl (12.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.0/12.0 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading google_api_core-2.19.0-py3-none-any.whl (139 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.0/139.0 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading google_cloud_storage-2.16.0-py2.py3-none-any.whl (125 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m125.6/125.6 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading googleapis_common_protos-1.63.0-py2.py3-none-any.whl (229 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m229.1/229.1 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hUsing cached pinecone_client-3.2.2-py3-none-any.whl (215 kB)\n", + "Using cached pinecone_datasets-0.7.0-py3-none-any.whl (13 kB)\n", + "Installing collected packages: pinecone-client, googleapis-common-protos, botocore, google-api-core, aiobotocore, google-cloud-storage, pinecone-datasets\n", + " Attempting uninstall: pinecone-client\n", + " Found existing installation: pinecone-client 2.2.4\n", + " Uninstalling pinecone-client-2.2.4:\n", + " Successfully uninstalled pinecone-client-2.2.4\n", + " Attempting uninstall: googleapis-common-protos\n", + " Found existing installation: googleapis-common-protos 1.62.0\n", + " Uninstalling googleapis-common-protos-1.62.0:\n", + " Successfully uninstalled googleapis-common-protos-1.62.0\n", " Attempting uninstall: botocore\n", - " Found existing installation: botocore 1.29.76\n", - " Uninstalling botocore-1.29.76:\n", - " Successfully uninstalled botocore-1.29.76\n", + " Found existing installation: botocore 1.34.28\n", + " Uninstalling botocore-1.34.28:\n", + " Successfully uninstalled botocore-1.34.28\n", + " Attempting uninstall: google-api-core\n", + " Found existing installation: google-api-core 2.15.0\n", + " Uninstalling google-api-core-2.15.0:\n", + " Successfully uninstalled google-api-core-2.15.0\n", " Attempting uninstall: aiobotocore\n", - " Found existing installation: aiobotocore 2.5.0\n", - " Uninstalling aiobotocore-2.5.0:\n", - " Successfully uninstalled aiobotocore-2.5.0\n", + " Found existing installation: aiobotocore 2.11.2\n", + " Uninstalling aiobotocore-2.11.2:\n", + " Successfully uninstalled aiobotocore-2.11.2\n", + " Attempting uninstall: google-cloud-storage\n", + " Found existing installation: google-cloud-storage 2.14.0\n", + " Uninstalling google-cloud-storage-2.14.0:\n", + " Successfully uninstalled google-cloud-storage-2.14.0\n", + " Attempting uninstall: pinecone-datasets\n", + " Found existing installation: pinecone-datasets 0.6.2\n", + " Uninstalling pinecone-datasets-0.6.2:\n", + " Successfully uninstalled pinecone-datasets-0.6.2\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "tts 0.13.3 requires inflect==5.6.0, but you have inflect 6.0.4 which is incompatible.\n", - "tts 0.13.3 requires librosa==0.10.0.*, but you have librosa 0.8.1 which is incompatible.\n", - "tts 0.13.3 requires numpy==1.21.6; python_version < \"3.10\", but you have numpy 1.23.5 which is incompatible.\n", - "tts 0.13.3 requires umap-learn==0.5.1, but you have umap-learn 0.5.3 which is incompatible.\n", - "aioboto3 11.1.0 requires aiobotocore[boto3]==2.5.0, but you have aiobotocore 2.9.0 which is incompatible.\n", - "argilla 1.3.2 requires pandas<2.0.0,>=1.0.0, but you have pandas 2.0.2 which is incompatible.\n", - "boto3 1.26.76 requires botocore<1.30.0,>=1.29.76, but you have botocore 1.33.13 which is incompatible.\n", - "farm-haystack 1.16.1 requires protobuf<=3.20.2, but you have protobuf 3.20.3 which is incompatible.\n", - "farm-haystack 1.16.1 requires transformers[torch]==4.25.1, but you have transformers 4.34.1 which is incompatible.\n", - "fennel-ai 0.14.0 requires grandalf<0.8,>=0.7, but you have grandalf 0.8 which is incompatible.\n", - "fennel-ai 0.14.0 requires pandas<2.0.0,>=1.5.0, but you have pandas 2.0.2 which is incompatible.\n", - "gradio 3.28.3 requires mdit-py-plugins<=0.3.3, but you have mdit-py-plugins 0.3.5 which is incompatible.\n", - "instructor 0.2.5 requires openai<0.28.0,>=0.27.8, but you have openai 1.1.1 which is incompatible.\n", - "instructor 0.2.5 requires pydantic<3.0.0,>=2.0.2, but you have pydantic 1.10.13 which is incompatible.\n", - "langchain-ibis 0.0.100 requires aleph-alpha-client<3.0.0,>=2.15.0, but you have aleph-alpha-client 3.1.0 which is incompatible.\n", - "langchain-ibis 0.0.100 requires SQLAlchemy<2,>=1, but you have sqlalchemy 2.0.23 which is incompatible.\n", - "langflow 0.0.68 requires huggingface-hub<0.14.0,>=0.13.3, but you have huggingface-hub 0.17.3 which is incompatible.\n", - "langflow 0.0.68 requires openai<0.28.0,>=0.27.2, but you have openai 1.1.1 which is incompatible.\n", - "langflow 0.0.68 requires pandas<2.0.0,>=1.5.3, but you have pandas 2.0.2 which is incompatible.\n", - "langflow 0.0.68 requires typer<0.8.0,>=0.7.0, but you have typer 0.4.2 which is incompatible.\n", - "langflow 0.0.68 requires websockets<12.0.0,>=11.0.2, but you have websockets 10.4 which is incompatible.\n", - "llama-index 0.6.18 requires typing-extensions==4.5.0, but you have typing-extensions 4.7.1 which is incompatible.\n", - "mistralai 0.0.1 requires pydantic<3.0.0,>=2.5.2, but you have pydantic 1.10.13 which is incompatible.\n", - "sagemaker 2.152.0 requires PyYAML==5.4.1, but you have pyyaml 6.0 which is incompatible.\n", - "shazamio 0.4.0.1 requires numpy<2.0.0,>=1.24.0, but you have numpy 1.23.5 which is incompatible.\n", - "steamship 2.16.6 requires aiohttp==3.8.3, but you have aiohttp 3.9.1 which is incompatible.\n", - "steamship 2.16.6 requires pydantic==1.10.2, but you have pydantic 1.10.13 which is incompatible.\n", - "steamship 2.16.6 requires requests==2.28.1, but you have requests 2.31.0 which is incompatible.\n", - "steamship 2.16.6 requires semver==2.13.0, but you have semver 3.0.0 which is incompatible.\n", - "steamship 2.16.6 requires tiktoken==0.2.0, but you have tiktoken 0.3.3 which is incompatible.\n", - "steamship-langchain 0.0.20 requires langchain==0.0.152, but you have langchain 0.0.162 which is incompatible.\n", - "tortoise 3.0.0 requires tokenizers<0.14.0,>=0.13.2, but you have tokenizers 0.14.1 which is incompatible.\n", - "tortoise 3.0.0 requires torchaudio<0.14.0,>=0.13.1, but you have torchaudio 2.0.2 which is incompatible.\n", - "trainer 0.0.20 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.3 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed aiobotocore-2.9.0 botocore-1.33.13 fsspec-2023.12.2 gcsfs-2023.12.2.post1 pinecone-datasets-0.6.2 pydantic-1.10.13 s3fs-2023.12.2\n", - "\u001b[33mWARNING: There was an error checking the latest version of pip.\u001b[0m\u001b[33m\n", - "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" + "aioboto3 12.3.0 requires aiobotocore[boto3]==2.11.2, but you have aiobotocore 2.12.3 which is incompatible.\n", + "label-studio 1.12.0 requires bleach<5.1.0,>=5.0.0, but you have bleach 6.1.0 which is incompatible.\n", + "label-studio 1.12.0 requires jsonschema==3.2.0, but you have jsonschema 4.21.1 which is incompatible.\n", + "label-studio 1.12.0 requires python-json-logger==2.0.4, but you have python-json-logger 2.0.7 which is incompatible.\n", + "label-studio 1.12.0 requires pytz<2023.0,>=2022.1, but you have pytz 2023.4 which is incompatible.\n", + "label-studio 1.12.0 requires urllib3<2.0.0,>=1.26.18, but you have urllib3 2.0.7 which is incompatible.\n", + "vdf-io 0.1.246 requires pinecone-client~=4.0.0, but you have pinecone-client 3.2.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed aiobotocore-2.12.3 botocore-1.34.69 google-api-core-2.19.0 google-cloud-storage-2.16.0 googleapis-common-protos-1.63.0 pinecone-client-3.2.2 pinecone-datasets-0.7.0\n", + "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ - "%pip install pinecone-datasets" + "%pip install aiobotocore==2.12.3 aioitertools==0.11.0 botocore==1.34.69 fsspec==2023.12.2 gcsfs==2023.12.2.post1 google-api-core==2.19.0 google-cloud-core==2.4.1 google-cloud-storage==2.16.0 google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 pinecone-client==3.2.2 pinecone-datasets==0.7.0 proto-plus==1.23.0 pyarrow==11.0.0 pydantic==1.10.15 s3fs==2023.12.2 wrapt==1.16.0" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from pinecone_datasets import load_dataset" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pinecone_datasets\n", + "\n", + "ds = pinecone_datasets.load_dataset(\"ANN_Fashion-MNIST_d784_euclidean\")" + ] + }, { "cell_type": "code", "execution_count": 3, "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namecreated_atdocumentsqueriessourcelicensebuckettaskdense_modelsparse_modeldescriptiontagsargs
0ANN_DEEP1B_d96_angular2023-03-10 14:17:01.481785999000010000https://github.com/erikbern/ann-benchmarksNonegs://pinecone-datasets-dev/ANN_DEEP1B_d96_angularANN{'name': 'ANN benchmark dense model', 'tokeniz...{'name': None, 'tokenizer': None}NoneNoneNone
1ANN_Fashion-MNIST_d784_euclidean2023-03-10 14:17:01.4817856000010000https://github.com/erikbern/ann-benchmarksNonegs://pinecone-datasets-dev/ANN_Fashion-MNIST_d...ANN{'name': 'ANN benchmark dense model', 'tokeniz...{'name': None, 'tokenizer': None}NoneNoneNone
2ANN_GIST_d960_euclidean2023-03-10 14:17:01.48178510000001000https://github.com/erikbern/ann-benchmarksNonegs://pinecone-datasets-dev/ANN_GIST_d960_eucli...ANN{'name': 'ANN benchmark dense model', 'tokeniz...{'name': None, 'tokenizer': None}NoneNoneNone
3ANN_GloVe_d100_angular2023-03-10 14:17:01.481785118351410000https://github.com/erikbern/ann-benchmarksNonegs://pinecone-datasets-dev/ANN_GloVe_d100_angularANN{'name': 'ANN benchmark dense model', 'tokeniz...{'name': None, 'tokenizer': None}NoneNoneNone
4ANN_GloVe_d200_angular2023-03-10 14:17:01.481785118351410000https://github.com/erikbern/ann-benchmarksNonegs://pinecone-datasets-dev/ANN_GloVe_d200_angularANN{'name': 'ANN benchmark dense model', 'tokeniz...{'name': None, 'tokenizer': None}NoneNoneNone
5ANN_GloVe_d25_angular2023-03-10 14:17:01.481785118351410000https://github.com/erikbern/ann-benchmarksNonegs://pinecone-datasets-dev/ANN_GloVe_d25_angularANN{'name': 'ANN benchmark dense model', 'tokeniz...{'name': None, 'tokenizer': None}NoneNoneNone
6ANN_GloVe_d50_angular2023-03-10 14:17:01.481785118351410000https://github.com/erikbern/ann-benchmarksNonegs://pinecone-datasets-dev/ANN_GloVe_d50_angularANN{'name': 'ANN benchmark dense model', 'tokeniz...{'name': None, 'tokenizer': None}NoneNoneNone
7ANN_LastFM_d64_angular2023-03-10 14:17:01.48178529238550000https://github.com/erikbern/ann-benchmarksNonegs://pinecone-datasets-dev/ANN_LastFM_d64_angularANN{'name': 'ANN benchmark dense model', 'tokeniz...{'name': None, 'tokenizer': None}NoneNoneNone
8ANN_MNIST_d784_euclidean2023-03-10 14:17:01.4817856000010000https://github.com/erikbern/ann-benchmarksNonegs://pinecone-datasets-dev/ANN_MNIST_d784_eucl...ANN{'name': 'ANN benchmark dense model', 'tokeniz...{'name': None, 'tokenizer': None}NoneNoneNone
9ANN_NYTimes_d256_angular2023-03-10 14:17:01.48178529000010000https://github.com/erikbern/ann-benchmarksNonegs://pinecone-datasets-dev/ANN_NYTimes_d256_an...ANN{'name': 'ANN benchmark dense model', 'tokeniz...{'name': None, 'tokenizer': None}NoneNoneNone
10ANN_SIFT1M_d128_euclidean2023-03-10 14:17:01.481785100000010000https://github.com/erikbern/ann-benchmarksNonegs://pinecone-datasets-dev/ANN_SIFT1M_d128_euc...ANN{'name': 'ANN benchmark dense model', 'tokeniz...{'name': None, 'tokenizer': None}NoneNoneNone
11amazon_toys_quora_all-MiniLM-L6-bm25Jul 26, 2023 14:17:01.481785100000https://www.kaggle.com/datasets/PromptCloudHQ/...Nonegs://pinecone-datasets-dev/amazon_toys_quora_a...QA{'name': 'sentence-transformers/all-MiniLM-L6-...{'name': 'bm25', 'tokenizer': None}NoneNoneNone
12cohere-7682023-11-04 15:23:40100000001000https://huggingface.co/datasets/Cohere/wikiped...NoneNoneNone{'name': 'cohere', 'tokenizer': None, 'dimensi...None10M vectors from wikipedia-22-12 (english) emb...NoneNone
13it-threat-data-test2023-06-2810429650https://cse-cic-ids2018.s3.ca-central-1.amazon...NoneNoneNone{'name': 'it_threat_model.model', 'tokenizer':...NoneNoneNoneNone
14it-threat-data-train2023-06-2810428670https://cse-cic-ids2018.s3.ca-central-1.amazon...NoneNoneNone{'name': 'it_threat_model.model', 'tokenizer':...NoneNoneNoneNone
15langchain-python-docs-text-embedding-ada-0022023-06-2734760https://huggingface.co/datasets/jamescalam/lan...NoneNoneNone{'name': 'text-embedding-ada-002', 'tokenizer'...NoneNoneNoneNone
16mnist2023-11-04 15:23:406000010000https://huggingface.co/datasets/mnistNoneNoneNone{'name': 'mnist', 'tokenizer': None, 'dimensio...NoneThe MNIST dataset consists of 70,000 28x28 bla...NoneNone
17movielens-user-ratings2023-06-089705820https://huggingface.co/datasets/pinecone/movie...Nonepinecone-datasets-devclassification{'name': 'pinecone/movie-recommender-user-mode...NoneNoneNoneNone
18msmarco-v1-bm25-allMiniLML6V22023-08-03 12:42:2288418236980NoneNoneNoneNone{'name': 'all-minilm-l6-v2', 'tokenizer': None...{'name': 'bm25-k0.9-b0.4', 'tokenizer': None}NoneNoneNone
19nq-768-tasb2023-11-04 15:23:4026808933452https://huggingface.co/datasets/BeIR/nqNoneNoneNone{'name': 'nq-768-tasb', 'tokenizer': None, 'di...NoneNoneNone
20quora_all-MiniLM-L6-bm25-100K2023-06-25 10:00:00.00000010000015000https://quoradata.quora.com/First-Quora-Datase...Nonegs://pinecone-datasets-dev/quora_all-MiniLM-L6...similar questions{'name': 'sentence-transformers/msmarco-MiniLM...{'name': 'naver/splade-cocondenser-ensembledis...NoneNoneNone
21quora_all-MiniLM-L6-bm252023-02-17 14:17:01.48178552293115000https://quoradata.quora.com/First-Quora-Datase...Nonegs://pinecone-datasets-dev/quora_all-MiniLM-L6...similar questions{'name': 'sentence-transformers/msmarco-MiniLM...{'name': 'naver/splade-cocondenser-ensembledis...NoneNoneNone
22quora_all-MiniLM-L6-v2_Splade-100K2023-06-25 11:00:00.00000010000015000https://quoradata.quora.com/First-Quora-Datase...Nonegs://pinecone-datasets-dev/quora_all-MiniLM-L6...similar questions{'name': 'sentence-transformers/msmarco-MiniLM...{'name': 'naver/splade-cocondenser-ensembledis...NoneNoneNone
23quora_all-MiniLM-L6-v2_Splade2023-02-17 14:15:01.48344552293115000https://quoradata.quora.com/First-Quora-Datase...Nonegs://pinecone-datasets-dev/quora_all-MiniLM-L6...similar questions{'name': 'sentence-transformers/msmarco-MiniLM...{'name': 'naver/splade-cocondenser-ensembledis...NoneNoneNone
24squad-text-embedding-ada-0022023-06-29188910https://huggingface.co/datasets/squadNoneNoneNone{'name': 'text-embedding-ada-002', 'tokenizer'...NoneNoneNoneNone
25wikipedia-simple-text-embedding-ada-002-100K2023-06-25 12:00:00.0000001000000wikipediaNonepinecone-datasets-devmultiple{'name': 'text-embedding-ada-002', 'tokenizer'...NoneNoneNoneNone
26wikipedia-simple-text-embedding-ada-0022023-05-28 12:00:26.1704032839450wikipediaNonepinecone-datasets-devmultiple{'name': 'text-embedding-ada-002', 'tokenizer'...NoneNoneNoneNone
27yfcc-100K-filter-euclidean2023-09-04 14:15:39.96772710000000100000big-ann-challenge 2023NoneNoneNone{'name': 'yfcc', 'tokenizer': None, 'dimension...None100K slice of the dataset from the 2023 big an...NoneNone
28yfcc-10M-filter-euclidean2023-08-24 13:51:29.13675910000000100000big-ann-challenge 2023NoneNoneNone{'name': 'yfcc', 'tokenizer': None, 'dimension...NoneDataset from the 2023 big ann challenge - filt...NoneNone
29youtube-transcripts-text-embedding-ada-0022023-06-01 03-22-14.451204389500youtubeNonepinecone-datasets-devmultiple{'name': 'text-embedding-ada-002', 'tokenizer'...NoneNoneNoneNone
\n", + "
" + ], + "text/plain": [ + " name \\\n", + "0 ANN_DEEP1B_d96_angular \n", + "1 ANN_Fashion-MNIST_d784_euclidean \n", + "2 ANN_GIST_d960_euclidean \n", + "3 ANN_GloVe_d100_angular \n", + "4 ANN_GloVe_d200_angular \n", + "5 ANN_GloVe_d25_angular \n", + "6 ANN_GloVe_d50_angular \n", + "7 ANN_LastFM_d64_angular \n", + "8 ANN_MNIST_d784_euclidean \n", + "9 ANN_NYTimes_d256_angular \n", + "10 ANN_SIFT1M_d128_euclidean \n", + "11 amazon_toys_quora_all-MiniLM-L6-bm25 \n", + "12 cohere-768 \n", + "13 it-threat-data-test \n", + "14 it-threat-data-train \n", + "15 langchain-python-docs-text-embedding-ada-002 \n", + "16 mnist \n", + "17 movielens-user-ratings \n", + "18 msmarco-v1-bm25-allMiniLML6V2 \n", + "19 nq-768-tasb \n", + "20 quora_all-MiniLM-L6-bm25-100K \n", + "21 quora_all-MiniLM-L6-bm25 \n", + "22 quora_all-MiniLM-L6-v2_Splade-100K \n", + "23 quora_all-MiniLM-L6-v2_Splade \n", + "24 squad-text-embedding-ada-002 \n", + "25 wikipedia-simple-text-embedding-ada-002-100K \n", + "26 wikipedia-simple-text-embedding-ada-002 \n", + "27 yfcc-100K-filter-euclidean \n", + "28 yfcc-10M-filter-euclidean \n", + "29 youtube-transcripts-text-embedding-ada-002 \n", + "\n", + " created_at documents queries \\\n", + "0 2023-03-10 14:17:01.481785 9990000 10000 \n", + "1 2023-03-10 14:17:01.481785 60000 10000 \n", + "2 2023-03-10 14:17:01.481785 1000000 1000 \n", + "3 2023-03-10 14:17:01.481785 1183514 10000 \n", + "4 2023-03-10 14:17:01.481785 1183514 10000 \n", + "5 2023-03-10 14:17:01.481785 1183514 10000 \n", + "6 2023-03-10 14:17:01.481785 1183514 10000 \n", + "7 2023-03-10 14:17:01.481785 292385 50000 \n", + "8 2023-03-10 14:17:01.481785 60000 10000 \n", + "9 2023-03-10 14:17:01.481785 290000 10000 \n", + "10 2023-03-10 14:17:01.481785 1000000 10000 \n", + "11 Jul 26, 2023 14:17:01.481785 10000 0 \n", + "12 2023-11-04 15:23:40 10000000 1000 \n", + "13 2023-06-28 1042965 0 \n", + "14 2023-06-28 1042867 0 \n", + "15 2023-06-27 3476 0 \n", + "16 2023-11-04 15:23:40 60000 10000 \n", + "17 2023-06-08 970582 0 \n", + "18 2023-08-03 12:42:22 8841823 6980 \n", + "19 2023-11-04 15:23:40 2680893 3452 \n", + "20 2023-06-25 10:00:00.000000 100000 15000 \n", + "21 2023-02-17 14:17:01.481785 522931 15000 \n", + "22 2023-06-25 11:00:00.000000 100000 15000 \n", + "23 2023-02-17 14:15:01.483445 522931 15000 \n", + "24 2023-06-29 18891 0 \n", + "25 2023-06-25 12:00:00.000000 100000 0 \n", + "26 2023-05-28 12:00:26.170403 283945 0 \n", + "27 2023-09-04 14:15:39.967727 10000000 100000 \n", + "28 2023-08-24 13:51:29.136759 10000000 100000 \n", + "29 2023-06-01 03-22-14.451204 38950 0 \n", + "\n", + " source license \\\n", + "0 https://github.com/erikbern/ann-benchmarks None \n", + "1 https://github.com/erikbern/ann-benchmarks None \n", + "2 https://github.com/erikbern/ann-benchmarks None \n", + "3 https://github.com/erikbern/ann-benchmarks None \n", + "4 https://github.com/erikbern/ann-benchmarks None \n", + "5 https://github.com/erikbern/ann-benchmarks None \n", + "6 https://github.com/erikbern/ann-benchmarks None \n", + "7 https://github.com/erikbern/ann-benchmarks None \n", + "8 https://github.com/erikbern/ann-benchmarks None \n", + "9 https://github.com/erikbern/ann-benchmarks None \n", + "10 https://github.com/erikbern/ann-benchmarks None \n", + "11 https://www.kaggle.com/datasets/PromptCloudHQ/... None \n", + "12 https://huggingface.co/datasets/Cohere/wikiped... None \n", + "13 https://cse-cic-ids2018.s3.ca-central-1.amazon... None \n", + "14 https://cse-cic-ids2018.s3.ca-central-1.amazon... None \n", + "15 https://huggingface.co/datasets/jamescalam/lan... None \n", + "16 https://huggingface.co/datasets/mnist None \n", + "17 https://huggingface.co/datasets/pinecone/movie... None \n", + "18 None None \n", + "19 https://huggingface.co/datasets/BeIR/nq None \n", + "20 https://quoradata.quora.com/First-Quora-Datase... None \n", + "21 https://quoradata.quora.com/First-Quora-Datase... None \n", + "22 https://quoradata.quora.com/First-Quora-Datase... None \n", + "23 https://quoradata.quora.com/First-Quora-Datase... None \n", + "24 https://huggingface.co/datasets/squad None \n", + "25 wikipedia None \n", + "26 wikipedia None \n", + "27 big-ann-challenge 2023 None \n", + "28 big-ann-challenge 2023 None \n", + "29 youtube None \n", + "\n", + " bucket task \\\n", + "0 gs://pinecone-datasets-dev/ANN_DEEP1B_d96_angular ANN \n", + "1 gs://pinecone-datasets-dev/ANN_Fashion-MNIST_d... ANN \n", + "2 gs://pinecone-datasets-dev/ANN_GIST_d960_eucli... ANN \n", + "3 gs://pinecone-datasets-dev/ANN_GloVe_d100_angular ANN \n", + "4 gs://pinecone-datasets-dev/ANN_GloVe_d200_angular ANN \n", + "5 gs://pinecone-datasets-dev/ANN_GloVe_d25_angular ANN \n", + "6 gs://pinecone-datasets-dev/ANN_GloVe_d50_angular ANN \n", + "7 gs://pinecone-datasets-dev/ANN_LastFM_d64_angular ANN \n", + "8 gs://pinecone-datasets-dev/ANN_MNIST_d784_eucl... ANN \n", + "9 gs://pinecone-datasets-dev/ANN_NYTimes_d256_an... ANN \n", + "10 gs://pinecone-datasets-dev/ANN_SIFT1M_d128_euc... ANN \n", + "11 gs://pinecone-datasets-dev/amazon_toys_quora_a... QA \n", + "12 None None \n", + "13 None None \n", + "14 None None \n", + "15 None None \n", + "16 None None \n", + "17 pinecone-datasets-dev classification \n", + "18 None None \n", + "19 None None \n", + "20 gs://pinecone-datasets-dev/quora_all-MiniLM-L6... similar questions \n", + "21 gs://pinecone-datasets-dev/quora_all-MiniLM-L6... similar questions \n", + "22 gs://pinecone-datasets-dev/quora_all-MiniLM-L6... similar questions \n", + "23 gs://pinecone-datasets-dev/quora_all-MiniLM-L6... similar questions \n", + "24 None None \n", + "25 pinecone-datasets-dev multiple \n", + "26 pinecone-datasets-dev multiple \n", + "27 None None \n", + "28 None None \n", + "29 pinecone-datasets-dev multiple \n", + "\n", + " dense_model \\\n", + "0 {'name': 'ANN benchmark dense model', 'tokeniz... \n", + "1 {'name': 'ANN benchmark dense model', 'tokeniz... \n", + "2 {'name': 'ANN benchmark dense model', 'tokeniz... \n", + "3 {'name': 'ANN benchmark dense model', 'tokeniz... \n", + "4 {'name': 'ANN benchmark dense model', 'tokeniz... \n", + "5 {'name': 'ANN benchmark dense model', 'tokeniz... \n", + "6 {'name': 'ANN benchmark dense model', 'tokeniz... \n", + "7 {'name': 'ANN benchmark dense model', 'tokeniz... \n", + "8 {'name': 'ANN benchmark dense model', 'tokeniz... \n", + "9 {'name': 'ANN benchmark dense model', 'tokeniz... \n", + "10 {'name': 'ANN benchmark dense model', 'tokeniz... \n", + "11 {'name': 'sentence-transformers/all-MiniLM-L6-... \n", + "12 {'name': 'cohere', 'tokenizer': None, 'dimensi... \n", + "13 {'name': 'it_threat_model.model', 'tokenizer':... \n", + "14 {'name': 'it_threat_model.model', 'tokenizer':... \n", + "15 {'name': 'text-embedding-ada-002', 'tokenizer'... \n", + "16 {'name': 'mnist', 'tokenizer': None, 'dimensio... \n", + "17 {'name': 'pinecone/movie-recommender-user-mode... \n", + "18 {'name': 'all-minilm-l6-v2', 'tokenizer': None... \n", + "19 {'name': 'nq-768-tasb', 'tokenizer': None, 'di... \n", + "20 {'name': 'sentence-transformers/msmarco-MiniLM... \n", + "21 {'name': 'sentence-transformers/msmarco-MiniLM... \n", + "22 {'name': 'sentence-transformers/msmarco-MiniLM... \n", + "23 {'name': 'sentence-transformers/msmarco-MiniLM... \n", + "24 {'name': 'text-embedding-ada-002', 'tokenizer'... \n", + "25 {'name': 'text-embedding-ada-002', 'tokenizer'... \n", + "26 {'name': 'text-embedding-ada-002', 'tokenizer'... \n", + "27 {'name': 'yfcc', 'tokenizer': None, 'dimension... \n", + "28 {'name': 'yfcc', 'tokenizer': None, 'dimension... \n", + "29 {'name': 'text-embedding-ada-002', 'tokenizer'... \n", + "\n", + " sparse_model \\\n", + "0 {'name': None, 'tokenizer': None} \n", + "1 {'name': None, 'tokenizer': None} \n", + "2 {'name': None, 'tokenizer': None} \n", + "3 {'name': None, 'tokenizer': None} \n", + "4 {'name': None, 'tokenizer': None} \n", + "5 {'name': None, 'tokenizer': None} \n", + "6 {'name': None, 'tokenizer': None} \n", + "7 {'name': None, 'tokenizer': None} \n", + "8 {'name': None, 'tokenizer': None} \n", + "9 {'name': None, 'tokenizer': None} \n", + "10 {'name': None, 'tokenizer': None} \n", + "11 {'name': 'bm25', 'tokenizer': None} \n", + "12 None \n", + "13 None \n", + "14 None \n", + "15 None \n", + "16 None \n", + "17 None \n", + "18 {'name': 'bm25-k0.9-b0.4', 'tokenizer': None} \n", + "19 None \n", + "20 {'name': 'naver/splade-cocondenser-ensembledis... \n", + "21 {'name': 'naver/splade-cocondenser-ensembledis... \n", + "22 {'name': 'naver/splade-cocondenser-ensembledis... \n", + "23 {'name': 'naver/splade-cocondenser-ensembledis... \n", + "24 None \n", + "25 None \n", + "26 None \n", + "27 None \n", + "28 None \n", + "29 None \n", + "\n", + " description tags args \n", + "0 None None None \n", + "1 None None None \n", + "2 None None None \n", + "3 None None None \n", + "4 None None None \n", + "5 None None None \n", + "6 None None None \n", + "7 None None None \n", + "8 None None None \n", + "9 None None None \n", + "10 None None None \n", + "11 None None None \n", + "12 10M vectors from wikipedia-22-12 (english) emb... None None \n", + "13 None None None \n", + "14 None None None \n", + "15 None None None \n", + "16 The MNIST dataset consists of 70,000 28x28 bla... None None \n", + "17 None None None \n", + "18 None None None \n", + "19 None None \n", + "20 None None None \n", + "21 None None None \n", + "22 None None None \n", + "23 None None None \n", + "24 None None None \n", + "25 None None None \n", + "26 None None None \n", + "27 100K slice of the dataset from the 2023 big an... None None \n", + "28 Dataset from the 2023 big ann challenge - filt... None None \n", + "29 None None None " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_l = pinecone_datasets.list_datasets(as_df=True)\n", + "df_l" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'name': 'sentence-transformers/all-MiniLM-L6-v2',\n", + " 'tokenizer': None,\n", + " 'dimension': 384}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_l[df_l[\"name\"] == \"amazon_toys_quora_all-MiniLM-L6-bm25\"].iloc[0].dense_model\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, "outputs": [], "source": [ - "dataset = load_dataset(\"ANN_Fashion-MNIST_d784_euclidean\")" + "amz_ds = pinecone_datasets.load_dataset(\"amazon_toys_quora_all-MiniLM-L6-bm25\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idvaluessparse_valuesmetadatablob
0eac7efa5dbd3d667f26eb3d3ab504464[0.0077547780238091946, -0.02774387039244175, ...{'indices': [2182291806, 4287202515, 148124445...{'amazon_category_and_sub_category': 'Hobbies ...{'text': 'Hornby 2014 Catalogue (Hornby): \n", + " Pr...
1b17540ef7e86e461d37f3ae58b7b72ac[0.002257382730022073, -0.03035414218902588, 0...{'indices': [2118423442, 2177509083, 224097760...{'amazon_category_and_sub_category': 'Hobbies ...{'text': 'FunkyBuys® Large Christmas Holiday E...
2348f344247b0c1a935b1223072ef9d8a[-0.003095218911767006, 0.016020774841308594, ...{'indices': [2349888478, 3814962844, 310417642...{'amazon_category_and_sub_category': 'Hobbies ...{'text': 'CLASSIC TOY TRAIN SET TRACK CARRIAGE...
3e12b92dbb8eaee78b22965d2a9bbbd9f[-0.024034591391682625, -0.048526741564273834,...{'indices': [2182291806, 719182917, 1942275469...{'amazon_category_and_sub_category': 'Hobbies ...{'text': 'HORNBY Coach R4410A BR Hawksworth Co...
4e33a9adeed5f36840ccc227db4682a36[-0.07078640908002853, 0.009733847342431545, 0...{'indices': [2182291806, 2415375917, 369727517...{'amazon_category_and_sub_category': 'Hobbies ...{'text': 'Hornby 00 Gauge 0-4-0 Gildenlow Salt...
\n", + "
" + ], + "text/plain": [ + " id \\\n", + "0 eac7efa5dbd3d667f26eb3d3ab504464 \n", + "1 b17540ef7e86e461d37f3ae58b7b72ac \n", + "2 348f344247b0c1a935b1223072ef9d8a \n", + "3 e12b92dbb8eaee78b22965d2a9bbbd9f \n", + "4 e33a9adeed5f36840ccc227db4682a36 \n", + "\n", + " values \\\n", + "0 [0.0077547780238091946, -0.02774387039244175, ... \n", + "1 [0.002257382730022073, -0.03035414218902588, 0... \n", + "2 [-0.003095218911767006, 0.016020774841308594, ... \n", + "3 [-0.024034591391682625, -0.048526741564273834,... \n", + "4 [-0.07078640908002853, 0.009733847342431545, 0... \n", + "\n", + " sparse_values \\\n", + "0 {'indices': [2182291806, 4287202515, 148124445... \n", + "1 {'indices': [2118423442, 2177509083, 224097760... \n", + "2 {'indices': [2349888478, 3814962844, 310417642... \n", + "3 {'indices': [2182291806, 719182917, 1942275469... \n", + "4 {'indices': [2182291806, 2415375917, 369727517... \n", + "\n", + " metadata \\\n", + "0 {'amazon_category_and_sub_category': 'Hobbies ... \n", + "1 {'amazon_category_and_sub_category': 'Hobbies ... \n", + "2 {'amazon_category_and_sub_category': 'Hobbies ... \n", + "3 {'amazon_category_and_sub_category': 'Hobbies ... \n", + "4 {'amazon_category_and_sub_category': 'Hobbies ... \n", + "\n", + " blob \n", + "0 {'text': 'Hornby 2014 Catalogue (Hornby): \n", + " Pr... \n", + "1 {'text': 'FunkyBuys® Large Christmas Holiday E... \n", + "2 {'text': 'CLASSIC TOY TRAIN SET TRACK CARRIAGE... \n", + "3 {'text': 'HORNBY Coach R4410A BR Hawksworth Co... \n", + "4 {'text': 'Hornby 00 Gauge 0-4-0 Gildenlow Salt... " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "amz_ds.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "384" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(amz_ds.documents.iloc[0][\"values\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import pinecone\n", + "\n", + "pc = pinecone.Pinecone(api_key=os.environ[\"PINECONE_API_KEY\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "from pinecone import ServerlessSpec\n", + "\n", + "pc.create_index(\n", + " name=\"amazon-toys-quora-all-minilm-l6-bm25\",\n", + " dimension=384,\n", + " metric=\"cosine\",\n", + " spec=ServerlessSpec(cloud=\"aws\", region=\"us-west-2\"),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(400)\n", + "Reason: Bad Request\n", + "HTTP response headers: HTTPHeaderDict({'Date': 'Wed, 15 May 2024 18:42:51 GMT', 'Content-Type': 'application/json', 'Content-Length': '92', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '178', 'x-pinecone-request-id': '1010414569894152647', 'x-envoy-upstream-service-time': '7', 'server': 'envoy'})\n", + "HTTP response body: {\"code\":3,\"message\":\"Sparse vector size 2211 exceeds the maximum size of 1000\",\"details\":[]}\n", + "\n" + ] + } + ], + "source": [ + "index = pc.Index(\"amazon-toys-quora-all-minilm-l6-bm25\")\n", + "\n", + "for batch in amz_ds.iter_documents(batch_size=100):\n", + " # remove null values from the batch\n", + " # break\n", + " # Metadata value must be a string, number, boolean or list of string\n", + " for doc in batch:\n", + " for key, value in doc[\"metadata\"].items():\n", + " if not isinstance(value, (str, int, float, bool, list)):\n", + " doc[\"metadata\"][key] = str(value)\n", + " try:\n", + " index.upsert(vectors=batch)\n", + " except Exception as e:\n", + " print(e)\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "510a0234e9724179b3f9b584fdb9e620", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "sending upsert requests: 0%| | 0/10000 [00:00 1\u001b[0m \u001b[43mamz_ds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_pinecone_index\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mamazon-toys-quora-all-minilm-l6-bm25\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mshould_create_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mserverless\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mcloud\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43maws\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mregion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mus-west-2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone_datasets/dataset.py:569\u001b[0m, in \u001b[0;36mDataset.to_pinecone_index\u001b[0;34m(self, index_name, namespace, should_create_index, batch_size, show_progress, api_key, environment, region, cloud, serverless, **kwargs)\u001b[0m\n\u001b[1;32m 566\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 567\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_pinecone_index(api_key\u001b[38;5;241m=\u001b[39mapi_key, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 569\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_upsert_to_index\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 570\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindex_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 571\u001b[0m \u001b[43m \u001b[49m\u001b[43mnamespace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnamespace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 572\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 573\u001b[0m \u001b[43m \u001b[49m\u001b[43mshow_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshow_progress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 574\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone_datasets/dataset.py:444\u001b[0m, in \u001b[0;36mDataset._upsert_to_index\u001b[0;34m(self, index_name, namespace, batch_size, show_progress)\u001b[0m\n\u001b[1;32m 439\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_upsert_to_index\u001b[39m(\n\u001b[1;32m 440\u001b[0m \u001b[38;5;28mself\u001b[39m, index_name: \u001b[38;5;28mstr\u001b[39m, namespace: \u001b[38;5;28mstr\u001b[39m, batch_size: \u001b[38;5;28mint\u001b[39m, show_progress: \u001b[38;5;28mbool\u001b[39m\n\u001b[1;32m 441\u001b[0m ):\n\u001b[1;32m 442\u001b[0m pinecone_index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_pinecone_client\u001b[38;5;241m.\u001b[39mIndex(index_name)\n\u001b[0;32m--> 444\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mpinecone_index\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupsert_from_dataframe\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 445\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdocuments\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSchema\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdocuments_select_columns\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 446\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mall\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 447\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 448\u001b[0m \u001b[43m \u001b[49m\u001b[43mnamespace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnamespace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 449\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 450\u001b[0m \u001b[43m \u001b[49m\u001b[43mshow_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshow_progress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 451\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mupserted_count\u001b[39m\u001b[38;5;124m\"\u001b[39m: res\u001b[38;5;241m.\u001b[39mupserted_count}\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone/data/index.py:229\u001b[0m, in \u001b[0;36mIndex.upsert_from_dataframe\u001b[0;34m(self, df, namespace, batch_size, show_progress)\u001b[0m\n\u001b[1;32m 227\u001b[0m results \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m chunk \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_iter_dataframe(df, batch_size\u001b[38;5;241m=\u001b[39mbatch_size):\n\u001b[0;32m--> 229\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupsert\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvectors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnamespace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnamespace\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 230\u001b[0m pbar\u001b[38;5;241m.\u001b[39mupdate(\u001b[38;5;28mlen\u001b[39m(chunk))\n\u001b[1;32m 231\u001b[0m results\u001b[38;5;241m.\u001b[39mappend(res)\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone/utils/error_handling.py:10\u001b[0m, in \u001b[0;36mvalidate_and_convert_errors..inner_func\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(func)\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner_func\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m MaxRetryError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e\u001b[38;5;241m.\u001b[39mreason, ProtocolError):\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone/data/index.py:168\u001b[0m, in \u001b[0;36mIndex.upsert\u001b[0;34m(self, vectors, namespace, batch_size, show_progress, **kwargs)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 162\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124masync_req is not supported when batch_size is provided.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 163\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTo upsert in parallel, please follow: \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 165\u001b[0m )\n\u001b[1;32m 167\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m batch_size \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 168\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_upsert_batch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvectors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnamespace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_check_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 170\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(batch_size, \u001b[38;5;28mint\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m batch_size \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbatch_size must be a positive integer\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone/data/index.py:189\u001b[0m, in \u001b[0;36mIndex._upsert_batch\u001b[0;34m(self, vectors, namespace, _check_type, **kwargs)\u001b[0m\n\u001b[1;32m 186\u001b[0m args_dict \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_parse_non_empty_args([(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnamespace\u001b[39m\u001b[38;5;124m\"\u001b[39m, namespace)])\n\u001b[1;32m 187\u001b[0m vec_builder \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mlambda\u001b[39;00m v: VectorFactory\u001b[38;5;241m.\u001b[39mbuild(v, check_type\u001b[38;5;241m=\u001b[39m_check_type)\n\u001b[0;32m--> 189\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_vector_api\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupsert\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 190\u001b[0m \u001b[43m \u001b[49m\u001b[43mUpsertRequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 191\u001b[0m \u001b[43m \u001b[49m\u001b[43mvectors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mmap\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mvec_builder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvectors\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 192\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 193\u001b[0m \u001b[43m \u001b[49m\u001b[43m_check_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_check_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m{\u001b[49m\u001b[43mk\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m_OPENAPI_ENDPOINT_PARAMS\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 195\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 196\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m{\u001b[49m\u001b[43mk\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m_OPENAPI_ENDPOINT_PARAMS\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 197\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone/core/client/api_client.py:772\u001b[0m, in \u001b[0;36mEndpoint.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 761\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 762\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\" This method is invoked when endpoints are called\u001b[39;00m\n\u001b[1;32m 763\u001b[0m \u001b[38;5;124;03m Example:\u001b[39;00m\n\u001b[1;32m 764\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 770\u001b[0m \n\u001b[1;32m 771\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 772\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcallable\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone/core/client/api/data_plane_api.py:1084\u001b[0m, in \u001b[0;36mDataPlaneApi.__init__..__upsert\u001b[0;34m(self, upsert_request, **kwargs)\u001b[0m\n\u001b[1;32m 1081\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_host_index\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_host_index\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 1082\u001b[0m kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mupsert_request\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \\\n\u001b[1;32m 1083\u001b[0m upsert_request\n\u001b[0;32m-> 1084\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall_with_http_info\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone/core/client/api_client.py:834\u001b[0m, in \u001b[0;36mEndpoint.call_with_http_info\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 830\u001b[0m header_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapi_client\u001b[38;5;241m.\u001b[39mselect_header_content_type(\n\u001b[1;32m 831\u001b[0m content_type_headers_list)\n\u001b[1;32m 832\u001b[0m params[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mheader\u001b[39m\u001b[38;5;124m'\u001b[39m][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mContent-Type\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m header_list\n\u001b[0;32m--> 834\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapi_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall_api\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 835\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msettings\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mendpoint_path\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msettings\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mhttp_method\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 836\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpath\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 837\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mquery\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 838\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mheader\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 839\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbody\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 840\u001b[0m \u001b[43m \u001b[49m\u001b[43mpost_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mform\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 841\u001b[0m \u001b[43m \u001b[49m\u001b[43mfiles\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mfile\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 842\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msettings\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mresponse_type\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 843\u001b[0m \u001b[43m \u001b[49m\u001b[43mauth_settings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msettings\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mauth\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 844\u001b[0m \u001b[43m \u001b[49m\u001b[43masync_req\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43masync_req\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 845\u001b[0m \u001b[43m \u001b[49m\u001b[43m_check_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m_check_return_type\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 846\u001b[0m \u001b[43m \u001b[49m\u001b[43m_return_http_data_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m_return_http_data_only\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 847\u001b[0m \u001b[43m \u001b[49m\u001b[43m_preload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m_preload_content\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 848\u001b[0m \u001b[43m \u001b[49m\u001b[43m_request_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m_request_timeout\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 849\u001b[0m \u001b[43m \u001b[49m\u001b[43m_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_host\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 850\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_formats\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcollection_format\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone/core/client/api_client.py:409\u001b[0m, in \u001b[0;36mApiClient.call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, async_req, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _check_type)\u001b[0m\n\u001b[1;32m 355\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Makes the HTTP request (synchronous) and returns deserialized data.\u001b[39;00m\n\u001b[1;32m 356\u001b[0m \n\u001b[1;32m 357\u001b[0m \u001b[38;5;124;03mTo make an async_req request, set the async_req parameter.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 406\u001b[0m \u001b[38;5;124;03m then the method will return the response directly.\u001b[39;00m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m async_req:\n\u001b[0;32m--> 409\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__call_api\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresource_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 410\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheader_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 411\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpost_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfiles\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 412\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mauth_settings\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 413\u001b[0m \u001b[43m \u001b[49m\u001b[43m_return_http_data_only\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcollection_formats\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 414\u001b[0m \u001b[43m \u001b[49m\u001b[43m_preload_content\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_request_timeout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_host\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43m_check_type\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpool\u001b[38;5;241m.\u001b[39mapply_async(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__call_api, (resource_path,\n\u001b[1;32m 418\u001b[0m method, path_params,\n\u001b[1;32m 419\u001b[0m query_params,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 427\u001b[0m _request_timeout,\n\u001b[1;32m 428\u001b[0m _host, _check_type))\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone/core/client/api_client.py:203\u001b[0m, in \u001b[0;36mApiClient.__call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _check_type)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m PineconeApiException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 202\u001b[0m e\u001b[38;5;241m.\u001b[39mbody \u001b[38;5;241m=\u001b[39m e\u001b[38;5;241m.\u001b[39mbody\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m--> 203\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 205\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlast_response \u001b[38;5;241m=\u001b[39m response_data\n\u001b[1;32m 207\u001b[0m return_data \u001b[38;5;241m=\u001b[39m response_data\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone/core/client/api_client.py:196\u001b[0m, in \u001b[0;36mApiClient.__call_api\u001b[0;34m(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _check_type)\u001b[0m\n\u001b[1;32m 192\u001b[0m url \u001b[38;5;241m=\u001b[39m _host \u001b[38;5;241m+\u001b[39m resource_path\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 195\u001b[0m \u001b[38;5;66;03m# perform request and return response\u001b[39;00m\n\u001b[0;32m--> 196\u001b[0m response_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 197\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheader_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 198\u001b[0m \u001b[43m \u001b[49m\u001b[43mpost_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpost_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 199\u001b[0m \u001b[43m \u001b[49m\u001b[43m_preload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_preload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[43m \u001b[49m\u001b[43m_request_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_request_timeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m PineconeApiException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 202\u001b[0m e\u001b[38;5;241m.\u001b[39mbody \u001b[38;5;241m=\u001b[39m e\u001b[38;5;241m.\u001b[39mbody\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone/core/client/api_client.py:455\u001b[0m, in \u001b[0;36mApiClient.request\u001b[0;34m(self, method, url, query_params, headers, post_params, body, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrest_client\u001b[38;5;241m.\u001b[39mOPTIONS(url,\n\u001b[1;32m 448\u001b[0m query_params\u001b[38;5;241m=\u001b[39mquery_params,\n\u001b[1;32m 449\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 452\u001b[0m _request_timeout\u001b[38;5;241m=\u001b[39m_request_timeout,\n\u001b[1;32m 453\u001b[0m body\u001b[38;5;241m=\u001b[39mbody)\n\u001b[1;32m 454\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m method \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPOST\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 455\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrest_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPOST\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mpost_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpost_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43m_preload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_preload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 460\u001b[0m \u001b[43m \u001b[49m\u001b[43m_request_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_request_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 461\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 462\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m method \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPUT\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrest_client\u001b[38;5;241m.\u001b[39mPUT(url,\n\u001b[1;32m 464\u001b[0m query_params\u001b[38;5;241m=\u001b[39mquery_params,\n\u001b[1;32m 465\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 468\u001b[0m _request_timeout\u001b[38;5;241m=\u001b[39m_request_timeout,\n\u001b[1;32m 469\u001b[0m body\u001b[38;5;241m=\u001b[39mbody)\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone/core/client/rest.py:302\u001b[0m, in \u001b[0;36mRESTClientObject.POST\u001b[0;34m(self, url, headers, query_params, post_params, body, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m 300\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mPOST\u001b[39m(\u001b[38;5;28mself\u001b[39m, url, headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, query_params\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, post_params\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 301\u001b[0m body\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, _preload_content\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, _request_timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 302\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPOST\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 304\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[43m \u001b[49m\u001b[43mpost_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpost_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 306\u001b[0m \u001b[43m \u001b[49m\u001b[43m_preload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_preload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 307\u001b[0m \u001b[43m \u001b[49m\u001b[43m_request_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_request_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 308\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/lib/python3.10/site-packages/pinecone/core/client/rest.py:261\u001b[0m, in \u001b[0;36mRESTClientObject.request\u001b[0;34m(self, method, url, query_params, headers, body, post_params, _preload_content, _request_timeout)\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;241m500\u001b[39m \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m r\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m599\u001b[39m:\n\u001b[1;32m 259\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ServiceException(http_resp\u001b[38;5;241m=\u001b[39mr)\n\u001b[0;32m--> 261\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m PineconeApiException(http_resp\u001b[38;5;241m=\u001b[39mr)\n\u001b[1;32m 263\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m r\n", + "\u001b[0;31mPineconeApiException\u001b[0m: (400)\nReason: Bad Request\nHTTP response headers: HTTPHeaderDict({'Date': 'Wed, 15 May 2024 19:16:52 GMT', 'Content-Type': 'application/json', 'Content-Length': '92', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '341', 'x-pinecone-request-id': '4313797377394377488', 'x-envoy-upstream-service-time': '6', 'server': 'envoy'})\nHTTP response body: {\"code\":3,\"message\":\"Sparse vector size 2211 exceeds the maximum size of 1000\",\"details\":[]}\n" + ] + } + ], + "source": [ + "amz_ds.to_pinecone_index(\n", + " index_name=\"amazon-toys-quora-all-minilm-l6-bm25\",\n", + " should_create_index=True,\n", + " serverless=True,\n", + " cloud=\"aws\",\n", + " region=\"us-west-2\",\n", + ")" ] }, {