diff --git a/README.html b/README.html new file mode 100644 index 0000000..97ed03f --- /dev/null +++ b/README.html @@ -0,0 +1,504 @@ +
+ + +
+This library uses a universal format for vector datasets to easily + export and import data from all vector databases.
+Request support for a VectorDB by voting/commenting on this + poll
+See the Contributing section to add + support for your favorite vector database.
++ + +
+Vector Database | +Import | +Export | +
---|---|---|
Pinecone | +✅ | +✅ | +
Qdrant | +✅ | +✅ | +
Milvus | +✅ | +✅ | +
GCP Vertex AI Vector Search | +✅ | +✅ | +
KDB.AI | +✅ | +✅ | +
LanceDB | +✅ | +✅ | +
DataStax Astra DB | +✅ | +✅ | +
Chroma | +✅ | +✅ | +
Turbopuffer | +✅ | +✅ | +
Vector Database | +Import | +Export | +
---|
Vector Database | +Import | +Export | +
---|---|---|
pgvector | +❌ | +❌ | +
Azure AI Search | +❌ | +❌ | +
Weaviate | +❌ | +❌ | +
MongoDB Atlas | +❌ | +❌ | +
Apache Cassandra | +❌ | +❌ | +
txtai | +❌ | +❌ | +
SQLite-VSS | +❌ | +❌ | +
Vector Database | +Import | +Export | +
---|---|---|
Vespa | +❌ | +❌ | +
AWS Neptune | +❌ | +❌ | +
Neo4j | +❌ | +❌ | +
Marqo | +❌ | +❌ | +
OpenSearch | +❌ | +❌ | +
Elasticsearch | +❌ | +❌ | +
Apache Solr | +❌ | +❌ | +
Redis Search | +❌ | +❌ | +
ClickHouse | +❌ | +❌ | +
USearch | +❌ | +❌ | +
Rockset | +❌ | +❌ | +
Epsilla | +❌ | +❌ | +
Activeloop Deep Lake | +❌ | +❌ | +
ApertureDB | +❌ | +❌ | +
CrateDB | +❌ | +❌ | +
Meilisearch | +❌ | +❌ | +
MyScale | +❌ | +❌ | +
Nuclia DB | +❌ | +❌ | +
OramaSearch | +❌ | +❌ | +
Typesense | +❌ | +❌ | +
Anari AI | +❌ | +❌ | +
Vald | +❌ | +❌ | +
pip install vdf-io
+git clone https://github.com/AI-Northstar-Tech/vector-io.git
+cd vector-io
+pip install -r requirements.txt
+class NamespaceMeta(BaseModel):
+str
+ namespace: str
+ index_name: int
+ total_vector_count: int
+ exported_vector_count: int
+ dimensions: str | None = None
+ model_name: str] = ["vector"]
+ vector_columns: List[str
+ data_path: str | None = None
+ metric: = None
+ index_config: Optional[Dict[Any, Any]] str, Any]] = None
+ schema_dict: Optional[Dict[
+
+class VDFMeta(BaseModel):
+str
+ version: str]
+ file_structure: List[str
+ author: str
+ exported_from: str, List[NamespaceMeta]]
+ indexes: Dict[str
+ exported_at: str] = None id_column: Optional[
+export_vdf --help
+usage: export_vdf [-h] [-m MODEL_NAME]
+[--max_file_size MAX_FILE_SIZE]
+ [--push_to_hub | --no-push_to_hub]
+ [--public | --no-public]
+ {pinecone,qdrant,kdbai,milvus,vertexai_vectorsearch}
+ ...
+
+Export data from various vector databases to the VDF format for vector datasets
+
+options:
+-h, --help show this help message and exit
+ -m MODEL_NAME, --model_name MODEL_NAME
+ Name of model used
+ --max_file_size MAX_FILE_SIZE
+ Maximum file size in MB (default:
+ 1024)
+ --push_to_hub, --no-push_to_hub
+ Push to hub
+ --public, --no-public
+ Make dataset public (default:
+ False)
+
+Vector Databases:
+Choose the vectors database to export data from
+
+{pinecone,qdrant,kdbai,milvus,vertexai_vectorsearch}
+ pinecone Export data from Pinecone
+ qdrant Export data from Qdrant
+ kdbai Export data from KDB.AI
+ milvus Export data from Milvus
+ vertexai_vectorsearch
+ Export data from Vertex AI Vector
+ Search
+import_vdf --help
+usage: import_vdf [-h] [-d DIR] [-s | --subset | --no-subset]
+[--create_new | --no-create_new]
+ {milvus,pinecone,qdrant,vertexai_vectorsearch,kdbai}
+ ...
+
+Import data from VDF to a vector database
+
+options:
+-h, --help show this help message and exit
+ -d DIR, --dir DIR Directory to import
+ -s, --subset, --no-subset
+ Import a subset of data (default: False)
+ --create_new, --no-create_new
+ Create a new index (default: False)
+
+Vector Databases:
+Choose the vectors database to export data from
+
+{milvus,pinecone,qdrant,vertexai_vectorsearch,kdbai}
+ milvus Import data to Milvus
+ pinecone Import data to Pinecone
+ qdrant Import data to Qdrant
+ vertexai_vectorsearch
+ Import data to Vertex AI Vector Search
+ kdbai Import data to KDB.AI
+This Python script is used to re-embed a vector dataset. It takes a + directory of vector dataset in the VDF format and re-embeds it using a + new model. The script also allows you to specify the name of the column + containing text to be embedded.
+reembed_vdf --help
+usage: reembed_vdf [-h] -d DIR [-m NEW_MODEL_NAME]
+[-t TEXT_COLUMN]
+
+Reembed a vector dataset
+
+options:
+-h, --help show this help message and exit
+ -d DIR, --dir DIR Directory of vector dataset in
+ the VDF format
+ -m NEW_MODEL_NAME, --new_model_name NEW_MODEL_NAME
+ Name of new model to be used
+ -t TEXT_COLUMN, --text_column TEXT_COLUMN
+ Name of the column containing
+ text to be embedded
+export_vdf -m hkunlp/instructor-xl --push_to_hub pinecone --environment gcp-starter
+
+import_vdf -d /path/to/vdf/dataset milvus
+
+reembed_vdf -d /path/to/vdf/dataset -m sentence-transformers/all-MiniLM-L6-v2 -t title
+Follow the prompt to select the index and id range to export.
+If you wish to add an import/export implementation for a new vector + database, you must also implement the other side of the import/export + for the same database. Please fork the repo and send a PR for both the + import and export scripts.
+Steps to add a new vector database (ABC):
+src/vdf_io/export_vdf/export_abc.py
+ and src/vdf_io/import_vdf/import_abc.py
for the new
+ DB.Export:
+Import:
+If you wish to change the VDF specification, please open an issue to + discuss the change before sending a PR.
+If you wish to improve the efficiency of the import/export scripts, + please fork the repo and send a PR.
+Running the scripts in the repo will send anonymous usage data to AI + Northstar Tech to help improve the library.
+You can opt out this by setting the environment variable
+ DISABLE_TELEMETRY_VECTORIO
to 1
.
+
If you have any questions, please open an issue on the repo or + message Dhruv Anand on LinkedIn
\ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 0000000..97ed03f --- /dev/null +++ b/index.html @@ -0,0 +1,504 @@ ++ + +
+This library uses a universal format for vector datasets to easily + export and import data from all vector databases.
+Request support for a VectorDB by voting/commenting on this + poll
+See the Contributing section to add + support for your favorite vector database.
++ + +
+Vector Database | +Import | +Export | +
---|---|---|
Pinecone | +✅ | +✅ | +
Qdrant | +✅ | +✅ | +
Milvus | +✅ | +✅ | +
GCP Vertex AI Vector Search | +✅ | +✅ | +
KDB.AI | +✅ | +✅ | +
LanceDB | +✅ | +✅ | +
DataStax Astra DB | +✅ | +✅ | +
Chroma | +✅ | +✅ | +
Turbopuffer | +✅ | +✅ | +
Vector Database | +Import | +Export | +
---|
Vector Database | +Import | +Export | +
---|---|---|
pgvector | +❌ | +❌ | +
Azure AI Search | +❌ | +❌ | +
Weaviate | +❌ | +❌ | +
MongoDB Atlas | +❌ | +❌ | +
Apache Cassandra | +❌ | +❌ | +
txtai | +❌ | +❌ | +
SQLite-VSS | +❌ | +❌ | +
Vector Database | +Import | +Export | +
---|---|---|
Vespa | +❌ | +❌ | +
AWS Neptune | +❌ | +❌ | +
Neo4j | +❌ | +❌ | +
Marqo | +❌ | +❌ | +
OpenSearch | +❌ | +❌ | +
Elasticsearch | +❌ | +❌ | +
Apache Solr | +❌ | +❌ | +
Redis Search | +❌ | +❌ | +
ClickHouse | +❌ | +❌ | +
USearch | +❌ | +❌ | +
Rockset | +❌ | +❌ | +
Epsilla | +❌ | +❌ | +
Activeloop Deep Lake | +❌ | +❌ | +
ApertureDB | +❌ | +❌ | +
CrateDB | +❌ | +❌ | +
Meilisearch | +❌ | +❌ | +
MyScale | +❌ | +❌ | +
Nuclia DB | +❌ | +❌ | +
OramaSearch | +❌ | +❌ | +
Typesense | +❌ | +❌ | +
Anari AI | +❌ | +❌ | +
Vald | +❌ | +❌ | +
pip install vdf-io
+git clone https://github.com/AI-Northstar-Tech/vector-io.git
+cd vector-io
+pip install -r requirements.txt
+class NamespaceMeta(BaseModel):
+str
+ namespace: str
+ index_name: int
+ total_vector_count: int
+ exported_vector_count: int
+ dimensions: str | None = None
+ model_name: str] = ["vector"]
+ vector_columns: List[str
+ data_path: str | None = None
+ metric: = None
+ index_config: Optional[Dict[Any, Any]] str, Any]] = None
+ schema_dict: Optional[Dict[
+
+class VDFMeta(BaseModel):
+str
+ version: str]
+ file_structure: List[str
+ author: str
+ exported_from: str, List[NamespaceMeta]]
+ indexes: Dict[str
+ exported_at: str] = None id_column: Optional[
+export_vdf --help
+usage: export_vdf [-h] [-m MODEL_NAME]
+[--max_file_size MAX_FILE_SIZE]
+ [--push_to_hub | --no-push_to_hub]
+ [--public | --no-public]
+ {pinecone,qdrant,kdbai,milvus,vertexai_vectorsearch}
+ ...
+
+Export data from various vector databases to the VDF format for vector datasets
+
+options:
+-h, --help show this help message and exit
+ -m MODEL_NAME, --model_name MODEL_NAME
+ Name of model used
+ --max_file_size MAX_FILE_SIZE
+ Maximum file size in MB (default:
+ 1024)
+ --push_to_hub, --no-push_to_hub
+ Push to hub
+ --public, --no-public
+ Make dataset public (default:
+ False)
+
+Vector Databases:
+Choose the vectors database to export data from
+
+{pinecone,qdrant,kdbai,milvus,vertexai_vectorsearch}
+ pinecone Export data from Pinecone
+ qdrant Export data from Qdrant
+ kdbai Export data from KDB.AI
+ milvus Export data from Milvus
+ vertexai_vectorsearch
+ Export data from Vertex AI Vector
+ Search
+import_vdf --help
+usage: import_vdf [-h] [-d DIR] [-s | --subset | --no-subset]
+[--create_new | --no-create_new]
+ {milvus,pinecone,qdrant,vertexai_vectorsearch,kdbai}
+ ...
+
+Import data from VDF to a vector database
+
+options:
+-h, --help show this help message and exit
+ -d DIR, --dir DIR Directory to import
+ -s, --subset, --no-subset
+ Import a subset of data (default: False)
+ --create_new, --no-create_new
+ Create a new index (default: False)
+
+Vector Databases:
+Choose the vectors database to export data from
+
+{milvus,pinecone,qdrant,vertexai_vectorsearch,kdbai}
+ milvus Import data to Milvus
+ pinecone Import data to Pinecone
+ qdrant Import data to Qdrant
+ vertexai_vectorsearch
+ Import data to Vertex AI Vector Search
+ kdbai Import data to KDB.AI
+This Python script is used to re-embed a vector dataset. It takes a + directory of vector dataset in the VDF format and re-embeds it using a + new model. The script also allows you to specify the name of the column + containing text to be embedded.
+reembed_vdf --help
+usage: reembed_vdf [-h] -d DIR [-m NEW_MODEL_NAME]
+[-t TEXT_COLUMN]
+
+Reembed a vector dataset
+
+options:
+-h, --help show this help message and exit
+ -d DIR, --dir DIR Directory of vector dataset in
+ the VDF format
+ -m NEW_MODEL_NAME, --new_model_name NEW_MODEL_NAME
+ Name of new model to be used
+ -t TEXT_COLUMN, --text_column TEXT_COLUMN
+ Name of the column containing
+ text to be embedded
+export_vdf -m hkunlp/instructor-xl --push_to_hub pinecone --environment gcp-starter
+
+import_vdf -d /path/to/vdf/dataset milvus
+
+reembed_vdf -d /path/to/vdf/dataset -m sentence-transformers/all-MiniLM-L6-v2 -t title
+Follow the prompt to select the index and id range to export.
+If you wish to add an import/export implementation for a new vector + database, you must also implement the other side of the import/export + for the same database. Please fork the repo and send a PR for both the + import and export scripts.
+Steps to add a new vector database (ABC):
+src/vdf_io/export_vdf/export_abc.py
+ and src/vdf_io/import_vdf/import_abc.py
for the new
+ DB.Export:
+Import:
+If you wish to change the VDF specification, please open an issue to + discuss the change before sending a PR.
+If you wish to improve the efficiency of the import/export scripts, + please fork the repo and send a PR.
+Running the scripts in the repo will send anonymous usage data to AI + Northstar Tech to help improve the library.
+You can opt out this by setting the environment variable
+ DISABLE_TELEMETRY_VECTORIO
to 1
.
+
If you have any questions, please open an issue on the repo or + message Dhruv Anand on LinkedIn
\ No newline at end of file diff --git a/src/vdf_io/notebooks/aerospike-qs.ipynb b/src/vdf_io/notebooks/aerospike-qs.ipynb new file mode 100644 index 0000000..e69de29 diff --git a/src/vdf_io/notebooks/upsert_pinecone.ipynb b/src/vdf_io/notebooks/upsert_pinecone.ipynb index d1341f2..0dd1d8b 100644 --- a/src/vdf_io/notebooks/upsert_pinecone.ipynb +++ b/src/vdf_io/notebooks/upsert_pinecone.ipynb @@ -1,5 +1,32 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import json\n", + "import os\n", + "from dotenv import load_dotenv, find_dotenv\n", + "from typing import List, Dict, Any\n", + "from rich import print as rprint\n", + "\n", + "load_dotenv(find_dotenv(), override=True)\n" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -1120,165 +1147,1209 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Collecting pinecone-datasets\n", - " Downloading pinecone_datasets-0.6.2-py3-none-any.whl.metadata (11 kB)\n", - "Requirement already satisfied: fsspec<2024.0.0,>=2023.1.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-datasets) (2023.5.0)\n", - "Collecting gcsfs<2024.0.0,>=2023.1.0 (from pinecone-datasets)\n", - " Downloading gcsfs-2023.12.2.post1-py2.py3-none-any.whl.metadata (1.6 kB)\n", - "Requirement already satisfied: pandas<3.0.0,>=2.0.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-datasets) (2.0.2)\n", - "Requirement already satisfied: pinecone-client<3.0.0,>=2.2.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-datasets) (2.2.4)\n", - "Requirement already satisfied: pyarrow<12.0.0,>=11.0.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-datasets) (11.0.0)\n", - "Collecting pydantic<2.0.0,>=1.10.5 (from pinecone-datasets)\n", - " Downloading pydantic-1.10.13-cp39-cp39-macosx_11_0_arm64.whl.metadata (149 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.6/149.6 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hCollecting s3fs<2024.0.0,>=2023.1.0 (from pinecone-datasets)\n", - " Downloading s3fs-2023.12.2-py3-none-any.whl.metadata (1.6 kB)\n", - "Requirement already satisfied: tqdm<5.0.0,>=4.65.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-datasets) (4.65.0)\n", - "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (3.9.1)\n", - "Requirement already satisfied: decorator>4.1.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (5.1.1)\n", - "Collecting fsspec<2024.0.0,>=2023.1.0 (from pinecone-datasets)\n", - " Downloading fsspec-2023.12.2-py3-none-any.whl.metadata (6.8 kB)\n", - "Requirement already satisfied: google-auth>=1.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.17.3)\n", - "Requirement already satisfied: google-auth-oauthlib in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.0.0)\n", - "Requirement already satisfied: google-cloud-storage in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.9.0)\n", - "Requirement already satisfied: requests in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.31.0)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets) (2023.3)\n", - "Requirement already satisfied: tzdata>=2022.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets) (2023.3)\n", - "Requirement already satisfied: numpy>=1.20.3 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets) (1.23.5)\n", - "Requirement already satisfied: pyyaml>=5.4 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-client<3.0.0,>=2.2.2->pinecone-datasets) (6.0)\n", - "Requirement already satisfied: loguru>=0.5.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-client<3.0.0,>=2.2.2->pinecone-datasets) (0.7.0)\n", - "Requirement already satisfied: typing-extensions>=3.7.4 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-client<3.0.0,>=2.2.2->pinecone-datasets) (4.7.1)\n", - "Requirement already satisfied: dnspython>=2.0.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-client<3.0.0,>=2.2.2->pinecone-datasets) (2.3.0)\n", - "Requirement already satisfied: urllib3>=1.21.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pinecone-client<3.0.0,>=2.2.2->pinecone-datasets) (1.26.15)\n", - "Collecting aiobotocore<3.0.0,>=2.5.4 (from s3fs<2024.0.0,>=2023.1.0->pinecone-datasets)\n", - " Downloading aiobotocore-2.9.0-py3-none-any.whl.metadata (20 kB)\n", - "Collecting botocore<1.33.14,>=1.33.2 (from aiobotocore<3.0.0,>=2.5.4->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets)\n", - " Downloading botocore-1.33.13-py3-none-any.whl.metadata (6.1 kB)\n", - "Requirement already satisfied: wrapt<2.0.0,>=1.10.10 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiobotocore<3.0.0,>=2.5.4->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.14.1)\n", - "Requirement already satisfied: aioitertools<1.0.0,>=0.5.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiobotocore<3.0.0,>=2.5.4->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets) (0.11.0)\n", - "Requirement already satisfied: attrs>=17.3.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (22.2.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (6.0.4)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.3.3)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.3.1)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (4.0.2)\n", - "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (5.3.0)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (0.3.0)\n", - "Requirement already satisfied: six>=1.9.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.16.0)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (4.9)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from requests->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.1.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from requests->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from requests->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2023.5.7)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-auth-oauthlib->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.3.1)\n", - "Requirement already satisfied: google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.11.0)\n", - "Requirement already satisfied: google-cloud-core<3.0dev,>=2.3.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.3.2)\n", - "Requirement already satisfied: google-resumable-media>=2.3.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (2.5.0)\n", - "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from botocore<1.33.14,>=1.33.2->aiobotocore<3.0.0,>=2.5.4->s3fs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.0.1)\n", - "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.56.2 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.59.0)\n", - "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (3.20.3)\n", - "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from google-resumable-media>=2.3.2->google-cloud-storage->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (1.5.0)\n", - "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.2->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (0.5.0)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /opt/homebrew/anaconda3/lib/python3.9/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib->gcsfs<2024.0.0,>=2023.1.0->pinecone-datasets) (3.2.2)\n", - "Downloading pinecone_datasets-0.6.2-py3-none-any.whl (12 kB)\n", - "Downloading gcsfs-2023.12.2.post1-py2.py3-none-any.whl (34 kB)\n", - "Downloading fsspec-2023.12.2-py3-none-any.whl (168 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m169.0/169.0 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading pydantic-1.10.13-cp39-cp39-macosx_11_0_arm64.whl (2.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading s3fs-2023.12.2-py3-none-any.whl (28 kB)\n", - "Downloading aiobotocore-2.9.0-py3-none-any.whl (75 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.0/76.0 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading botocore-1.33.13-py3-none-any.whl (11.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.8/11.8 MB\u001b[0m \u001b[31m19.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25h\u001b[33mDEPRECATION: omegaconf 2.0.6 has a non-standard dependency specifier PyYAML>=5.1.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of omegaconf or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mDEPRECATION: torchsde 0.2.5 has a non-standard dependency specifier numpy>=1.19.*; python_version >= \"3.7\". pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of torchsde or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mDEPRECATION: voicefixer 0.1.2 has a non-standard dependency specifier streamlit>=1.12.0pyyaml. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of voicefixer or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", - "\u001b[0mInstalling collected packages: pydantic, fsspec, botocore, aiobotocore, s3fs, gcsfs, pinecone-datasets\n", - " Attempting uninstall: pydantic\n", - " Found existing installation: pydantic 2.5.2\n", - " Uninstalling pydantic-2.5.2:\n", - " Successfully uninstalled pydantic-2.5.2\n", - " Attempting uninstall: fsspec\n", - " Found existing installation: fsspec 2023.5.0\n", - " Uninstalling fsspec-2023.5.0:\n", - " Successfully uninstalled fsspec-2023.5.0\n", + "Collecting aiobotocore==2.12.3\n", + " Obtaining dependency information for aiobotocore==2.12.3 from https://files.pythonhosted.org/packages/71/86/bbe79b24d4603c65a67e405661092c2fe0fa9b14e78dc8270bc83777412e/aiobotocore-2.12.3-py3-none-any.whl.metadata\n", + " Downloading aiobotocore-2.12.3-py3-none-any.whl.metadata (21 kB)\n", + "Requirement already satisfied: aioitertools==0.11.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (0.11.0)\n", + "Collecting botocore==1.34.69\n", + " Obtaining dependency information for botocore==1.34.69 from https://files.pythonhosted.org/packages/c6/78/919e50b633035216dfb68627b1a4eac1235148b89b34a28f07fd99e8ac17/botocore-1.34.69-py3-none-any.whl.metadata\n", + " Downloading botocore-1.34.69-py3-none-any.whl.metadata (5.7 kB)\n", + "Requirement already satisfied: fsspec==2023.12.2 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (2023.12.2)\n", + "Requirement already satisfied: gcsfs==2023.12.2.post1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (2023.12.2.post1)\n", + "Collecting google-api-core==2.19.0\n", + " Obtaining dependency information for google-api-core==2.19.0 from https://files.pythonhosted.org/packages/2d/ed/e514e0c59cdf1a469b1a1ab21b77698d0692adaa7cbc920c3a0b287e8493/google_api_core-2.19.0-py3-none-any.whl.metadata\n", + " Downloading google_api_core-2.19.0-py3-none-any.whl.metadata (2.7 kB)\n", + "Requirement already satisfied: google-cloud-core==2.4.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (2.4.1)\n", + "Collecting google-cloud-storage==2.16.0\n", + " Obtaining dependency information for google-cloud-storage==2.16.0 from https://files.pythonhosted.org/packages/cb/e5/7d045d188f4ef85d94b9e3ae1bf876170c6b9f4c9a950124978efc36f680/google_cloud_storage-2.16.0-py2.py3-none-any.whl.metadata\n", + " Downloading google_cloud_storage-2.16.0-py2.py3-none-any.whl.metadata (6.1 kB)\n", + "Requirement already satisfied: google-crc32c==1.5.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (1.5.0)\n", + "Requirement already satisfied: google-resumable-media==2.7.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (2.7.0)\n", + "Collecting googleapis-common-protos==1.63.0\n", + " Obtaining dependency information for googleapis-common-protos==1.63.0 from https://files.pythonhosted.org/packages/dc/a6/12a0c976140511d8bc8a16ad15793b2aef29ac927baa0786ccb7ddbb6e1c/googleapis_common_protos-1.63.0-py2.py3-none-any.whl.metadata\n", + " Downloading googleapis_common_protos-1.63.0-py2.py3-none-any.whl.metadata (1.5 kB)\n", + "Collecting pinecone-client==3.2.2\n", + " Obtaining dependency information for pinecone-client==3.2.2 from https://files.pythonhosted.org/packages/cb/bb/c51fa42d85f431b3b3ec4c35a13a8bb99cafc0671918139a48767421d354/pinecone_client-3.2.2-py3-none-any.whl.metadata\n", + " Using cached pinecone_client-3.2.2-py3-none-any.whl.metadata (16 kB)\n", + "Collecting pinecone-datasets==0.7.0\n", + " Obtaining dependency information for pinecone-datasets==0.7.0 from https://files.pythonhosted.org/packages/ba/6d/62d3a757c5c0806078895a0f2b23d33edd977cb51ae233d313580927ffcb/pinecone_datasets-0.7.0-py3-none-any.whl.metadata\n", + " Using cached pinecone_datasets-0.7.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: proto-plus==1.23.0 in /Users/dhruvanand/.local/lib/python3.10/site-packages (1.23.0)\n", + "Requirement already satisfied: pyarrow==11.0.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (11.0.0)\n", + "Requirement already satisfied: pydantic==1.10.15 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (1.10.15)\n", + "Requirement already satisfied: s3fs==2023.12.2 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (2023.12.2)\n", + "Requirement already satisfied: wrapt==1.16.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (1.16.0)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.7.4.post0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiobotocore==2.12.3) (3.9.1)\n", + "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from botocore==1.34.69) (1.0.1)\n", + "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from botocore==1.34.69) (2.8.2)\n", + "Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from botocore==1.34.69) (2.0.7)\n", + "Requirement already satisfied: decorator>4.1.2 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from gcsfs==2023.12.2.post1) (5.1.1)\n", + "Requirement already satisfied: google-auth>=1.2 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from gcsfs==2023.12.2.post1) (2.26.1)\n", + "Requirement already satisfied: google-auth-oauthlib in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from gcsfs==2023.12.2.post1) (1.2.0)\n", + "Requirement already satisfied: requests in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from gcsfs==2023.12.2.post1) (2.31.0)\n", + "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0.dev0,>=3.19.5 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from google-api-core==2.19.0) (4.25.3)\n", + "Requirement already satisfied: certifi>=2019.11.17 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pinecone-client==3.2.2) (2023.11.17)\n", + "Requirement already satisfied: tqdm>=4.64.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pinecone-client==3.2.2) (4.66.4)\n", + "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pinecone-client==3.2.2) (4.9.0)\n", + "Requirement already satisfied: pandas<3.0.0,>=2.0.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pinecone-datasets==0.7.0) (2.1.4)\n", + "Requirement already satisfied: numpy>=1.16.6 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pyarrow==11.0.0) (1.26.4)\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.7.4.post0->aiobotocore==2.12.3) (23.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.7.4.post0->aiobotocore==2.12.3) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.7.4.post0->aiobotocore==2.12.3) (1.9.4)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.7.4.post0->aiobotocore==2.12.3) (1.4.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.7.4.post0->aiobotocore==2.12.3) (1.3.1)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.7.4.post0->aiobotocore==2.12.3) (4.0.3)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from google-auth>=1.2->gcsfs==2023.12.2.post1) (5.3.2)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from google-auth>=1.2->gcsfs==2023.12.2.post1) (0.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from google-auth>=1.2->gcsfs==2023.12.2.post1) (4.9)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets==0.7.0) (2023.4)\n", + "Requirement already satisfied: tzdata>=2022.1 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pandas<3.0.0,>=2.0.0->pinecone-datasets==0.7.0) (2023.4)\n", + "Requirement already satisfied: six>=1.5 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from python-dateutil<3.0.0,>=2.1->botocore==1.34.69) (1.16.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from requests->gcsfs==2023.12.2.post1) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from requests->gcsfs==2023.12.2.post1) (3.6)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from google-auth-oauthlib->gcsfs==2023.12.2.post1) (1.3.1)\n", + "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.2->gcsfs==2023.12.2.post1) (0.5.1)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /Users/dhruvanand/miniforge3/lib/python3.10/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib->gcsfs==2023.12.2.post1) (3.2.2)\n", + "Downloading aiobotocore-2.12.3-py3-none-any.whl (76 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.5/76.5 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0meta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading botocore-1.34.69-py3-none-any.whl (12.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.0/12.0 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading google_api_core-2.19.0-py3-none-any.whl (139 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.0/139.0 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading google_cloud_storage-2.16.0-py2.py3-none-any.whl (125 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m125.6/125.6 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading googleapis_common_protos-1.63.0-py2.py3-none-any.whl (229 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m229.1/229.1 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hUsing cached pinecone_client-3.2.2-py3-none-any.whl (215 kB)\n", + "Using cached pinecone_datasets-0.7.0-py3-none-any.whl (13 kB)\n", + "Installing collected packages: pinecone-client, googleapis-common-protos, botocore, google-api-core, aiobotocore, google-cloud-storage, pinecone-datasets\n", + " Attempting uninstall: pinecone-client\n", + " Found existing installation: pinecone-client 2.2.4\n", + " Uninstalling pinecone-client-2.2.4:\n", + " Successfully uninstalled pinecone-client-2.2.4\n", + " Attempting uninstall: googleapis-common-protos\n", + " Found existing installation: googleapis-common-protos 1.62.0\n", + " Uninstalling googleapis-common-protos-1.62.0:\n", + " Successfully uninstalled googleapis-common-protos-1.62.0\n", " Attempting uninstall: botocore\n", - " Found existing installation: botocore 1.29.76\n", - " Uninstalling botocore-1.29.76:\n", - " Successfully uninstalled botocore-1.29.76\n", + " Found existing installation: botocore 1.34.28\n", + " Uninstalling botocore-1.34.28:\n", + " Successfully uninstalled botocore-1.34.28\n", + " Attempting uninstall: google-api-core\n", + " Found existing installation: google-api-core 2.15.0\n", + " Uninstalling google-api-core-2.15.0:\n", + " Successfully uninstalled google-api-core-2.15.0\n", " Attempting uninstall: aiobotocore\n", - " Found existing installation: aiobotocore 2.5.0\n", - " Uninstalling aiobotocore-2.5.0:\n", - " Successfully uninstalled aiobotocore-2.5.0\n", + " Found existing installation: aiobotocore 2.11.2\n", + " Uninstalling aiobotocore-2.11.2:\n", + " Successfully uninstalled aiobotocore-2.11.2\n", + " Attempting uninstall: google-cloud-storage\n", + " Found existing installation: google-cloud-storage 2.14.0\n", + " Uninstalling google-cloud-storage-2.14.0:\n", + " Successfully uninstalled google-cloud-storage-2.14.0\n", + " Attempting uninstall: pinecone-datasets\n", + " Found existing installation: pinecone-datasets 0.6.2\n", + " Uninstalling pinecone-datasets-0.6.2:\n", + " Successfully uninstalled pinecone-datasets-0.6.2\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "tts 0.13.3 requires inflect==5.6.0, but you have inflect 6.0.4 which is incompatible.\n", - "tts 0.13.3 requires librosa==0.10.0.*, but you have librosa 0.8.1 which is incompatible.\n", - "tts 0.13.3 requires numpy==1.21.6; python_version < \"3.10\", but you have numpy 1.23.5 which is incompatible.\n", - "tts 0.13.3 requires umap-learn==0.5.1, but you have umap-learn 0.5.3 which is incompatible.\n", - "aioboto3 11.1.0 requires aiobotocore[boto3]==2.5.0, but you have aiobotocore 2.9.0 which is incompatible.\n", - "argilla 1.3.2 requires pandas<2.0.0,>=1.0.0, but you have pandas 2.0.2 which is incompatible.\n", - "boto3 1.26.76 requires botocore<1.30.0,>=1.29.76, but you have botocore 1.33.13 which is incompatible.\n", - "farm-haystack 1.16.1 requires protobuf<=3.20.2, but you have protobuf 3.20.3 which is incompatible.\n", - "farm-haystack 1.16.1 requires transformers[torch]==4.25.1, but you have transformers 4.34.1 which is incompatible.\n", - "fennel-ai 0.14.0 requires grandalf<0.8,>=0.7, but you have grandalf 0.8 which is incompatible.\n", - "fennel-ai 0.14.0 requires pandas<2.0.0,>=1.5.0, but you have pandas 2.0.2 which is incompatible.\n", - "gradio 3.28.3 requires mdit-py-plugins<=0.3.3, but you have mdit-py-plugins 0.3.5 which is incompatible.\n", - "instructor 0.2.5 requires openai<0.28.0,>=0.27.8, but you have openai 1.1.1 which is incompatible.\n", - "instructor 0.2.5 requires pydantic<3.0.0,>=2.0.2, but you have pydantic 1.10.13 which is incompatible.\n", - "langchain-ibis 0.0.100 requires aleph-alpha-client<3.0.0,>=2.15.0, but you have aleph-alpha-client 3.1.0 which is incompatible.\n", - "langchain-ibis 0.0.100 requires SQLAlchemy<2,>=1, but you have sqlalchemy 2.0.23 which is incompatible.\n", - "langflow 0.0.68 requires huggingface-hub<0.14.0,>=0.13.3, but you have huggingface-hub 0.17.3 which is incompatible.\n", - "langflow 0.0.68 requires openai<0.28.0,>=0.27.2, but you have openai 1.1.1 which is incompatible.\n", - "langflow 0.0.68 requires pandas<2.0.0,>=1.5.3, but you have pandas 2.0.2 which is incompatible.\n", - "langflow 0.0.68 requires typer<0.8.0,>=0.7.0, but you have typer 0.4.2 which is incompatible.\n", - "langflow 0.0.68 requires websockets<12.0.0,>=11.0.2, but you have websockets 10.4 which is incompatible.\n", - "llama-index 0.6.18 requires typing-extensions==4.5.0, but you have typing-extensions 4.7.1 which is incompatible.\n", - "mistralai 0.0.1 requires pydantic<3.0.0,>=2.5.2, but you have pydantic 1.10.13 which is incompatible.\n", - "sagemaker 2.152.0 requires PyYAML==5.4.1, but you have pyyaml 6.0 which is incompatible.\n", - "shazamio 0.4.0.1 requires numpy<2.0.0,>=1.24.0, but you have numpy 1.23.5 which is incompatible.\n", - "steamship 2.16.6 requires aiohttp==3.8.3, but you have aiohttp 3.9.1 which is incompatible.\n", - "steamship 2.16.6 requires pydantic==1.10.2, but you have pydantic 1.10.13 which is incompatible.\n", - "steamship 2.16.6 requires requests==2.28.1, but you have requests 2.31.0 which is incompatible.\n", - "steamship 2.16.6 requires semver==2.13.0, but you have semver 3.0.0 which is incompatible.\n", - "steamship 2.16.6 requires tiktoken==0.2.0, but you have tiktoken 0.3.3 which is incompatible.\n", - "steamship-langchain 0.0.20 requires langchain==0.0.152, but you have langchain 0.0.162 which is incompatible.\n", - "tortoise 3.0.0 requires tokenizers<0.14.0,>=0.13.2, but you have tokenizers 0.14.1 which is incompatible.\n", - "tortoise 3.0.0 requires torchaudio<0.14.0,>=0.13.1, but you have torchaudio 2.0.2 which is incompatible.\n", - "trainer 0.0.20 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.3 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed aiobotocore-2.9.0 botocore-1.33.13 fsspec-2023.12.2 gcsfs-2023.12.2.post1 pinecone-datasets-0.6.2 pydantic-1.10.13 s3fs-2023.12.2\n", - "\u001b[33mWARNING: There was an error checking the latest version of pip.\u001b[0m\u001b[33m\n", - "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" + "aioboto3 12.3.0 requires aiobotocore[boto3]==2.11.2, but you have aiobotocore 2.12.3 which is incompatible.\n", + "label-studio 1.12.0 requires bleach<5.1.0,>=5.0.0, but you have bleach 6.1.0 which is incompatible.\n", + "label-studio 1.12.0 requires jsonschema==3.2.0, but you have jsonschema 4.21.1 which is incompatible.\n", + "label-studio 1.12.0 requires python-json-logger==2.0.4, but you have python-json-logger 2.0.7 which is incompatible.\n", + "label-studio 1.12.0 requires pytz<2023.0,>=2022.1, but you have pytz 2023.4 which is incompatible.\n", + "label-studio 1.12.0 requires urllib3<2.0.0,>=1.26.18, but you have urllib3 2.0.7 which is incompatible.\n", + "vdf-io 0.1.246 requires pinecone-client~=4.0.0, but you have pinecone-client 3.2.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed aiobotocore-2.12.3 botocore-1.34.69 google-api-core-2.19.0 google-cloud-storage-2.16.0 googleapis-common-protos-1.63.0 pinecone-client-3.2.2 pinecone-datasets-0.7.0\n", + "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ - "%pip install pinecone-datasets" + "%pip install aiobotocore==2.12.3 aioitertools==0.11.0 botocore==1.34.69 fsspec==2023.12.2 gcsfs==2023.12.2.post1 google-api-core==2.19.0 google-cloud-core==2.4.1 google-cloud-storage==2.16.0 google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 pinecone-client==3.2.2 pinecone-datasets==0.7.0 proto-plus==1.23.0 pyarrow==11.0.0 pydantic==1.10.15 s3fs==2023.12.2 wrapt==1.16.0" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from pinecone_datasets import load_dataset" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pinecone_datasets\n", + "\n", + "ds = pinecone_datasets.load_dataset(\"ANN_Fashion-MNIST_d784_euclidean\")" + ] + }, { "cell_type": "code", "execution_count": 3, "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " | name | \n", + "created_at | \n", + "documents | \n", + "queries | \n", + "source | \n", + "license | \n", + "bucket | \n", + "task | \n", + "dense_model | \n", + "sparse_model | \n", + "description | \n", + "tags | \n", + "args | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "ANN_DEEP1B_d96_angular | \n", + "2023-03-10 14:17:01.481785 | \n", + "9990000 | \n", + "10000 | \n", + "https://github.com/erikbern/ann-benchmarks | \n", + "None | \n", + "gs://pinecone-datasets-dev/ANN_DEEP1B_d96_angular | \n", + "ANN | \n", + "{'name': 'ANN benchmark dense model', 'tokeniz... | \n", + "{'name': None, 'tokenizer': None} | \n", + "None | \n", + "None | \n", + "None | \n", + "
1 | \n", + "ANN_Fashion-MNIST_d784_euclidean | \n", + "2023-03-10 14:17:01.481785 | \n", + "60000 | \n", + "10000 | \n", + "https://github.com/erikbern/ann-benchmarks | \n", + "None | \n", + "gs://pinecone-datasets-dev/ANN_Fashion-MNIST_d... | \n", + "ANN | \n", + "{'name': 'ANN benchmark dense model', 'tokeniz... | \n", + "{'name': None, 'tokenizer': None} | \n", + "None | \n", + "None | \n", + "None | \n", + "
2 | \n", + "ANN_GIST_d960_euclidean | \n", + "2023-03-10 14:17:01.481785 | \n", + "1000000 | \n", + "1000 | \n", + "https://github.com/erikbern/ann-benchmarks | \n", + "None | \n", + "gs://pinecone-datasets-dev/ANN_GIST_d960_eucli... | \n", + "ANN | \n", + "{'name': 'ANN benchmark dense model', 'tokeniz... | \n", + "{'name': None, 'tokenizer': None} | \n", + "None | \n", + "None | \n", + "None | \n", + "
3 | \n", + "ANN_GloVe_d100_angular | \n", + "2023-03-10 14:17:01.481785 | \n", + "1183514 | \n", + "10000 | \n", + "https://github.com/erikbern/ann-benchmarks | \n", + "None | \n", + "gs://pinecone-datasets-dev/ANN_GloVe_d100_angular | \n", + "ANN | \n", + "{'name': 'ANN benchmark dense model', 'tokeniz... | \n", + "{'name': None, 'tokenizer': None} | \n", + "None | \n", + "None | \n", + "None | \n", + "
4 | \n", + "ANN_GloVe_d200_angular | \n", + "2023-03-10 14:17:01.481785 | \n", + "1183514 | \n", + "10000 | \n", + "https://github.com/erikbern/ann-benchmarks | \n", + "None | \n", + "gs://pinecone-datasets-dev/ANN_GloVe_d200_angular | \n", + "ANN | \n", + "{'name': 'ANN benchmark dense model', 'tokeniz... | \n", + "{'name': None, 'tokenizer': None} | \n", + "None | \n", + "None | \n", + "None | \n", + "
5 | \n", + "ANN_GloVe_d25_angular | \n", + "2023-03-10 14:17:01.481785 | \n", + "1183514 | \n", + "10000 | \n", + "https://github.com/erikbern/ann-benchmarks | \n", + "None | \n", + "gs://pinecone-datasets-dev/ANN_GloVe_d25_angular | \n", + "ANN | \n", + "{'name': 'ANN benchmark dense model', 'tokeniz... | \n", + "{'name': None, 'tokenizer': None} | \n", + "None | \n", + "None | \n", + "None | \n", + "
6 | \n", + "ANN_GloVe_d50_angular | \n", + "2023-03-10 14:17:01.481785 | \n", + "1183514 | \n", + "10000 | \n", + "https://github.com/erikbern/ann-benchmarks | \n", + "None | \n", + "gs://pinecone-datasets-dev/ANN_GloVe_d50_angular | \n", + "ANN | \n", + "{'name': 'ANN benchmark dense model', 'tokeniz... | \n", + "{'name': None, 'tokenizer': None} | \n", + "None | \n", + "None | \n", + "None | \n", + "
7 | \n", + "ANN_LastFM_d64_angular | \n", + "2023-03-10 14:17:01.481785 | \n", + "292385 | \n", + "50000 | \n", + "https://github.com/erikbern/ann-benchmarks | \n", + "None | \n", + "gs://pinecone-datasets-dev/ANN_LastFM_d64_angular | \n", + "ANN | \n", + "{'name': 'ANN benchmark dense model', 'tokeniz... | \n", + "{'name': None, 'tokenizer': None} | \n", + "None | \n", + "None | \n", + "None | \n", + "
8 | \n", + "ANN_MNIST_d784_euclidean | \n", + "2023-03-10 14:17:01.481785 | \n", + "60000 | \n", + "10000 | \n", + "https://github.com/erikbern/ann-benchmarks | \n", + "None | \n", + "gs://pinecone-datasets-dev/ANN_MNIST_d784_eucl... | \n", + "ANN | \n", + "{'name': 'ANN benchmark dense model', 'tokeniz... | \n", + "{'name': None, 'tokenizer': None} | \n", + "None | \n", + "None | \n", + "None | \n", + "
9 | \n", + "ANN_NYTimes_d256_angular | \n", + "2023-03-10 14:17:01.481785 | \n", + "290000 | \n", + "10000 | \n", + "https://github.com/erikbern/ann-benchmarks | \n", + "None | \n", + "gs://pinecone-datasets-dev/ANN_NYTimes_d256_an... | \n", + "ANN | \n", + "{'name': 'ANN benchmark dense model', 'tokeniz... | \n", + "{'name': None, 'tokenizer': None} | \n", + "None | \n", + "None | \n", + "None | \n", + "
10 | \n", + "ANN_SIFT1M_d128_euclidean | \n", + "2023-03-10 14:17:01.481785 | \n", + "1000000 | \n", + "10000 | \n", + "https://github.com/erikbern/ann-benchmarks | \n", + "None | \n", + "gs://pinecone-datasets-dev/ANN_SIFT1M_d128_euc... | \n", + "ANN | \n", + "{'name': 'ANN benchmark dense model', 'tokeniz... | \n", + "{'name': None, 'tokenizer': None} | \n", + "None | \n", + "None | \n", + "None | \n", + "
11 | \n", + "amazon_toys_quora_all-MiniLM-L6-bm25 | \n", + "Jul 26, 2023 14:17:01.481785 | \n", + "10000 | \n", + "0 | \n", + "https://www.kaggle.com/datasets/PromptCloudHQ/... | \n", + "None | \n", + "gs://pinecone-datasets-dev/amazon_toys_quora_a... | \n", + "QA | \n", + "{'name': 'sentence-transformers/all-MiniLM-L6-... | \n", + "{'name': 'bm25', 'tokenizer': None} | \n", + "None | \n", + "None | \n", + "None | \n", + "
12 | \n", + "cohere-768 | \n", + "2023-11-04 15:23:40 | \n", + "10000000 | \n", + "1000 | \n", + "https://huggingface.co/datasets/Cohere/wikiped... | \n", + "None | \n", + "None | \n", + "None | \n", + "{'name': 'cohere', 'tokenizer': None, 'dimensi... | \n", + "None | \n", + "10M vectors from wikipedia-22-12 (english) emb... | \n", + "None | \n", + "None | \n", + "
13 | \n", + "it-threat-data-test | \n", + "2023-06-28 | \n", + "1042965 | \n", + "0 | \n", + "https://cse-cic-ids2018.s3.ca-central-1.amazon... | \n", + "None | \n", + "None | \n", + "None | \n", + "{'name': 'it_threat_model.model', 'tokenizer':... | \n", + "None | \n", + "None | \n", + "None | \n", + "None | \n", + "
14 | \n", + "it-threat-data-train | \n", + "2023-06-28 | \n", + "1042867 | \n", + "0 | \n", + "https://cse-cic-ids2018.s3.ca-central-1.amazon... | \n", + "None | \n", + "None | \n", + "None | \n", + "{'name': 'it_threat_model.model', 'tokenizer':... | \n", + "None | \n", + "None | \n", + "None | \n", + "None | \n", + "
15 | \n", + "langchain-python-docs-text-embedding-ada-002 | \n", + "2023-06-27 | \n", + "3476 | \n", + "0 | \n", + "https://huggingface.co/datasets/jamescalam/lan... | \n", + "None | \n", + "None | \n", + "None | \n", + "{'name': 'text-embedding-ada-002', 'tokenizer'... | \n", + "None | \n", + "None | \n", + "None | \n", + "None | \n", + "
16 | \n", + "mnist | \n", + "2023-11-04 15:23:40 | \n", + "60000 | \n", + "10000 | \n", + "https://huggingface.co/datasets/mnist | \n", + "None | \n", + "None | \n", + "None | \n", + "{'name': 'mnist', 'tokenizer': None, 'dimensio... | \n", + "None | \n", + "The MNIST dataset consists of 70,000 28x28 bla... | \n", + "None | \n", + "None | \n", + "
17 | \n", + "movielens-user-ratings | \n", + "2023-06-08 | \n", + "970582 | \n", + "0 | \n", + "https://huggingface.co/datasets/pinecone/movie... | \n", + "None | \n", + "pinecone-datasets-dev | \n", + "classification | \n", + "{'name': 'pinecone/movie-recommender-user-mode... | \n", + "None | \n", + "None | \n", + "None | \n", + "None | \n", + "
18 | \n", + "msmarco-v1-bm25-allMiniLML6V2 | \n", + "2023-08-03 12:42:22 | \n", + "8841823 | \n", + "6980 | \n", + "None | \n", + "None | \n", + "None | \n", + "None | \n", + "{'name': 'all-minilm-l6-v2', 'tokenizer': None... | \n", + "{'name': 'bm25-k0.9-b0.4', 'tokenizer': None} | \n", + "None | \n", + "None | \n", + "None | \n", + "
19 | \n", + "nq-768-tasb | \n", + "2023-11-04 15:23:40 | \n", + "2680893 | \n", + "3452 | \n", + "https://huggingface.co/datasets/BeIR/nq | \n", + "None | \n", + "None | \n", + "None | \n", + "{'name': 'nq-768-tasb', 'tokenizer': None, 'di... | \n", + "None | \n", + "\n", + " | None | \n", + "None | \n", + "
20 | \n", + "quora_all-MiniLM-L6-bm25-100K | \n", + "2023-06-25 10:00:00.000000 | \n", + "100000 | \n", + "15000 | \n", + "https://quoradata.quora.com/First-Quora-Datase... | \n", + "None | \n", + "gs://pinecone-datasets-dev/quora_all-MiniLM-L6... | \n", + "similar questions | \n", + "{'name': 'sentence-transformers/msmarco-MiniLM... | \n", + "{'name': 'naver/splade-cocondenser-ensembledis... | \n", + "None | \n", + "None | \n", + "None | \n", + "
21 | \n", + "quora_all-MiniLM-L6-bm25 | \n", + "2023-02-17 14:17:01.481785 | \n", + "522931 | \n", + "15000 | \n", + "https://quoradata.quora.com/First-Quora-Datase... | \n", + "None | \n", + "gs://pinecone-datasets-dev/quora_all-MiniLM-L6... | \n", + "similar questions | \n", + "{'name': 'sentence-transformers/msmarco-MiniLM... | \n", + "{'name': 'naver/splade-cocondenser-ensembledis... | \n", + "None | \n", + "None | \n", + "None | \n", + "
22 | \n", + "quora_all-MiniLM-L6-v2_Splade-100K | \n", + "2023-06-25 11:00:00.000000 | \n", + "100000 | \n", + "15000 | \n", + "https://quoradata.quora.com/First-Quora-Datase... | \n", + "None | \n", + "gs://pinecone-datasets-dev/quora_all-MiniLM-L6... | \n", + "similar questions | \n", + "{'name': 'sentence-transformers/msmarco-MiniLM... | \n", + "{'name': 'naver/splade-cocondenser-ensembledis... | \n", + "None | \n", + "None | \n", + "None | \n", + "
23 | \n", + "quora_all-MiniLM-L6-v2_Splade | \n", + "2023-02-17 14:15:01.483445 | \n", + "522931 | \n", + "15000 | \n", + "https://quoradata.quora.com/First-Quora-Datase... | \n", + "None | \n", + "gs://pinecone-datasets-dev/quora_all-MiniLM-L6... | \n", + "similar questions | \n", + "{'name': 'sentence-transformers/msmarco-MiniLM... | \n", + "{'name': 'naver/splade-cocondenser-ensembledis... | \n", + "None | \n", + "None | \n", + "None | \n", + "
24 | \n", + "squad-text-embedding-ada-002 | \n", + "2023-06-29 | \n", + "18891 | \n", + "0 | \n", + "https://huggingface.co/datasets/squad | \n", + "None | \n", + "None | \n", + "None | \n", + "{'name': 'text-embedding-ada-002', 'tokenizer'... | \n", + "None | \n", + "None | \n", + "None | \n", + "None | \n", + "
25 | \n", + "wikipedia-simple-text-embedding-ada-002-100K | \n", + "2023-06-25 12:00:00.000000 | \n", + "100000 | \n", + "0 | \n", + "wikipedia | \n", + "None | \n", + "pinecone-datasets-dev | \n", + "multiple | \n", + "{'name': 'text-embedding-ada-002', 'tokenizer'... | \n", + "None | \n", + "None | \n", + "None | \n", + "None | \n", + "
26 | \n", + "wikipedia-simple-text-embedding-ada-002 | \n", + "2023-05-28 12:00:26.170403 | \n", + "283945 | \n", + "0 | \n", + "wikipedia | \n", + "None | \n", + "pinecone-datasets-dev | \n", + "multiple | \n", + "{'name': 'text-embedding-ada-002', 'tokenizer'... | \n", + "None | \n", + "None | \n", + "None | \n", + "None | \n", + "
27 | \n", + "yfcc-100K-filter-euclidean | \n", + "2023-09-04 14:15:39.967727 | \n", + "10000000 | \n", + "100000 | \n", + "big-ann-challenge 2023 | \n", + "None | \n", + "None | \n", + "None | \n", + "{'name': 'yfcc', 'tokenizer': None, 'dimension... | \n", + "None | \n", + "100K slice of the dataset from the 2023 big an... | \n", + "None | \n", + "None | \n", + "
28 | \n", + "yfcc-10M-filter-euclidean | \n", + "2023-08-24 13:51:29.136759 | \n", + "10000000 | \n", + "100000 | \n", + "big-ann-challenge 2023 | \n", + "None | \n", + "None | \n", + "None | \n", + "{'name': 'yfcc', 'tokenizer': None, 'dimension... | \n", + "None | \n", + "Dataset from the 2023 big ann challenge - filt... | \n", + "None | \n", + "None | \n", + "
29 | \n", + "youtube-transcripts-text-embedding-ada-002 | \n", + "2023-06-01 03-22-14.451204 | \n", + "38950 | \n", + "0 | \n", + "youtube | \n", + "None | \n", + "pinecone-datasets-dev | \n", + "multiple | \n", + "{'name': 'text-embedding-ada-002', 'tokenizer'... | \n", + "None | \n", + "None | \n", + "None | \n", + "None | \n", + "
\n", + " | id | \n", + "values | \n", + "sparse_values | \n", + "metadata | \n", + "blob | \n", + "
---|---|---|---|---|---|
0 | \n", + "eac7efa5dbd3d667f26eb3d3ab504464 | \n", + "[0.0077547780238091946, -0.02774387039244175, ... | \n", + "{'indices': [2182291806, 4287202515, 148124445... | \n", + "{'amazon_category_and_sub_category': 'Hobbies ... | \n", + "{'text': 'Hornby 2014 Catalogue (Hornby): \n", + " Pr... | \n", + "
1 | \n", + "b17540ef7e86e461d37f3ae58b7b72ac | \n", + "[0.002257382730022073, -0.03035414218902588, 0... | \n", + "{'indices': [2118423442, 2177509083, 224097760... | \n", + "{'amazon_category_and_sub_category': 'Hobbies ... | \n", + "{'text': 'FunkyBuys® Large Christmas Holiday E... | \n", + "
2 | \n", + "348f344247b0c1a935b1223072ef9d8a | \n", + "[-0.003095218911767006, 0.016020774841308594, ... | \n", + "{'indices': [2349888478, 3814962844, 310417642... | \n", + "{'amazon_category_and_sub_category': 'Hobbies ... | \n", + "{'text': 'CLASSIC TOY TRAIN SET TRACK CARRIAGE... | \n", + "
3 | \n", + "e12b92dbb8eaee78b22965d2a9bbbd9f | \n", + "[-0.024034591391682625, -0.048526741564273834,... | \n", + "{'indices': [2182291806, 719182917, 1942275469... | \n", + "{'amazon_category_and_sub_category': 'Hobbies ... | \n", + "{'text': 'HORNBY Coach R4410A BR Hawksworth Co... | \n", + "
4 | \n", + "e33a9adeed5f36840ccc227db4682a36 | \n", + "[-0.07078640908002853, 0.009733847342431545, 0... | \n", + "{'indices': [2182291806, 2415375917, 369727517... | \n", + "{'amazon_category_and_sub_category': 'Hobbies ... | \n", + "{'text': 'Hornby 00 Gauge 0-4-0 Gildenlow Salt... | \n", + "