diff --git a/.github/workflows/json-bundle.yml b/.github/workflows/json-bundle.yml new file mode 100644 index 000000000..413b1d895 --- /dev/null +++ b/.github/workflows/json-bundle.yml @@ -0,0 +1,52 @@ +name: Bundle JSON files +on: + push: + branches: + - main + +jobs: + bundle: + runs-on: ubuntu-latest + permissions: + contents: 'read' + id-token: 'write' + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v3 + with: + python-version: '3.12' + + - uses: getsentry/action-setup-venv@v1.0.4 + id: venv + with: + python-version: '3.12' + cache-dependency-path: | + docs/tool/vdb_table/requirements.txt + install-cmd: pip install -r docs/tools/vdb_table/requirements.txt + + - name: Generate bundle + id: bundle + run: | + python docs/tools/vdb_table/data_utils.py json_to_bundle -dd "docs/tools/vdb_table/data/*" + + - id: 'auth' + uses: 'google-github-actions/auth@v2' + with: + workload_identity_provider: 'projects/903342166386/locations/global/workloadIdentityPools/github-pool/providers/github-provider' + service_account: 'github-actions-sa@superlinked-vectorhub.iam.gserviceaccount.com' + + - id: 'upload-bundle' + uses: 'google-github-actions/upload-cloud-storage@v2' + with: + path: './bundle.json' + destination: 'vectorhub-bundle/vectorhub-bundle' + + - id: 'upload-schema' + uses: 'google-github-actions/upload-cloud-storage@v2' + with: + path: 'docs/tools/vdb_table/vendor.schema.json' + destination: 'vectorhub-bundle/vendor.schema.json' \ No newline at end of file diff --git a/.github/workflows/json-validate.yml b/.github/workflows/json-validate.yml index 7a2fdb84c..2f98f4229 100644 --- a/.github/workflows/json-validate.yml +++ b/.github/workflows/json-validate.yml @@ -18,5 +18,5 @@ jobs: uses: GrantBirki/json-yaml-validate@v2.4.0 with: base_dir: docs/tools/vdb_table/data - json_schema: docs/tools/vdb_table/data/vendor.schema.json + json_schema: docs/tools/vdb_table/vendor.schema.json json_schema_version: "draft-2020-12" diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..d558a0306 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +venv/ +.env \ No newline at end of file diff --git a/docs/tools/vdb_table/data/activeloop.json b/docs/tools/vdb_table/data/activeloop.json new file mode 100644 index 000000000..4a1738365 --- /dev/null +++ b/docs/tools/vdb_table/data/activeloop.json @@ -0,0 +1,153 @@ +{ + "name": "Activeloop Deep Lake", + "links": { + "docs": "https://docs.activeloop.ai/", + "github": "https://github.com/activeloopai/deeplake", + "website": "https://www.activeloop.ai/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/97", + "poc_github": "https://github.com/davidbuniat", + "slug": "activeloop" + }, + "oss": { + "support": "full", + "source_url": "https://github.com/activeloopai/deeplake", + "comment": "https://github.com/activeloopai/deeplake" + }, + "license": { + "value": "MPL 2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "python", + "c++" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 7200, + "vector_launch_year": 2023, + "metadata_filter": { + "support": "", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "", + "source_url": "https://docs.activeloop.ai/performance-features/querying-datasets/query-syntax", + "comment": "While you can run embedding search + contains(text, 'keywoard') or multiple those (keyword search inside text tensor) since BM25 not available I wouldn't call a full hybrid search. https://docs.activeloop.ai/performance-features/querying-datasets/query-syntax" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "none", + "source_url": "", + "comment": "although doable with UDFs and location storage" + }, + "multi_vec": { + "support": "full", + "source_url": "https://docs.activeloop.ai/technical-details/data-layout", + "comment": "https://docs.activeloop.ai/technical-details/data-layout" + }, + "sparse_vectors": { + "support": "partial", + "source_url": "https://docs.deeplake.ai/en/latest/Htypes.html", + "comment": "no native sparse vector support, although it supports all numpy arrays hence can also store sparse numpy arrays" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "partial", + "source_url": "", + "comment": "you can search i.e. search keywords with TQL/SQL contains(...) function, but I assume you mean more than just text search, by full-text search engine" + }, + "embeddings_text": { + "support": "", + "source_url": "https://docs.activeloop.ai/quickstart", + "comment": "https://docs.activeloop.ai/quickstart#creating-your-first-vector-store" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "https://python.langchain.com/docs/integrations/vectorstores/activeloop_deeplake", + "comment": "https://python.langchain.com/docs/integrations/vectorstores/activeloop_deeplake" + }, + "llamaindex": { + "support": "full", + "source_url": "https://docs.llamaindex.ai/en/stable/examples/vector_stores/DeepLakeIndexDemo.html", + "comment": "https://docs.llamaindex.ai/en/stable/examples/vector_stores/DeepLakeIndexDemo.html" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "https://www.activeloop.ai/pricing/ Free up to 200GB, then $100 per 30M embeddings (200GB) https://www.activeloop.ai/resources/deep-lake-hnsw-index-rapidly-query-35-m-vectors-save-80/", + "source_url": "https://www.activeloop.ai/pricing/", + "comment": "" + }, + "in_process": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "https://docs.deeplake.ai/en/latest/deeplake.html", + "comment": "create a dataset per tenant, similar to collections/namespaces" + }, + "disk_index": { + "support": "full", + "source_url": "https://docs.activeloop.ai/performance-features/index-for-ann-search", + "comment": "Custom Memory optimized HNSW that sits on top of an object storage (including FS)" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "partial", + "source_url": "https://docs.activeloop.ai/technical-details/data-layout", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/anariai.json b/docs/tools/vdb_table/data/anariai.json new file mode 100644 index 000000000..17a4bb4d3 --- /dev/null +++ b/docs/tools/vdb_table/data/anariai.json @@ -0,0 +1,152 @@ +{ + "name": "Anari AI", + "links": { + "docs": "", + "github": "https://github.com/Anari-AI", + "website": "https://anari.ai/vector-acceleration-engine/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/99", + "poc_github": "https://github.com/jovan-stojanovic", + "slug": "anariai" + }, + "oss": { + "support": "none", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Proprietary", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 0, + "vector_launch_year": 2023, + "metadata_filter": { + "support": "", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "none", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "none", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/apachecassandra.json b/docs/tools/vdb_table/data/apachecassandra.json new file mode 100644 index 000000000..4398e2592 --- /dev/null +++ b/docs/tools/vdb_table/data/apachecassandra.json @@ -0,0 +1,152 @@ +{ + "name": "Apache Cassandra", + "links": { + "docs": "https://cassandra.apache.org/doc/latest/cassandra/vector-search/overview.html", + "github": "https://github.com/apache/cassandra", + "website": "https://cassandra.apache.org/_/Apache-Cassandra-5.0-Moving-Toward-an-AI-Driven-Future.html", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/84", + "poc_github": "https://github.com/jbellis", + "slug": "apachecassandra" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache-2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "java" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 8400, + "vector_launch_year": 2023, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "via CassIO" + }, + "hybrid_search": { + "support": "full", + "source_url": "https://cassio.org/", + "comment": "via CassIO" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "via keyspaces and tables" + }, + "disk_index": { + "support": "full", + "source_url": "https://cassandra.apache.org/doc/latest/cassandra/developing/cql/indexing/sai/sai-faq.html", + "comment": "SAI index" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/apachesolr.json b/docs/tools/vdb_table/data/apachesolr.json new file mode 100644 index 000000000..4b3c94d44 --- /dev/null +++ b/docs/tools/vdb_table/data/apachesolr.json @@ -0,0 +1,152 @@ +{ + "name": "Apache Solr", + "links": { + "docs": "https://solr.apache.org/guide/solr/latest/query-guide/dense-vector-search.html", + "github": "https://github.com/apache/solr", + "website": "https://solr.apache.org/guide/solr/latest/query-guide/dense-vector-search.html#usage-as-re-ranking-query", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/78", + "poc_github": "https://github.com/alessandrobenedetti", + "slug": "apachesolr" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache-2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "java" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 905, + "vector_launch_year": 2022, + "metadata_filter": { + "support": "full", + "source_url": "https://solr.apache.org/guide/solr/latest/query-guide/dense-vector-search.html", + "comment": "https://solr.apache.org/guide/solr/latest/query-guide/dense-vector-search.html#usage-with-filter-queries" + }, + "hybrid_search": { + "support": "full", + "source_url": "https://sease.io/2023/10/apache-lucene-solr-ai-roadmap-do-you-want-to-make-it-happen.html", + "comment": "https://sease.io/2023/10/apache-lucene-solr-ai-roadmap-do-you-want-to-make-it-happen.html linear combination implemented" + }, + "facets": { + "support": "full", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "full", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "none", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "full", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "", + "source_url": "https://pypi.org/project/eurelis-langchain-solr-vectorstore/", + "comment": "https://pypi.org/project/eurelis-langchain-solr-vectorstore/" + }, + "llamaindex": { + "support": "none", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "https://www.searchstax.com/lp/managed-solr-search", + "comment": "via https://www.searchstax.com/lp/managed-solr-search and https://bonsai.io" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "https://issues.apache.org/jira/browse/SOLR-16836", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/aperturedb.json b/docs/tools/vdb_table/data/aperturedb.json new file mode 100644 index 000000000..661591fe6 --- /dev/null +++ b/docs/tools/vdb_table/data/aperturedb.json @@ -0,0 +1,152 @@ +{ + "name": "ApertureDB", + "links": { + "docs": "https://docs.aperturedata.io/", + "github": "https://github.com/aperture-data", + "website": "https://www.aperturedata.io/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/95", + "poc_github": "https://github.com/luisremis", + "slug": "aperturedb" + }, + "oss": { + "support": "", + "source_url": "", + "comment": "" + }, + "license": { + "value": "", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 0, + "vector_launch_year": 0, + "metadata_filter": { + "support": "", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/azureai.json b/docs/tools/vdb_table/data/azureai.json new file mode 100644 index 000000000..25f62f7cf --- /dev/null +++ b/docs/tools/vdb_table/data/azureai.json @@ -0,0 +1,152 @@ +{ + "name": "Azure AI Search", + "links": { + "docs": "https://learn.microsoft.com/en-us/azure/search/", + "github": "https://github.com/Azure", + "website": "https://azure.microsoft.com/en-us/products/ai-services/ai-search/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/88", + "poc_github": "https://github.com/farzad528", + "slug": "azureai" + }, + "oss": { + "support": "none", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Proprietary", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 0, + "vector_launch_year": 2023, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "https://learn.microsoft.com/en-us/azure/search/hybrid-search-overview", + "comment": "https://learn.microsoft.com/en-us/azure/search/hybrid-search-overview" + }, + "facets": { + "support": "full", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "https://learn.microsoft.com/en-us/azure/search/search-query-odata-geo-spatial-functions", + "comment": "https://learn.microsoft.com/en-us/azure/search/search-query-odata-geo-spatial-functions" + }, + "multi_vec": { + "support": "full", + "source_url": "https://learn.microsoft.com/en-us/azure/search/vector-search-how-to-query?tabs=query-2023-11-01%2Cfilter-2023-11-01", + "comment": "https://learn.microsoft.com/en-us/azure/search/vector-search-how-to-query?tabs=query-2023-11-01%2Cfilter-2023-11-01#vector-query-request" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "full", + "source_url": "https://learn.microsoft.com/en-us/azure/search/index-ranking-similarity", + "comment": "https://learn.microsoft.com/en-us/azure/search/index-ranking-similarity" + }, + "full_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "full", + "source_url": "https://learn.microsoft.com/en-us/azure/search/cognitive-search-custom-skill-web-api", + "comment": "Custom Web API skill in skillsets - Azure AI Search | Microsoft Learn https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/announcing-the-public-preview-of-integrated-vectorization-in/ba-p/3960809" + }, + "embeddings_image": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "full", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "none", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "Pricing - Azure Cognitive Search", + "source_url": "https://azure.microsoft.com/en-us/pricing/details/search/", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "https://learn.microsoft.com/en-us/azure/search/search-modeling-multitenant-saas-applications", + "comment": "https://learn.microsoft.com/en-us/azure/search/search-modeling-multitenant-saas-applications" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "https://learn.microsoft.com/en-us/azure/search/search-capacity-planning", + "comment": "https://learn.microsoft.com/en-us/azure/search/search-capacity-planning#concepts-search-units-replicas-partitions-shards" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 2048, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/chroma.json b/docs/tools/vdb_table/data/chroma.json new file mode 100644 index 000000000..e3c5ed05e --- /dev/null +++ b/docs/tools/vdb_table/data/chroma.json @@ -0,0 +1,152 @@ +{ + "name": "Chroma", + "links": { + "docs": "https://docs.trychroma.com/", + "github": "https://github.com/chroma-core/chroma", + "website": "https://www.trychroma.com/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/79", + "poc_github": "https://github.com/jeffchuber", + "slug": "chroma" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache-2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "python" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 9700, + "vector_launch_year": 2022, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "none", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "none", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "none", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/clickhouse.json b/docs/tools/vdb_table/data/clickhouse.json new file mode 100644 index 000000000..f3c8158a9 --- /dev/null +++ b/docs/tools/vdb_table/data/clickhouse.json @@ -0,0 +1,152 @@ +{ + "name": "ClickHouse", + "links": { + "docs": "https://clickhouse.com/docs/knowledgebase/vector-search", + "github": "https://github.com/ClickHouse/ClickHouse", + "website": "https://clickhouse.com/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/100", + "poc_github": "https://github.com/cwurm", + "slug": "clickhouse" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache 2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "c++" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 32000, + "vector_launch_year": 2022, + "metadata_filter": { + "support": "full", + "source_url": "https://clickhouse.com/use-cases/machine-learning-and-data-science", + "comment": "" + }, + "hybrid_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "full", + "source_url": "https://clickhouse.com/docs/knowledgebase/filtered-aggregates", + "comment": "You can filter on a distance and aggregate or yes you can segment using If combinators." + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "", + "comment": "easily done by adding them as separate columns for each. e.g. CREATE TABLE titleVec Array(Float32), contentVec Array(Float32), title String, contentString ENGINE=MergeTree ORDER BY tuple()" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "partial", + "source_url": "https://clickhouse.com/docs/en/sql-reference/functions/udf", + "comment": "possible to load an embedding model as a UDF" + }, + "embeddings_image": { + "support": "partial", + "source_url": "", + "comment": "possible to load an embedding model as a UDF" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "none", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "none", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "Clickhouse Pricing", + "source_url": "https://clickhouse.com/pricing", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "https://github.com/chdb-io/chdb", + "comment": "possible via https://github.com/chdb-io/chdb" + }, + "multi_tenancy": { + "support": "full", + "source_url": "https://clickhouse.com/docs/en/operations/access-rights", + "comment": "with RBAC and row-level security" + }, + "disk_index": { + "support": "full", + "source_url": "https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/annindexes", + "comment": "Via Usearch" + }, + "ephemeral": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "", + "comment": "sharding applicable to both vector and index storage" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/cratedb.json b/docs/tools/vdb_table/data/cratedb.json new file mode 100644 index 000000000..2dad1fcee --- /dev/null +++ b/docs/tools/vdb_table/data/cratedb.json @@ -0,0 +1,152 @@ +{ + "name": "CrateDB", + "links": { + "docs": "https://cratedb.com/solutions/vector-database", + "github": "https://github.com/crate/crate", + "website": "http://www.cratedb.com/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/98", + "poc_github": "https://github.com/marijaselakovic", + "slug": "cratedb" + }, + "oss": { + "support": "full", + "source_url": "https://github.com/crate/crate", + "comment": "https://github.com/crate/crate" + }, + "license": { + "value": "Apache 2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "java" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 3800, + "vector_launch_year": 2023, + "metadata_filter": { + "support": "full", + "source_url": "https://cratedb.com/docs/crate/reference/en/5.5/general/builtins/scalar-functions.html", + "comment": "combine knn_match with other criteria" + }, + "hybrid_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "full", + "source_url": "https://cratedb.com/docs/crate/reference/en/5.5/general/builtins/aggregation.html", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "https://cratedb.com/docs/crate/reference/en/5.5/general/dql/geo.html", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "https://cratedb.com/docs/crate/reference/en/5.5/general/ddl/data-types.html", + "comment": "add more columns of type knn_vector" + }, + "sparse_vectors": { + "support": "", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "https://cratedb.com/docs/crate/reference/en/5.5/general/dql/fulltext.html", + "comment": "based on Apache Lucene" + }, + "embeddings_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "https://github.com/crate-workbench/langchain", + "comment": "" + }, + "llamaindex": { + "support": "", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "https://console.cratedb.cloud/", + "comment": "https://console.cratedb.cloud/" + }, + "pricing": { + "value": "CrateDB Pricing https://console.cratedb.cloud/", + "source_url": "https://cratedb.com/product/pricing", + "comment": "" + }, + "in_process": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "https://cratedb.com/docs/crate/reference/en/5.5/admin/privileges.html", + "comment": "via Schemas, Tables" + }, + "disk_index": { + "support": "full", + "source_url": "https://cratedb.com/blog/guide-to-write-operations-in-cratedb", + "comment": "all tables and columns in CrateDB are stored in lucene indexes/segments on disk. \nhttps://cratedb.com/blog/guide-to-write-operations-in-cratedb" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "https://cratedb.com/docs/crate/reference/en/5.5/general/ddl/sharding.html", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 2048, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/datastaxastra.json b/docs/tools/vdb_table/data/datastaxastra.json new file mode 100644 index 000000000..86cf9e1ef --- /dev/null +++ b/docs/tools/vdb_table/data/datastaxastra.json @@ -0,0 +1,152 @@ +{ + "name": "DataStax Astra DB", + "links": { + "docs": "https://docs.datastax.com/en/astra-serverless/docs/vector-search/overview.html", + "github": "https://github.com/datastax/", + "website": "https://www.datastax.com/products/datastax-astra", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/85", + "poc_github": "https://github.com/Erick%20Ramirez", + "slug": "datastaxastra" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Proprietary", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "java" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 0, + "vector_launch_year": 2023, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "full", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "full", + "source_url": "https://www.datastax.com/products/ragstack", + "comment": "https://www.datastax.com/products/ragstack" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "?" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "https://www.datastax.com/products/datastax-astra", + "comment": "https://www.datastax.com/products/datastax-astra" + }, + "pricing": { + "value": "https://www.datastax.com/pricing/astra-db", + "source_url": "https://www.datastax.com/pricing/astra-db", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "full", + "source_url": "https://cassandra.apache.org/doc/latest/cassandra/developing/cql/indexing/sai/sai-faq.html", + "comment": "SAI index" + }, + "ephemeral": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 250000000, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 8192, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/elasticsearch.json b/docs/tools/vdb_table/data/elasticsearch.json new file mode 100644 index 000000000..457ecdf3f --- /dev/null +++ b/docs/tools/vdb_table/data/elasticsearch.json @@ -0,0 +1,152 @@ +{ + "name": "Elasticsearch", + "links": { + "docs": "https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html", + "github": "https://github.com/elastic/elasticsearch", + "website": "https://www.elastic.co/enterprise-search/vector-search", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/76", + "poc_github": "https://github.com/m-adams", + "slug": "elasticsearch" + }, + "oss": { + "support": "none", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Elastic License v2", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "java" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 65900, + "vector_launch_year": 2021, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "full", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "full", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "full", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "full", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 100000000, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 4096, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/epsilla.json b/docs/tools/vdb_table/data/epsilla.json new file mode 100644 index 000000000..d2bf8c925 --- /dev/null +++ b/docs/tools/vdb_table/data/epsilla.json @@ -0,0 +1,152 @@ +{ + "name": "Epsilla", + "links": { + "docs": "https://epsilla-inc.gitbook.io/epsilladb/", + "github": "https://github.com/epsilla-cloud/vectordb", + "website": "https://epsilla.com/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/89", + "poc_github": "https://github.com/richard-epsilla", + "slug": "epsilla" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "GPL-3.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "c++" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 797, + "vector_launch_year": 2023, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "https://epsilla-inc.gitbook.io/epsilladb/vector-database/create-a-new-table", + "comment": "https://epsilla-inc.gitbook.io/epsilladb/vector-database/create-a-new-table#embedding-fields" + }, + "sparse_vectors": { + "support": "full", + "source_url": "https://epsilla-inc.gitbook.io/epsilladb/vector-database/dense-vector-vs.-sparse-vector", + "comment": "Epsilla supports sparse vector retrieval: https://epsilla-inc.gitbook.io/epsilladb/vector-database/dense-vector-vs.-sparse-vector" + }, + "bm25": { + "support": "", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "full", + "source_url": "https://epsilla-inc.gitbook.io/epsilladb/vector-database/embeddings", + "comment": "https://epsilla-inc.gitbook.io/epsilladb/vector-database/embeddings" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "Storage: $0.025 / GB / month\nCompute: $0.0125 / CU / hour\n1CU = 0.125vCPU 512MB RAM\nTraffic: $0.03 / GB https://epsilla.com/pricing", + "source_url": "https://epsilla.com/pricing", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "https://epsilla-inc.gitbook.io/epsilladb/vector-database/search-the-top-k-semantically-similar-records", + "comment": "via tables (Epsilla supports multi tenancy through tables https://epsilla-inc.gitbook.io/epsilladb/vector-database/create-a-new-table and metadata (fields) filtering https://epsilla-inc.gitbook.io/epsilladb/vector-database/search-the-top-k-semantically-similar-records#filter-expression)" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/gcpvertexai.json b/docs/tools/vdb_table/data/gcpvertexai.json new file mode 100644 index 000000000..9cf92c012 --- /dev/null +++ b/docs/tools/vdb_table/data/gcpvertexai.json @@ -0,0 +1,152 @@ +{ + "name": "GCP Vertex AI Vector Search", + "links": { + "docs": "https://cloud.google.com/vertex-ai/docs/vector-search/overview", + "github": "https://github.com/GoogleCloudPlatform", + "website": "https://cloud.google.com/blog/products/ai-machine-learning/vertex-matching-engine-blazing-fast-and-massively-scalable-nearest-neighbor-search", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/83", + "poc_github": "https://github.com/kazunori279", + "slug": "gcpvertexai" + }, + "oss": { + "support": "none", + "source_url": "", + "comment": "" + }, + "license": { + "value": "", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 0, + "vector_launch_year": 2021, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "none", + "source_url": "", + "comment": "(Vertex embeddings only)" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "none", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/kdbai.json b/docs/tools/vdb_table/data/kdbai.json new file mode 100644 index 000000000..87af019ef --- /dev/null +++ b/docs/tools/vdb_table/data/kdbai.json @@ -0,0 +1,152 @@ +{ + "name": "KDB.AI", + "links": { + "docs": "https://code.kx.com/kdbai/", + "github": "https://github.com/KxSystems", + "website": "http://kdb.ai/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/103", + "poc_github": "https://github.com/sshanks-kx", + "slug": "kdbai" + }, + "oss": { + "support": "none", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Proprietary", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "python" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 0, + "vector_launch_year": 2023, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "Cloud = Free, limited memory/storage\n\nOn-Prem = Contact Sales", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "full", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/lancedb.json b/docs/tools/vdb_table/data/lancedb.json new file mode 100644 index 000000000..f4ded1e3b --- /dev/null +++ b/docs/tools/vdb_table/data/lancedb.json @@ -0,0 +1,152 @@ +{ + "name": "LanceDB", + "links": { + "docs": "https://lancedb.github.io/lancedb/", + "github": "https://github.com/lancedb/lancedb", + "website": "https://lancedb.com/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/87", + "poc_github": "https://github.com/changhiskhan", + "slug": "lancedb" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache-2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "rust" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 1900, + "vector_launch_year": 2023, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "full", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "full", + "source_url": "https://lancedb.github.io/lancedb/embeddings/embedding_functions/", + "comment": "Embeddings from any provider can be run as a background process so that the user doesn't need to manually ingest them.\nhttps://lancedb.github.io/lancedb/embeddings/embedding_functions/" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "none", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "full", + "source_url": "https://github.com/lancedb/lance/blob/main/rust/lance/src/index/vector/diskann.rs", + "comment": "Experimental DiskANN" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 2048, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/marqo.json b/docs/tools/vdb_table/data/marqo.json new file mode 100644 index 000000000..d4d4942ca --- /dev/null +++ b/docs/tools/vdb_table/data/marqo.json @@ -0,0 +1,152 @@ +{ + "name": "Marqo", + "links": { + "docs": "https://docs.marqo.ai/", + "github": "https://github.com/marqo-ai/marqo", + "website": "http://marqo.ai/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/73", + "poc_github": "https://github.com/OwenPendrighElliott", + "slug": "marqo" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache-2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "python" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 3700, + "vector_launch_year": 2022, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "", + "comment": "via weights" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "full", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "full", + "source_url": "", + "comment": "supports CLIP, OpenCLIP and Multilingual CLIP (as well as BYO models for these APIs)" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "none", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "Pricing - Marqo Cloud", + "source_url": "https://www.marqo.ai/pricing", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/meilisearch.json b/docs/tools/vdb_table/data/meilisearch.json new file mode 100644 index 000000000..e27296675 --- /dev/null +++ b/docs/tools/vdb_table/data/meilisearch.json @@ -0,0 +1,152 @@ +{ + "name": "Meilisearch", + "links": { + "docs": "https://www.meilisearch.com/docs", + "github": "https://github.com/meilisearch/meilisearch", + "website": "https://www.meilisearch.com/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/93", + "poc_github": "https://github.com/gmourier", + "slug": "meilisearch" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "MIT", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "rust" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 40100, + "vector_launch_year": 0, + "metadata_filter": { + "support": "full", + "source_url": "https://solr.apache.org/guide/solr/latest/query-guide/dense-vector-search.html", + "comment": "Dense vector search" + }, + "hybrid_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "full", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch", + "comment": "https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch" + }, + "multi_vec": { + "support": "", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "full", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "none", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "Meilisearch Pricing", + "source_url": "https://www.meilisearch.com/pricing", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/milvus.json b/docs/tools/vdb_table/data/milvus.json new file mode 100644 index 000000000..ae5b9fbc7 --- /dev/null +++ b/docs/tools/vdb_table/data/milvus.json @@ -0,0 +1,153 @@ +{ + "name": "Milvus", + "links": { + "docs": "https://milvus.io/docs", + "github": "https://github.com/milvus-io/milvus", + "website": "http://milvus.io/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/71", + "poc_github": "https://github.com/Enwei%20Jiao", + "slug": "milvus" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache-2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "go", + "c++" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 24300, + "vector_launch_year": 2019, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "https://milvus.io/docs/multi_tenancy.md", + "comment": "https://milvus.io/docs/multi_tenancy.md" + }, + "disk_index": { + "support": "full", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 32768, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/mongo.json b/docs/tools/vdb_table/data/mongo.json new file mode 100644 index 000000000..c3177393f --- /dev/null +++ b/docs/tools/vdb_table/data/mongo.json @@ -0,0 +1,153 @@ +{ + "name": "MongoDB Atlas", + "links": { + "docs": "https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-overview/", + "github": "https://github.com/mongodb", + "website": "https://www.mongodb.com/products/platform/atlas-vector-search", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/72", + "poc_github": "https://github.com/dvsander", + "slug": "mongo" + }, + "oss": { + "support": "none", + "source_url": "", + "comment": "" + }, + "license": { + "value": "GNU AGPL v3.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "c++", + "java" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 0, + "vector_launch_year": 2023, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "https://gist.github.com/hweller1/d6dbd5036ae4366108b534a0f1662a20", + "comment": "https://gist.github.com/hweller1/d6dbd5036ae4366108b534a0f1662a20" + }, + "facets": { + "support": "full", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "full", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "MongoDB Pricing", + "source_url": "https://www.mongodb.com/pricing", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "via logical partitioning or metadata" + }, + "disk_index": { + "support": "full", + "source_url": "https://www.mongodb.com/docs/atlas/atlas-search/faq/", + "comment": "https://www.mongodb.com/docs/atlas/atlas-search/faq/#does-fts-store-my-entire-index-in-memory-" + }, + "ephemeral": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 16000000, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/myscale.json b/docs/tools/vdb_table/data/myscale.json new file mode 100644 index 000000000..a671b1e39 --- /dev/null +++ b/docs/tools/vdb_table/data/myscale.json @@ -0,0 +1,152 @@ +{ + "name": "MyScale", + "links": { + "docs": "https://myscale.com/docs/en/vector-search/", + "github": "https://github.com/myscale", + "website": "https://myscale.com/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/101", + "poc_github": "https://github.com/lqhl", + "slug": "myscale" + }, + "oss": { + "support": "none", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Proprietary", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "c++" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 0, + "vector_launch_year": 2023, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "https://myscale.com/docs/en/hybrid-search/", + "comment": "Hybrid Search" + }, + "facets": { + "support": "full", + "source_url": "https://myscale.com/docs/en/advanced-applications/object-detect/", + "comment": "via https://myscale.com/docs/en/advanced-applications/object-detect/#grouping-subqueries" + }, + "geo_search": { + "support": "full", + "source_url": "https://myscale.com/docs/en/functions/geo-functions/", + "comment": "via https://myscale.com/docs/en/functions/geospatial-functions/" + }, + "multi_vec": { + "support": "", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "full", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "none", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "none", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "MyScale Pricing", + "source_url": "https://myscale.com/pricing/", + "comment": "" + }, + "in_process": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "full", + "source_url": "https://myscale.com/blog/myscale-on-aws/", + "comment": "via https://myscale.com/blog/myscale-on-aws/ > MyScale\u2019s MSTG algorithm allows vector data to be cached on local NVMe SSD disks, providing users with high-performance vector searches while significantly saving on memory usage." + }, + "ephemeral": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/neo4j.json b/docs/tools/vdb_table/data/neo4j.json new file mode 100644 index 000000000..d762286c5 --- /dev/null +++ b/docs/tools/vdb_table/data/neo4j.json @@ -0,0 +1,152 @@ +{ + "name": "Neo4j", + "links": { + "docs": "https://neo4j.com/docs/cypher-manual/current/indexes-for-vector-search/", + "github": "https://github.com/neo4j", + "website": "https://neo4j.com/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/96", + "poc_github": "https://github.com/akollegger", + "slug": "neo4j" + }, + "oss": { + "support": "none", + "source_url": "", + "comment": "" + }, + "license": { + "value": "", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 0, + "vector_launch_year": 0, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "none", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "full", + "source_url": "", + "comment": "Java apps only" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "none", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/nucliadb.json b/docs/tools/vdb_table/data/nucliadb.json new file mode 100644 index 000000000..f870f10a1 --- /dev/null +++ b/docs/tools/vdb_table/data/nucliadb.json @@ -0,0 +1,153 @@ +{ + "name": "Nuclia DB", + "links": { + "docs": "https://docs.nuclia.dev/docs/", + "github": "https://github.com/nuclia/nucliadb", + "website": "https://nuclia.com/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/90", + "poc_github": "https://github.com/clayrisse", + "slug": "nucliadb" + }, + "oss": { + "support": "", + "source_url": "", + "comment": "" + }, + "license": { + "value": "AGPLv3", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "rust", + "python" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 497, + "vector_launch_year": 0, + "metadata_filter": { + "support": "", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/opensearch.json b/docs/tools/vdb_table/data/opensearch.json new file mode 100644 index 000000000..b719905ae --- /dev/null +++ b/docs/tools/vdb_table/data/opensearch.json @@ -0,0 +1,152 @@ +{ + "name": "OpenSearch", + "links": { + "docs": "https://opensearch.org/docs/latest/search-plugins/knn/index/", + "github": "https://github.com/opensearch-project/OpenSearch", + "website": "https://opensearch.org/platform/search/vector-database.html", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/77", + "poc_github": "https://github.com/dtaivpp", + "slug": "opensearch" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache-2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "java" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 7900, + "vector_launch_year": 2021, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "", + "comment": "Only linear combination" + }, + "facets": { + "support": "full", + "source_url": "https://opensearch.org/docs/latest/aggregations/metric/index/", + "comment": "https://opensearch.org/docs/latest/aggregations/metric/index/" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "full", + "source_url": "https://opensearch.org/docs/latest/search-plugins/neural-sparse-search/", + "comment": "https://opensearch.org/docs/latest/search-plugins/neural-sparse-search/" + }, + "bm25": { + "support": "full", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "full", + "source_url": "https://opensearch.org/docs/2.11/ml-commons-plugin/custom-local-models/", + "comment": "https://opensearch.org/docs/2.11/ml-commons-plugin/custom-local-models/" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "full", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "full", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 100000000, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 10000, + "unlimited": false, + "source_url": "https://opensearch.org/docs/2.0/search-plugins/knn/approximate-knn/", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/oramasearch.json b/docs/tools/vdb_table/data/oramasearch.json new file mode 100644 index 000000000..c22ab01b4 --- /dev/null +++ b/docs/tools/vdb_table/data/oramasearch.json @@ -0,0 +1,152 @@ +{ + "name": "OramaSearch", + "links": { + "docs": "https://docs.oramasearch.com/open-source/", + "github": "https://github.com/oramasearch/orama", + "website": "http://oramasearch.com/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/82", + "poc_github": "https://github.com/MicheleRiva", + "slug": "oramasearch" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache-2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "typescript" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 6700, + "vector_launch_year": 2022, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "full", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "full", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "none", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "none", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "free plan / $4.99 per month (pro)", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "Cloud solution only" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/pgvector.json b/docs/tools/vdb_table/data/pgvector.json new file mode 100644 index 000000000..78b93fd72 --- /dev/null +++ b/docs/tools/vdb_table/data/pgvector.json @@ -0,0 +1,152 @@ +{ + "name": "pgvector", + "links": { + "docs": "", + "github": "https://github.com/pgvector/pgvector", + "website": "https://github.com/pgvector/pgvector", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/69", + "poc_github": "https://github.com/jkatz", + "slug": "pgvector" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "PostgreSQL Licence https://opensource.org/license/postgresql/", + "source_url": "https://opensource.org/license/postgresql/", + "comment": "" + }, + "dev_languages": { + "value": [ + "c" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 6900, + "vector_launch_year": 2021, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "https://github.com/pgvector/pgvector", + "comment": "https://github.com/pgvector/pgvector#hybrid-search" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "Postgis" + }, + "multi_vec": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "full", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "https://www.postgresql.org/docs/current/textsearch.html", + "comment": "https://www.postgresql.org/docs/current/textsearch.html via GIST" + }, + "embeddings_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "(supabase)" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "none", + "source_url": "", + "comment": "While pgvector does not provide this natively, you can get this either from PostgreSQL functionality like \"postgres_fdw\" or from extensions. You can also choose to subdivide your index through partitioning." + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 2000, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/pinecone.json b/docs/tools/vdb_table/data/pinecone.json new file mode 100644 index 000000000..bbe06aa39 --- /dev/null +++ b/docs/tools/vdb_table/data/pinecone.json @@ -0,0 +1,152 @@ +{ + "name": "Pinecone", + "links": { + "docs": "https://docs.pinecone.io/", + "github": "https://github.com/pinecone-io", + "website": "https://www.pinecone.io/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/65", + "poc_github": "https://github.com/jamescalam", + "slug": "pinecone" + }, + "oss": { + "support": "none", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Proprietary", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "rust" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 0, + "vector_launch_year": 2019, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "none", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "full", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "via namespaces" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 40000, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 20000, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/qdrant.json b/docs/tools/vdb_table/data/qdrant.json new file mode 100644 index 000000000..b221dfe92 --- /dev/null +++ b/docs/tools/vdb_table/data/qdrant.json @@ -0,0 +1,152 @@ +{ + "name": "Qdrant", + "links": { + "docs": "https://qdrant.tech/documentation/", + "github": "https://github.com/qdrant/qdrant", + "website": "https://qdrant.tech/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/66", + "poc_github": "https://github.com/atarora", + "slug": "qdrant" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache-2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "rust" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 14700, + "vector_launch_year": 2021, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "none", + "source_url": "https://qdrant.tech/articles/sparse-vectors/", + "comment": "https://qdrant.tech/articles/sparse-vectors/#hybrid-search-combining-sparse-and-dense-vectors" + }, + "facets": { + "support": "none", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "full", + "source_url": "https://qdrant.tech/articles/sparse-vectors", + "comment": "https://qdrant.tech/articles/sparse-vectors" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "https://qdrant.tech/documentation/concepts/indexing/", + "comment": "https://qdrant.tech/documentation/concepts/indexing/#full-text-index" + }, + "embeddings_text": { + "support": "full", + "source_url": "https://qdrant.github.io/fastembed/examples/Supported_Models/", + "comment": "via FastEmbed (Supported models: https://qdrant.github.io/fastembed/examples/Supported_Models/)" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "full", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "via collections/metadata" + }, + "disk_index": { + "support": "full", + "source_url": "https://qdrant.tech/documentation/concepts/storage/", + "comment": "https://qdrant.tech/documentation/concepts/storage/#configuring-memmap-storage" + }, + "ephemeral": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "https://qdrant.tech/documentation/guides/distributed_deployment/", + "comment": "https://qdrant.tech/documentation/guides/distributed_deployment/#user-defined-sharding" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/redis.json b/docs/tools/vdb_table/data/redis.json new file mode 100644 index 000000000..d7d2cf9c3 --- /dev/null +++ b/docs/tools/vdb_table/data/redis.json @@ -0,0 +1,152 @@ +{ + "name": "Redis Search", + "links": { + "docs": "https://redis.io/docs/get-started/vector-database/", + "github": "https://github.com/RedisAI/VectorSimilarity", + "website": "https://redis.com/solutions/use-cases/vector-database/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/81", + "poc_github": "https://github.com/tylerhutcherson", + "slug": "redis" + }, + "oss": { + "support": "partial", + "source_url": "https://blog.opensource.org/the-sspl-is-not-an-open-source-license/", + "comment": "vector search component is under SSPL which is not fully OSS" + }, + "license": { + "value": "(i) Redis Source Available License 2.0 (RSALv2) or (ii) the Server Side Public License v1 (SSPLv1)", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "c" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 62400, + "vector_launch_year": 2021, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "full", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "https://redis.io/docs/interact/search-and-query/advanced-concepts/vectors/", + "comment": "https://redis.io/docs/interact/search-and-query/advanced-concepts/vectors/#storing-vectors-in-json" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "full", + "source_url": "https://redis.io/docs/interact/search-and-query/advanced-concepts/scoring/", + "comment": "https://redis.io/docs/interact/search-and-query/advanced-concepts/scoring/" + }, + "full_text": { + "support": "full", + "source_url": "https://redis.io/docs/interact/search-and-query/query/full-text/", + "comment": "" + }, + "embeddings_text": { + "support": "none", + "source_url": "https://github.com/RedisVentures/redisvl", + "comment": "Embedding generation support available via client like https://github.com/RedisVentures/redisvl" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "https://redis.com/blog/multi-tenancy-redis-enterprise/", + "comment": "https://redis.com/blog/multi-tenancy-redis-enterprise/" + }, + "disk_index": { + "support": "none", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/requirements.txt b/docs/tools/vdb_table/data/requirements.txt deleted file mode 100644 index 5cae28e80..000000000 --- a/docs/tools/vdb_table/data/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -attrs==23.1.0 -jsonschema==4.20.0 -jsonschema-specifications==2023.11.2 -referencing==0.32.0 -rpds-py==0.13.2 diff --git a/docs/tools/vdb_table/data/rockset.json b/docs/tools/vdb_table/data/rockset.json new file mode 100644 index 000000000..5343a91fc --- /dev/null +++ b/docs/tools/vdb_table/data/rockset.json @@ -0,0 +1,152 @@ +{ + "name": "Rockset", + "links": { + "docs": "https://docs.rockset.com/documentation/docs/vector-search", + "github": "https://github.com/rockset", + "website": "https://rockset.com/vector-search/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/102", + "poc_github": "https://github.com/brencon", + "slug": "rockset" + }, + "oss": { + "support": "none", + "source_url": "", + "comment": "" + }, + "license": { + "value": "", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 0, + "vector_launch_year": 0, + "metadata_filter": { + "support": "", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/turbopuffer.json b/docs/tools/vdb_table/data/turbopuffer.json new file mode 100644 index 000000000..e90a7a239 --- /dev/null +++ b/docs/tools/vdb_table/data/turbopuffer.json @@ -0,0 +1,152 @@ +{ + "name": "Turbopuffer", + "links": { + "docs": "https://turbopuffer.com/docs", + "github": "https://github.com/sirupsen", + "website": "http://turbopuffer.com/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/92", + "poc_github": "https://github.com/sirupsen", + "slug": "turbopuffer" + }, + "oss": { + "support": "none", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Proprietary", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "rust" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 0, + "vector_launch_year": 0, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "none", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "none", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "none", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "none", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "$1/million vecs, $4/million queries https://turbopuffer.com", + "source_url": "https://turbopuffer.com/", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 65536, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 1536, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/txtai.json b/docs/tools/vdb_table/data/txtai.json new file mode 100644 index 000000000..1bb927789 --- /dev/null +++ b/docs/tools/vdb_table/data/txtai.json @@ -0,0 +1,152 @@ +{ + "name": "txtai", + "links": { + "docs": "https://neuml.github.io/txtai/", + "github": "https://neuml.github.io/txtai/", + "website": "https://neuml.github.io/txtai/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/86", + "poc_github": "https://github.com/davidmezzetti", + "slug": "txtai" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache-2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "python" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 5500, + "vector_launch_year": 2020, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "https://neuml.hashnode.dev/whats-new-in-txtai-60", + "comment": "https://neuml.hashnode.dev/whats-new-in-txtai-60#heading-subindexes" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "full", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "full", + "source_url": "https://neuml.hashnode.dev/build-rag-pipelines-with-txtai", + "comment": "https://neuml.hashnode.dev/build-rag-pipelines-with-txtai" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/typesense.json b/docs/tools/vdb_table/data/typesense.json new file mode 100644 index 000000000..19ce3764a --- /dev/null +++ b/docs/tools/vdb_table/data/typesense.json @@ -0,0 +1,152 @@ +{ + "name": "Typesense", + "links": { + "docs": "https://typesense.org/docs/", + "github": "https://github.com/typesense/typesense", + "website": "https://typesense.org/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/94", + "poc_github": "https://github.com/jasonbosco", + "slug": "typesense" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "GPL-3.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "c++" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 16100, + "vector_launch_year": 2023, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "full", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "full", + "source_url": "https://typesense.org/docs/0.25.1/api/vector-search.html", + "comment": "https://typesense.org/docs/0.25.1/api/vector-search.html#creating-an-auto-embedding-field" + }, + "embeddings_image": { + "support": "full", + "source_url": "https://typesense.org/docs/0.25.1/api/vector-search.html", + "comment": "https://typesense.org/docs/0.25.1/api/vector-search.html#use-cases:~:text=E%2D5-,CLIP,-OpenAI%27s%20Text%20Embeddings" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "Pricing | Typesense Cloud", + "source_url": "https://cloud.typesense.org/pricing", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "none", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/usearch.json b/docs/tools/vdb_table/data/usearch.json new file mode 100644 index 000000000..e4706d2bd --- /dev/null +++ b/docs/tools/vdb_table/data/usearch.json @@ -0,0 +1,152 @@ +{ + "name": "USearch", + "links": { + "docs": "https://unum-cloud.github.io/usearch/", + "github": "https://github.com/unum-cloud/usearch", + "website": "https://unum-cloud.github.io/usearch/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/91", + "poc_github": "https://github.com/ashvardanian", + "slug": "usearch" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache-2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "c++" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 1061, + "vector_launch_year": 2023, + "metadata_filter": { + "support": "", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "https://unum-cloud.github.io/usearch/python/reference.html", + "comment": "using the multi param in index creation" + }, + "sparse_vectors": { + "support": "", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "none", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/vald.json b/docs/tools/vdb_table/data/vald.json new file mode 100644 index 000000000..3752d0edc --- /dev/null +++ b/docs/tools/vdb_table/data/vald.json @@ -0,0 +1,152 @@ +{ + "name": "Vald", + "links": { + "docs": "https://vald.vdaas.org/docs/", + "github": "https://github.com/vdaas/vald", + "website": "http://github.com/vdaas/vald", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/80", + "poc_github": "https://github.com/kpango", + "slug": "vald" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache-2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "go" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 1400, + "vector_launch_year": 2021, + "metadata_filter": { + "support": "", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "none", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "none", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/validate.py b/docs/tools/vdb_table/data/validate.py deleted file mode 100644 index 4ae2b295a..000000000 --- a/docs/tools/vdb_table/data/validate.py +++ /dev/null @@ -1,8 +0,0 @@ -from jsonschema import validate -import json - -schema_obj = json.load(open("vendor.json.schema", "r")) -vendor_obj = json.load(open("vendorX.json", "r")) - -validate(instance=vendor_obj, schema=schema_obj) - diff --git a/docs/tools/vdb_table/data/vectara.json b/docs/tools/vdb_table/data/vectara.json new file mode 100644 index 000000000..ceade6eed --- /dev/null +++ b/docs/tools/vdb_table/data/vectara.json @@ -0,0 +1,152 @@ +{ + "name": "Vectara", + "links": { + "docs": "https://docs.vectara.com/docs/", + "github": "https://github.com/vectara", + "website": "http://vectara.com/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/74", + "poc_github": "https://github.com/ofermend", + "slug": "vectara" + }, + "oss": { + "support": "none", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Proprietary", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "not known" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 0, + "vector_launch_year": 2021, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "", + "comment": "Only Linear combination" + }, + "facets": { + "support": "", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "none", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "none", + "source_url": "https://vectara.com/how-boomerang-takes-retrieval-augmented-generation-to-the-next-level-via-grounded-generation", + "comment": "[Note Vectara has its own embedding model called \"Boomerang\" that is SOTA (see https://vectara.com/how-boomerang-takes-retrieval-augmented-generation-to-the-next-level-via-grounded-generation/) and also works better than OpenAI or Cohere in terms of cross-lingual support (https://vectara.com/introducing-boomerang-vectaras-new-and-improved-retrieval-model-2/)]" + }, + "embeddings_image": { + "support": "", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "full", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "Pricing - Vectara", + "source_url": "https://vectara.com/pricing/", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "via corpora or metadata" + }, + "disk_index": { + "support": "full", + "source_url": "", + "comment": "" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 50000000, + "unlimited": false, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/vendor.schema.json b/docs/tools/vdb_table/data/vendor.schema.json deleted file mode 100644 index d09bbf791..000000000 --- a/docs/tools/vdb_table/data/vendor.schema.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "vdb_table/schema/vendor", - "type": "object", - - "properties": { - "name": { "type": "string" }, - "website_url": { "type": "string" }, - "docs_url": { "type": "string" }, - "logo_url": { "type": "string" }, - "point_of_contact_email": { "type": "string", "format": "email" }, - - "oss": { "allOf": [{"$ref": "#/$defs/feature"}], "$comment": "Open source & free to self-host" }, - "managed_cloud": { "allOf": [{"$ref": "#/$defs/feature"}], "$comment": "Managed Cloud Offering" }, - "disk_index": { "allOf": [{"$ref": "#/$defs/feature"}], "$comment": "Disk-based Index" }, - - "doc_size_limit": { "allOf": [{"$ref": "#/$defs/integerWithUnits"}], "$comment": "Metadata/Doc size limit" }, - "vector_dimensions_max": { "allOf": [{"$ref": "#/$defs/integer"}], "$comment": "Maximum integer of vector dimensions" }, - - "license": { "allOf": [{"$ref": "#/$defs/string"}], "$comment": "OSS license for the core product" } - }, - - "$defs": { - "feature": { - "$id": "feature", - "type": "object", - "properties": { - "support": { "enum": ["none", "partial", "full"]}, - "source_level": {"enum": ["", "claimed", "documented"]}, - "source_url": {"type": "string"}, - "comment": {"type": "string"} - } - }, - "string": { - "$id": "string", - "type": "object", - "properties": { - "value": {"type": "string"}, - "source_level": {"enum": ["", "claimed", "documented"]}, - "source_url": {"type": "string"}, - "comment": {"type": "string"} - } - }, - "integer": { - "$id": "integer", - "type": "object", - "properties": { - "value": {"type": "integer"}, - "source_level": {"enum": ["", "claimed", "documented"]}, - "source_url": {"type": "string"}, - "comment": {"type":"string"} - } - }, - "integerWithUnits": { - "$id": "integerWithUnits", - "type": "object", - "properties": { - "value": {"type": "integer"}, - "unit": {"type": "string"}, - "source_level": {"enum": ["", "claimed", "documented"]}, - "source_url": {"type": "string"}, - "comment": {"type": "string"} - } - } - } -} diff --git a/docs/tools/vdb_table/data/vendorX.json b/docs/tools/vdb_table/data/vendorX.json deleted file mode 100644 index f32e06e25..000000000 --- a/docs/tools/vdb_table/data/vendorX.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "name": "vendorX", - "website_url": "https://vendorX.com", - "docs_url": "https://docs.vendorX.com", - "logo_url": "https://images.vendorX.com/logo.png", - "point_of_contact_email": "poc@vendorx.com", - "oss": { - "support": "full", - "source_level": "documented", - "source_url": "https://github.com/vendorx/vendorx/license.md" - }, - "managed_cloud": { - "support": "none" - }, - "disk_index": { - "support": "partial", - "source_level": "claimed", - "source_url": "", - "comment": "We support disk indices, but we do it under the hood to save cost and have no way to prove that to you." - }, - "doc_size_limit": { - "value": 50, - "unit": "MB", - "source_level": "documented", - "source_url": "https://docs.vendorX.com/limits.html" - }, - "vector_dimensions_max": { - "value": 10000, - "source_level": "documented", - "source_url": "https://docs.vendorX.com/limits.html", - "comment": "Can be configured to be more, but we don't have the docs for how to do that yet" - }, - "license": { - "value": "Apache-2.0", - "source_level": "documented", - "source_url": "https://github.com/vendorx/vendorx/license.md" - } -} diff --git a/docs/tools/vdb_table/data/vespa.json b/docs/tools/vdb_table/data/vespa.json new file mode 100644 index 000000000..44ab68370 --- /dev/null +++ b/docs/tools/vdb_table/data/vespa.json @@ -0,0 +1,153 @@ +{ + "name": "Vespa", + "links": { + "docs": "https://docs.vespa.ai/", + "github": "https://github.com/vespa-engine/vespa", + "website": "https://vespa.ai/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/70", + "poc_github": "https://github.com/oyving", + "slug": "vespa" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Apache-2.0", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "java", + "c++" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 5000, + "vector_launch_year": 2017, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "facets": { + "support": "full", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "full", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "full", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "none", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "", + "source_url": "", + "comment": "" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "none", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "https://cloud.vespa.ai/price-calculator", + "source_url": "https://cloud.vespa.ai/price-calculator", + "comment": "" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "full", + "source_url": "https://docs.vespa.ai/en/proton.html", + "comment": "https://docs.vespa.ai/en/proton.html#index" + }, + "ephemeral": { + "support": "", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "", + "source_url": "", + "comment": "" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 0, + "unlimited": true, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/data/weaviate.json b/docs/tools/vdb_table/data/weaviate.json new file mode 100644 index 000000000..e1dec633f --- /dev/null +++ b/docs/tools/vdb_table/data/weaviate.json @@ -0,0 +1,152 @@ +{ + "name": "Weaviate", + "links": { + "docs": "https://weaviate.io/developers/weaviate", + "github": "https://github.com/weaviate/weaviate", + "website": "http://weaviate.io/", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/67", + "poc_github": "https://github.com/StefanBogdan", + "slug": "weaviate" + }, + "oss": { + "support": "full", + "source_url": "", + "comment": "" + }, + "license": { + "value": "BSD", + "source_url": "", + "comment": "" + }, + "dev_languages": { + "value": [ + "go" + ], + "source_url": "", + "comment": "" + }, + "github_stars": 8300, + "vector_launch_year": 2019, + "metadata_filter": { + "support": "full", + "source_url": "", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "", + "comment": "RRF*+RSF*" + }, + "facets": { + "support": "full", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "full", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "full", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "full", + "source_url": "", + "comment": "" + }, + "full_text": { + "support": "full", + "source_url": "https://weaviate.io/developers/weaviate/concepts/indexing", + "comment": "https://weaviate.io/developers/weaviate/concepts/indexing#introduction" + }, + "embeddings_text": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_image": { + "support": "full", + "source_url": "", + "comment": "" + }, + "embeddings_structured": { + "support": "", + "source_url": "", + "comment": "" + }, + "rag": { + "support": "full", + "source_url": "https://weaviate.io/developers/weaviate/modules/reader-generator-modules/generative-openai", + "comment": "Generative Search - OpenAI | Weaviate - vector databaseweaviate/modules/reader-generator-modules/generative-openai" + }, + "recsys": { + "support": "", + "source_url": "", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "", + "comment": "" + }, + "pricing": { + "value": "", + "source_url": "", + "comment": "" + }, + "in_process": { + "support": "full", + "source_url": "https://weaviate.io/developers/weaviate/installation/embedded", + "comment": "https://weaviate.io/developers/weaviate/installation/embedded" + }, + "multi_tenancy": { + "support": "full", + "source_url": "", + "comment": "" + }, + "disk_index": { + "support": "full", + "source_url": "https://weaviate.io/developers/weaviate/concepts/vector-index", + "comment": "https://weaviate.io/developers/weaviate/concepts/vector-index#flat-index" + }, + "ephemeral": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "https://weaviate.io/developers/weaviate/concepts/cluster", + "comment": "https://weaviate.io/developers/weaviate/concepts/cluster https://weaviate.io/developers/weaviate/concepts/replication-architecture" + }, + "doc_size": { + "bytes": 0, + "unlimited": true, + "source_url": "", + "comment": "" + }, + "vector_dims": { + "value": 65535, + "unlimited": false, + "source_url": "", + "comment": "" + } +} \ No newline at end of file diff --git a/docs/tools/vdb_table/dataTypeMapping.txt b/docs/tools/vdb_table/dataTypeMapping.txt deleted file mode 100644 index e94650194..000000000 --- a/docs/tools/vdb_table/dataTypeMapping.txt +++ /dev/null @@ -1,38 +0,0 @@ -Rich text - Open-source & free to self-host -Rich text - Managed Cloud Offering -Rich text - Disk-based Index -Rich text - Multi-tenancy Support -Rich text - In-built Text Embeddings creation(Bring-your-own-model) -Rich text - In-built Image Embedding creation -Rich text - Metadata Filtering -Rich text - Embeddable -Rich text - Multiple vectors per point -Rich text - Langchain integration -Rich text - Llama index integration -Rich text - Hybrid Search -Rich text - BM25 support -Rich text - Sparse Vectors Support -Rich text - Full-text Search Engine -Rich text - Facets (Aggregations with Count) -Rich text - GeoSearch Support -Multiple select - Metadata/Doc size limit -Single select - Max Dimensions -Rich text - Ephemeral Index support(without server) -Rich text - Sharding -Multiple select - License -Multiple select - Development Language -Number - Github Stars -Single select - First Release of Vector Search -Rich text - Pricing -Rich text - Calls LLM internally for RAG -Rich text - Recommendations API -Rich text - Personalization -Rich text - User events (clickstream) - - -Inputs required - -Rich text = string -Single select = string (string should be one of the already created ones, otherwise it'll create a new option) -Multiple select = list of strings (string should be one of the already created ones, otherwise it'll create a new option) -Number = integer diff --git a/docs/tools/vdb_table/data_utils.py b/docs/tools/vdb_table/data_utils.py new file mode 100644 index 000000000..2d9852bac --- /dev/null +++ b/docs/tools/vdb_table/data_utils.py @@ -0,0 +1,267 @@ +import os +import json +import argparse +import openpyxl +import re +import humanfriendly +import glob +from jsonschema import validate +from url_normalize import url_normalize +from collections import OrderedDict + +class JsonProp(): + def __init__(self, prop) -> None: + self.prop = prop + + def id(self): + return self.prop[0] + + def type(self): + # Parse the type - either just {type: value} or the $ref thing from jsonschema + if 'type' in self.prop[1]: + return self.prop[1]['type'] + return self.prop[1].get('allOf')[0]['$ref'].removeprefix('#/$defs/') + + def group(self): + # $comment is a reserved prop in jsonschema, we use it for display name and comment with | as separator + return self.prop[1].get('$comment','').split('|')[0].strip() + + def name(self): + # $comment is a reserved prop in jsonschema, we use it for display name and comment with | as separator + return self.prop[1].get('$comment','').split('|')[1].strip() + + def comment(self): + # $comment is a reserved prop in jsonschema, we use it for display name and comment with | as separator + name_and_comment = self.prop[1].get('$comment','').split('|') + if len(name_and_comment) > 1: + return name_and_comment[2].strip() + return '' + + def __str__(self): + return "%s, %s, %s, %s, %s"%(self.id(), self.type(), self.group(), self.name(), self.comment()) + +class JsonSchemaWrapper(): # Switch to XLSX + def __init__(self, schema) -> None: + self.schema = schema + + def props(self): + return [JsonProp(prop) for prop in self.schema['properties'].items()] + + def prop_by_id(self, id): + for prop in self.props(): + if prop.id() == id: + return prop + return None + +class JsonValueFactory(): + def backfillLinkFromValue(value, hyperlink): + URL_REGEX = r"\b((?:https?://)?(?:(?:www\.)?(?:[\da-z\.-]+)\.(?:[a-z]{2,6})|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|(?:(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])))(?::[0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])?(?:/[\w\.-]*)*/?)\b" + if hyperlink: + return hyperlink + parsed_url = re.search(URL_REGEX, value) + return parsed_url.group() if parsed_url else "" + + def convertValueToJson(schema, value, hyperlink): + return getattr(JsonValueFactory, schema.type())(value, hyperlink) + + def bytesWithSource(value, hyperlink): + try: + num_bytes = humanfriendly.parse_size(value) + except: + num_bytes = 0 + return { + "bytes": num_bytes, + "unlimited": True if "unlimited" in value.lower() else False, + "source_url": hyperlink, + "comment": "" + } + + def featureWithSource(value, hyperlink): + support = "" + if "✅" in value: support = "full" + if "🟨" in value: support = "partial" + if "❌" in value: support = "none" + return { + "support": support, + "source_url": JsonValueFactory.backfillLinkFromValue(value, hyperlink), + "comment": "" if not value else value.translate({ord(x): '' for x in ["✅","❌","🟨"]}).strip() + } + + def integer(value, _hyperlink): + try: + return int(value) + except: + return 0 + + def integerWithSource(value, hyperlink): + try: + integer = int(value) + except: + integer = 0 + return { + "value": integer, + "unlimited": True if "unlimited" in value.lower() else False, + "source_url": hyperlink, + "comment": "" + } + + def links(value, _hyperlink): + link_list = value.split("|") + return { + "docs": url_normalize(link_list[0]), + "github": url_normalize(link_list[1]), + "website": url_normalize(link_list[2]), + "vendor_discussion": url_normalize(link_list[3]), + "poc_github": url_normalize("https://github.com/%s"%(link_list[4])), + "slug": link_list[5] + } + + def string(value, _hyperlink): + return value + + def stringListWithSource(value, hyperlink): + return { + "value": [val.strip().lower() for val in value.split(",")], + "source_url": hyperlink, + "comment": "" + } + + def stringWithSource(value, hyperlink): + return { + "value": value, + "source_url": JsonValueFactory.backfillLinkFromValue(value, hyperlink), + "comment": "" + } + +class XLSXWrapper(): + # Preparation steps for the raw CSV that have to be done manually: + # 1. Fill in missing headers based on the vendor.schema.json file. Currently there are 2: + # Links + # In-built Structured Data Embedding creation + # 2. Replace new-line characters in the file with spaces to make sure you have a valid CSV. + HEADER_TO_SCHEMA_ID = { + "DB | Attributes": "name", + "Links": "links", + "License": "license", + "Development Language": "dev_languages", + "Github Stars": "github_stars", + "First Release of Vector Search": "vector_launch_year", + "Metadata Filtering": "metadata_filter", + "Hybrid Search": "hybrid_search", + "Facets (Aggregations with Count)": "facets", + "GeoSearch Support": "geo_search", + "Multiple vectors per point": "multi_vec", + "Sparse Vectors Support": "sparse_vectors", + "BM25 support": "bm25", + "Full-text Search Engine": "full_text", + "In-built Text Embeddings creation (Bring-your-own-model)": "embeddings_text", + "In-built Image Embedding creation": "embeddings_image", + "In-built Structured Data Embedding creation": "embeddings_structured", + "Calls LLM internally for RAG": "rag", + "Recommendations API": "recsys", + "Langchain integration": "langchain", + "Llama index integration": "llamaindex", + "Open-source & free to self-host": "oss", + "Managed Cloud Offering": "managed_cloud", + "Pricing": "pricing", + "Embeddable": "in_process", + "Multi-tenancy Support": "multi_tenancy", + "Disk-based Index": "disk_index", + "Ephemeral Index support (without server)": "ephemeral", + "Sharding": "sharding", + "Metadata/Doc size limit": "doc_size", + "Max Dimensions": "vector_dims" + } + + def map_header_to_schema_id(self, header): + return self.HEADER_TO_SCHEMA_ID.get(header.replace("\n",""), None) + + def __init__(self, xlsx_path, xlsx_sheet, table_schema) -> None: + self.xlsx_path = xlsx_path + self.table_schema = table_schema + self.xlsx_sheet = xlsx_sheet + + def value_to_json(self, value, schema): + val = value.value + if not val: + val = None + elif isinstance(val, float): + val = str(int(val)) + else: + val = str(val) + + return JsonValueFactory.convertValueToJson( + schema, "" if not val else val, + "" if not value.hyperlink else value.hyperlink.target) + + def row_to_json(self, header, row): + # Collect the properties from the row. + data = {} + for i, value in enumerate(row): + key = self.HEADER_TO_SCHEMA_ID.get(header[i].value, None) + if key: + data[key] = self.value_to_json(value, self.table_schema.prop_by_id(key)) + # Order the properties according to the schema. + result = OrderedDict() + for prop in self.table_schema.props(): + result[prop.id()] = data[prop.id()] + + return result + + def to_json(self, output_dir): + workbook = openpyxl.load_workbook(self.xlsx_path, data_only=True) + sheet = workbook[self.xlsx_sheet] + header_row = [] + for i, row in enumerate(sheet.iter_rows()): + if not row[0].value: break + if i == 0: + header_row = row + else: + print("Processing", row[0].value) + output_obj = self.row_to_json(header_row, row) + with open(os.path.join(output_dir, output_obj["links"]["slug"]+".json"), "w") as output_file: + json.dump(output_obj, output_file, indent=2) + +class CLI(): + # Use XLSX instead of CSV to preserve the hyperlinks in the original data. + def xlsx_to_json(self, path_to_schema, output_dir, path_to_xlsx, xlsx_sheet): + table_schema = JsonSchemaWrapper(json.load(open(path_to_schema, "r"))) + xlsx = XLSXWrapper(path_to_xlsx, xlsx_sheet, table_schema) + xlsx.to_json(output_dir) + + def json_to_bundle(self, data_glob): + obj_list = [] + for name in glob.glob(data_glob): + obj_list.append(json.load(open(name, "r"), object_pairs_hook=OrderedDict)) + with open("bundle.json", "w") as output_file: + json.dump(obj_list, output_file, indent=2) + + def json_validate(self, path_to_schema, data_glob): + schema_obj = json.load(open(path_to_schema, "r")) + for name in glob.glob(data_glob): + vendor_obj = json.load(open(name, "r")) + validate(instance=vendor_obj, schema=schema_obj) + + +# Executes when your script is called from the command-line: +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='CLI for managing the VDB json files in /data') + parser.add_argument('command', help='The command you want to perform: xlsx_to_json') + parser.add_argument('-sp','--schema_path', help='Path to the schema.json file.') + parser.add_argument('-xp','--xlsx_path', help='Path to the legacy XSLX file.') + parser.add_argument('-xs','--xlsx_sheet', help='Sheet to use in the XLSX file.') + parser.add_argument('-od','--output_dir', help='Output directory for the vendor JSONs.') + parser.add_argument('-dd','--data_glob', help='Glob pattern for the vendor JSON data.') + + args = parser.parse_args() + + cli = CLI() + if args.command == 'xlsx_to_json': + cli.xlsx_to_json(args.schema_path, args.output_dir, args.xlsx_path, args.xlsx_sheet) + + if args.command == 'json_to_bundle': + cli.json_to_bundle(args.data_glob) + + if args.command == 'json_validate': + cli.json_validate(args.schema_path, args.data_glob) + diff --git a/docs/tools/vdb_table/raw_data_dec31.xlsx b/docs/tools/vdb_table/raw_data_dec31.xlsx new file mode 100644 index 000000000..0e65dfa70 Binary files /dev/null and b/docs/tools/vdb_table/raw_data_dec31.xlsx differ diff --git a/docs/tools/vdb_table/raw_data_jan9.xlsx b/docs/tools/vdb_table/raw_data_jan9.xlsx new file mode 100644 index 000000000..a5b20b324 Binary files /dev/null and b/docs/tools/vdb_table/raw_data_jan9.xlsx differ diff --git a/docs/tools/vdb_table/requirements.txt b/docs/tools/vdb_table/requirements.txt new file mode 100644 index 000000000..2fa2ee0af --- /dev/null +++ b/docs/tools/vdb_table/requirements.txt @@ -0,0 +1,22 @@ +annotated-types==0.6.0 +attrs==23.1.0 +certifi==2023.11.17 +charset-normalizer==3.3.2 +et-xmlfile==1.1.0 +humanfriendly==10.0 +idna==3.6 +inflection==0.5.1 +jsonschema==4.20.0 +jsonschema-specifications==2023.11.2 +openpyxl==3.1.2 +pyairtable==2.2.1 +pydantic==2.5.2 +pydantic_core==2.14.5 +python-dotenv==1.0.0 +referencing==0.32.0 +requests==2.31.0 +rpds-py==0.13.2 +six==1.16.0 +typing_extensions==4.9.0 +url-normalize==1.4.3 +urllib3==2.1.0 diff --git a/docs/tools/vdb_table/updateAirtable.py b/docs/tools/vdb_table/updateAirtable.py deleted file mode 100644 index ba2863ac9..000000000 --- a/docs/tools/vdb_table/updateAirtable.py +++ /dev/null @@ -1,121 +0,0 @@ -import os -import requests -from dotenv import load_dotenv -import json -from pyairtable import Api -from pyairtable.orm import Model, fields as F - -load_dotenv() -airtable_api_key = os.getenv("AIRTABLE_API_KEY") -BASE_ID = os.getenv("BASE_ID") -TABLE_ID = os.getenv("TABLE_ID") - - -api = Api(airtable_api_key) -base = api.base(BASE_ID) -table = api.table(BASE_ID, TABLE_ID) -# schema_obj = json.load(open("vendor.schema.json", "r")) - - -#TODO -def get_json_data(): - - return list_of_fields, records - - - -def update_table_fields(): - - #TODO - #code to check if there is a new field (column) added in the JSON, - #if yes then create that field in the table - - table_schema = table.schema() - return table_schema - - -def update_table_records(): - - list_of_fields = ["Database Name", - "Open-source & free to self-host", - "Managed Cloud Offering", - "Disk-based Index", - "Multi-tenancy Support", - "In-built Text Embeddings creation(Bring-your-own-model)", - "In-built Image Embedding creation", - "Metadata Filtering", - "Embeddable", - "Multiple vectors per point", - "Langchain integration", - "Llama index integration", - "Hybrid Search", - "BM25 support", - "Sparse Vectors Support", - "Full-text Search Engine", - "Facets (Aggregations with Count)", - "GeoSearch Support", - "Metadata/Doc size limit", - "Max Dimensions", - "Ephemeral Index support(without server)", - "Sharding", - "License", - "Development Language", - "Github Stars", - "First Release of Vector Search", - "Pricing", - "Calls LLM internally for RAG", - "Recommendations API", - "Personalization", - "User events (clickstream)" - ] - - class VDB(Model): - field_0 = F.TextField(list_of_fields[0]) - field_1 = F.RichTextField(list_of_fields[1]) - # field_2 = F.RichTextField(list_of_fields[2]) - # field_3 = F.RichTextField(list_of_fields[3]) - # field_4 = F.RichTextField(list_of_fields[4]) - # field_5 = F.RichTextField(list_of_fields[5]) - # field_6 = F.RichTextField(list_of_fields[6]) - # field_7 = F.RichTextField(list_of_fields[7]) - # field_8 = F.RichTextField(list_of_fields[8]) - # field_9 = F.RichTextField(list_of_fields[9]) - # field_10 = F.RichTextField(list_of_fields[10]) - # field_11 = F.RichTextField(list_of_fields[11]) - # field_12 = F.RichTextField(list_of_fields[12]) - # field_13 = F.RichTextField(list_of_fields[13]) - # field_14 = F.RichTextField(list_of_fields[14]) - # field_15 = F.RichTextField(list_of_fields[15]) - # field_16 = F.RichTextField(list_of_fields[16]) - # field_17 = F.RichTextField(list_of_fields[17]) - # field_18 = F.MultipleSelectField(list_of_fields[18]) - # field_19 = F.SelectField(list_of_fields[19]) - # field_20 = F.RichTextField(list_of_fields[20]) - # field_21 = F.RichTextField(list_of_fields[21]) - # field_22 = F.MultipleSelectField(list_of_fields[22]) - # field_23 = F.MultipleSelectField(list_of_fields[23]) - # field_24 = F.NumberField(list_of_fields[24]) - # field_25 = F.SelectField(list_of_fields[25]) - # field_26 = F.RichTextField(list_of_fields[26]) - # field_27 = F.RichTextField(list_of_fields[27]) - # field_28 = F.RichTextField(list_of_fields[28]) - # field_29 = F.RichTextField(list_of_fields[29]) - # field_30 = F.RichTextField(list_of_fields[30]) - - class Meta: - base_id = BASE_ID - table_name = TABLE_ID - api_key = airtable_api_key - - database_instance = VDB( - field_0="Pinecone", - field_1="✅", - ) - - assert database_instance.save() - print(database_instance.exists()) - print(database_instance.id) - - - -update_table_records() \ No newline at end of file diff --git a/docs/tools/vdb_table/vendor.schema.json b/docs/tools/vdb_table/vendor.schema.json new file mode 100644 index 000000000..f05474fa8 --- /dev/null +++ b/docs/tools/vdb_table/vendor.schema.json @@ -0,0 +1,107 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "vdb_table/schema/vendor", + "type": "object", + + "properties": { + "name": {"type": "string", "$comment": "About | Vendor | " }, + "links": {"allOf": [{"$ref": "#/$defs/links"}], "$comment": "About | Links | " }, + "oss": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "About | OSS | The code-base is open source and users can self-host it for free." }, + "license": {"allOf": [{"$ref": "#/$defs/stringWithSource"}], "$comment": "About | License | The license the source code is released under." }, + "dev_languages": {"allOf": [{"$ref": "#/$defs/stringListWithSource"}], "$comment": "About | Dev Lang | The language the database is developed in." }, + "github_stars": {"type": "integer", "$comment": "About | GitHub ⭐ | The number of stars for the core product repository." }, + "vector_launch_year": {"type": "integer", "$comment": "About | VSS Launch | The year of the first release for the vector search functionality." }, + "metadata_filter": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Search | Filters | Metadata filtering support within vector search - allowing users to refine results based on additional contextual informatio and enhancing precision in search queries. Not to be confused with filters/faceting in Lucene based keyword search." }, + "hybrid_search": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Search | Hybrid Search | Built-in blending of the 2 lists (keyword & vector search results) being done natively within the DB using something like Reciprocal Rank Fusion." }, + "facets": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Search | Facets | Built-in aggregations on facets and facet cardinality calculation (similar to Lucene)." }, + "geo_search": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Search | Geo Search | Built-in geospatial index for location queries." }, + "multi_vec": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Search | Multi-Vector | Ability to create and index multiple vectors for a given document." }, + "sparse_vectors": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Search | Sparse | Bring your own sparse vectors (e.g. TF-IDF) and have them be indexed and available for retrieval in the database." }, + "bm25": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Search | BM25 | Built-in tokenization, indexing, and scoring for BM25." }, + "full_text": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Search | Full-Text | Built-in inverted index for terms, capable of boolean queries (similar to Lucene)." }, + "embeddings_text": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Models | Text Model | Ability to plug in commonly-used embedding models for text vectorization (Sentence-transformers/HuggingFace feature extraction models)" }, + "embeddings_image": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Models | Image Model | Ability to plug in image embedding models for vectorization (e.g. CLIP)." }, + "embeddings_structured": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Models | Struct Model | Ability to plug in models used to embedd structured data like user clicks and graphs." }, + "rag": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "APIs | RAG | Calls a LLM internally for a full-stack Retrieval Augmented Generation solution." }, + "recsys": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "APIs | RecSys | Built-in support for recommendation system functionality, e.g. a solution that refines the results based on behavioral data." }, + "langchain": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "APIs | LangChain | Built-in integration with the LlamaIndex library." }, + "llamaindex": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "APIs | LlamaIndex | Built-in integration with the LangChain library." }, + "managed_cloud": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Managed | A managed cloud offering is available." }, + "pricing": {"allOf": [{"$ref": "#/$defs/stringWithSource"}], "$comment": "Ops | Pricing | The pricing models very widely, so this field is unstructured and ideally points to a pricing page." }, + "in_process": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | In-process | Ability to run embedded within the application process, which leads to a simpler deployment and management in situations when the whole workload fits into one machine." }, + "multi_tenancy": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Multi-Tenant | Multi-tenancy - the ability to store multiple isolated indexes within one database instance." }, + "disk_index": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Disk Index | Ability to store the vector index state on disk." }, + "ephemeral": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Ephemeral Index | Ephemeral index support without a server." }, + "sharding": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Sharding | The search index is sharded across multiple machines." }, + "doc_size": {"allOf": [{"$ref": "#/$defs/bytesWithSource"}], "$comment": "Ops | Document Size | Maximum size of a stored document." }, + "vector_dims": {"allOf": [{"$ref": "#/$defs/integerWithSource"}], "$comment": "Ops | Vector Dims | Maximum number of vector dimensions." } + + }, + + "$defs": { + "links": { + "$id": "links", + "type": "object", + "properties": { + "docs": {"type": "string"}, + "github": {"type": "string"}, + "website": {"type": "string"}, + "vendor_discussion": {"type": "string"}, + "poc_github": {"type": "string"}, + "slug": {"type": "string"} + } + }, + "featureWithSource": { + "$id": "featureWithSource", + "type": "object", + "properties": { + "support": {"enum": ["", "none", "partial", "full"]}, + "source_url": {"type": "string"}, + "comment": {"type": "string"} + } + }, + "stringWithSource": { + "$id": "stringWithSource", + "type": "object", + "properties": { + "value": {"type": "string"}, + "source_url": {"type": "string"}, + "comment": {"type": "string"} + } + }, + "stringListWithSource": { + "$id": "stringListWithSource", + "type": "object", + "properties": { + "value": { + "type": "array", + "items": { + "type": "string" + } + }, + "source_url": {"type": "string"}, + "comment": {"type": "string"} + } + }, + "integerWithSource": { + "$id": "integerWithSource", + "type": "object", + "properties": { + "value": {"type": "integer"}, + "unlimited": {"type": "boolean"}, + "source_url": {"type": "string"}, + "comment": {"type":"string"} + } + }, + "bytesWithSource": { + "$id": "bytesWithSource", + "type": "object", + "properties": { + "bytes": {"type": "integer"}, + "unlimited": {"type": "boolean"}, + "source_url": {"type": "string"}, + "comment": {"type":"string"} + } + } + } +}