-
Notifications
You must be signed in to change notification settings - Fork 107
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* VDB table data - json schema, import utility from the original sheet & the vendor json files. * Automatic json schema validation for all vendor json files in PRs. * Automatic bundling of vendor json files and push to GCS when PRs merge to main.
- Loading branch information
Showing
51 changed files
with
6,080 additions
and
277 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
name: Bundle JSON files | ||
on: | ||
push: | ||
branches: | ||
- main | ||
|
||
jobs: | ||
bundle: | ||
runs-on: ubuntu-latest | ||
permissions: | ||
contents: 'read' | ||
id-token: 'write' | ||
|
||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set up Python 3.12 | ||
uses: actions/setup-python@v3 | ||
with: | ||
python-version: '3.12' | ||
|
||
- uses: getsentry/[email protected] | ||
id: venv | ||
with: | ||
python-version: '3.12' | ||
cache-dependency-path: | | ||
docs/tool/vdb_table/requirements.txt | ||
install-cmd: pip install -r docs/tools/vdb_table/requirements.txt | ||
|
||
- name: Generate bundle | ||
id: bundle | ||
run: | | ||
python docs/tools/vdb_table/data_utils.py json_to_bundle -dd "docs/tools/vdb_table/data/*" | ||
- id: 'auth' | ||
uses: 'google-github-actions/auth@v2' | ||
with: | ||
workload_identity_provider: 'projects/903342166386/locations/global/workloadIdentityPools/github-pool/providers/github-provider' | ||
service_account: '[email protected]' | ||
|
||
- id: 'upload-bundle' | ||
uses: 'google-github-actions/upload-cloud-storage@v2' | ||
with: | ||
path: './bundle.json' | ||
destination: 'vectorhub-bundle/vectorhub-bundle' | ||
|
||
- id: 'upload-schema' | ||
uses: 'google-github-actions/upload-cloud-storage@v2' | ||
with: | ||
path: 'docs/tools/vdb_table/vendor.schema.json' | ||
destination: 'vectorhub-bundle/vendor.schema.json' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,5 +18,5 @@ jobs: | |
uses: GrantBirki/[email protected] | ||
with: | ||
base_dir: docs/tools/vdb_table/data | ||
json_schema: docs/tools/vdb_table/data/vendor.schema.json | ||
json_schema: docs/tools/vdb_table/vendor.schema.json | ||
json_schema_version: "draft-2020-12" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
venv/ | ||
.env |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
{ | ||
"name": "Activeloop Deep Lake", | ||
"links": { | ||
"docs": "https://docs.activeloop.ai/", | ||
"github": "https://github.com/activeloopai/deeplake", | ||
"website": "https://www.activeloop.ai/", | ||
"vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/97", | ||
"poc_github": "https://github.com/davidbuniat", | ||
"slug": "activeloop" | ||
}, | ||
"oss": { | ||
"support": "full", | ||
"source_url": "https://github.com/activeloopai/deeplake", | ||
"comment": "https://github.com/activeloopai/deeplake" | ||
}, | ||
"license": { | ||
"value": "MPL 2.0", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"dev_languages": { | ||
"value": [ | ||
"python", | ||
"c++" | ||
], | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"github_stars": 7200, | ||
"vector_launch_year": 2023, | ||
"metadata_filter": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"hybrid_search": { | ||
"support": "", | ||
"source_url": "https://docs.activeloop.ai/performance-features/querying-datasets/query-syntax", | ||
"comment": "While you can run embedding search + contains(text, 'keywoard') or multiple those (keyword search inside text tensor) since BM25 not available I wouldn't call a full hybrid search. https://docs.activeloop.ai/performance-features/querying-datasets/query-syntax" | ||
}, | ||
"facets": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"geo_search": { | ||
"support": "none", | ||
"source_url": "", | ||
"comment": "although doable with UDFs and location storage" | ||
}, | ||
"multi_vec": { | ||
"support": "full", | ||
"source_url": "https://docs.activeloop.ai/technical-details/data-layout", | ||
"comment": "https://docs.activeloop.ai/technical-details/data-layout" | ||
}, | ||
"sparse_vectors": { | ||
"support": "partial", | ||
"source_url": "https://docs.deeplake.ai/en/latest/Htypes.html", | ||
"comment": "no native sparse vector support, although it supports all numpy arrays hence can also store sparse numpy arrays" | ||
}, | ||
"bm25": { | ||
"support": "none", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"full_text": { | ||
"support": "partial", | ||
"source_url": "", | ||
"comment": "you can search i.e. search keywords with TQL/SQL contains(...) function, but I assume you mean more than just text search, by full-text search engine" | ||
}, | ||
"embeddings_text": { | ||
"support": "", | ||
"source_url": "https://docs.activeloop.ai/quickstart", | ||
"comment": "https://docs.activeloop.ai/quickstart#creating-your-first-vector-store" | ||
}, | ||
"embeddings_image": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"embeddings_structured": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"rag": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"recsys": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"langchain": { | ||
"support": "full", | ||
"source_url": "https://python.langchain.com/docs/integrations/vectorstores/activeloop_deeplake", | ||
"comment": "https://python.langchain.com/docs/integrations/vectorstores/activeloop_deeplake" | ||
}, | ||
"llamaindex": { | ||
"support": "full", | ||
"source_url": "https://docs.llamaindex.ai/en/stable/examples/vector_stores/DeepLakeIndexDemo.html", | ||
"comment": "https://docs.llamaindex.ai/en/stable/examples/vector_stores/DeepLakeIndexDemo.html" | ||
}, | ||
"managed_cloud": { | ||
"support": "full", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"pricing": { | ||
"value": "https://www.activeloop.ai/pricing/ Free up to 200GB, then $100 per 30M embeddings (200GB) https://www.activeloop.ai/resources/deep-lake-hnsw-index-rapidly-query-35-m-vectors-save-80/", | ||
"source_url": "https://www.activeloop.ai/pricing/", | ||
"comment": "" | ||
}, | ||
"in_process": { | ||
"support": "full", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"multi_tenancy": { | ||
"support": "full", | ||
"source_url": "https://docs.deeplake.ai/en/latest/deeplake.html", | ||
"comment": "create a dataset per tenant, similar to collections/namespaces" | ||
}, | ||
"disk_index": { | ||
"support": "full", | ||
"source_url": "https://docs.activeloop.ai/performance-features/index-for-ann-search", | ||
"comment": "Custom Memory optimized HNSW that sits on top of an object storage (including FS)" | ||
}, | ||
"ephemeral": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"sharding": { | ||
"support": "partial", | ||
"source_url": "https://docs.activeloop.ai/technical-details/data-layout", | ||
"comment": "" | ||
}, | ||
"doc_size": { | ||
"bytes": 0, | ||
"unlimited": true, | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"vector_dims": { | ||
"value": 0, | ||
"unlimited": true, | ||
"source_url": "", | ||
"comment": "" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
{ | ||
"name": "Anari AI", | ||
"links": { | ||
"docs": "", | ||
"github": "https://github.com/Anari-AI", | ||
"website": "https://anari.ai/vector-acceleration-engine/", | ||
"vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/99", | ||
"poc_github": "https://github.com/jovan-stojanovic", | ||
"slug": "anariai" | ||
}, | ||
"oss": { | ||
"support": "none", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"license": { | ||
"value": "Proprietary", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"dev_languages": { | ||
"value": [ | ||
"" | ||
], | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"github_stars": 0, | ||
"vector_launch_year": 2023, | ||
"metadata_filter": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"hybrid_search": { | ||
"support": "none", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"facets": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"geo_search": { | ||
"support": "none", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"multi_vec": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"sparse_vectors": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"bm25": { | ||
"support": "none", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"full_text": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"embeddings_text": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"embeddings_image": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"embeddings_structured": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"rag": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"recsys": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"langchain": { | ||
"support": "none", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"llamaindex": { | ||
"support": "none", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"managed_cloud": { | ||
"support": "full", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"pricing": { | ||
"value": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"in_process": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"multi_tenancy": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"disk_index": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"ephemeral": { | ||
"support": "none", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"sharding": { | ||
"support": "", | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"doc_size": { | ||
"bytes": 0, | ||
"unlimited": true, | ||
"source_url": "", | ||
"comment": "" | ||
}, | ||
"vector_dims": { | ||
"value": 0, | ||
"unlimited": false, | ||
"source_url": "", | ||
"comment": "" | ||
} | ||
} |
Oops, something went wrong.