From 6110af79dec87ca04853858a24701419a9481ed3 Mon Sep 17 00:00:00 2001 From: David Mezzetti <561939+davidmezzetti@users.noreply.github.com> Date: Fri, 12 Jan 2024 12:50:46 -0500 Subject: [PATCH 1/6] Update txtai.json --- docs/tools/vdb_table/data/txtai.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/tools/vdb_table/data/txtai.json b/docs/tools/vdb_table/data/txtai.json index 1bb927789..b515d903e 100644 --- a/docs/tools/vdb_table/data/txtai.json +++ b/docs/tools/vdb_table/data/txtai.json @@ -2,7 +2,7 @@ "name": "txtai", "links": { "docs": "https://neuml.github.io/txtai/", - "github": "https://neuml.github.io/txtai/", + "github": "https://github.com/neuml/txtai", "website": "https://neuml.github.io/txtai/", "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/86", "poc_github": "https://github.com/davidmezzetti", @@ -25,7 +25,7 @@ "source_url": "", "comment": "" }, - "github_stars": 5500, + "github_stars": 5972, "vector_launch_year": 2020, "metadata_filter": { "support": "full", @@ -113,9 +113,9 @@ "comment": "" }, "in_process": { - "support": "none", + "support": "full", "source_url": "", - "comment": "" + "comment": "txtai embeddings indexes can be loaded and run directly in a Python application" }, "multi_tenancy": { "support": "", @@ -123,9 +123,9 @@ "comment": "" }, "disk_index": { - "support": "", + "support": "full", "source_url": "", - "comment": "" + "comment": "txtai embeddings indexes can be saved to a directory, tar.gz file and/or to object storage such as S3" }, "ephemeral": { "support": "full", @@ -149,4 +149,4 @@ "source_url": "", "comment": "" } -} \ No newline at end of file +} From b212fde7425a3eb0e998bd5a7f69169cd6a97035 Mon Sep 17 00:00:00 2001 From: Dhruv Anand <105786647+dhruv-anand-aintech@users.noreply.github.com> Date: Mon, 15 Jan 2024 17:11:12 +0530 Subject: [PATCH 2/6] Add reference requirement in PR checklist and README --- .github/PULL_REQUEST_TEMPLATE/vdb-table_pr.md | 1 + docs/tools/vdb_table/README.md | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE/vdb-table_pr.md b/.github/PULL_REQUEST_TEMPLATE/vdb-table_pr.md index a0c9a5aa3..616f59010 100644 --- a/.github/PULL_REQUEST_TEMPLATE/vdb-table_pr.md +++ b/.github/PULL_REQUEST_TEMPLATE/vdb-table_pr.md @@ -8,3 +8,4 @@ labels: 'vdb comparison' ## Checklist before requesting a review - [ ] I have followed the [contribution guidelines](https://github.com/superlinked/VectorHub/tree/main/docs/tools/vdb_table) +- [ ] I have provided a reference for each attribute for which I'm adding a "support" claim. diff --git a/docs/tools/vdb_table/README.md b/docs/tools/vdb_table/README.md index 67f0fccd2..c52c8876a 100644 --- a/docs/tools/vdb_table/README.md +++ b/docs/tools/vdb_table/README.md @@ -70,7 +70,11 @@ tools/ Attributes inside vendorX.json has the following properties -- `support`: Whose values can be `[ "", "none", "partial", "full" ]` indicating on confidence levels, for that attribute support. +- `support`: Whose values can be `[ "", "none", "partial", "full" ]` indicating on confidence levels, for that attribute support. NOTE: Each change where a "support" claim is being added MUST include a reference to documentation or an example of the functionality being described. + - `""` means the cell will be blank. + - `"none"` means the cell will have a ❌. + - `"partial"` means the cell will have a 🟨. + - `"full"` means the cell will have a ✅. - `value`: `license` and `dev_languages` have this property to support values about license details and languages (as a list). - `source_url`: To provide documentation links, or evidence supporting the attribute values. It is shown as the 'external link' button in the cell. - `comment`: Any other useful information that will be shown on hover and with the info icon. From 7e9b5c6ee1e77ed618793f005f002a470e2a9235 Mon Sep 17 00:00:00 2001 From: Dhruv Anand <105786647+dhruv-anand-aintech@users.noreply.github.com> Date: Mon, 15 Jan 2024 19:33:04 +0530 Subject: [PATCH 3/6] Clarify disk_index definition --- docs/tools/vdb_table/vendor.schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/vdb_table/vendor.schema.json b/docs/tools/vdb_table/vendor.schema.json index f05474fa8..3455344dc 100644 --- a/docs/tools/vdb_table/vendor.schema.json +++ b/docs/tools/vdb_table/vendor.schema.json @@ -30,7 +30,7 @@ "pricing": {"allOf": [{"$ref": "#/$defs/stringWithSource"}], "$comment": "Ops | Pricing | The pricing models very widely, so this field is unstructured and ideally points to a pricing page." }, "in_process": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | In-process | Ability to run embedded within the application process, which leads to a simpler deployment and management in situations when the whole workload fits into one machine." }, "multi_tenancy": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Multi-Tenant | Multi-tenancy - the ability to store multiple isolated indexes within one database instance." }, - "disk_index": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Disk Index | Ability to store the vector index state on disk." }, + "disk_index": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Disk Index | The vector search index can be held on disk while in use, as opposed to holding it in RAM, which makes the management of large indices more affordable" }, "ephemeral": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Ephemeral Index | Ephemeral index support without a server." }, "sharding": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Sharding | The search index is sharded across multiple machines." }, "doc_size": {"allOf": [{"$ref": "#/$defs/bytesWithSource"}], "$comment": "Ops | Document Size | Maximum size of a stored document." }, From 6c65657d026934a84defce0f46d19fb28f963862 Mon Sep 17 00:00:00 2001 From: Dhruv Anand <105786647+dhruv-anand-aintech@users.noreply.github.com> Date: Mon, 15 Jan 2024 19:49:15 +0530 Subject: [PATCH 4/6] nit --- docs/tools/vdb_table/vendor.schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/vdb_table/vendor.schema.json b/docs/tools/vdb_table/vendor.schema.json index 3455344dc..fa286978f 100644 --- a/docs/tools/vdb_table/vendor.schema.json +++ b/docs/tools/vdb_table/vendor.schema.json @@ -30,7 +30,7 @@ "pricing": {"allOf": [{"$ref": "#/$defs/stringWithSource"}], "$comment": "Ops | Pricing | The pricing models very widely, so this field is unstructured and ideally points to a pricing page." }, "in_process": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | In-process | Ability to run embedded within the application process, which leads to a simpler deployment and management in situations when the whole workload fits into one machine." }, "multi_tenancy": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Multi-Tenant | Multi-tenancy - the ability to store multiple isolated indexes within one database instance." }, - "disk_index": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Disk Index | The vector search index can be held on disk while in use, as opposed to holding it in RAM, which makes the management of large indices more affordable" }, + "disk_index": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Disk Index | The vector search index can be held on disk while in use, as opposed to holding it in RAM, which makes the management of large indices more affordable." }, "ephemeral": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Ephemeral Index | Ephemeral index support without a server." }, "sharding": {"allOf": [{"$ref": "#/$defs/featureWithSource"}], "$comment": "Ops | Sharding | The search index is sharded across multiple machines." }, "doc_size": {"allOf": [{"$ref": "#/$defs/bytesWithSource"}], "$comment": "Ops | Document Size | Maximum size of a stored document." }, From dd154b9da1f84c26ddc601e0b6d36b1c57dc6fe8 Mon Sep 17 00:00:00 2001 From: David Mezzetti <561939+davidmezzetti@users.noreply.github.com> Date: Mon, 15 Jan 2024 09:21:09 -0500 Subject: [PATCH 5/6] Update txtai.json --- docs/tools/vdb_table/data/txtai.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/tools/vdb_table/data/txtai.json b/docs/tools/vdb_table/data/txtai.json index b515d903e..89bbabd15 100644 --- a/docs/tools/vdb_table/data/txtai.json +++ b/docs/tools/vdb_table/data/txtai.json @@ -123,9 +123,9 @@ "comment": "" }, "disk_index": { - "support": "full", - "source_url": "", - "comment": "txtai embeddings indexes can be saved to a directory, tar.gz file and/or to object storage such as S3" + "support": "partial", + "source_url": "https://neuml.github.io/txtai/embeddings/configuration/ann/#faiss", + "comment": "txtai embeddings indexes support on disk operations via mmap" }, "ephemeral": { "support": "full", From c560834d7eaa2068208cb83501ef7fc2fd7002dc Mon Sep 17 00:00:00 2001 From: Dhruv Anand <105786647+dhruv-anand-aintech@users.noreply.github.com> Date: Mon, 15 Jan 2024 19:51:16 +0530 Subject: [PATCH 6/6] Add option to link to code directly --- docs/tools/vdb_table/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/vdb_table/README.md b/docs/tools/vdb_table/README.md index c52c8876a..bf8d1171a 100644 --- a/docs/tools/vdb_table/README.md +++ b/docs/tools/vdb_table/README.md @@ -70,7 +70,7 @@ tools/ Attributes inside vendorX.json has the following properties -- `support`: Whose values can be `[ "", "none", "partial", "full" ]` indicating on confidence levels, for that attribute support. NOTE: Each change where a "support" claim is being added MUST include a reference to documentation or an example of the functionality being described. +- `support`: Whose values can be `[ "", "none", "partial", "full" ]` indicating on confidence levels, for that attribute support. NOTE: Each change where a "support" claim is being added MUST include either i) a reference to documentation, ii) an example of the functionality being described, or iii) a link to the actual code implementing the specific functionality. - `""` means the cell will be blank. - `"none"` means the cell will have a ❌. - `"partial"` means the cell will have a 🟨.