Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/change api endpoint #101

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions utils/automate_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,17 @@
import os
from openapi_to_sdk.sdk_automation import PythonSDKBuilder

url="https://vectorai-development-api.azurewebsites.net"
url="https://vectorai-development-api.azurewebsites.net"
# url = "https://api.vctr.ai"
url = "https://vectorai-production-api.azurewebsites.net"
sdk = PythonSDKBuilder(
url=url,
# url="https://vectorai-development-api.azurewebsites.net",
# url='https://vecdb-aueast-api.azurewebsites.net',
inherited_properties=['username', 'api_key', 'url'],
decorators=[
'retry()',
"return_curl_or_response('json')"],
override_param_defaults=dict(
min_score=None,
cursor=None,
# url='https://vecdb-aueast-api.azurewebsites.net',
url=url,
# sort=False,
sort_by_created_at_date=False,
),
internal_functions=[
Expand Down
141 changes: 60 additions & 81 deletions vectorai/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1129,7 +1129,7 @@ def bulk_encode(self, encoders, documents, **kwargs):

@retry()
@return_curl_or_response('json')
def predict_knn_regression(self, collection_name, vector, search_field, target_field, impute_value, k=5, weighting=True, predict_operation="mean", **kwargs):
def predict_knn_regression(self, collection_name, vector, search_field, target_field, impute_value, k=5, weighting=True, predict_operation="mean", include_search_results=True, **kwargs):
"""Predict KNN regression.
Predict with KNN regression using normal search.

Expand All @@ -1145,6 +1145,7 @@ def predict_knn_regression(self, collection_name, vector, search_field, target_f
weighting: weighting
impute_value: What value to fill if target field is missing.
predict_operation: How to predict using the vectors.
include_search_results: If True, returns the results as well.

"""
return requests.post(
Expand All @@ -1160,6 +1161,7 @@ def predict_knn_regression(self, collection_name, vector, search_field, target_f
weighting=weighting,
impute_value=impute_value,
predict_operation=predict_operation,
include_search_results=include_search_results,
))

@retry()
Expand Down Expand Up @@ -1241,6 +1243,10 @@ def filters(self, collection_name, filters=[], page=1, page_size=20, asc=False,
These are the available conditions:

"==", "!=", ">=", ">", "<", "<="

If you are looking to combine your filters with multiple ORs, simply add the following inside the query
`{"strict":"must_or"}`.


Args
========
Expand Down Expand Up @@ -2121,7 +2127,7 @@ def text_chunking(self, collection_name, text_field, chunk_field, insert_results

@retry()
@return_curl_or_response('json')
def text_chunking_encoder(self, collection_name, text_field, chunk_field, insert_results_to_seperate_collection_name, encoder_task="text", refresh=True, store_to_pipeline=True, **kwargs):
def text_chunking_encoder(self, collection_name, text_field, chunk_field, insert_results_to_seperate_collection_name, encoder_task="text", refresh=True, store_to_pipeline=True, alias="", **kwargs):
"""Chunk a text field and encode the chunks
Split text into separate sentences. Encode each sentence to create chunkvectors.
These are stored as \_chunkvector\_. The chunk field created is `field` + \_chunk\_.
Expand All @@ -2137,6 +2143,7 @@ def text_chunking_encoder(self, collection_name, text_field, chunk_field, insert
refresh: Whether to refresh the whole collection and re-encode all to vectors
insert_results_to_seperate_collection_name: If specified the chunks will be inserted into a seperate collection. Default is None which means no seperate collection.
store_to_pipeline: Whether to store the encoder to the chunking pipeline
alias: If alias is present, it will create change the name of the created vector field into field_{alias}_chunkvector_

"""
return requests.post(
Expand All @@ -2151,6 +2158,7 @@ def text_chunking_encoder(self, collection_name, text_field, chunk_field, insert
refresh=refresh,
insert_results_to_seperate_collection_name=insert_results_to_seperate_collection_name,
store_to_pipeline=store_to_pipeline,
alias=alias,
))

@retry()
Expand Down Expand Up @@ -2178,6 +2186,31 @@ def process_pdf(self, collection_name, file_url, filename, **kwargs):
filename=filename,
))

@retry()
@return_curl_or_response('json')
def bulk_process_pdf(self, collection_name, file_urls, filenames, **kwargs):
"""Process multiple pdfs
Insert multiple PDFs into Vector AI:

Args
========
username: Username
api_key: Api Key, you can request it from request_api_key
collection_name: What collection to insert the PDF into
file_urls: The file url blobs
filenames: The name of the PDF files

"""
return requests.post(
url=self.url+'/collection/job/bulk_process_pdf',
json=dict(
username=self.username,
api_key=self.api_key,
collection_name=collection_name,
file_urls=file_urls,
filenames=filenames,
))

@retry()
@return_curl_or_response('json')
def process_doc(self, collection_name, file_url, filename, **kwargs):
Expand All @@ -2203,6 +2236,31 @@ def process_doc(self, collection_name, file_url, filename, **kwargs):
filename=filename,
))

@retry()
@return_curl_or_response('json')
def bulk_process_doc(self, collection_name, file_urls, filenames, **kwargs):
"""Process multiple doc or docx files
Insert multiple word docs into Vector AI

Args
========
username: Username
api_key: Api Key, you can request it from request_api_key
collection_name: What collection to insert the word doc into
file_urls: The file url blobs
filenames: The name of the Doc or DocX files

"""
return requests.post(
url=self.url+'/collection/job/bulk_process_doc',
json=dict(
username=self.username,
api_key=self.api_key,
collection_name=collection_name,
file_urls=file_urls,
filenames=filenames,
))

@retry()
@return_curl_or_response('json')
def copy_collection_from_another_user(self, collection_name, source_collection_name, source_username, source_api_key, **kwargs):
Expand Down Expand Up @@ -4123,82 +4181,3 @@ def tag_documents_from_hub(self, tag_collection_name, vector_field, hub_username
hub_api_key=hub_api_key,
))

@retry()
@return_curl_or_response('json')
def rank_comparator(self, ranked_list_1, ranked_list_2, **kwargs):
"""Compare ranks between 2 results list.
Compare the ranks between 2 results list in VecDB

Args
========
username: Username
api_key: Api Key, you can request it from request_api_key
ranked_list_1: First ranked List
ranked_list_2: Second ranked list

"""
return requests.post(
url=self.url+'/experimentation/rank_comparator',
json=dict(
username=self.username,
api_key=self.api_key,
ranked_list_1=ranked_list_1,
ranked_list_2=ranked_list_2,
))

@retry()
@return_curl_or_response('json')
def bias_indicator(self, anchor_documents, documents, metadata_field, vector_field, **kwargs):
"""Compare bias of documents against anchor documents
Compare bias of documents against anchor documents

Args
========
username: Username
api_key: Api Key, you can request it from request_api_key
anchor_documents: Anchor documents to compare other documents against.
documents: Documents to compare against the anchor documents
metadata_field: Field from which the vector was derived
vector_field: Vector field to compare against

"""
return requests.post(
url=self.url+'/experimentation/bias_indicator',
json=dict(
username=self.username,
api_key=self.api_key,
anchor_documents=anchor_documents,
documents=documents,
metadata_field=metadata_field,
vector_field=vector_field,
))

@retry()
@return_curl_or_response('json')
def cluster_comparator(self, collection_name, cluster_field, cluster_value, vector_field, alias, **kwargs):
"""Compare clusters
Compare the clusters for cluster comparator

Args
========
username: Username
api_key: Api Key, you can request it from request_api_key
collection_name: the name of the collection
cluster_field: the cluster field
cluster_value: the cluster values by which to compare on
vector_field: The vector field that has been clustered
alias: The alias of the vector field

"""
return requests.post(
url=self.url+'/experimentation/cluster_comparator',
json=dict(
username=self.username,
api_key=self.api_key,
collection_name=collection_name,
cluster_field=cluster_field,
cluster_value=cluster_value,
vector_field=vector_field,
alias=alias,
))

4 changes: 1 addition & 3 deletions vectorai/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,7 @@ class ViClient(ViWriteClient, ViAnalyticsClient):
def __init__(self,
username: str=None,
api_key: str=None,
# Old API URL: https://vecdb-aueast-api.azurewebsites.net
# url: str="https://vectorai-development-api-vectorai-test-api.azurewebsites.net/",
url: str="https://vectorai-development-api.azurewebsites.net",
url: str ="https://vectorai-production-api.azurewebsites.net",
analytics_url="https://vector-analytics.vctr.ai", verbose: bool = True) -> None:
super().__init__(username, api_key, url)
if username is None:
Expand Down