From 5c5ca98cafdc9bce0dc09fdf20533ee0c481b89e Mon Sep 17 00:00:00 2001 From: hibajamal Date: Wed, 29 Nov 2023 12:53:46 +0100 Subject: [PATCH] rev --- .../examples/qdrant_zendesk/code/qdrant.py | 35 ++++++++++++------- .../qdrant_zendesk/code/qdrant-snippets.py | 1 + 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/docs/examples/docs/examples/qdrant_zendeskdocs/examples/qdrant_zendesk/code/qdrant.py b/docs/examples/docs/examples/qdrant_zendeskdocs/examples/qdrant_zendesk/code/qdrant.py index c60bbcb47a..c5ae642d05 100644 --- a/docs/examples/docs/examples/qdrant_zendeskdocs/examples/qdrant_zendesk/code/qdrant.py +++ b/docs/examples/docs/examples/qdrant_zendeskdocs/examples/qdrant_zendesk/code/qdrant.py @@ -1,25 +1,29 @@ -from typing import Iterator, Optional, Dict, Any, Tuple +from typing import Optional, Dict, Any, Tuple import dlt from dlt.common import pendulum from dlt.common.time import ensure_pendulum_datetime -from dlt.common.typing import TDataItem, TDataItems, TAnyDateTime -from dlt.extract.source import DltResource +from dlt.common.typing import TAnyDateTime from dlt.sources.helpers.requests import client from dlt.destinations.qdrant import qdrant_adapter from qdrant_client import QdrantClient +from dlt.common.configuration.inject import with_config + +# helper function to fix the datetime format def _parse_date_or_none(value: Optional[str]) -> Optional[pendulum.DateTime]: if not value: return None return ensure_pendulum_datetime(value) +# modify dates to return datetime objects instead def _fix_date(ticket): ticket["updated_at"] = _parse_date_or_none(ticket["updated_at"]) ticket["created_at"] = _parse_date_or_none(ticket["created_at"]) ticket["due_at"] = _parse_date_or_none(ticket["due_at"]) return ticket +# function from: https://github.com/dlt-hub/verified-sources/tree/master/sources/zendesk @dlt.source(max_table_nesting=2) def zendesk_support( credentials: Dict[str, str] = dlt.secrets.value, @@ -54,12 +58,12 @@ def zendesk_support( subdomain = credentials["subdomain"] url = f"https://{subdomain}.zendesk.com" - # we use `append` write disposition, because objects in ticket_events endpoint are never updated + # we use `append` write disposition, because objects in tickets_data endpoint are never updated # so we do not need to merge # we set primary_key so allow deduplication of events by the `incremental` below in the rare case # when two events have the same timestamp @dlt.resource(primary_key="id", write_disposition="append") - def ticket_events( + def tickets_data( updated_at: dlt.sources.incremental[ pendulum.DateTime ] = dlt.sources.incremental( @@ -70,7 +74,7 @@ def ticket_events( ) ): # URL For ticket events - # 'https://d3v-dlthub.zendesk.com/api/v2/incremental/ticket_events.json?start_time=946684800' + # 'https://d3v-dlthub.zendesk.com/api/v2/incremental/tickets_data.json?start_time=946684800' event_pages = get_pages( url=url, endpoint="/api/v2/incremental/tickets", @@ -80,15 +84,15 @@ def ticket_events( ) for page in event_pages: yield ([_fix_date(ticket) for ticket in page]) - # yield [ticket for ticket in page] # stop loading when using end_value and end is reached. # unfortunately, Zendesk API does not have the "end_time" parameter, so we stop iterating ourselves if updated_at.end_out_of_range: return - return ticket_events + return tickets_data +# function from: https://github.com/dlt-hub/verified-sources/tree/master/sources/zendesk def get_pages( url: str, endpoint: str, @@ -153,15 +157,20 @@ def get_pages( # running the Qdrant client to connect to your Qdrant database - qdrant_client = QdrantClient( - url="https://5708cdff-94ce-4e2d-bc41-2dbf4d281244.europe-west3-0.gcp.cloud.qdrant.io", - api_key="UtTVT2g5yYVj5syiYeEqm41Z90dE0B2c6CQs-GOP4bTOnj2IUZkdog", - ) + @with_config(sections=("destination", "credentials")) + def get_qdrant_client(location=dlt.secrets.value, api_key=dlt.secrets.value): + return QdrantClient( + url=location, + api_key=api_key, + ) + + # running the Qdrant client to connect to your Qdrant database + qdrant_client = get_qdrant_client() # view Qdrant collections you'll find your dataset here: print(qdrant_client.get_collections()) - # query Qdrant with appropriate prompt + # query Qdrant with prompt: getting tickets info close to "cancellation" response = qdrant_client.query( "zendesk_data_content", # collection/dataset name with the 'content' suffix -> tickets content table query_text=["cancel", "cancel subscription"], # prompt to search diff --git a/docs/website/docs/examples/qdrant_zendesk/code/qdrant-snippets.py b/docs/website/docs/examples/qdrant_zendesk/code/qdrant-snippets.py index 34bb42283f..a444f772fd 100644 --- a/docs/website/docs/examples/qdrant_zendesk/code/qdrant-snippets.py +++ b/docs/website/docs/examples/qdrant_zendesk/code/qdrant-snippets.py @@ -166,6 +166,7 @@ def get_pages( # @@@DLT_SNIPPET_START declare_qdrant_client # running the Qdrant client to connect to your Qdrant database + @with_config(sections=("destination", "credentials")) def get_qdrant_client(location=dlt.secrets.value, api_key=dlt.secrets.value): return QdrantClient(