Skip to content

Commit

Permalink
fixed code and qdrant creds
Browse files Browse the repository at this point in the history
  • Loading branch information
hibajamal committed Nov 29, 2023
1 parent 038dd4f commit 9664169
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 51 deletions.
78 changes: 49 additions & 29 deletions docs/website/docs/examples/qdrant_zendesk/code/qdrant-snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,32 @@
def qdrant_snippet():
# @@@DLT_SNIPPET_START example
# @@@DLT_SNIPPET_START zendesk_conn
from typing import Iterator, Optional, Dict, Any, Tuple
from typing import Optional, Dict, Any, Tuple

import dlt
from dlt.common import pendulum
from dlt.common.time import ensure_pendulum_datetime
from dlt.common.typing import TAnyDateTime
from dlt.sources.helpers.requests import client
from dlt.destinations.qdrant import qdrant_adapter

from qdrant_client import QdrantClient

# function source: https://dlthub.com/docs/examples/incremental_loading/#loading-code
from dlt.common.configuration.inject import with_config

# helper function to fix the datetime format
def _parse_date_or_none(value: Optional[str]) -> Optional[pendulum.DateTime]:
if not value:
return None
return ensure_pendulum_datetime(value)

# modify dates to return datetime objects instead
def _fix_date(ticket):
ticket["updated_at"] = _parse_date_or_none(ticket["updated_at"])
ticket["created_at"] = _parse_date_or_none(ticket["created_at"])
ticket["due_at"] = _parse_date_or_none(ticket["due_at"])
return ticket

# function from: https://github.com/dlt-hub/verified-sources/tree/master/sources/zendesk
@dlt.source(max_table_nesting=2)
def zendesk_support(
credentials: Dict[str, str] = dlt.secrets.value,
Expand Down Expand Up @@ -49,37 +63,41 @@ def zendesk_support(
subdomain = credentials["subdomain"]
url = f"https://{subdomain}.zendesk.com"

# we use `append` write disposition, because objects in ticket_events endpoint are never updated
# we use `append` write disposition, because objects in tickets_data endpoint are never updated
# so we do not need to merge
# we set primary_key so allow deduplication of events by the `incremental` below in the rare case
# when two events have the same timestamp
@dlt.resource(primary_key="id", write_disposition="append")
def ticket_events(
timestamp: dlt.sources.incremental[int] = dlt.sources.incremental(
"timestamp",
initial_value=start_date_ts,
end_value=end_date_ts,
def tickets_data(
updated_at: dlt.sources.incremental[
pendulum.DateTime
] = dlt.sources.incremental(
"updated_at",
initial_value=start_date_obj,
end_value=end_date_obj,
allow_external_schedulers=True,
),
)
):
# URL For ticket events
# 'https://d3v-dlthub.zendesk.com/api/v2/incremental/ticket_events.json?start_time=946684800'
# 'https://d3v-dlthub.zendesk.com/api/v2/incremental/tickets_data.json?start_time=946684800'
event_pages = get_pages(
url=url,
endpoint="/api/v2/incremental/ticket_events.json",
endpoint="/api/v2/incremental/tickets",
auth=auth,
data_point_name="ticket_events",
params={"start_time": timestamp.last_value},
data_point_name="tickets",
params={"start_time": updated_at.last_value.int_timestamp},
)
for page in event_pages:
yield page
yield ([_fix_date(ticket) for ticket in page])

# stop loading when using end_value and end is reached.
# unfortunately, Zendesk API does not have the "end_time" parameter, so we stop iterating ourselves
if timestamp.end_out_of_range:
if updated_at.end_out_of_range:
return

return ticket_events
return tickets_data

# function from: https://github.com/dlt-hub/verified-sources/tree/master/sources/zendesk
def get_pages(
url: str,
endpoint: str,
Expand Down Expand Up @@ -128,7 +146,7 @@ def get_pages(

# create a pipeline with an appropriate name
pipeline = dlt.pipeline(
pipeline_name="qdrant_zendesk_pipeline_ATT",
pipeline_name="qdrant_zendesk_pipeline",
destination="qdrant",
dataset_name="zendesk_data",
)
Expand All @@ -148,29 +166,31 @@ def get_pages(

# @@@DLT_SNIPPET_START declare_qdrant_client
# running the Qdrant client to connect to your Qdrant database
qdrant_client = QdrantClient(
url="https://5708cdff-94ce-4e2d-bc41-2dbf4d281244.europe-west3-0.gcp.cloud.qdrant.io",
api_key="UtTVT2g5yYVj5syiYeEqm41Z90dE0B2c6CQs-GOP4bTOnj2IUZkdog",
)

@with_config(sections=("destination", "credentials"))
def get_qdrant_client(location=dlt.secrets.value, api_key=dlt.secrets.value):
return QdrantClient(
url=location,
api_key=api_key,
)

# running the Qdrant client to connect to your Qdrant database
qdrant_client = get_qdrant_client()

# view Qdrant collections you'll find your dataset here:
print(qdrant_client.get_collections())
# @@@DLT_SNIPPET_END declare_qdrant_client

# @@@DLT_SNIPPET_START get_response
# query Qdrant with appropriate prompt
# query Qdrant with prompt: getting tickets info close to "cancellation"
response = qdrant_client.query(
"zendesk_data_tickets", # collection/dataset name with the 'tickets' suffix -> tickets table
"zendesk_data_content", # collection/dataset name with the 'content' suffix -> tickets content table
query_text=["cancel", "cancel subscription"], # prompt to search
limit=3 # limit the number of results to the nearest 3 embeddings
)
# @@@DLT_SNIPPET_END get_response

print(response)

# @@@DLT_REMOVE
assert len(response) <= 3

# @@@DLT_SNIPPET_END example

qdrant_snippet()
# @@@DLT_SNIPPET_END example
74 changes: 52 additions & 22 deletions docs/website/docs/examples/qdrant_zendesk/index.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
title: Similarity Searching with dlt and Qdrant
title: Similarity Searching with Qdrant
description: Learn how to use the dlt source, Zendesk and dlt destination, Qdrant to conduct a similarity search on your tickets data.
keywords: [similarity search, example]
---
Expand Down Expand Up @@ -29,11 +29,32 @@ First, configure the destination credentials for [Qdrant](https://dlthub.com/doc

<!--@@@DLT_SNIPPET_START ./code/qdrant-snippets.py::zendesk_conn-->
```py
from typing import Optional, Dict, Any, Tuple

import dlt
from dlt.common import pendulum
from dlt.common.time import ensure_pendulum_datetime
from dlt.common.typing import TAnyDateTime
from dlt.sources.helpers.requests import client
from dlt.destinations.qdrant import qdrant_adapter
from qdrant_client import QdrantClient

# function source: https://dlthub.com/docs/examples/incremental_loading/#loading-code
from dlt.common.configuration.inject import with_config

# helper function to fix the datetime format
def _parse_date_or_none(value: Optional[str]) -> Optional[pendulum.DateTime]:
if not value:
return None
return ensure_pendulum_datetime(value)

# modify dates to return datetime objects instead
def _fix_date(ticket):
ticket["updated_at"] = _parse_date_or_none(ticket["updated_at"])
ticket["created_at"] = _parse_date_or_none(ticket["created_at"])
ticket["due_at"] = _parse_date_or_none(ticket["due_at"])
return ticket

# function from: https://github.com/dlt-hub/verified-sources/tree/master/sources/zendesk
@dlt.source(max_table_nesting=2)
def zendesk_support(
credentials: Dict[str, str] = dlt.secrets.value,
Expand Down Expand Up @@ -68,37 +89,41 @@ def zendesk_support(
subdomain = credentials["subdomain"]
url = f"https://{subdomain}.zendesk.com"

# we use `append` write disposition, because objects in ticket_events endpoint are never updated
# we use `append` write disposition, because objects in tickets_data endpoint are never updated
# so we do not need to merge
# we set primary_key so allow deduplication of events by the `incremental` below in the rare case
# when two events have the same timestamp
@dlt.resource(primary_key="id", write_disposition="append")
def ticket_events(
timestamp: dlt.sources.incremental[int] = dlt.sources.incremental(
"timestamp",
initial_value=start_date_ts,
end_value=end_date_ts,
def tickets_data(
updated_at: dlt.sources.incremental[
pendulum.DateTime
] = dlt.sources.incremental(
"updated_at",
initial_value=start_date_obj,
end_value=end_date_obj,
allow_external_schedulers=True,
),
)
):
# URL For ticket events
# 'https://d3v-dlthub.zendesk.com/api/v2/incremental/ticket_events.json?start_time=946684800'
# 'https://d3v-dlthub.zendesk.com/api/v2/incremental/tickets_data.json?start_time=946684800'
event_pages = get_pages(
url=url,
endpoint="/api/v2/incremental/ticket_events.json",
endpoint="/api/v2/incremental/tickets",
auth=auth,
data_point_name="ticket_events",
params={"start_time": timestamp.last_value},
data_point_name="tickets",
params={"start_time": updated_at.last_value.int_timestamp},
)
for page in event_pages:
yield page
yield ([_fix_date(ticket) for ticket in page])

# stop loading when using end_value and end is reached.
# unfortunately, Zendesk API does not have the "end_time" parameter, so we stop iterating ourselves
if timestamp.end_out_of_range:
if updated_at.end_out_of_range:
return

return ticket_events
return tickets_data

# function from: https://github.com/dlt-hub/verified-sources/tree/master/sources/zendesk
def get_pages(
url: str,
endpoint: str,
Expand Down Expand Up @@ -169,10 +194,15 @@ print(load_info)
<!--@@@DLT_SNIPPET_START ./code/qdrant-snippets.py::declare_qdrant_client-->
```py
# running the Qdrant client to connect to your Qdrant database
qdrant_client = QdrantClient(
url="https://your-qdrant-url",
api_key="your-qdrant-api-key",
)
@with_config(sections=("destination", "credentials"))
def get_qdrant_client(location=dlt.secrets.value, api_key=dlt.secrets.value):
return QdrantClient(
url=location,
api_key=api_key,
)

# running the Qdrant client to connect to your Qdrant database
qdrant_client = get_qdrant_client()

# view Qdrant collections you'll find your dataset here:
print(qdrant_client.get_collections())
Expand All @@ -181,9 +211,9 @@ print(qdrant_client.get_collections())

<!--@@@DLT_SNIPPET_START ./code/qdrant-snippets.py::get_response-->
```py
# query Qdrant with appropriate prompt
# query Qdrant with prompt: getting tickets info close to "cancellation"
response = qdrant_client.query(
"zendesk_data_tickets", # collection/dataset name with the 'tickets' suffix -> tickets table
"zendesk_data_content", # collection/dataset name with the 'content' suffix -> tickets content table
query_text=["cancel", "cancel subscription"], # prompt to search
limit=3 # limit the number of results to the nearest 3 embeddings
)
Expand Down

0 comments on commit 9664169

Please sign in to comment.