Skip to content

Commit

Permalink
added all code snippets and sidebars.js
Browse files Browse the repository at this point in the history
  • Loading branch information
hibajamal committed Nov 17, 2023
1 parent 96cb389 commit c86c955
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 15 deletions.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import dlt
from dlt.destinations.qdrant import qdrant_adapter
from qdrant_client import QdrantClient

from zendesk import zendesk_support

def main():
# 1. Create a pipeline
pipeline = dlt.pipeline(
pipeline_name="qdrant_zendesk_pipeline",
destination="qdrant",
dataset_name="zendesk_data_tickets",
)

# 2. Initialize Zendesk source to get the ticket data
zendesk_source = zendesk_support(load_all=False)
tickets = zendesk_source.tickets

# 3. Run the dlt pipeline
info = pipeline.run(
# 4. Here we use a special function to tell Qdrant
# which fields to embed
qdrant_adapter(
tickets,
embed=["subject", "description"],
)
)

return info

if __name__ == "__main__":
load_info = main()
print(load_info)


# running the Qdrant client
qdrant_client = QdrantClient(
url="https://your-qdrant-url",
api_key="your-qdrant-api-key",
)

# view Qdrant collections
print(qdrant_client.get_collections())

response = qdrant_client.query(
"zendesk_data_tickets", # collection/dataset name
query_text=["cancel", "cancel subscription"], # prompt to search
limit=3 # limit the number of results to the nearest 3 embeddings
)

print(response)
21 changes: 21 additions & 0 deletions docs/website/docs/examples/_examples-qdrant-header.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import Admonition from "@theme/Admonition";
import CodeBlock from '@theme/CodeBlock';

<Admonition>
The source code for this example can be found in our repository at: <a href={"https://github.com/dlt-hub/dlt/tree/devel/docs/examples/" + props.slug}>{"https://github.com/dlt-hub/dlt/tree/devel/docs/examples/" + props.slug}</a>.
</Admonition>

## TLDR
<div>{props.intro}</div>

## Setup: Running this example on your machine
<CodeBlock language="sh">
{`# clone the dlt repository
git clone [email protected]:dlt-hub/dlt.git
# go to example directory
cd ./dlt/docs/examples/${props.slug}
# install dlt with qdrant
pip install "dlt[qdrant]"
# run the example script
python ${props.run_file}.py`}
</CodeBlock>
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,29 @@ def main():
load_info = main()
print(load_info)

# @@@DLT_SNIPPET_END main_code
# @@@DLT_SNIPPET_END main_code

# @@@DLT_SNIPPET_START declare_qdrant_client
# running the Qdrant client
qdrant_client = QdrantClient(
url="https://your-qdrant-url",
api_key="your-qdrant-api-key",
)
# @@@DLT_SNIPPET_END declare_qdrant_client

# @@@DLT_SNIPPET_START view_collections
# view Qdrant collections
print(qdrant_client.get_collections())
# @@@DLT_SNIPPET_END view_collections

# @@@DLT_SNIPPET_START get_response
response = qdrant_client.query(
"zendesk_data_tickets", # collection/dataset name
query_text=["cancel", "cancel subscription"], # prompt to search
limit=3 # limit the number of results to the nearest 3 embeddings
)
# @@@DLT_SNIPPET_END get_response

print(response)

# @@@DLT_SNIPPET_END example
73 changes: 59 additions & 14 deletions docs/website/docs/examples/qdrant_zendesk/index.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
---
title: Load Zendesk tickets data to Qdrant
title: Similarity Searching with dlt and Qdrant
description: Learn how to use the dlt source, Zendesk and dlt destination, Qdrant to conduct a similarity search on your tickets data.
keywords: [similarity search, example]
---
import Header from '../_examples-qdrant-header.md';

<Header
intro="In this tutorial, you will learn how to do use dlt to store your
vectorized Zendesk tickets data in the dlt destination; qdrant. You can
use qdrant's vectorization and similarity searching capabilities on your tickets data,
vectorized Zendesk tickets data in the dlt destination; Qdrant. You can
use Qdrant's vectorization and similarity searching capabilities on your tickets data,
while using dlt as a medium to automate your pipeline."
slug="qdrant_zendesk"
run_file="qdrant-snippets"
Expand Down Expand Up @@ -43,44 +44,88 @@ email = "..."
You are now setup to start writing a custom pipeline to connect your Zendesk ticket data to Qdrant. The following lines of code do exactly that:

<!--@@@DLT_SNIPPET_START ./code/qdrant-snippets.py::main_code-->
```py
import dlt
from dlt.destinations.qdrant import qdrant_adapter
from qdrant_client import QdrantClient

from zendesk import zendesk_support

def main():
# 1. Create a pipeline
pipeline = dlt.pipeline(
pipeline_name="qdrant_zendesk_pipeline",
destination="qdrant",
dataset_name="zendesk_data_tickets",
)

# 2. Initialize Zendesk source to get the ticket data
zendesk_source = zendesk_support(load_all=False)
tickets = zendesk_source.tickets

# 3. Run the dlt pipeline
info = pipeline.run(
# 4. Here we use a special function to tell Qdrant
# which fields to embed
qdrant_adapter(
tickets,
embed=["subject", "description"],
)
)

return info

if __name__ == "__main__":
load_info = main()
print(load_info)
```
<!--@@@DLT_SNIPPET_END ./code/qdrant-snippets.py::main_code-->

Overview of the code above:
1. We create a pipeline with the name `qdrant_zendesk_pipeline` and the destination Qdrant.
1. We create a pipeline with the name `qdrant_zendesk_pipeline` and the destination Qdrant. The name of the dataset here will be the same as the "collection" name on Qdrant!
2. Then, we initialize the Zendesk verified source. We only need to load the tickets data, so we get tickets resource from the source by getting the tickets attribute.
3. pipeline.run() runs the pipeline and returns information about the load process.
4. Since Qdrant is a vector database, it specializes in conducting similarity searches within its embedded data. To make that possible, we use the special Qdrant adapter to embed (or vectorize) our data before loading it.
4. Qdrant being a vector database, specializes in conducting similarity searches within its **embedded data**. To make that possible, we use the special Qdrant adapter to **embed** (or vectorize) our data before loading it.

## Querying the data

In the code above, we also imported the `QdrantClient`. That is what we will use to connect to Qdrant and see what sort of data we have thus stored so far.
We connect to the Qdrant client with our credentials:

```python
<!--@@@DLT_SNIPPET_START ./code/qdrant-snippets.py::declare_qdrant_client-->
```py
# running the Qdrant client
qdrant_client = QdrantClient(
url="https://your-qdrant-url",
api_key="your-qdrant-api-key",
)
```
<!--@@@DLT_SNIPPET_END ./code/qdrant-snippets.py::declare_qdrant_client-->

If needed, you can list all your datasets or Qdrant "collections" with the following function call:

```python
qdrant_client.get_collections()
<!--@@@DLT_SNIPPET_START ./code/qdrant-snippets.py::view_collections-->
```py
# view Qdrant collections
print(qdrant_client.get_collections())
```
<!--@@@DLT_SNIPPET_END ./code/qdrant-snippets.py::view_collections-->

You should be able to see your own data there. For the purposes of this article, it would be the same name as the dataset you declared in the pipeline above, i.e. `zendesk_data_tickets`.

Next, we query Qdrant to conduct a similarity search using their "query" function. For example, we would like to see tickets that are similar to the ("subject", "description") pair: ("cancel", "cancel subscription"). It can be queried as follows:

```python
qdrant_client.query(
"zendesk_data_tickets", # collection/dataset name
query_text=["cancel", "cancel subscription"], # prompt to search
limit=3 # limit the number of results to the nearest 3 embeddings
<!--@@@DLT_SNIPPET_START ./code/qdrant-snippets.py::get_response-->
```py
response = qdrant_client.query(
"zendesk_data_tickets", # collection/dataset name
query_text=["cancel", "cancel subscription"], # prompt to search
limit=3 # limit the number of results to the nearest 3 embeddings
)
```
The query above gives the following response:
<!--@@@DLT_SNIPPET_END ./code/qdrant-snippets.py::get_response-->

The query above gives stores the following results in the `response` variable:
```json
[QueryResponse(id='6aeacd21-b3d0-5174-97ef-5aaa59486414', embedding=None, metadata={'_dlt_id': 'Nx3wBiL29xTgaQ', '_dlt_load_id': '1700130284.002391', 'allow_attachments': True, 'allow_channelback': False, 'assignee_id': 12765072569105, 'brand_id': 12765073054225, 'created_at': '2023-09-01T11:19:25+00:00', 'custom_status_id': 12765028278545, 'description': 'I have been trying to cancel my subscription but the system won’t let me do it. Can you please help?', 'from_messaging_channel': False, 'generated_timestamp': 1693567167, 'group_id': 12765036328465, 'has_incidents': False, 'id': 12, 'is_public': True, 'organization_id': 12765041119505, 'raw_subject': 'Unable to Cancel Subscription', 'requester_id': 12765072569105, 'status': 'open', 'subject': 'Unable to Cancel Subscription', 'submitter_id': 12765072569105, 'tags': ['test1'], 'test_field': 'test1', 'ticket_form_id': 12765054772497, 'updated_at': '2023-09-01T11:19:25+00:00', 'url': 'https://d3v-dlthub.zendesk.com/api/v2/tickets/12.json', 'via__channel': 'web'}, document='', score=0.89545774),
QueryResponse(id='a22189c1-70ab-5421-938b-1caae3e7d6d8', embedding=None, metadata={'_dlt_id': 'bc/xloksL89EUg', '_dlt_load_id': '1700130284.002391', 'allow_attachments': True, 'allow_channelback': False, 'assignee_id': 12765072569105, 'brand_id': 12765073054225, 'created_at': '2023-07-18T17:23:42+00:00', 'custom_status_id': 12765028278545, 'description': 'ABCDEF', 'from_messaging_channel': False, 'generated_timestamp': 1689701023, 'group_id': 12765036328465, 'has_incidents': False, 'id': 4, 'is_public': True, 'organization_id': 12765041119505, 'raw_subject': 'What is this ticket', 'requester_id': 12765072569105, 'status': 'open', 'subject': 'What is this ticket', 'submitter_id': 12765072569105, 'tags': ['test1'], 'test_field': 'test1', 'ticket_form_id': 12765054772497, 'updated_at': '2023-07-18T17:23:42+00:00', 'url': 'https://d3v-dlthub.zendesk.com/api/v2/tickets/4.json', 'via__channel': 'web'}, document='', score=0.8643349),
Expand Down
2 changes: 2 additions & 0 deletions docs/website/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ const sidebars = {
{
type: 'category',
label: 'Integrations',
label: 'Integrations',
link: {
type: 'generated-index',
title: 'Integrations',
Expand Down Expand Up @@ -240,6 +241,7 @@ const sidebars = {
'examples/incremental_loading/index',
'examples/connector_x_arrow/index',
'examples/chess_production/index',
'examples/qdrant_zendesk/index'
],
},
{
Expand Down

0 comments on commit c86c955

Please sign in to comment.