Skip to content

Commit

Permalink
Merge branch 'main' (which includes a large refactor by Michal) into …
Browse files Browse the repository at this point in the history
…jc/issue-54-request-context
  • Loading branch information
ds-jakub-cierocki committed Jul 3, 2024
2 parents d523bf7 + 6510bd8 commit efe212f
Show file tree
Hide file tree
Showing 71 changed files with 1,482 additions and 882 deletions.
1 change: 1 addition & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ omit =
exclude_lines =
pragma: no cover
if __name__ == .__main__.
\.\.\.
show_missing = True
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/01_feature_request.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: 🚀 Feature Request
description: Submit a proposal/request for a new db-ally feature.
title: "feat: "
labels: ["enhancement"]
labels: ["feature"]
body:
- type: markdown
attributes:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# <h1 align="center">db-ally</h1>
# <h1 align="center">🦮 db-ally</h1>

<p align="center">
<em>Efficient, consistent and secure library for querying structured data with natural language</em>
Expand Down
10 changes: 5 additions & 5 deletions benchmark/dbally_benchmark/e2e_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
import dbally
from dbally.collection import Collection
from dbally.collection.exceptions import NoViewFoundError
from dbally.iql_generator.iql_prompt_template import UnsupportedQueryError, default_iql_template
from dbally.iql_generator.prompt import IQL_GENERATION_TEMPLATE, UnsupportedQueryError
from dbally.llms.litellm import LiteLLM
from dbally.view_selection.view_selector_prompt_template import default_view_selector_template
from dbally.view_selection.prompt import VIEW_SELECTION_TEMPLATE


async def _run_dbally_for_single_example(example: BIRDExample, collection: Collection) -> Text2SQLResult:
Expand Down Expand Up @@ -126,9 +126,9 @@ async def evaluate(cfg: DictConfig) -> Any:
logger.info(f"db-ally predictions saved under directory: {output_dir}")

if run:
run["config/iql_prompt_template"] = stringify_unsupported(default_iql_template.chat)
run["config/view_selection_prompt_template"] = stringify_unsupported(default_view_selector_template.chat)
run["config/iql_prompt_template"] = stringify_unsupported(default_iql_template)
run["config/iql_prompt_template"] = stringify_unsupported(IQL_GENERATION_TEMPLATE.chat)
run["config/view_selection_prompt_template"] = stringify_unsupported(VIEW_SELECTION_TEMPLATE.chat)
run["config/iql_prompt_template"] = stringify_unsupported(IQL_GENERATION_TEMPLATE)
run[f"evaluation/{metrics_file_name}"].upload((output_dir / metrics_file_name).as_posix())
run[f"evaluation/{results_file_name}"].upload((output_dir / results_file_name).as_posix())
run["evaluation/metrics"] = stringify_unsupported(metrics)
Expand Down
12 changes: 7 additions & 5 deletions benchmark/dbally_benchmark/iql_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from dbally.audit.event_tracker import EventTracker
from dbally.iql_generator.iql_generator import IQLGenerator
from dbally.iql_generator.iql_prompt_template import UnsupportedQueryError, default_iql_template
from dbally.iql_generator.prompt import IQL_GENERATION_TEMPLATE, UnsupportedQueryError
from dbally.llms.litellm import LiteLLM
from dbally.views.structured import BaseStructuredView

Expand All @@ -33,13 +33,15 @@ async def _run_iql_for_single_example(
event_tracker = EventTracker()

try:
iql_filters, _ = await iql_generator.generate_iql(
question=example.question, filters=filter_list, event_tracker=event_tracker
iql_filters = await iql_generator.generate_iql(
question=example.question,
filters=filter_list,
event_tracker=event_tracker,
)
except UnsupportedQueryError:
return IQLResult(question=example.question, iql_filters="UNSUPPORTED_QUERY", exception_raised=True)

return IQLResult(question=example.question, iql_filters=iql_filters, exception_raised=False)
return IQLResult(question=example.question, iql_filters=str(iql_filters), exception_raised=False)


async def run_iql_for_dataset(
Expand Down Expand Up @@ -139,7 +141,7 @@ async def evaluate(cfg: DictConfig) -> Any:
logger.info(f"IQL predictions saved under directory: {output_dir}")

if run:
run["config/iql_prompt_template"] = stringify_unsupported(default_iql_template.chat)
run["config/iql_prompt_template"] = stringify_unsupported(IQL_GENERATION_TEMPLATE.chat)
run[f"evaluation/{metrics_file_name}"].upload((output_dir / metrics_file_name).as_posix())
run[f"evaluation/{results_file_name}"].upload((output_dir / results_file_name).as_posix())
run["evaluation/metrics"] = stringify_unsupported(metrics)
Expand Down
2 changes: 1 addition & 1 deletion benchmark/dbally_benchmark/text2sql/prompt_template.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dbally.prompts import PromptTemplate
from dbally.prompt import PromptTemplate

TEXT2SQL_PROMPT_TEMPLATE = PromptTemplate(
(
Expand Down
3 changes: 1 addition & 2 deletions docs/about/roadmap.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@ Below you can find a list of planned features and integrations.
## Planned Features

- [ ] **Support analytical queries**: support for exposing operations beyond filtering.
- [ ] **Few-shot prompting configuration**: allow users to configure the few-shot prompting in View definition to
- [x] **Few-shot prompting configuration**: allow users to configure the few-shot prompting in View definition to
improve IQL generation accuracy.
- [ ] **Request contextualization**: allow to provide extra context for db-ally runs, such as user asking the question.
- [X] **OpenAI Assistants API adapter**: allow to embed db-ally into OpenAI's Assistants API to easily extend the
capabilities of the assistant.
- [ ] **Langchain adapter**: allow to embed db-ally into Langchain applications.


## Integrations

Being agnostic to the underlying technology is one of the main goals of db-ally.
Expand Down
Binary file added docs/assets/guide_dog_lg.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/assets/guide_dog_sm.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 1 addition & 2 deletions docs/how-to/create_custom_event_handler.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ In this guide we will implement a simple [Event Handler](../reference/event_hand
First, we need to create a new class that inherits from `EventHandler` and implements the all abstract methods.

```python
from dbally.audit import EventHandler
from dbally.data_models.audit import RequestStart, RequestEnd
from dbally.audit import EventHandler, RequestStart, RequestEnd

class FileEventHandler(EventHandler):

Expand Down
25 changes: 6 additions & 19 deletions docs/how-to/llms/custom.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,42 +44,29 @@ class MyLLMClient(LLMClient[LiteLLMOptions]):

async def call(
self,
prompt: ChatFormat,
response_format: Optional[Dict[str, str]],
conversation: ChatFormat,
options: LiteLLMOptions,
event: LLMEvent,
json_mode: bool = False,
) -> str:
# Your LLM API call
```

The [`call`](../../reference/llms/index.md#dbally.llms.clients.base.LLMClient.call) method is an abstract method that must be implemented in your subclass. This method should call the LLM inference API and return the response.
The [`call`](../../reference/llms/index.md#dbally.llms.clients.base.LLMClient.call) method is an abstract method that must be implemented in your subclass. This method should call the LLM inference API and return the response in string format.

### Step 3: Use tokenizer to count tokens

The [`count_tokens`](../../reference/llms/index.md#dbally.llms.base.LLM.count_tokens) method is used to count the number of tokens in the messages. You can override this method in your custom class to use the tokenizer and count tokens specifically for your model.
The [`count_tokens`](../../reference/llms/index.md#dbally.llms.base.LLM.count_tokens) method is used to count the number of tokens in the prompt. You can override this method in your custom class to use the tokenizer and count tokens specifically for your model.

```python
class MyLLM(LLM[LiteLLMOptions]):

def count_tokens(self, messages: ChatFormat, fmt: Dict[str, str]) -> int:
# Count tokens in the messages in a custom way
def count_tokens(self, prompt: PromptTemplate) -> int:
# Count tokens in the prompt in a custom way
```
!!!warning
Incorrect token counting can cause problems in the [`NLResponder`](../../reference/nl_responder.md#dbally.nl_responder.nl_responder.NLResponder) and force the use of an explanation prompt template that is more generic and does not include specific rows from the IQL response.

### Step 4: Define custom prompt formatting

The [`format_prompt`](../../reference/llms/index.md#dbally.llms.base.LLM.format_prompt) method is used to apply formatting to the prompt template. You can override this method in your custom class to change how the formatting is performed.

```python
class MyLLM(LLM[LiteLLMOptions]):

def format_prompt(self, template: PromptTemplate, fmt: Dict[str, str]) -> ChatFormat:
# Apply custom formatting to the prompt template
```
!!!note
In general, implementation of this method is not required unless the LLM API does not support [OpenAI conversation formatting](https://platform.openai.com/docs/api-reference/chat/create#chat-create-messages){:target="_blank"}. If your model API expects a different format, override this method to avoid issues with inference call.

## Customising LLM Options

[`LLMOptions`](../../reference/llms/index.md#dbally.llms.clients.base.LLMOptions) is a class that defines the options your LLM will use. To create a custom options, you need to create a subclass of [`LLMOptions`](../../reference/llms/index.md#dbally.llms.clients.base.LLMOptions) and define the required properties that will be passed to the [`LLMClient`](../../reference/llms/index.md#dbally.llms.clients.base.LLMClient).
Expand Down
18 changes: 18 additions & 0 deletions docs/how-to/llms/litellm.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,24 @@ Integrate db-ally with your LLM vendor.
llm=LiteLLM(model_name="anyscale/meta-llama/Llama-2-70b-chat-hf")
```

=== "Azure OpenAI"

```python
import os
from dbally.llms.litellm import LiteLLM

## set ENV variables
os.environ["AZURE_API_KEY"] = "your-api-key"
os.environ["AZURE_API_BASE"] = "your-api-base-url"
os.environ["AZURE_API_VERSION"] = "your-api-version"

# optional
os.environ["AZURE_AD_TOKEN"] = ""
os.environ["AZURE_API_TYPE"] = ""

llm = LiteLLM(model_name="azure/<your_deployment_name>")
```

Use LLM in your collection.

```python
Expand Down
97 changes: 97 additions & 0 deletions docs/how-to/views/few-shots.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# How-To: Define few shots

There are many ways to improve the accuracy of IQL generation - one of them is to use few-shot prompting. db-ally allows you to inject few-shot examples for any type of defined view, both structured and freeform.

Few shots are defined in the [`list_few_shots`](../../reference/views/index.md#dbally.views.base.BaseView.list_few_shots) method, each few shot example should be an instance of [`FewShotExample`](../../reference/prompt.md#dbally.prompt.elements.FewShotExample) class that defines example question and expected LLM answer.

## Structured views

For structured views, both questions and answers for [`FewShotExample`](../../reference/prompt.md#dbally.prompt.elements.FewShotExample) can be defined as a strings, whereas in case of answers Python expressions are also allowed (please see lambda function in example below).

```python
from dbally.prompt.elements import FewShotExample
from dbally.views.sqlalchemy_base import SqlAlchemyBaseView

class RecruitmentView(SqlAlchemyBaseView):
"""
A view for retrieving candidates from the database.
"""

def list_few_shots(self) -> List[FewShotExample]:
return [
FewShotExample(
"Which candidates studied at University of Toronto?",
'studied_at("University of Toronto")',
),
FewShotExample(
"Do we have any soon available perfect fits for senior data scientist positions?",
lambda: (
self.is_available_within_months(1)
and self.data_scientist_position()
and self.has_seniority("senior")
),
),
...
]
```

## Freeform views

Currently freeform views accept SQL query syntax as a raw string. The larger variety of passing parameters is considered to be implemented in further db-ally releases.

```python
from dbally.prompt.elements import FewShotExample
from dbally.views.freeform.text2sql import BaseText2SQLView

class RecruitmentView(BaseText2SQLView):
"""
A view for retrieving candidates from the database.
"""

def list_few_shots(self) -> List[FewShotExample]:
return [
FewShotExample(
"Which candidates studied at University of Toronto?",
'SELECT name FROM candidates WHERE university = "University of Toronto"',
),
FewShotExample(
"Which clients are from NY?",
'SELECT name FROM clients WHERE city = "NY"',
),
...
]
```

## Prompt format

By default each few shot is injected subsequent to a system prompt message. The format is as follows:

```python
[
{
"role" "user",
"content": "Question",
},
{
"role": "assistant",
"content": "Answer",
}
]
```

If you use `examples` formatting tag in content field of the system or user message, all examples are going to be injected inside the message without additional conversation.

The example of prompt utilizing `examples` tag:

```python
[
{
"role" "system",
"content": "Here are example resonses:\n {examples}",
},
]
```

!!!info
There is no best way to inject a few shot example. Different models can behave diffrently based on few shots formatting of choice.
Generally, first appoach should yield the best results in most cases. Therefore, adding example tags in your custom prompts is not recommended.
2 changes: 0 additions & 2 deletions docs/reference/collection.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
!!! tip
To understand the general idea better, visit the [Collection concept page](../concepts/collections.md).

::: dbally.create_collection

::: dbally.collection.Collection

::: dbally.collection.results.ExecutionResult
Expand Down
16 changes: 11 additions & 5 deletions docs/reference/event_handlers/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ db-ally provides an `EventHandler` abstract class that can be used to log the ru
Each run of [dbally.Collection.ask][dbally.Collection.ask] will trigger all instances of EventHandler that were passed to the Collection's constructor (or the [dbally.create_collection][dbally.create_collection] function).


1. `EventHandler.request_start` is called with [RequestStart][dbally.data_models.audit.RequestStart], it can return a context object that will be passed to next calls.
1. `EventHandler.request_start` is called with [RequestStart][dbally.audit.events.RequestStart], it can return a context object that will be passed to next calls.
2. For each event that occurs during the run, `EventHandler.event_start` is called with the context object returned by `EventHandler.request_start` and an Event object. It can return context for the `EventHandler.event_end` method.
3. When the event ends `EventHandler.event_end` is called with the context object returned by `EventHandler.event_start` and an Event object.
4. On the end of the run `EventHandler.request_end` is called with the context object returned by `EventHandler.request_start` and the [RequestEnd][dbally.data_models.audit.RequestEnd].
4. On the end of the run `EventHandler.request_end` is called with the context object returned by `EventHandler.request_start` and the [RequestEnd][dbally.audit.events.RequestEnd].


``` mermaid
Expand Down Expand Up @@ -42,8 +42,14 @@ Currently handled events:

::: dbally.audit.EventHandler

::: dbally.data_models.audit.RequestStart
::: dbally.audit.events.RequestStart

::: dbally.data_models.audit.RequestEnd
::: dbally.audit.events.RequestEnd

::: dbally.data_models.audit.LLMEvent
::: dbally.audit.events.Event

::: dbally.audit.events.LLMEvent

::: dbally.audit.events.SimilarityEvent

::: dbally.audit.spans.EventSpan
1 change: 0 additions & 1 deletion docs/reference/index.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# dbally


::: dbally.create_collection
4 changes: 0 additions & 4 deletions docs/reference/iql/iql_generator.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# IQLGenerator

::: dbally.iql_generator.iql_generator.IQLGenerator

::: dbally.iql_generator.iql_prompt_template.IQLPromptTemplate

::: dbally.iql_generator.iql_prompt_template.default_iql_template
4 changes: 0 additions & 4 deletions docs/reference/nl_responder.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,3 @@ Otherwise, a response is generated using a `nl_responder_prompt_template`.
To understand general idea better, visit the [NL Responder concept page](../concepts/nl_responder.md).

::: dbally.nl_responder.nl_responder.NLResponder

::: dbally.nl_responder.query_explainer_prompt_template

::: dbally.nl_responder.nl_responder_prompt_template.default_nl_responder_template
7 changes: 7 additions & 0 deletions docs/reference/prompt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Prompt

::: dbally.prompt.template.PromptTemplate

::: dbally.prompt.template.PromptFormat

::: dbally.prompt.elements.FewShotExample
2 changes: 0 additions & 2 deletions docs/reference/view_selection/llm_view_selector.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# LLMViewSelector

::: dbally.view_selection.LLMViewSelector

::: dbally.view_selection.view_selector_prompt_template.default_view_selector_template
10 changes: 10 additions & 0 deletions docs/stylesheets/extra.css
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
:root {
--md-primary-fg-color: #00b0e0;
}

.md-header__button.md-logo {
margin: 0;
padding: 0;
}

.md-header__button.md-logo img, .md-header__button.md-logo svg {
height: 1.8rem;
width: 1.8rem;
}
Loading

0 comments on commit efe212f

Please sign in to comment.