Skip to content

Commit

Permalink
Add shortcut to select_context() (#706)
Browse files Browse the repository at this point in the history
* adjust docstring for select_context

* langchain select_context, update quickstarts

* undo app name change

* remove dev cell

* generalized langchain select_context (#711)

* generalized langchain select_context

* typo

* typo in string

* update langchain example to pass app in select_context

---------

Co-authored-by: Josh Reini <[email protected]>

* comments, clarity updates to quickstarts

* add lib-independent select_context

* update lc li quickstarts

---------

Co-authored-by: Piotr Mardziel <[email protected]>
  • Loading branch information
joshreini1 and piotrm0 authored Dec 22, 2023
1 parent c26988c commit ab57de9
Show file tree
Hide file tree
Showing 11 changed files with 272 additions and 102 deletions.
64 changes: 32 additions & 32 deletions trulens_eval/examples/quickstart/langchain_quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"outputs": [],
"source": [
"import os\n",
"os.environ[\"OPENAI_API_KEY\"] = \"...\""
"os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
]
},
{
Expand Down Expand Up @@ -116,7 +116,8 @@
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
"splits = text_splitter.split_documents(docs)\n",
"\n",
"vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())"
"vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(\n",
"))"
]
},
{
Expand Down Expand Up @@ -180,16 +181,21 @@
"outputs": [],
"source": [
"from trulens_eval.feedback.provider import OpenAI\n",
"from trulens_eval import Select\n",
"import numpy as np\n",
"\n",
"# Initialize provider class\n",
"openai = OpenAI()\n",
"\n",
"# select context to be used in feedback. the location of context is app specific.\n",
"from trulens_eval.app import App\n",
"context = App.select_context(rag_chain)\n",
"\n",
"from trulens_eval.feedback import Groundedness\n",
"grounded = Groundedness(groundedness_provider=OpenAI())\n",
"# Define a groundedness feedback function\n",
"f_groundedness = (\n",
" Feedback(grounded.groundedness_measure_with_cot_reasons)\n",
" .on(Select.RecordCalls.first.invoke.rets.context)\n",
" .on(context.collect()) # collect context chunks into a list\n",
" .on_output()\n",
" .aggregate(grounded.grounded_statements_aggregator)\n",
")\n",
Expand All @@ -199,10 +205,10 @@
"# Question/statement relevance between question and each context chunk.\n",
"f_context_relevance = (\n",
" Feedback(openai.qs_relevance)\n",
" .on(Select.RecordCalls.first.invoke.args.input)\n",
" .on(Select.RecordCalls.first.invoke.rets.context)\n",
" .on_input()\n",
" .on(context)\n",
" .aggregate(np.mean)\n",
")"
" )"
]
},
{
Expand Down Expand Up @@ -236,15 +242,6 @@
"display(llm_response)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tru.run_dashboard()"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -288,11 +285,14 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Explore in a Dashboard"
"records, feedback = tru.get_records_and_feedback(app_ids=[\"Chain1_ChatApplication\"])\n",
"\n",
"records.head()"
]
},
{
Expand All @@ -301,42 +301,42 @@
"metadata": {},
"outputs": [],
"source": [
"tru.run_dashboard() # open a local streamlit app to explore\n",
"\n",
"# tru.stop_dashboard() # stop if needed"
"tru.get_leaderboard(app_ids=[\"Chain1_ChatApplication\"])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard."
"## Explore in a Dashboard"
]
},
{
"attachments": {},
"cell_type": "markdown",
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Note: Feedback functions evaluated in the deferred manner can be seen in the \"Progress\" page of the TruLens dashboard."
"tru.run_dashboard() # open a local streamlit app to explore\n",
"\n",
"# tru.stop_dashboard() # stop if needed"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Or view results directly in your notebook"
"Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard."
]
},
{
"cell_type": "code",
"execution_count": null,
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"outputs": [],
"source": [
"tru.get_records_and_feedback(app_ids=[])[0] # pass an empty list of app_ids to get all"
"Note: Feedback functions evaluated in the deferred manner can be seen in the \"Progress\" page of the TruLens dashboard."
]
}
],
Expand All @@ -356,7 +356,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.11.0"
},
"vscode": {
"interpreter": {
Expand Down
125 changes: 87 additions & 38 deletions trulens_eval/examples/quickstart/llama_index_quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -50,36 +50,24 @@
"outputs": [],
"source": [
"import os\n",
"os.environ[\"OPENAI_API_KEY\"] = \"...\""
"os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Import from LlamaIndex and TruLens"
"### Import from TruLens"
]
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"🦑 Tru initialized with db url sqlite:///default.sqlite .\n",
"🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.\n"
]
}
],
"outputs": [],
"source": [
"from trulens_eval import Feedback, Tru, TruLlama\n",
"from trulens_eval.feedback import Groundedness\n",
"from trulens_eval.feedback.provider.openai import OpenAI\n",
"\n",
"from trulens_eval import Tru\n",
"tru = Tru()"
]
},
Expand Down Expand Up @@ -145,23 +133,36 @@
"import numpy as np\n",
"\n",
"# Initialize provider class\n",
"from trulens_eval.feedback.provider.openai import OpenAI\n",
"openai = OpenAI()\n",
"\n",
"grounded = Groundedness(groundedness_provider=OpenAI())\n",
"# select context to be used in feedback. the location of context is app specific.\n",
"from trulens_eval.app import App\n",
"context = App.select_context(query_engine)\n",
"\n",
"# imports for feedback\n",
"from trulens_eval import Feedback\n",
"\n",
"# Define a groundedness feedback function\n",
"f_groundedness = Feedback(grounded.groundedness_measure_with_cot_reasons).on(\n",
" TruLlama.select_source_nodes().node.text.collect()\n",
" ).on_output(\n",
" ).aggregate(grounded.grounded_statements_aggregator)\n",
"from trulens_eval.feedback import Groundedness\n",
"grounded = Groundedness(groundedness_provider=OpenAI())\n",
"f_groundedness = (\n",
" Feedback(grounded.groundedness_measure_with_cot_reasons)\n",
" .on(context.collect()) # collect context chunks into a list\n",
" .on_output()\n",
" .aggregate(grounded.grounded_statements_aggregator)\n",
")\n",
"\n",
"# Question/answer relevance between overall question and answer.\n",
"f_qa_relevance = Feedback(openai.relevance).on_input_output()\n",
"\n",
"# Question/statement relevance between question and each context chunk.\n",
"f_qs_relevance = Feedback(openai.qs_relevance).on_input().on(\n",
" TruLlama.select_source_nodes().node.text\n",
" ).aggregate(np.mean)"
"f_qs_relevance = (\n",
" Feedback(openai.qs_relevance)\n",
" .on_input()\n",
" .on(context)\n",
" .aggregate(np.mean)\n",
")"
]
},
{
Expand All @@ -178,6 +179,7 @@
"metadata": {},
"outputs": [],
"source": [
"from trulens_eval import TruLlama\n",
"tru_query_engine_recorder = TruLlama(query_engine,\n",
" app_id='LlamaIndex_App1',\n",
" feedbacks=[f_groundedness, f_qa_relevance, f_qs_relevance])"
Expand All @@ -195,11 +197,10 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Explore in a Dashboard"
"## Retrieve records and feedback"
]
},
{
Expand All @@ -208,33 +209,63 @@
"metadata": {},
"outputs": [],
"source": [
"tru.run_dashboard() # open a local streamlit app to explore\n",
"# The record of the ap invocation can be retrieved from the `recording`:\n",
"\n",
"# tru.stop_dashboard() # stop if needed"
"rec = recording.get() # use .get if only one record\n",
"# recs = recording.records # use .records if multiple\n",
"\n",
"display(rec)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard."
"# The results of the feedback functions can be rertireved from the record. These\n",
"# are `Future` instances (see `concurrent.futures`). You can use `as_completed`\n",
"# to wait until they have finished evaluating.\n",
"\n",
"from trulens_eval.schema import FeedbackResult\n",
"\n",
"from concurrent.futures import as_completed\n",
"\n",
"for feedback_future in as_completed(rec.feedback_results):\n",
" feedback, feedback_result = feedback_future.result()\n",
" \n",
" feedback: Feedback\n",
" feedbac_result: FeedbackResult\n",
"\n",
" display(feedback.name, feedback_result.result)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"records, feedback = tru.get_records_and_feedback(app_ids=[\"LlamaIndex_App1\"])\n",
"\n",
"records.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Note: Feedback functions evaluated in the deferred manner can be seen in the \"Progress\" page of the TruLens dashboard."
"tru.get_leaderboard(app_ids=[\"LlamaIndex_App1\"])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Or view results directly in your notebook"
"## Explore in a Dashboard"
]
},
{
Expand All @@ -243,7 +274,25 @@
"metadata": {},
"outputs": [],
"source": [
"tru.get_records_and_feedback(app_ids=[])[0] # pass an empty list of app_ids to get all"
"tru.run_dashboard() # open a local streamlit app to explore\n",
"\n",
"# tru.stop_dashboard() # stop if needed"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Note: Feedback functions evaluated in the deferred manner can be seen in the \"Progress\" page of the TruLens dashboard."
]
}
],
Expand All @@ -263,7 +312,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.11.0"
},
"vscode": {
"interpreter": {
Expand Down
19 changes: 19 additions & 0 deletions trulens_eval/trulens_eval/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,25 @@ def __init__(

self.tru_post_init()

@classmethod
def select_context(
cls,
app: Optional[Any] = None
) -> Lens:
if app is None:
raise ValueError("Could not determine context selection without `app` argument.")

# Checking by module name so we don't have to try to import either
# langchain or llama_index beforehand.
if type(app).__module__.startswith("langchain"):
from trulens_eval.tru_chain import TruChain
return TruChain.select_context(app)
elif type(app).__module__.startswith("llama_index"):
from trulens_eval.tru_llama import TruLlama
return TruLlama.select_context(app)
else:
raise ValueError(f"Could not determine context from unrecognized `app` type {type(app)}.")

def __hash__(self):
return hash(id(self))

Expand Down
Loading

0 comments on commit ab57de9

Please sign in to comment.