Add shortcut to select_context() (#706)

* adjust docstring for select_context * langchain select_context, update quickstarts * undo app name change * remove dev cell * generalized langchain select_context (#711) * generalized langchain select_context * typo * typo in string * update langchain example to pass app in select_context --------- Co-authored-by: Josh Reini <[email protected]> * comments, clarity updates to quickstarts * add lib-independent select_context * update lc li quickstarts --------- Co-authored-by: Piotr Mardziel <[email protected]>
truera · Dec 22, 2023 · ab57de9 · ab57de9
1 parent c26988c
commit ab57de9
Show file tree

Hide file tree

Showing 11 changed files with 272 additions and 102 deletions.
diff --git a/trulens_eval/examples/quickstart/langchain_quickstart.ipynb b/trulens_eval/examples/quickstart/langchain_quickstart.ipynb
@@ -38,7 +38,7 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"...\""
+    "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
    ]
   },
   {
@@ -116,7 +116,8 @@
     "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
     "splits = text_splitter.split_documents(docs)\n",
     "\n",
-    "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())"
+    "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(\n",
+    "))"
    ]
   },
   {
@@ -180,16 +181,21 @@
    "outputs": [],
    "source": [
     "from trulens_eval.feedback.provider import OpenAI\n",
-    "from trulens_eval import Select\n",
     "import numpy as np\n",
+    "\n",
     "# Initialize provider class\n",
     "openai = OpenAI()\n",
+    "\n",
+    "# select context to be used in feedback. the location of context is app specific.\n",
+    "from trulens_eval.app import App\n",
+    "context = App.select_context(rag_chain)\n",
+    "\n",
     "from trulens_eval.feedback import Groundedness\n",
     "grounded = Groundedness(groundedness_provider=OpenAI())\n",
     "# Define a groundedness feedback function\n",
     "f_groundedness = (\n",
     "    Feedback(grounded.groundedness_measure_with_cot_reasons)\n",
-    "    .on(Select.RecordCalls.first.invoke.rets.context)\n",
+    "    .on(context.collect()) # collect context chunks into a list\n",
     "    .on_output()\n",
     "    .aggregate(grounded.grounded_statements_aggregator)\n",
     ")\n",
@@ -199,10 +205,10 @@
     "# Question/statement relevance between question and each context chunk.\n",
     "f_context_relevance = (\n",
     "    Feedback(openai.qs_relevance)\n",
-    "    .on(Select.RecordCalls.first.invoke.args.input)\n",
-    "    .on(Select.RecordCalls.first.invoke.rets.context)\n",
+    "    .on_input()\n",
+    "    .on(context)\n",
     "    .aggregate(np.mean)\n",
-    ")"
+    "    )"
    ]
   },
   {
@@ -236,15 +242,6 @@
     "display(llm_response)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tru.run_dashboard()"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -288,11 +285,14 @@
    ]
   },
   {
-   "attachments": {},
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "## Explore in a Dashboard"
+    "records, feedback = tru.get_records_and_feedback(app_ids=[\"Chain1_ChatApplication\"])\n",
+    "\n",
+    "records.head()"
    ]
   },
   {
@@ -301,42 +301,42 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tru.run_dashboard() # open a local streamlit app to explore\n",
-    "\n",
-    "# tru.stop_dashboard() # stop if needed"
+    "tru.get_leaderboard(app_ids=[\"Chain1_ChatApplication\"])"
    ]
   },
   {
    "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard."
+    "## Explore in a Dashboard"
    ]
   },
   {
-   "attachments": {},
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Note: Feedback functions evaluated in the deferred manner can be seen in the \"Progress\" page of the TruLens dashboard."
+    "tru.run_dashboard() # open a local streamlit app to explore\n",
+    "\n",
+    "# tru.stop_dashboard() # stop if needed"
    ]
   },
   {
    "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Or view results directly in your notebook"
+    "Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard."
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "attachments": {},
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "tru.get_records_and_feedback(app_ids=[])[0] # pass an empty list of app_ids to get all"
+    "Note: Feedback functions evaluated in the deferred manner can be seen in the \"Progress\" page of the TruLens dashboard."
    ]
   }
  ],
@@ -356,7 +356,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.11.0"
   },
   "vscode": {
    "interpreter": {

diff --git a/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb b/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb
@@ -50,36 +50,24 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"...\""
+    "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
    ]
   },
   {
    "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Import from LlamaIndex and TruLens"
+    "### Import from TruLens"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "🦑 Tru initialized with db url sqlite:///default.sqlite .\n",
-      "🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "from trulens_eval import Feedback, Tru, TruLlama\n",
-    "from trulens_eval.feedback import Groundedness\n",
-    "from trulens_eval.feedback.provider.openai import OpenAI\n",
-    "\n",
+    "from trulens_eval import Tru\n",
     "tru = Tru()"
    ]
   },
@@ -145,23 +133,36 @@
     "import numpy as np\n",
     "\n",
     "# Initialize provider class\n",
+    "from trulens_eval.feedback.provider.openai import OpenAI\n",
     "openai = OpenAI()\n",
     "\n",
-    "grounded = Groundedness(groundedness_provider=OpenAI())\n",
+    "# select context to be used in feedback. the location of context is app specific.\n",
+    "from trulens_eval.app import App\n",
+    "context = App.select_context(query_engine)\n",
+    "\n",
+    "# imports for feedback\n",
+    "from trulens_eval import Feedback\n",
     "\n",
     "# Define a groundedness feedback function\n",
-    "f_groundedness = Feedback(grounded.groundedness_measure_with_cot_reasons).on(\n",
-    "    TruLlama.select_source_nodes().node.text.collect()\n",
-    "    ).on_output(\n",
-    "    ).aggregate(grounded.grounded_statements_aggregator)\n",
+    "from trulens_eval.feedback import Groundedness\n",
+    "grounded = Groundedness(groundedness_provider=OpenAI())\n",
+    "f_groundedness = (\n",
+    "    Feedback(grounded.groundedness_measure_with_cot_reasons)\n",
+    "    .on(context.collect()) # collect context chunks into a list\n",
+    "    .on_output()\n",
+    "    .aggregate(grounded.grounded_statements_aggregator)\n",
+    ")\n",
     "\n",
     "# Question/answer relevance between overall question and answer.\n",
     "f_qa_relevance = Feedback(openai.relevance).on_input_output()\n",
     "\n",
     "# Question/statement relevance between question and each context chunk.\n",
-    "f_qs_relevance = Feedback(openai.qs_relevance).on_input().on(\n",
-    "    TruLlama.select_source_nodes().node.text\n",
-    "    ).aggregate(np.mean)"
+    "f_qs_relevance = (\n",
+    "    Feedback(openai.qs_relevance)\n",
+    "    .on_input()\n",
+    "    .on(context)\n",
+    "    .aggregate(np.mean)\n",
+    ")"
    ]
   },
   {
@@ -178,6 +179,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from trulens_eval import TruLlama\n",
     "tru_query_engine_recorder = TruLlama(query_engine,\n",
     "    app_id='LlamaIndex_App1',\n",
     "    feedbacks=[f_groundedness, f_qa_relevance, f_qs_relevance])"
@@ -195,11 +197,10 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Explore in a Dashboard"
+    "## Retrieve records and feedback"
    ]
   },
   {
@@ -208,33 +209,63 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tru.run_dashboard() # open a local streamlit app to explore\n",
+    "# The record of the ap invocation can be retrieved from the `recording`:\n",
     "\n",
-    "# tru.stop_dashboard() # stop if needed"
+    "rec = recording.get() # use .get if only one record\n",
+    "# recs = recording.records # use .records if multiple\n",
+    "\n",
+    "display(rec)"
    ]
   },
   {
-   "attachments": {},
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard."
+    "# The results of the feedback functions can be rertireved from the record. These\n",
+    "# are `Future` instances (see `concurrent.futures`). You can use `as_completed`\n",
+    "# to wait until they have finished evaluating.\n",
+    "\n",
+    "from trulens_eval.schema import FeedbackResult\n",
+    "\n",
+    "from concurrent.futures import as_completed\n",
+    "\n",
+    "for feedback_future in  as_completed(rec.feedback_results):\n",
+    "    feedback, feedback_result = feedback_future.result()\n",
+    "    \n",
+    "    feedback: Feedback\n",
+    "    feedbac_result: FeedbackResult\n",
+    "\n",
+    "    display(feedback.name, feedback_result.result)"
    ]
   },
   {
-   "attachments": {},
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "records, feedback = tru.get_records_and_feedback(app_ids=[\"LlamaIndex_App1\"])\n",
+    "\n",
+    "records.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Note: Feedback functions evaluated in the deferred manner can be seen in the \"Progress\" page of the TruLens dashboard."
+    "tru.get_leaderboard(app_ids=[\"LlamaIndex_App1\"])"
    ]
   },
   {
    "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Or view results directly in your notebook"
+    "## Explore in a Dashboard"
    ]
   },
   {
@@ -243,7 +274,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tru.get_records_and_feedback(app_ids=[])[0] # pass an empty list of app_ids to get all"
+    "tru.run_dashboard() # open a local streamlit app to explore\n",
+    "\n",
+    "# tru.stop_dashboard() # stop if needed"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note: Feedback functions evaluated in the deferred manner can be seen in the \"Progress\" page of the TruLens dashboard."
    ]
   }
  ],
@@ -263,7 +312,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.11.0"
   },
   "vscode": {
    "interpreter": {

diff --git a/trulens_eval/trulens_eval/app.py b/trulens_eval/trulens_eval/app.py
@@ -458,6 +458,25 @@ def __init__(
 
         self.tru_post_init()
 
+    @classmethod
+    def select_context(
+        cls,
+        app: Optional[Any] = None
+    ) -> Lens:
+        if app is None:
+            raise ValueError("Could not determine context selection without `app` argument.")
+
+        # Checking by module name so we don't have to try to import either
+        # langchain or llama_index beforehand.
+        if type(app).__module__.startswith("langchain"):
+            from trulens_eval.tru_chain import TruChain
+            return TruChain.select_context(app)
+        elif type(app).__module__.startswith("llama_index"):
+            from trulens_eval.tru_llama import TruLlama
+            return TruLlama.select_context(app)
+        else:
+            raise ValueError(f"Could not determine context from unrecognized `app` type {type(app)}.")
+
     def __hash__(self):
         return hash(id(self))