Skip to content

Commit

Permalink
Merge branch 'main' into debugging
Browse files Browse the repository at this point in the history
  • Loading branch information
petrus-jvrensburg committed Jun 25, 2024
2 parents 98d33b8 + 52df85c commit a3dbd1d
Show file tree
Hide file tree
Showing 7 changed files with 190 additions and 30 deletions.
9 changes: 5 additions & 4 deletions lib/instructor.ex
Original file line number Diff line number Diff line change
Expand Up @@ -560,19 +560,20 @@ defmodule Instructor do
"""
}

messages = [sys_message | messages]

case mode do
:md_json ->
messages ++
[sys_message | messages] ++
[
%{
role: "assistant",
content: "Here is the perfectly correctly formatted JSON\n```json"
}
]

_ ->
:json ->
[sys_message | messages]

:tools ->
messages
end
end)
Expand Down
13 changes: 8 additions & 5 deletions lib/instructor/adapters/llamacpp.ex
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ defmodule Instructor.Adapters.Llamacpp do
before_request.(req)
end

Req.post!(req)
Req.post(req)
send(pid, :done)
end)
end,
Expand Down Expand Up @@ -122,18 +122,21 @@ defmodule Instructor.Adapters.Llamacpp do
before_request.(req)
end

response = Req.post!(req)
response = Req.post(req)

if is_function(after_response) do
after_response.(response)
end

case response do
%{status: 200, body: %{"content" => params}} ->
{:ok, %{status: 200, body: %{"content" => params}}} ->
{:ok, to_openai_response(params)}

_ ->
nil
{:ok, %{status: status}} ->
{:error, "Unexpected HTTP response code: #{status}"}

{:error, reason} ->
{:error, reason}
end
end

Expand Down
11 changes: 6 additions & 5 deletions lib/instructor/adapters/openai.ex
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ defmodule Instructor.Adapters.OpenAI do
before_request.(req)
end

Req.post!(req)
Req.post(req)
send(pid, :done)
end)
end,
Expand Down Expand Up @@ -95,15 +95,16 @@ defmodule Instructor.Adapters.OpenAI do
before_request.(req)
end

response = Req.post!(req)
response = Req.post(req)

if is_function(after_response) do
after_response.(response)
end

case response.status do
200 -> {:ok, response.body}
_ -> {:error, response.body}
case response do
{:ok, %{status: 200, body: body}} -> {:ok, body}
{:ok, %{status: status}} -> {:error, "Unexpected HTTP response code: #{status}"}
{:error, reason} -> {:error, reason}
end
end

Expand Down
2 changes: 1 addition & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ defmodule Instructor.MixProject do
[
{:ecto, "~> 3.11"},
{:jason, "~> 1.4.0"},
{:req, "~> 0.4.0"},
{:req, "~> 0.5 or ~> 1.0"},
{:jaxon, "~> 2.0"},
{:ex_doc, "~> 0.31", only: :dev, runtime: false},
{:mox, "~> 1.1.0", only: :test}
Expand Down
14 changes: 7 additions & 7 deletions mix.lock
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
%{
"castore": {:hex, :castore, "1.0.5", "9eeebb394cc9a0f3ae56b813459f990abb0a3dedee1be6b27fdb50301930502f", [:mix], [], "hexpm", "8d7c597c3e4a64c395980882d4bca3cebb8d74197c590dc272cfd3b6a6310578"},
"castore": {:hex, :castore, "1.0.7", "b651241514e5f6956028147fe6637f7ac13802537e895a724f90bf3e36ddd1dd", [:mix], [], "hexpm", "da7785a4b0d2a021cd1292a60875a784b6caef71e76bf4917bdee1f390455cf5"},
"decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"},
"earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"},
"ecto": {:hex, :ecto, "3.11.1", "4b4972b717e7ca83d30121b12998f5fcdc62ba0ed4f20fd390f16f3270d85c3e", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ebd3d3772cd0dfcd8d772659e41ed527c28b2a8bde4b00fe03e0463da0f1983b"},
"elixir_make": {:hex, :elixir_make, "0.7.7", "7128c60c2476019ed978210c245badf08b03dbec4f24d05790ef791da11aa17c", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}], "hexpm", "5bc19fff950fad52bbe5f211b12db9ec82c6b34a9647da0c2224b8b8464c7e6c"},
"ex_doc": {:hex, :ex_doc, "0.31.0", "06eb1dfd787445d9cab9a45088405593dd3bb7fe99e097eaa71f37ba80c7a676", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "5350cafa6b7f77bdd107aa2199fe277acf29d739aba5aee7e865fc680c62a110"},
"finch": {:hex, :finch, "0.16.0", "40733f02c89f94a112518071c0a91fe86069560f5dbdb39f9150042f44dcfb1a", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2.6 or ~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "f660174c4d519e5fec629016054d60edd822cdfe2b7270836739ac2f97735ec5"},
"hpax": {:hex, :hpax, "0.1.2", "09a75600d9d8bbd064cdd741f21fc06fc1f4cf3d0fcc335e5aa19be1a7235c84", [:mix], [], "hexpm", "2c87843d5a23f5f16748ebe77969880e29809580efdaccd615cd3bed628a8c13"},
"finch": {:hex, :finch, "0.18.0", "944ac7d34d0bd2ac8998f79f7a811b21d87d911e77a786bc5810adb75632ada4", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2.6 or ~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "69f5045b042e531e53edc2574f15e25e735b522c37e2ddb766e15b979e03aa65"},
"hpax": {:hex, :hpax, "0.2.0", "5a58219adcb75977b2edce5eb22051de9362f08236220c9e859a47111c194ff5", [:mix], [], "hexpm", "bea06558cdae85bed075e6c036993d43cd54d447f76d8190a8db0dc5893fa2f1"},
"jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"},
"jaxon": {:hex, :jaxon, "2.0.8", "00951a79d354260e28d7e36f956c3de94818124768a4b22e0fc55559d1b3bfe7", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "74532853b1126609615ea98f0ceb5009e70465ca98027afbbd8ed314d887e82d"},
"makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"},
"makeup_elixir": {:hex, :makeup_elixir, "0.16.1", "cc9e3ca312f1cfeccc572b37a09980287e243648108384b97ff2b76e505c3555", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "e127a341ad1b209bd80f7bd1620a15693a9908ed780c3b763bccf7d200c767c6"},
"makeup_erlang": {:hex, :makeup_erlang, "0.1.3", "d684f4bac8690e70b06eb52dad65d26de2eefa44cd19d64a8095e1417df7c8fd", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "b78dc853d2e670ff6390b605d807263bf606da3c82be37f9d7f68635bd886fc9"},
"mime": {:hex, :mime, "2.0.5", "dc34c8efd439abe6ae0343edbb8556f4d63f178594894720607772a041b04b02", [:mix], [], "hexpm", "da0d64a365c45bc9935cc5c8a7fc5e49a0e0f9932a761c55d6c52b142780a05c"},
"mint": {:hex, :mint, "1.5.2", "4805e059f96028948870d23d7783613b7e6b0e2fb4e98d720383852a760067fd", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "d77d9e9ce4eb35941907f1d3df38d8f750c357865353e21d335bdcdf6d892a02"},
"mint": {:hex, :mint, "1.6.0", "88a4f91cd690508a04ff1c3e28952f322528934be541844d54e0ceb765f01d5e", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "3c5ae85d90a5aca0a49c0d8b67360bbe407f3b54f1030a111047ff988e8fefaa"},
"mox": {:hex, :mox, "1.1.0", "0f5e399649ce9ab7602f72e718305c0f9cdc351190f72844599545e4996af73c", [:mix], [], "hexpm", "d44474c50be02d5b72131070281a5d3895c0e7a95c780e90bc0cfe712f633a13"},
"nimble_options": {:hex, :nimble_options, "1.1.0", "3b31a57ede9cb1502071fade751ab0c7b8dbe75a9a4c2b5bbb0943a690b63172", [:mix], [], "hexpm", "8bbbb3941af3ca9acc7835f5655ea062111c9c27bcac53e004460dfd19008a99"},
"nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"},
"nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"},
"nimble_pool": {:hex, :nimble_pool, "1.0.0", "5eb82705d138f4dd4423f69ceb19ac667b3b492ae570c9f5c900bb3d2f50a847", [:mix], [], "hexpm", "80be3b882d2d351882256087078e1b1952a28bf98d0a287be87e4a24a710b67a"},
"req": {:hex, :req, "0.4.8", "2b754a3925ddbf4ad78c56f30208ced6aefe111a7ea07fb56c23dccc13eb87ae", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.9", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "7146e51d52593bb7f20d00b5308a5d7d17d663d6e85cd071452b613a8277100c"},
"nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"},
"req": {:hex, :req, "0.5.0", "6d8a77c25cfc03e06a439fb12ffb51beade53e3fe0e2c5e362899a18b50298b3", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "dda04878c1396eebbfdec6db6f3d4ca609e5c8846b7ee88cc56eb9891406f7a3"},
"telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"},
}
149 changes: 149 additions & 0 deletions pages/cookbook/pii-data-sanitization.livemd
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# PII Data Sanitization

## Section

```elixir
Mix.install(
[
{:instructor, "~> 0.0.0"}
],
config: [
instructor: [
adapter: Instructor.Adapters.OpenAI,
openai: [api_key: System.fetch_env!("LB_OPENAI_API_KEY")]
]
]
)
```

<!-- livebook:{"output":true} -->

```
:ok
```

### Overview

This example demonstrates the usage of OpenAI's ChatCompletion model for the extraction and scrubbing of Personally Identifiable Information (PII) from an input. The code defines Ecto schema to manage the PII data and offers function for both extraction and sanitation.

#### Defining the Structures

First, Ecto schemas are defined to represent the PII data and the overall structure for PII data extraction.

```elixir
defmodule PII do
use Ecto.Schema
use Instructor.Validator

@doc """
## Field Descriptions:
- index: an auto incrementing integer starting at zero
- type: the type of personal identifiable information
- value: the PII value
"""
@primary_key false
embedded_schema do
embeds_many :data, Datum, primary_key: false do
field(:index, :integer)
field(:type, :string)
field(:value, :string)
end
end

@doc """
Iterates over the private data and replaces the value with a placeholder in the
form of <{data_type}_{i}>
"""
def scrub({:ok, pii}, input) do
Enum.reduce(pii.data, input, fn datum, acc ->
String.replace(acc, datum.value, "<#{datum.type}_#{datum.index}>")
end)
end

def scrub({:error, reason}, _input) do
dbg(reason)
end

def extract(input) do
Instructor.chat_completion(
model: "gpt-3.5-turbo",
response_model: PII,
max_retries: 3,
messages: [
%{
role: "system",
content:
"You are a world class PII scrubbing model, Extract the PII data from the following document"
},
%{
role: "system",
content: """
Examples of PII: names, addresses, phone numbers, email addresses, financial information
"""
},
%{
role: "system",
content: """
Instructions:
- any spaces in the type should be converted to underscores and all letters should be lower case
- use abbreviations when choosing the type
"""
},
%{
role: "user",
content: input
}
]
)
end
end
```

<!-- livebook:{"output":true} -->

```
{:module, PII, <<70, 79, 82, 49, 0, 0, 26, ...>>, {:extract, 1}}
```

### Extracting PII Data

The OpenAI API is utilized to extract PII information from a given input.

```elixir
input =
"Hello John Smith, I am Jill. Your GitBoat, LLC credit card account 1111-0000-1111-8765 has a minimum payment of $33.32 that is due by July 24th."

pii_data = PII.extract(input)
```

<!-- livebook:{"output":true} -->

```
{:ok,
%PII{
data: [
%PII.Datum{index: 0, type: "credit_card_number", value: "1111-0000-1111-8765"},
%PII.Datum{index: 1, type: "currency", value: "$33.32"},
%PII.Datum{index: 2, type: "date", value: "July 24th"},
%PII.Datum{index: 3, type: "person_name", value: "John Smith"},
%PII.Datum{index: 4, type: "person_name", value: "Jill"},
%PII.Datum{index: 5, type: "organization_name", value: "GitBoat, LLC"}
]
}}
```

### Scrubbing PII Data

After extracting the PII data, the `PII.scrub/2` funnction is used to sanitize the input.

```elixir
PII.scrub(pii_data, input)
```

<!-- livebook:{"output":true} -->

```
"Hello <person_name_3>, I am <person_name_4>. Your <organization_name_5> credit card account <credit_card_number_0> has a minimum payment of <currency_1> that is due by <date_2>."
```

<!-- livebook:{"offset":3702,"stamp":{"token":"XCP.oD4NDrK1viUlbkhzfBBPaVNvLsQzDjGpvT-bw6xDfEVSeL5u-2lPnLtPOpO1Fc2UWX19NiV-IHEqhEHJp3gA85416Ru1b3zNXWQTuX4aIrGFyWLrkFUot_c","version":2}} -->
22 changes: 14 additions & 8 deletions pages/quickstart.livemd
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ defmodule Politician do
- first_name: Their first name
- last_name: Their last name
- offices_held:
- office: The branch and position in government they served in
- from_date: When they entered office or null
- until_date: The date they left office or null
- office: The name of the political office held by the politician (in lowercase)
- from_date: When they entered office (YYYY-MM-DD)
- to_date: The date they left office, if relevant (YYYY-MM-DD or null).
"""
@primary_key false
embedded_schema do
Expand Down Expand Up @@ -195,17 +195,20 @@ Instructor.chat_completion(
<!-- livebook:{"output":true} -->

```
09:27:49.015 [debug] Retrying LLM call for NumberSeries:
10:30:03.764 [debug] Retrying LLM call for NumberSeries:
"series - The sum of the series must be even\nseries - should have at least 10 item(s)"
10:30:04.794 [debug] Retrying LLM call for NumberSeries:
"series - The sum of the series must be even"
```

<!-- livebook:{"output":true} -->

```
{:ok, %NumberSeries{series: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]}}
{:ok,
%NumberSeries{series: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]}}
```

Here we demonstrated using regular Lixar code to validate the outputs of an LLM, but we don't have to stop there. We can actually use the LLM to validate the outputs of the LLM.
Expand Down Expand Up @@ -280,7 +283,7 @@ presidents_stream =
stream: true,
response_model: {:array, Politician},
messages: [
%{role: "user", content: "Who are the first 5 presidents of the United States?"}
%{role: "user", content: "Who were the first 5 presidents of the United States?"}
]
)
```
Expand All @@ -298,7 +301,10 @@ As you can see, instead of returning the result, we return a stream which can be

```elixir
presidents_stream
|> Stream.each(fn {:ok, politician} -> IO.inspect(politician) end)
|> Stream.each(fn
{:ok, politician} -> IO.inspect(politician)
{:error, changeset} -> IO.inspect(changeset)
end)
|> Stream.run()
```

Expand Down

0 comments on commit a3dbd1d

Please sign in to comment.