diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1896e77a4d66..5afb89ca97d1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,14 +12,20 @@ repos: - id: ruff-format types_or: [python, pyi, jupyter] - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.10.0' + rev: "v1.10.0" hooks: - id: mypy additional_dependencies: [types-pkg-resources==0.1.3, types-all, wandb>=0.15.5] # Note: You have to update pyproject.toml[tool.mypy] too! - args: ['--config-file=pyproject.toml'] + args: ["--config-file=pyproject.toml"] exclude: (.*pyi$)|(weave_query)|(tests)|(examples) + - repo: https://github.com/RobertCraigie/pyright-python + rev: v1.1.387 + hooks: + - id: pyright + additional_dependencies: [".[tests]"] + # This is legacy Weave when we were building a notebook product - should be removed - repo: local hooks: diff --git a/Makefile b/Makefile index a53cde0a026b..6c1ed017c0f9 100644 --- a/Makefile +++ b/Makefile @@ -14,4 +14,8 @@ docs: build: uv build -prepare-release: docs build \ No newline at end of file +prepare-release: docs build + +synchronize-base-object-schemas: + cd weave && make generate_base_object_schemas && \ + cd ../weave-js && yarn generate-schemas diff --git a/dev_docs/BaseObjectClasses.md b/dev_docs/BaseObjectClasses.md new file mode 100644 index 000000000000..a571af49755b --- /dev/null +++ b/dev_docs/BaseObjectClasses.md @@ -0,0 +1,218 @@ +# BaseObjectClasses + +## Refresher on Objects and object storage + +In Weave, we have a general-purpose data storage system for objects. +The payloads themselves are completely free-form - basically anything that can be JSON-serialized. +Users can "publish" runtime objects to weave using `weave.publish`. +For example: + +```python +config = {"model_name": "my_model", "model_version": "1.0"} +ref = weave.publish(config, name="my_model_config") +``` + +This will create a new object "version" in the collection called "my_model_config". +These can then be retrieved using `weave.ref().get()`: + +```python +config = weave.ref("my_model_config").get() +``` + +Sometimes users are working with standard structured classes like `dataclasses` or `pydantic.BaseModel`. +In such cases, we have special serialization and deserialization logic that allows for cleaner serialization patterns. +For example, let's say the user does: + +```python +class ModelConfig(weave.Object): + model_name: str + model_version: str +``` + +Then the user can publish an instance of `ModelConfig` as follows: + +```python +config = ModelConfig(model_name="my_model", model_version="1.0") +ref = weave.publish(config) +``` + +This will result in an on-disk payload that looks like: + +```json +{ + "model_name": "my_model", + "model_version": "1.0", + "_type": "ModelConfig", + "_class_name": "ModelConfig", + "_bases": ["Object", "BaseModel"] +} +``` + +And additionally, the user can query for all objects of the `ModelConfig` class using the `base_object_classes` filter in `objs_query` or `POST objs/query`. +Effectively, this is like creating a virtual table for that class. + +**Terminology**: We use the term "weave Object" (capital "O") to refer to instances of classes that subclass `weave.Object`. + +**Technical note**: the "base_object_class" is the first subtype of "Object", not the _class_name. +For example, let's say the class hierarchy is: +* `A -> Object -> BaseModel`, then the `base_object_class` filter will be "A". +* `B -> A -> Object -> BaseModel`, then the `base_object_class` filter will still be "A"! + +Finally, the Weave library itself utilizes this mechanism for common objects like `Model`, `Dataset`, `Evaluation`, etc... +This allows the user to subclass these objects to add additional metadata or functionality, while categorizing them in the same virtual table. + +## Validated Base Objects + +While many Weave Objects are free-form and user-defined, there is often a need for well-defined schemas for configuration objects that are tightly defined by Weave itself. The BaseObject system provides a way to define these schemas once and use them consistently across the entire stack. + +### Key Features + +1. **Single Source of Truth**: Define your schema once using Pydantic models +2. **Full Stack Integration**: The schema is used for: + - Python SDK validation + - Server-side HTTP API validation + - Frontend UI validation with generated TypeScript types + - Future: OpenAPI schema generation + - Future: TypeScript SDK type generation + +### Usage Example + +Here's how to define and use a validated base object: + +1. **Define your schema** (in `weave/trace_server/interface/base_object_classes/your_schema.py`): + +```python +from pydantic import BaseModel +from weave.trace_server.interface.base_object_classes import base_object_def + +class NestedConfig(BaseModel): + setting_a: int + +class MyConfig(base_object_def.BaseObject): + name: str + nested: NestedConfig + reference: base_object_def.RefStr + +__all__ = ["MyConfig"] +``` + +2. **Use in Python**: +```python +# Publishing +ref = weave.publish(MyConfig(...)) + +# Fetching (maintains type) +config = ref.get() +assert isinstance(config, MyConfig) +``` + +3. **Use via HTTP API**: +```bash +# Creating +curl -X POST 'https://trace.wandb.ai/obj/create' \ + -H 'Content-Type: application/json' \ + -d '{ + "obj": { + "project_id": "user/project", + "object_id": "my_config", + "val": {...}, + "set_base_object_class": "MyConfig" + } + }' + +# Querying +curl -X POST 'https://trace.wandb.ai/objs/query' \ + -d '{ + "project_id": "user/project", + "filter": { + "base_object_classes": ["MyConfig"] + } + }' +``` + +4. **Use in React**: +```typescript +// Read with type safety +const result = useBaseObjectInstances("MyConfig", ...); + +// Write with validation +const createFn = useCreateBaseObjectInstance("MyConfig"); +createFn({...}); // TypeScript enforced schema +``` + +### Keeping Frontend Types in Sync + +Run `make synchronize-base-object-schemas` to ensure the frontend TypeScript types are up to date with your Pydantic schemas. + +### Implementation Notes + +- Base objects are pure data schemas (fields only) +- The system is designed to work independently of the weave SDK to maintain clean separation of concerns +- Server-side validation ensures data integrity +- Client-side validation (both Python and TypeScript) provides early feedback +- Generated TypeScript types ensure type safety in the frontend + +### Architecture Flow + +1. Define your schema in a python file in the `weave/trace_server/interface/base_object_classes/test_only_example.py` directory. See `weave/trace_server/interface/base_object_classes/test_only_example.py` as an example. +2. Make sure to register your schemas in `weave/trace_server/interface/base_object_classes/base_object_registry.py` by calling `register_base_object`. +3. Run `make synchronize-base-object-schemas` to generate the frontend types. + * The first step (`make generate_base_object_schemas`) will run `weave/scripts/generate_base_object_schemas.py` to generate a JSON schema in `weave/trace_server/interface/base_object_classes/generated/generated_base_object_class_schemas.json`. + * The second step (yarn `generate-schemas`) will read this file and use it to generate the frontend types located in `weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/generatedBaseObjectClasses.zod.ts`. +4. Now, each use case uses different parts: + 1. `Python Writing`. Users can directly import these classes and use them as normal Pydantic models, which get published with `weave.publish`. The python client correct builds the requisite payload. + 2. `Python Reading`. Users can `weave.ref().get()` and the weave python SDK will return the instance with the correct type. Note: we do some special handling such that the returned object is not a WeaveObject, but literally the exact pydantic class. + 3. `HTTP Writing`. In cases where the client/user does not want to add the special type information, users can publish base objects by setting the `set_base_object_class` setting on `POST obj/create` to the name of the class. The weave server will validate the object against the schema, update the metadata fields, and store the object. + 4. `HTTP Reading`. When querying for objects, the server will return the object with the correct type if the `base_object_class` metadata field is set. + 5. `Frontend`. The frontend will read the zod schema from `weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/generatedBaseObjectClasses.zod.ts` and use that to provide compile time type safety when using `useBaseObjectInstances` and runtime type safety when using `useCreateBaseObjectInstance`. +* Note: it is critical that all techniques produce the same digest for the same data - which is tested in the tests. This way versions are not thrashed by different clients/users. + +```mermaid +graph TD + subgraph Schema Definition + F["weave/trace_server/interface/
base_object_classes/your_schema.py"] --> |defines| P[Pydantic BaseObject] + P --> |register_base_object| R["base_object_registry.py"] + end + + subgraph Schema Generation + M["make synchronize-base-object-schemas"] --> G["make generate_base_object_schemas"] + G --> |runs| S["weave/scripts/
generate_base_object_schemas.py"] + R --> |import registered classes| S + S --> |generates| J["generated_base_object_class_schemas.json"] + M --> |yarn generate-schemas| Z["generatedBaseObjectClasses.zod.ts"] + J --> Z + end + + subgraph "Trace Server" + subgraph "HTTP API" + R --> |validates using| HW["POST obj/create
set_base_object_class"] + HW --> DB[(Weave Object Store)] + HR["POST objs/query
base_object_classes"] --> |Filters base_object_class| DB + end + end + + subgraph "Python SDK" + PW[Client Code] --> |import & publish| W[weave.publish] + W --> |store| HW + R --> |validates using| W + PR["weave ref get()"] --> |queries| HR + R --> |deserializes using| PR + end + + subgraph "Frontend" + Z --> |import| UBI["useBaseObjectInstances"] + Z --> |import| UCI["useCreateBaseObjectInstance"] + UBI --> |Filters base_object_class| HR + UCI --> |set_base_object_class| HW + UI[React UI] --> UBI + UI --> UCI + end + + style F fill:#f9f,stroke:#333,stroke-width:2px + style P fill:#f9f,stroke:#333,stroke-width:2px + style R fill:#bbf,stroke:#333,stroke-width:2px + style DB fill:#dfd,stroke:#333,stroke-width:2px + style J fill:#ffd,stroke:#333,stroke-width:2px + style Z fill:#ffd,stroke:#333,stroke-width:2px + style M fill:#faa,stroke:#333,stroke-width:4px +``` diff --git a/docs/Makefile b/docs/Makefile index 55973959fac3..7f47ddde5006 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -11,6 +11,12 @@ generate_python_sdk_docs: mkdir -p ./docs/reference/python-sdk python scripts/generate_python_sdk_docs.py +generate_typescript_sdk_docs: + mkdir -p ./docs/reference/typescript-sdk + rm -rf ./docs/reference/typescript-sdk + mkdir -p ./docs/reference/typescript-sdk + bash scripts/generate_typescript_sdk_docs.sh + generate_notebooks_docs: mkdir -p ./docs/reference/gen_notebooks rm -rf ./docs/reference/gen_notebooks diff --git a/docs/docs/guides/core-types/datasets.md b/docs/docs/guides/core-types/datasets.md index 887c29ab643d..820178185408 100644 --- a/docs/docs/guides/core-types/datasets.md +++ b/docs/docs/guides/core-types/datasets.md @@ -1,3 +1,6 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Datasets `Dataset`s enable you to collect examples for evaluation and automatically track versions for accurate comparisons. Use this to download the latest version locally with a simple API. @@ -10,25 +13,58 @@ This guide will show you how to: ## Sample code -```python -import weave -from weave import Dataset -# Initialize Weave -weave.init('intro-example') + + + ```python + import weave + from weave import Dataset + # Initialize Weave + weave.init('intro-example') + + # Create a dataset + dataset = Dataset( + name='grammar', + rows=[ + {'id': '0', 'sentence': "He no likes ice cream.", 'correction': "He doesn't like ice cream."}, + {'id': '1', 'sentence': "She goed to the store.", 'correction': "She went to the store."}, + {'id': '2', 'sentence': "They plays video games all day.", 'correction': "They play video games all day."} + ] + ) + + # Publish the dataset + weave.publish(dataset) + + # Retrieve the dataset + dataset_ref = weave.ref('grammar').get() + + # Access a specific example + example_label = dataset_ref.rows[2]['sentence'] + ``` + + + + ```typescript + import * as weave from 'weave'; + + // Initialize Weave + await weave.init('intro-example'); -# Create a dataset -dataset = Dataset(name='grammar', rows=[ - {'id': '0', 'sentence': "He no likes ice cream.", 'correction': "He doesn't like ice cream."}, - {'id': '1', 'sentence': "She goed to the store.", 'correction': "She went to the store."}, - {'id': '2', 'sentence': "They plays video games all day.", 'correction': "They play video games all day."} -]) + // Create a dataset + const dataset = new weave.Dataset({ + name: 'grammar', + rows: [ + {id: '0', sentence: "He no likes ice cream.", correction: "He doesn't like ice cream."}, + {id: '1', sentence: "She goed to the store.", correction: "She went to the store."}, + {id: '2', sentence: "They plays video games all day.", correction: "They play video games all day."} + ] + }); -# Publish the dataset -weave.publish(dataset) + // Publish the dataset + await dataset.save(); -# Retrieve the dataset -dataset_ref = weave.ref('grammar').get() + // Access a specific example + const exampleLabel = datasetRef.getRow(2).sentence; + ``` -# Access a specific example -example_label = dataset_ref.rows[2]['sentence'] -``` + + diff --git a/docs/docs/guides/core-types/media.md b/docs/docs/guides/core-types/media.md index 09a0ef66f49c..f097e3a0674f 100644 --- a/docs/docs/guides/core-types/media.md +++ b/docs/docs/guides/core-types/media.md @@ -1,3 +1,6 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Logging media Weave supports logging and displaying multiple first class media types. Log images with `PIL.Image.Image` and audio with `wave.Wave_read` either directly with the object API, or as the inputs or output of an op. @@ -6,34 +9,71 @@ Weave supports logging and displaying multiple first class media types. Log imag Logging type: `PIL.Image.Image`. Here is an example of logging an image with the OpenAI DALL-E API: -```python -import weave -from openai import OpenAI -import requests -from PIL import Image - - -weave.init('image-example') -client = OpenAI() - -@weave.op -def generate_image(prompt: str) -> Image: - response = client.images.generate( - model="dall-e-3", - prompt=prompt, - size="1024x1024", - quality="standard", - n=1, - ) - image_url = response.data[0].url - image_response = requests.get(image_url, stream=True) - image = Image.open(image_response.raw) - - # return a PIL.Image.Image object to be logged as an image - return image - -generate_image("a cat with a pumpkin hat") -``` + + + + ```python + import weave + from openai import OpenAI + import requests + from PIL import Image + + weave.init('image-example') + client = OpenAI() + + @weave.op + def generate_image(prompt: str) -> Image: + response = client.images.generate( + model="dall-e-3", + prompt=prompt, + size="1024x1024", + quality="standard", + n=1, + ) + image_url = response.data[0].url + image_response = requests.get(image_url, stream=True) + image = Image.open(image_response.raw) + + # return a PIL.Image.Image object to be logged as an image + return image + + generate_image("a cat with a pumpkin hat") + ``` + + + + + ```typescript + import {OpenAI} from 'openai'; + import * as weave from 'weave'; + + async function main() { + const client = await weave.init('image-example'); + const openai = new OpenAI(); + + const generateImage = weave.op(async (prompt: string) => { + const response = await openai.images.generate({ + model: 'dall-e-3', + prompt: prompt, + size: '1024x1024', + quality: 'standard', + n: 1, + }); + const imageUrl = response.data[0].url; + const imgResponse = await fetch(imageUrl); + const data = Buffer.from(await imgResponse.arrayBuffer()); + + return weave.weaveImage({data}); + }); + + generateImage('a cat with a pumpkin hat'); + } + + main(); + ``` + + + This image will be logged to weave and automatically displayed in the UI. The following is the trace view for above. @@ -43,31 +83,68 @@ This image will be logged to weave and automatically displayed in the UI. The fo Logging type: `wave.Wave_read`. Here is an example of logging an audio file using openai's speech generation API. -```python -import weave -from openai import OpenAI -import wave - - -weave.init("audio-example") -client = OpenAI() - - -@weave.op -def make_audio_file_streaming(text: str) -> wave.Wave_read: - with client.audio.speech.with_streaming_response.create( - model="tts-1", - voice="alloy", - input=text, - response_format="wav", - ) as res: - res.stream_to_file("output.wav") - - # return a wave.Wave_read object to be logged as audio - return wave.open("output.wav") - -make_audio_file_streaming("Hello, how are you?") -``` + + + + ```python + import weave + from openai import OpenAI + import wave + + weave.init("audio-example") + client = OpenAI() + + + @weave.op + def make_audio_file_streaming(text: str) -> wave.Wave_read: + with client.audio.speech.with_streaming_response.create( + model="tts-1", + voice="alloy", + input=text, + response_format="wav", + ) as res: + res.stream_to_file("output.wav") + + # return a wave.Wave_read object to be logged as audio + return wave.open("output.wav") + + make_audio_file_streaming("Hello, how are you?") + ``` + + + + + ```typescript + import {OpenAI} from 'openai'; + import * as weave from 'weave'; + + async function main() { + await weave.init('audio-example'); + const openai = new OpenAI(); + + const makeAudioFileStreaming = weave.op(async function audio(text: string) { + const response = await openai.audio.speech.create({ + model: 'tts-1', + voice: 'alloy', + input: text, + response_format: 'wav', + }); + + const chunks: Uint8Array[] = []; + for await (const chunk of response.body) { + chunks.push(chunk); + } + return weave.weaveAudio({data: Buffer.concat(chunks)}); + }); + + await makeAudioFileStreaming('Hello, how are you?'); + } + + main(); + ``` + + + This audio will be logged to weave and automatically displayed in the UI, with an audio player. The player can be expanded to view the raw audio waveform, in addition to a download button. diff --git a/docs/docs/guides/core-types/models.md b/docs/docs/guides/core-types/models.md index b35ea4e3d16e..83c16aa19a2c 100644 --- a/docs/docs/guides/core-types/models.md +++ b/docs/docs/guides/core-types/models.md @@ -1,72 +1,85 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Models -A `Model` is a combination of data (which can include configuration, trained model weights, or other information) and code that defines how the model operates. By structuring your code to be compatible with this API, you benefit from a structured way to version your application so you can more systematically keep track of your experiments. + + + A `Model` is a combination of data (which can include configuration, trained model weights, or other information) and code that defines how the model operates. By structuring your code to be compatible with this API, you benefit from a structured way to version your application so you can more systematically keep track of your experiments. -To create a model in Weave, you need the following: + To create a model in Weave, you need the following: -- a class that inherits from `weave.Model` -- type definitions on all attributes -- a typed `predict` function with `@weave.op()` decorator + - a class that inherits from `weave.Model` + - type definitions on all attributes + - a typed `predict` function with `@weave.op()` decorator -```python -from weave import Model -import weave + ```python + from weave import Model + import weave -class YourModel(Model): - attribute1: str - attribute2: int + class YourModel(Model): + attribute1: str + attribute2: int - @weave.op() - def predict(self, input_data: str) -> dict: - # Model logic goes here - prediction = self.attribute1 + ' ' + input_data - return {'pred': prediction} -``` + @weave.op() + def predict(self, input_data: str) -> dict: + # Model logic goes here + prediction = self.attribute1 + ' ' + input_data + return {'pred': prediction} + ``` -You can call the model as usual with: + You can call the model as usual with: -```python -import weave -weave.init('intro-example') + ```python + import weave + weave.init('intro-example') -model = YourModel(attribute1='hello', attribute2=5) -model.predict('world') -``` + model = YourModel(attribute1='hello', attribute2=5) + model.predict('world') + ``` -This will track the model settings along with the inputs and outputs anytime you call `predict`. + This will track the model settings along with the inputs and outputs anytime you call `predict`. -## Automatic versioning of models + ## Automatic versioning of models -When you change the attributes or the code that defines your model, these changes will be logged and the version will be updated. -This ensures that you can compare the predictions across different versions of your model. Use this to iterate on prompts or to try the latest LLM and compare predictions across different settings. + When you change the attributes or the code that defines your model, these changes will be logged and the version will be updated. + This ensures that you can compare the predictions across different versions of your model. Use this to iterate on prompts or to try the latest LLM and compare predictions across different settings. -For example, here we create a new model: + For example, here we create a new model: -```python -import weave -weave.init('intro-example') + ```python + import weave + weave.init('intro-example') -model = YourModel(attribute1='howdy', attribute2=10) -model.predict('world') -``` + model = YourModel(attribute1='howdy', attribute2=10) + model.predict('world') + ``` -After calling this, you will see that you now have two versions of this Model in the UI, each with different tracked calls. + After calling this, you will see that you now have two versions of this Model in the UI, each with different tracked calls. -## Serve models + ## Serve models -To serve a model, you can easily spin up a FastAPI server by calling: + To serve a model, you can easily spin up a FastAPI server by calling: -```bash -weave serve -``` + ```bash + weave serve + ``` -For additional instructions, see [serve](/guides/tools/serve). + For additional instructions, see [serve](/guides/tools/serve). -## Track production calls + ## Track production calls -To separate production calls, you can add an additional attribute to the predictions for easy filtering in the UI or API. + To separate production calls, you can add an additional attribute to the predictions for easy filtering in the UI or API. -```python -with weave.attributes({'env': 'production'}): - model.predict('world') -``` + ```python + with weave.attributes({'env': 'production'}): + model.predict('world') + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + diff --git a/docs/docs/guides/evaluation/scorers.md b/docs/docs/guides/evaluation/scorers.md index ce7ea3b86c15..e313dcb55821 100644 --- a/docs/docs/guides/evaluation/scorers.md +++ b/docs/docs/guides/evaluation/scorers.md @@ -1,670 +1,795 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Evaluation Metrics ## Evaluations in Weave + In Weave, Scorers are used to evaluate AI outputs and return evaluation metrics. They take the AI's output, analyze it, and return a dictionary of results. Scorers can use your input data as reference if needed and can also output extra information, such as explanations or reasonings from the evaluation. -Scorers are passed to a `weave.Evaluation` object during evaluation. There are two types of Scorers in weave: + + + Scorers are passed to a `weave.Evaluation` object during evaluation. There are two types of Scorers in weave: + + 1. **Function-based Scorers:** Simple Python functions decorated with `@weave.op`. + 2. **Class-based Scorers:** Python classes that inherit from `weave.Scorer` for more complex evaluations. -1. **Function-based Scorers:** Simple Python functions decorated with `@weave.op`. -2. **Class-based Scorers:** Python classes that inherit from `weave.Scorer` for more complex evaluations. + Scorers must return a dictionary and can return multiple metrics, nested metrics and non-numeric values such as text returned from a LLM-evaluator about its reasoning. -Scorers must return a dictionary and can return multiple metrics, nested metrics and non-numeric values such as text returned from a LLM-evaluator about its reasoning. + + + Scorers are special ops passed to a `weave.Evaluation` object during evaluation. + + ## Create your own Scorers + ### Function-based Scorers -These are functions decorated with `@weave.op` that return a dictionary. They're great for simple evaluations like: -```python -import weave + + + These are functions decorated with `@weave.op` that return a dictionary. They're great for simple evaluations like: + + ```python + import weave + + @weave.op + def evaluate_uppercase(text: str) -> dict: + return {"text_is_uppercase": text.isupper()} + + my_eval = weave.Evaluation( + dataset=[{"text": "HELLO WORLD"}], + scorers=[evaluate_uppercase] + ) + ``` + + When the evaluation is run, `evaluate_uppercase` checks if the text is all uppercase. + + + + These are functions wrapped with `weave.op` that accept an object with `modelOutput` and optionally `datasetRow`. They're great for simple evaluations like: + ```typescript + import * as weave from 'weave' + + const evaluateUppercase = weave.op( + ({modelOutput}) => modelOutput.toUpperCase() === modelOutput, + {name: 'textIsUppercase'} + ); -@weave.op -def evaluate_uppercase(text: str) -> dict: # Added return type hint - return {"text_is_uppercase": text.isupper()} -my_eval = weave.Evaluation( - dataset=[{"text": "HELLO WORLD"}], - scorers=[evaluate_uppercase] -) -``` + const myEval = new weave.Evaluation({ + dataset: [{text: 'HELLO WORLD'}], + scorers: [evaluateUppercase], + }) + ``` -When the evaluation is run, `evaluate_uppercase` checks if the text is all uppercase. + + ### Class-based Scorers -For more advanced evaluations, especially when you need to keep track of additional scorer metadata, try different prompts for your LLM-evaluators, or make multiple function calls, you can use the `Scorer` class. -**Requirements:** -1. Inherit from `weave.Scorer`. -2. Define a `score` method decorated with `@weave.op`. -3. The `score` method must return a dictionary. + + + For more advanced evaluations, especially when you need to keep track of additional scorer metadata, try different prompts for your LLM-evaluators, or make multiple function calls, you can use the `Scorer` class. + + **Requirements:** + + 1. Inherit from `weave.Scorer`. + 2. Define a `score` method decorated with `@weave.op`. + 3. The `score` method must return a dictionary. + + Example: + + ```python + import weave + from openai import OpenAI + from weave import Scorer + + llm_client = OpenAI() + + #highlight-next-line + class SummarizationScorer(Scorer): + model_id: str = "gpt-4o" + system_prompt: str = "Evaluate whether the summary is good." + + @weave.op + def some_complicated_preprocessing(self, text: str) -> str: + processed_text = "Original text: \n" + text + "\n" + return processed_text + + @weave.op + def call_llm(self, summary: str, processed_text: str) -> dict: + res = llm_client.chat.completions.create( + messages=[ + {"role": "system", "content": self.system_prompt}, + {"role": "user", "content": ( + f"Analyse how good the summary is compared to the original text." + f"Summary: {summary}\n{processed_text}" + )}]) + return {"summary_quality": res} + + @weave.op + def score(self, output: str, text: str) -> dict: + """Score the summary quality. + + Args: + output: The summary generated by an AI system + text: The original text being summarized + """ + processed_text = self.some_complicated_preprocessing(text) + eval_result = self.call_llm(summary=output, processed_text=processed_text) + return {"summary_quality": eval_result} + + evaluation = weave.Evaluation( + dataset=[{"text": "The quick brown fox jumps over the lazy dog."}], + scorers=[summarization_scorer]) + ``` + + This class evaluates how good a summary is by comparing it to the original text. + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + -Example: +## How Scorers Work +### Scorer Keyword Arguments -```python -import weave -from openai import OpenAI -from weave import Scorer + + + Scorers can access both the output from your AI system and the input data from the dataset row. -llm_client = OpenAI() + - **Input:** If you would like your scorer to use data from your dataset row, such as a "label" or "target" column then you can easily make this available to the scorer by adding a `label` or `target` keyword argument to your scorer definition. -#highlight-next-line -class SummarizationScorer(Scorer): - model_id: str = "gpt-4o" - system_prompt: str = "Evaluate whether the summary is good." + For example if you wanted to use a column called "label" from your dataset then your scorer function (or `score` class method) would have a parameter list like this: + ```python @weave.op - def some_complicated_preprocessing(self, text: str) -> str: - processed_text = "Original text: \n" + text + "\n" - return processed_text + def my_custom_scorer(output: str, label: int) -> dict: + ... + ``` - @weave.op - def call_llm(self, summary: str, processed_text: str) -> dict: - res = llm_client.chat.completions.create( - messages=[ - {"role": "system", "content": self.system_prompt}, - {"role": "user", "content": ( - f"Analyse how good the summary is compared to the original text." - f"Summary: {summary}\n{processed_text}" - )}]) - return {"summary_quality": res} + When a weave `Evaluation` is run, the output of the AI system is passed to the `output` parameter. The `Evaluation` also automatically tries to match any additional scorer argument names to your dataset columns. If customizing your scorer arguments or dataset columns is not feasible, you can use column mapping - see below for more. - @weave.op - def score(self, output: str, text: str) -> dict: - """Score the summary quality. + - **Output:** Include an `output` parameter in your scorer function's signature to access the AI system's output. - Args: - output: The summary generated by an AI system - text: The original text being summarized - """ - processed_text = self.some_complicated_preprocessing(text) - eval_result = self.call_llm(summary=output, processed_text=processed_text) - return {"summary_quality": eval_result} + ### Mapping Column Names with `column_map` -evaluation = weave.Evaluation( - dataset=[{"text": "The quick brown fox jumps over the lazy dog."}], - scorers=[summarization_scorer]) -``` -This class evaluates how good a summary is by comparing it to the original text. + Sometimes, the `score` methods' argument names don't match the column names in your dataset. You can fix this using a `column_map`. -## How Scorers Work -### Scorer Keyword Arguments -Scorers can access both the output from your AI system and the input data from the dataset row. + If you're using a class-based scorer, pass a dictionary to the `column_map` attribute of `Scorer` when you initialise your scorer class. This dictionary maps your `score` method's argument names to the dataset's column names, in the order: `{scorer_keyword_argument: dataset_column_name}`. -- **Input:** If you would like your scorer to use data from your dataset row, such as a "label" or "target" column then you can easily make this available to the scorer by adding a `label` or `target` keyword argument to your scorer definition. + Example: -For example if you wanted to use a column called "label" from your dataset then your scorer function (or `score` class method) would have a parameter list like this: + ```python + import weave + from weave import Scorer -```python -@weave.op -def my_custom_scorer(output: str, label: int) -> dict: # Added return type hint - ... -``` + # A dataset with news articles to be summarised + dataset = [ + {"news_article": "The news today was great...", "date": "2030-04-20", "source": "Bright Sky Network"}, + ... + ] -When a weave `Evaluation` is run, the output of the AI system is passed to the `output` parameter. The `Evaluation` also automatically tries to match any additional scorer argument names to your dataset columns. If customizing your scorer arguments or dataset columns is not feasible, you can use column mapping - see below for more. + # Scorer class + class SummarizationScorer(Scorer): -- **Output:** Include an `output` parameter in your scorer function's signature to access the AI system's output. + @weave.op + def score(output, text) -> dict: + """ + output: output summary from a LLM summarization system + text: the text being summarised + """ + ... # evaluate the quality of the summary + # create a scorer with a column mapping the `text` argument to the `news_article` data column + scorer = SummarizationScorer(column_map={"text" : "news_article"}) + ``` -### Mapping Column Names with column_map -Sometimes, the `score` methods' argument names don't match the column names in your dataset. You can fix this using a `column_map`. + Now, the `text` argument in the `score` method will receive data from the `news_article` dataset column. -If you're using a class-based scorer, pass a dictionary to the `column_map` attribute of `Scorer` when you initialise your scorer class. This dictionary maps your `score` method's argument names to the dataset's column names, in the order: `{scorer_keyword_argument: dataset_column_name}`. + **Notes:** -Example: + - Another equivalent option to map your columns is to subclass the `Scorer` and overload the `score` method mapping the columns explicitly. -```python -import weave -from weave import Scorer + ```python + import weave + from weave import Scorer -# A dataset with news articles to be summarised -dataset = [ - {"news_article": "The news today was great...", "date": "2030-04-20", "source": "Bright Sky Network"}, - ... -] + class MySummarizationScorer(SummarizationScorer): -# Scorer class -class SummarizationScorer(Scorer): - - @weave.op - def score(output, text) -> dict: - """ - output: output summary from a LLM summarization system - text: the text being summarised - """ - ... # evaluate the quality of the summary + @weave.op + def score(self, output: str, news_article: str) -> dict: # Added type hints + # overload the score method and map columns manually + return super().score(output=output, text=news_article) + ``` -# create a scorer with a column mapping the `text` argument to the `news_article` data column -scorer = SummarizationScorer(column_map={"text" : "news_article"}) -``` + + + Scorers can access both the output from your AI system and the contents of the dataset row. -Now, the `text` argument in the `score` method will receive data from the `news_article` dataset column. + You can easily access relevant columns from the dataset row by adding a `datasetRow` keyword argument to your scorer definition. -**Notes:** -- Another equivalent option to map your columns is to subclass the `Scorer` and overload the `score` method mapping the columns explicitly. + ```typescript + const myScorer = weave.op( + ({modelOutput, datasetRow}) => { + return modelOutput * 2 === datasetRow.expectedOutputTimesTwo; + }, + {name: 'myScorer'} + ); + ``` -```python -import weave -from weave import Scorer + ### Mapping Column Names with `columnMapping` + :::warning -class MySummarizationScorer(SummarizationScorer): - - @weave.op - def score(self, output: str, news_article: str) -> dict: # Added type hints - # overload the score method and map columns manually - return super().score(output=output, text=news_article) -``` + In TypeScript, this feature is currently on the `Evaluation` object, not individual scorers! + + ::: + + Sometimes your `datasetRow` keys will not exactly match the scorer's naming scheme, but they are semantically similar. You can map the columns using the `Evaluation`'s `columnMapping` option. + + The mapping is always from the scorer's perspective, i.e. `{scorer_key: dataset_column_name}`. + + Example: + + ```typescript + const myScorer = weave.op( + ({modelOutput, datasetRow}) => { + return modelOutput * 2 === datasetRow.expectedOutputTimesTwo; + }, + {name: 'myScorer'} + ); + + const myEval = new weave.Evaluation({ + dataset: [{expected: 2}], + scorers: [myScorer], + columnMapping: {expectedOutputTimesTwo: 'expected'} + }); + ``` + + + ### Final summarization of the scorer -During evaluation, the scorer will be computed for each row of your dataset. To provide a final score for the evaluation we provide an `auto_summarize` depending on the returning type of the output. - - average will be computed for numerical columns - - count and fraction for boolean cols - - other col types are ignored + + + During evaluation, the scorer will be computed for each row of your dataset. To provide a final score for the evaluation we provide an `auto_summarize` depending on the returning type of the output. + - Averages are computed for numerical columns + - Count and fraction for boolean columns + - Other column types are ignored -You can override the `summarize` method on the `Scorer` class and provide your own way of computing the final scores. The `summarize` function expects: + You can override the `summarize` method on the `Scorer` class and provide your own way of computing the final scores. The `summarize` function expects: -- A single parameter `score_rows`: This is a list of dictionaries, where each dictionary contains the scores returned by the `score` method for a single row of your dataset. -- It should return a dictionary containing the summarized scores. + - A single parameter `score_rows`: This is a list of dictionaries, where each dictionary contains the scores returned by the `score` method for a single row of your dataset. + - It should return a dictionary containing the summarized scores. -**Why this is useful?** + **Why this is useful?** -When you need to score all rows before deciding on the final value of the score for the dataset. + When you need to score all rows before deciding on the final value of the score for the dataset. -```python -class MyBinaryScorer(Scorer): - """ - Returns True if the full output matches the target, False if not - """ - - @weave.op - def score(output, target): - return {"match": if output == target} + ```python + class MyBinaryScorer(Scorer): + """ + Returns True if the full output matches the target, False if not + """ + + @weave.op + def score(output, target): + return {"match": if output == target} + + def summarize(self, score_rows: list) -> dict: + full_match = all(row["match"] for row in score_rows) + return {"full_match": full_match} + ``` + + > In this example, the default `auto_summarize` would have returned the count and proportion of True. - def summarize(self, score_rows: list) -> dict: - full_match = all(row["match"] for row in score_rows) - return {"full_match": full_match} -``` -> In this example, the default `auto_summarize` would have returned the count and proportion of True. + If you want to learn more, check the implementation of [CorrectnessLLMJudge](/tutorial-rag#optional-defining-a-scorer-class). -If you want to learn more, check the implementation of [CorrectnessLLMJudge](/tutorial-rag#optional-defining-a-scorer-class). + + + During evaluation, the scorer will be computed for each row of your dataset. To provide a final score, we use an internal `summarizeResults` function that aggregates depending on the output type. + - Averages are computed for numerical columns + - Count and fraction for boolean columns + - Other column types are ignored + + We don't currently support custom summarization. + + + ## Predefined Scorers -**Installation** + + + **Installation** + + To use Weave's predefined scorers you need to install some additional dependencies: -To use Weave's predefined scorers you need to install some additional dependencies: + ```bash + pip install weave[scorers] + ``` -```bash -pip install weave[scorers] -``` + **LLM-evaluators** -**LLM-evaluators** + The pre-defined scorers that use LLMs support the OpenAI, Anthropic, Google GenerativeAI and MistralAI clients. They also use `weave`'s `InstructorLLMScorer` class, so you'll need to install the [`instructor`](https://github.com/instructor-ai/instructor) Python package to be able to use them. You can get all necessary dependencies with `pip install "weave[scorers]"` -The pre-defined scorers that use LLMs support the OpenAI, Anthropic, Google GenerativeAI and MistralAI clients. They also use `weave`'s `InstructorLLMScorer` class, so you'll need to install the [`instructor`](https://github.com/instructor-ai/instructor) Python package to be able to use them. You can get all necessary dependencies with `pip install "weave[scorers]"` + ### `HallucinationFreeScorer` -### `HallucinationFreeScorer` + This scorer checks if your AI system's output includes any hallucinations based on the input data. -This scorer checks if your AI system's output includes any hallucinations based on the input data. + ```python + from weave.scorers import HallucinationFreeScorer -```python -from weave.scorers import HallucinationFreeScorer + llm_client = ... # initialize your LLM client here + + scorer = HallucinationFreeScorer( + client=llm_client, + model_id="gpt-4o" + ) + ``` -llm_client = ... # initialize your LLM client here + **Customization:** -scorer = HallucinationFreeScorer( - client=llm_client, - model_id="gpt4o" -) -``` + - Customize the `system_prompt` and `user_prompt` attributes of the scorer to define what "hallucination" means for you. -**Customization:** -- Customize the `system_prompt` and `user_prompt` attributes of the scorer to define what "hallucination" means for you. + **Notes:** -**Notes:** -- The `score` method expects an input column named `context`. If your dataset uses a different name, use the `column_map` attribute to map `context` to the dataset column. + - The `score` method expects an input column named `context`. If your dataset uses a different name, use the `column_map` attribute to map `context` to the dataset column. + + Here you have an example in the context of an evaluation: + + ```python + import asyncio + from openai import OpenAI + import weave + from weave.scorers import HallucinationFreeScorer + + # Initialize clients and scorers + llm_client = OpenAI() + hallucination_scorer = HallucinationFreeScorer( + client=llm_client, + model_id="gpt-4o", + column_map={"context": "input", "output": "other_col"} + ) -Here you have an example in the context of an evaluation: + # Create dataset + dataset = [ + {"input": "John likes various types of cheese."}, + {"input": "Pepe likes various types of cheese."}, + ] -```python -import asyncio -from openai import OpenAI -import weave -from weave.scorers import HallucinationFreeScorer + @weave.op + def model(input: str) -> str: + return "The person's favorite cheese is cheddar." -# Initialize clients and scorers -llm_client = OpenAI() -hallucination_scorer = HallucinationFreeScorer( - client=llm_client, - model_id="gpt-4o", - column_map={"context": "input", "output": "other_col"} -) + # Run evaluation + evaluation = weave.Evaluation( + dataset=dataset, + scorers=[hallucination_scorer], + ) + result = asyncio.run(evaluation.evaluate(model)) + print(result) + # {'HallucinationFreeScorer': {'has_hallucination': {'true_count': 2, 'true_fraction': 1.0}}, 'model_latency': {'mean': 1.4395725727081299}} + ``` -# Create dataset -dataset = [ - {"input": "John likes various types of cheese."}, - {"input": "Pepe likes various types of cheese."}, -] + --- -@weave.op -def model(input: str) -> str: - return "The person's favorite cheese is cheddar." + ### `SummarizationScorer` -# Run evaluation -evaluation = weave.Evaluation( - dataset=dataset, - scorers=[hallucination_scorer], -) -result = asyncio.run(evaluation.evaluate(model)) -print(result) -# {'HallucinationFreeScorer': {'has_hallucination': {'true_count': 2, 'true_fraction': 1.0}}, 'model_latency': {'mean': 1.4395725727081299}} -``` ---- + Use an LLM to compare a summary to the original text and evaluate the quality of the summary. -### `SummarizationScorer` + ```python + from weave.scorers import SummarizationScorer -Use an LLM to compare a summary to the original text and evaluate the quality of the summary. + llm_client = ... # initialize your LLM client here -```python -from weave.scorers import SummarizationScorer + scorer = SummarizationScorer( + client=llm_client, + model_id="gpt-4o" + ) + ``` -llm_client = ... # initialize your LLM client here + **How It Works:** -scorer = SummarizationScorer( - client=llm_client, - model_id="gpt4o" -) -``` + This scorer evaluates summaries in two ways: -**How It Works:** + 1. **Entity Density:** Checks the ratio of unique entities (like names, places, or things) mentioned in the summary to the total word count in the summary in order to estimate the "information density" of the summary. Uses an LLM to extract the entities. Similar to how entity density is used in the Chain of Density paper, https://arxiv.org/abs/2309.04269 -This scorer evaluates summaries in two ways: + 2. **Quality Grading:** Uses an LLM-evaluator to grade the summary as `poor`, `ok`, or `excellent`. These grades are converted to scores (0.0 for poor, 0.5 for ok, and 1.0 for excellent) so you can calculate averages. -1. **Entity Density:** Checks the ratio of unique entities (like names, places, or things) mentioned in the summary to the total word count in the summary in order to estimate the "information density" of the summary. Uses an LLM to extract the entities. Similar to how entity density is used in the Chain of Density paper, https://arxiv.org/abs/2309.04269 + **Customization:** -2. **Quality Grading:** Uses an LLM-evaluator to grade the summary as `poor`, `ok`, or `excellent`. These grades are converted to scores (0.0 for poor, 0.5 for ok, and 1.0 for excellent) so you can calculate averages. + - Adjust `summarization_evaluation_system_prompt` and `summarization_evaluation_prompt` to define what makes a good summary. -**Customization:** -- Adjust `summarization_evaluation_system_prompt` and `summarization_evaluation_prompt` to define what makes a good summary. + **Notes:** -**Notes:** -- This scorer uses the `InstructorLLMScorer` class. -- The `score` method expects the original text that was summarized to be present in the `input` column of the dataset. Use the `column_map` class attribute to map `input` to the correct dataset column if needed. + - This scorer uses the `InstructorLLMScorer` class. + - The `score` method expects the original text that was summarized to be present in the `input` column of the dataset. Use the `column_map` class attribute to map `input` to the correct dataset column if needed. + Here you have an example usage of the `SummarizationScorer` in the context of an evaluation: -Here you have an example usage of the `SummarizationScorer` in the context of an evaluation: + ```python + import asyncio + from openai import OpenAI + import weave + from weave.scorers import SummarizationScorer -```python -import asyncio -from openai import OpenAI -import weave -from weave.scorers import SummarizationScorer + class SummarizationModel(weave.Model): + @weave.op() + async def predict(self, input: str) -> str: + return "This is a summary of the input text." -class SummarizationModel(weave.Model): - @weave.op() - async def predict(self, input: str) -> str: - return "This is a summary of the input text." + # Initialize clients and scorers + llm_client = OpenAI() + model = SummarizationModel() + summarization_scorer = SummarizationScorer( + client=llm_client, + model_id="gpt-4o", + ) + # Create dataset + dataset = [ + {"input": "The quick brown fox jumps over the lazy dog."}, + {"input": "Artificial Intelligence is revolutionizing various industries."} + ] -# Initialize clients and scorers -llm_client = OpenAI() -model = SummarizationModel() -summarization_scorer = SummarizationScorer( - client=llm_client, - model_id="gpt-4o", -) -# Create dataset -dataset = [ - {"input": "The quick brown fox jumps over the lazy dog."}, - {"input": "Artificial Intelligence is revolutionizing various industries."} -] + # Run evaluation + evaluation = weave.Evaluation(dataset=dataset, scorers=[summarization_scorer]) + results = asyncio.run(evaluation.evaluate(model)) + print(results) + # {'SummarizationScorer': {'is_entity_dense': {'true_count': 0, 'true_fraction': 0.0}, 'summarization_eval_score': {'mean': 0.0}, 'entity_density': {'mean': 0.0}}, 'model_latency': {'mean': 6.210803985595703e-05}} + ``` -# Run evaluation -evaluation = weave.Evaluation(dataset=dataset, scorers=[summarization_scorer]) -results = asyncio.run(evaluation.evaluate(model)) -print(results) -# {'SummarizationScorer': {'is_entity_dense': {'true_count': 0, 'true_fraction': 0.0}, 'summarization_eval_score': {'mean': 0.0}, 'entity_density': {'mean': 0.0}}, 'model_latency': {'mean': 6.210803985595703e-05}} -``` + --- ---- + ### `OpenAIModerationScorer` -### `OpenAIModerationScorer` + The `OpenAIModerationScorer` uses OpenAI's Moderation API to check if the AI system's output contains disallowed content, such as hate speech or explicit material. -The `OpenAIModerationScorer` uses OpenAI's Moderation API to check if the AI system's output contains disallowed content, such as hate speech or explicit material. + ```python + from weave.scorers import OpenAIModerationScorer + from openai import OpenAI -```python -from weave.scorers import OpenAIModerationScorer -from openai import OpenAI + oai_client = OpenAI(api_key=...) # initialize your LLM client here -oai_client = OpenAI(api_key=...) # initialize your LLM client here + scorer = OpenAIModerationScorer( + client=oai_client, + model_id="text-embedding-3-small" + ) + ``` -scorer = OpenAIModerationScorer( - client=oai_client, - model_id="text-embedding-3-small" -) -``` + **How It Works:** -**How It Works:** + - Sends the AI's output to the OpenAI Moderation endpoint and returns a dictionary indicating whether the content is flagged and details about the categories involved. -- Sends the AI's output to the OpenAI Moderation endpoint and returns a dictionary indicating whether the content is flagged and details about the categories involved. + **Notes:** -**Notes:** -- Requires the `openai` Python package. -- The client must be an instance of OpenAI's `OpenAI` or `AsyncOpenAI` client. + - Requires the `openai` Python package. + - The client must be an instance of OpenAI's `OpenAI` or `AsyncOpenAI` client. + Here you have an example in the context of an evaluation: -Here you have an example in the context of an evaluation: -```python -import asyncio -from openai import OpenAI -import weave -from weave.scorers import OpenAIModerationScorer + ```python + import asyncio + from openai import OpenAI + import weave + from weave.scorers import OpenAIModerationScorer -class MyModel(weave.Model): - @weave.op - async def predict(self, input: str) -> str: - return input + class MyModel(weave.Model): + @weave.op + async def predict(self, input: str) -> str: + return input -# Initialize clients and scorers -client = OpenAI() -model = MyModel() -moderation_scorer = OpenAIModerationScorer(client=client) + # Initialize clients and scorers + client = OpenAI() + model = MyModel() + moderation_scorer = OpenAIModerationScorer(client=client) -# Create dataset -dataset = [ - {"input": "I love puppies and kittens!"}, - {"input": "I hate everyone and want to hurt them."} -] + # Create dataset + dataset = [ + {"input": "I love puppies and kittens!"}, + {"input": "I hate everyone and want to hurt them."} + ] -# Run evaluation -evaluation = weave.Evaluation(dataset=dataset, scorers=[moderation_scorer]) -results = asyncio.run(evaluation.evaluate(model)) -print(results) -# {'OpenAIModerationScorer': {'flagged': {'true_count': 1, 'true_fraction': 0.5}, 'categories': {'violence': {'true_count': 1, 'true_fraction': 1.0}}}, 'model_latency': {'mean': 9.500980377197266e-05}} -``` + # Run evaluation + evaluation = weave.Evaluation(dataset=dataset, scorers=[moderation_scorer]) + results = asyncio.run(evaluation.evaluate(model)) + print(results) + # {'OpenAIModerationScorer': {'flagged': {'true_count': 1, 'true_fraction': 0.5}, 'categories': {'violence': {'true_count': 1, 'true_fraction': 1.0}}}, 'model_latency': {'mean': 9.500980377197266e-05}} + ``` ---- + --- -### `EmbeddingSimilarityScorer` + ### `EmbeddingSimilarityScorer` -The `EmbeddingSimilarityScorer` computes the cosine similarity between the embeddings of the AI system's output and a target text from your dataset. It's useful for measuring how similar the AI's output is to a reference text. + The `EmbeddingSimilarityScorer` computes the cosine similarity between the embeddings of the AI system's output and a target text from your dataset. It's useful for measuring how similar the AI's output is to a reference text. -```python -from weave.scorers import EmbeddingSimilarityScorer + ```python + from weave.scorers import EmbeddingSimilarityScorer -llm_client = ... # initialise your LlM client + llm_client = ... # initialise your LlM client -similarity_scorer = EmbeddingSimilarityScorer( - client=llm_client - target_column="reference_text", # the dataset column to compare the output against - threshold=0.4 # the cosine similarity threshold to use -) -``` + similarity_scorer = EmbeddingSimilarityScorer( + client=llm_client + target_column="reference_text", # the dataset column to compare the output against + threshold=0.4 # the cosine similarity threshold to use + ) + ``` -**Parameters:** + **Parameters:** -- `target`: This scorer expects a `target` column in your dataset, it will calculate the cosine similarity of the embeddings of the `target` column to the AI system output. If your dataset doesn't contain a column called `target` you can use the scorers `column_map` attribute to map `target` to the appropriate column name in your dataset. See the Column Mapping section for more. -- `threshold` (float): The minimum cosine similarity score between the embedding of the AI system output and the embdedding of the `target`, above which the 2 samples are considered "similar", (defaults to `0.5`). `threshold` can be in a range from -1 to 1: + - `target`: This scorer expects a `target` column in your dataset, it will calculate the cosine similarity of the embeddings of the `target` column to the AI system output. If your dataset doesn't contain a column called `target` you can use the scorers `column_map` attribute to map `target` to the appropriate column name in your dataset. See the Column Mapping section for more. + - `threshold` (float): The minimum cosine similarity score between the embedding of the AI system output and the embdedding of the `target`, above which the 2 samples are considered "similar", (defaults to `0.5`). `threshold` can be in a range from -1 to 1: - 1 indicates identical direction. - 0 indicates orthogonal vectors. - -1 indicates opposite direction. -The correct cosine similarity threshold to set can fluctuate quite a lot depending on your use case, we advise exploring different thresholds. + The correct cosine similarity threshold to set can fluctuate quite a lot depending on your use case, we advise exploring different thresholds. + Here you have an example usage of the `EmbeddingSimilarityScorer` in the context of an evaluation: -Here you have an example usage of the `EmbeddingSimilarityScorer` in the context of an evaluation: + ```python + import asyncio + from openai import OpenAI + import weave + from weave.scorers import EmbeddingSimilarityScorer -```python -import asyncio -from openai import OpenAI -import weave -from weave.scorers import EmbeddingSimilarityScorer + # Initialize clients and scorers + client = OpenAI() + similarity_scorer = EmbeddingSimilarityScorer( + client=client, + threshold=0.7, + column_map={"target": "reference"} + ) -# Initialize clients and scorers -client = OpenAI() -similarity_scorer = EmbeddingSimilarityScorer( - client=client, - threshold=0.7, - column_map={"target": "reference"} -) + # Create dataset + dataset = [ + { + "input": "He's name is John", + "reference": "John likes various types of cheese.", + }, + { + "input": "He's name is Pepe.", + "reference": "Pepe likes various types of cheese.", + }, + ] + + # Define model + @weave.op + def model(input: str) -> str: + return "John likes various types of cheese." -# Create dataset -dataset = [ - { - "input": "He's name is John", - "reference": "John likes various types of cheese.", - }, - { - "input": "He's name is Pepe.", - "reference": "Pepe likes various types of cheese.", - }, -] + # Run evaluation + evaluation = weave.Evaluation( + dataset=dataset, + scorers=[similarity_scorer], + ) + result = asyncio.run(evaluation.evaluate(model)) + print(result) + # {'EmbeddingSimilarityScorer': {'is_similar': {'true_count': 1, 'true_fraction': 0.5}, 'similarity_score': {'mean': 0.8448514031462045}}, 'model_latency': {'mean': 0.45862746238708496}} + ``` -# Define model -@weave.op -def model(input: str) -> str: - return "John likes various types of cheese." + --- -# Run evaluation -evaluation = weave.Evaluation( - dataset=dataset, - scorers=[similarity_scorer], -) -result = asyncio.run(evaluation.evaluate(model)) -print(result) -# {'EmbeddingSimilarityScorer': {'is_similar': {'true_count': 1, 'true_fraction': 0.5}, 'similarity_score': {'mean': 0.8448514031462045}}, 'model_latency': {'mean': 0.45862746238708496}} -``` + ### `ValidJSONScorer` ---- + The ValidJSONScorer checks whether the AI system's output is valid JSON. This scorer is useful when you expect the output to be in JSON format and need to verify its validity. -### `ValidJSONScorer` + ```python + from weave.scorers import ValidJSONScorer -The ValidJSONScorer checks whether the AI system's output is valid JSON. This scorer is useful when you expect the output to be in JSON format and need to verify its validity. + json_scorer = ValidJSONScorer() + ``` -```python -from weave.scorers import ValidJSONScorer + Here you have an example usage of the `ValidJSONScorer` in the context of an evaluation: -json_scorer = ValidJSONScorer() -``` + ```python + import asyncio + import weave + from weave.scorers import ValidJSONScorer -Here you have an example usage of the `ValidJSONScorer` in the context of an evaluation: + class JSONModel(weave.Model): + @weave.op() + async def predict(self, input: str) -> str: + # This is a placeholder. + # In a real scenario, this would generate JSON. + return '{"key": "value"}' -```python -import asyncio -import weave -from weave.scorers import ValidJSONScorer + model = JSONModel() + json_scorer = ValidJSONScorer() -class JSONModel(weave.Model): - @weave.op() - async def predict(self, input: str) -> str: - # This is a placeholder. - # In a real scenario, this would generate JSON. - return '{"key": "value"}' + dataset = [ + {"input": "Generate a JSON object with a key and value"}, + {"input": "Create an invalid JSON"} + ] -model = JSONModel() -json_scorer = ValidJSONScorer() + evaluation = weave.Evaluation(dataset=dataset, scorers=[json_scorer]) + results = asyncio.run(evaluation.evaluate(model)) + print(results) + # {'ValidJSONScorer': {'json_valid': {'true_count': 2, 'true_fraction': 1.0}}, 'model_latency': {'mean': 8.58306884765625e-05}} + ``` -dataset = [ - {"input": "Generate a JSON object with a key and value"}, - {"input": "Create an invalid JSON"} -] + --- -evaluation = weave.Evaluation(dataset=dataset, scorers=[json_scorer]) -results = asyncio.run(evaluation.evaluate(model)) -print(results) -# {'ValidJSONScorer': {'json_valid': {'true_count': 2, 'true_fraction': 1.0}}, 'model_latency': {'mean': 8.58306884765625e-05}} -``` + ### `ValidXMLScorer` + The `ValidXMLScorer` checks whether the AI system's output is valid XML. This is useful when expecting XML-formatted outputs. ---- + ```python + from weave.scorers import ValidXMLScorer -### `ValidXMLScorer` + xml_scorer = ValidXMLScorer() + ``` -The `ValidXMLScorer` checks whether the AI system's output is valid XML. This is useful when expecting XML-formatted outputs. + Here you have an example usage of the `ValidXMLScorer` in the context of an evaluation: -```python -from weave.scorers import ValidXMLScorer + ```python + import asyncio + import weave + from weave.scorers import ValidXMLScorer -xml_scorer = ValidXMLScorer() -``` + class XMLModel(weave.Model): + @weave.op() + async def predict(self, input: str) -> str: + # This is a placeholder. In a real scenario, this would generate XML. + return 'value' + model = XMLModel() + xml_scorer = ValidXMLScorer() -Here you have an example usage of the `ValidXMLScorer` in the context of an evaluation: + dataset = [ + {"input": "Generate a valid XML with a root element"}, + {"input": "Create an invalid XML"} + ] -```python -import asyncio -import weave -from weave.scorers import ValidXMLScorer + evaluation = weave.Evaluation(dataset=dataset, scorers=[xml_scorer]) + results = asyncio.run(evaluation.evaluate(model)) + print(results) + # {'ValidXMLScorer': {'xml_valid': {'true_count': 2, 'true_fraction': 1.0}}, 'model_latency': {'mean': 8.20159912109375e-05}} + ``` -class XMLModel(weave.Model): - @weave.op() - async def predict(self, input: str) -> str: - # This is a placeholder. In a real scenario, this would generate XML. - return 'value' + --- -model = XMLModel() -xml_scorer = ValidXMLScorer() + ### `PydanticScorer` -dataset = [ - {"input": "Generate a valid XML with a root element"}, - {"input": "Create an invalid XML"} -] + The `PydanticScorer` validates the AI system's output against a Pydantic model to ensure it adheres to a specified schema or data structure. -evaluation = weave.Evaluation(dataset=dataset, scorers=[xml_scorer]) -results = asyncio.run(evaluation.evaluate(model)) -print(results) -# {'ValidXMLScorer': {'xml_valid': {'true_count': 2, 'true_fraction': 1.0}}, 'model_latency': {'mean': 8.20159912109375e-05}} -``` + ```python + from weave.scorers import PydanticScorer + from pydantic import BaseModel ---- + class FinancialReport(BaseModel): + revenue: int + year: str -### `PydanticScorer` + pydantic_scorer = PydanticScorer(model=FinancialReport) + ``` -The `PydanticScorer` validates the AI system's output against a Pydantic model to ensure it adheres to a specified schema or data structure. + --- -```python -from weave.scorers import PydanticScorer -from pydantic import BaseModel + ### RAGAS - `ContextEntityRecallScorer` -class FinancialReport(BaseModel): - revenue: int - year: str + The `ContextEntityRecallScorer` estimates context recall by extracting entities from both the AI system's output and the provided context, then computing the recall score. Based on the [RAGAS](https://github.com/explodinggradients/ragas) evaluation library -pydantic_scorer = PydanticScorer(model=FinancialReport) -``` + ```python + from weave.scorers import ContextEntityRecallScorer ---- + llm_client = ... # initialise your LlM client -### RAGAS - `ContextEntityRecallScorer` + entity_recall_scorer = ContextEntityRecallScorer( + client=llm_client + model_id="your-model-id" + ) + ``` -The `ContextEntityRecallScorer` estimates context recall by extracting entities from both the AI system's output and the provided context, then computing the recall score. Based on the [RAGAS](https://github.com/explodinggradients/ragas) evaluation library + **How It Works:** -```python -from weave.scorers import ContextEntityRecallScorer + - Uses an LLM to extract unique entities from the output and context and calculates recall. + - **Recall** indicates the proportion of important entities from the context that are captured in the output, helping to assess the model's effectiveness in retrieving relevant information. + - Returns a dictionary with the recall score. -llm_client = ... # initialise your LlM client + **Notes:** -entity_recall_scorer = ContextEntityRecallScorer( - client=llm_client - model_id="your-model-id" -) -``` + - Expects a `context` column in your dataset, use `column_map` to map `context` to another dataset column if needed. -**How It Works:** + --- -- Uses an LLM to extract unique entities from the output and context and calculates recall. -- **Recall** indicates the proportion of important entities from the context that are captured in the output, helping to assess the model's effectiveness in retrieving relevant information. -- Returns a dictionary with the recall score. + ### RAGAS - `ContextRelevancyScorer` -**Notes:** + The `ContextRelevancyScorer` evaluates the relevancy of the provided context to the AI system's output. It helps determine if the context used is appropriate for generating the output. Based on the [RAGAS](https://github.com/explodinggradients/ragas) evaluation library. -- Expects a `context` column in your dataset, use `column_map` to map `context` to another dataset column if needed. + ```python + from weave.scorers import ContextRelevancyScorer ---- + llm_client = ... # initialise your LlM client -### RAGAS - `ContextRelevancyScorer` + relevancy_scorer = ContextRelevancyScorer( + llm_client = ... # initialise your LlM client + model_id="your-model-id" + ) + ``` -The `ContextRelevancyScorer` evaluates the relevancy of the provided context to the AI system's output. It helps determine if the context used is appropriate for generating the output. Based on the [RAGAS](https://github.com/explodinggradients/ragas) evaluation library. + **How It Works:** -```python -from weave.scorers import ContextRelevancyScorer + - Uses an LLM to rate the relevancy of the context to the output on a scale from 0 to 1. + - Returns a dictionary with the `relevancy_score`. -llm_client = ... # initialise your LlM client + **Notes:** -relevancy_scorer = ContextRelevancyScorer( - llm_client = ... # initialise your LlM client - model_id="your-model-id" + - Expects a `context` column in your dataset, use `column_map` to map `context` to another dataset column if needed. + - Customize the `relevancy_prompt` to define how relevancy is assessed. + + Here you have an example usage of `ContextEntityRecallScorer` and `ContextRelevancyScorer` in the context of an evaluation: + + ```python + import asyncio + from textwrap import dedent + from openai import OpenAI + import weave + from weave.scorers import ContextEntityRecallScorer, ContextRelevancyScorer + + class RAGModel(weave.Model): + @weave.op() + async def predict(self, question: str) -> str: + "Retrieve relevant context" + return "Paris is the capital of France." + + + model = RAGModel() + + # Define prompts + relevancy_prompt: str = dedent(""" + Given the following question and context, rate the relevancy of the context to the question on a scale from 0 to 1. + + Question: {question} + Context: {context} + Relevancy Score (0-1): + """) + + # Initialize clients and scorers + llm_client = OpenAI() + entity_recall_scorer = ContextEntityRecallScorer( + client=client, + model_id="gpt-4o", + ) + + relevancy_scorer = ContextRelevancyScorer( + client=llm_client, + model_id="gpt-4o", + relevancy_prompt=relevancy_prompt ) -``` - -**How It Works:** - -- Uses an LLM to rate the relevancy of the context to the output on a scale from 0 to 1. -- Returns a dictionary with the `relevancy_score`. - -**Notes:** - -- Expects a `context` column in your dataset, use `column_map` to map `context` to another dataset column if needed. -- Customize the `relevancy_prompt` to define how relevancy is assessed. - - -Here you have an example usage of `ContextEntityRecallScorer` and `ContextRelevancyScorer` in the context of an evaluation: - -```python -import asyncio -from textwrap import dedent -from openai import OpenAI -import weave -from weave.scorers import ContextEntityRecallScorer, ContextRelevancyScorer - -class RAGModel(weave.Model): - @weave.op() - async def predict(self, question: str) -> str: - "Retrieve relevant context" - return "Paris is the capital of France." - - -model = RAGModel() - -# Define prompts -relevancy_prompt: str = dedent(""" - Given the following question and context, rate the relevancy of the context to the question on a scale from 0 to 1. - - Question: {question} - Context: {context} - Relevancy Score (0-1): - """) - -# Initialize clients and scorers -llm_client = OpenAI() -entity_recall_scorer = ContextEntityRecallScorer( - client=client, - model_id="gpt-4o", -) - -relevancy_scorer = ContextRelevancyScorer( - client=llm_client, - model_id="gpt-4o", - relevancy_prompt=relevancy_prompt -) - -# Create dataset -dataset = [ - { - "question": "What is the capital of France?", - "context": "Paris is the capital city of France." - }, - { - "question": "Who wrote Romeo and Juliet?", - "context": "William Shakespeare wrote many famous plays." - } -] - -# Run evaluation -evaluation = weave.Evaluation( - dataset=dataset, - scorers=[entity_recall_scorer, relevancy_scorer] -) -results = asyncio.run(evaluation.evaluate(model)) -print(results) -# {'ContextEntityRecallScorer': {'recall': {'mean': 0.3333333333333333}}, 'ContextRelevancyScorer': {'relevancy_score': {'mean': 0.5}}, 'model_latency': {'mean': 9.393692016601562e-05}} -``` + # Create dataset + dataset = [ + { + "question": "What is the capital of France?", + "context": "Paris is the capital city of France." + }, + { + "question": "Who wrote Romeo and Juliet?", + "context": "William Shakespeare wrote many famous plays." + } + ] + + # Run evaluation + evaluation = weave.Evaluation( + dataset=dataset, + scorers=[entity_recall_scorer, relevancy_scorer] + ) + results = asyncio.run(evaluation.evaluate(model)) + print(results) + # {'ContextEntityRecallScorer': {'recall': {'mean': 0.3333333333333333}}, 'ContextRelevancyScorer': {'relevancy_score': {'mean': 0.5}}, 'model_latency': {'mean': 9.393692016601562e-05}} + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + diff --git a/docs/docs/guides/integrations/openai.md b/docs/docs/guides/integrations/openai.md index e428fd5d66fc..541732f5060f 100644 --- a/docs/docs/guides/integrations/openai.md +++ b/docs/docs/guides/integrations/openai.md @@ -1,156 +1,308 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # OpenAI ## Tracing It’s important to store traces of LLM applications in a central database, both during development and in production. You’ll use these traces for debugging and to help build a dataset of tricky examples to evaluate against while improving your application. -Weave can automatically capture traces for the [openai python library](https://platform.openai.com/docs/reference/python-sdk?lang=python). - -Start capturing by calling `weave.init()` with a project name your choice. - -```python -from openai import OpenAI -import weave -client = OpenAI() -# highlight-next-line -weave.init('emoji-bot') - -response = client.chat.completions.create( - model="gpt-4", - messages=[ - { - "role": "system", - "content": "You are AGI. You will be provided with a message, and your task is to respond using emojis only." - }, - { - "role": "user", - "content": "How are you?" - } - ], - temperature=0.8, - max_tokens=64, - top_p=1 -) -``` + + + Weave can automatically capture traces for the [openai python library](https://platform.openai.com/docs/libraries/python-library). + + Start capturing by calling `weave.init()` with a project name your choice. + + ```python + from openai import OpenAI + import weave + client = OpenAI() + # highlight-next-line + weave.init('emoji-bot') + + response = client.chat.completions.create( + model="gpt-4", + messages=[ + { + "role": "system", + "content": "You are AGI. You will be provided with a message, and your task is to respond using emojis only." + }, + { + "role": "user", + "content": "How are you?" + } + ], + temperature=0.8, + max_tokens=64, + top_p=1 + ) + ``` + + + + Weave can automatically capture traces for the [openai typescript library](https://platform.openai.com/docs/libraries/node-js-library). + + Start capturing by calling `await weave.init()` with a project name your choice, and then wrapping your OpenAI client with `weave.wrapOpenAI`. + + ```typescript + import {OpenAI} from 'openai'; + import * as weave from 'weave'; + + // highlight-next-line + const client = await weave.init('emoji-bot'); + // highlight-next-line + const openai = weave.wrapOpenAI(new OpenAI()); + + const response = await openai.chat.completions.create({ + model: 'gpt-4', + messages: [ + { + role: 'system', + content: + 'You are AGI. You will be provided with a message, and your task is to respond using emojis only.', + }, + { + role: 'user', + content: 'How are you?', + }, + ], + temperature: 0.8, + max_tokens: 64, + top_p: 1, + }); + ``` + + + [![openai.png](imgs/openai.png)](https://wandb.ai/_scott/emoji-bot/weave/calls) ## Track your own ops + + Wrapping a function with `@weave.op` starts capturing inputs, outputs and app logic so you can debug how data flows through your app. You can deeply nest ops and build a tree of functions that you want to track. This also starts automatically versioning code as you experiment to capture ad-hoc details that haven't been committed to git. Simply create a function decorated with [`@weave.op`](/guides/tracking/ops) that calls into [openai python library](https://platform.openai.com/docs/reference/python-sdk?lang=python). In the example below, we have 2 functions wrapped with op. This helps us see how intermediate steps, like the retrieval step in a RAG app, are affecting how our app behaves. -```python -# highlight-next-line -import weave -from openai import OpenAI -import requests, random -PROMPT="""Emulate the Pokedex from early Pokémon episodes. State the name of the Pokemon and then describe it. - Your tone is informative yet sassy, blending factual details with a touch of dry humor. Be concise, no more than 3 sentences. """ -POKEMON = ['pikachu', 'charmander', 'squirtle', 'bulbasaur', 'jigglypuff', 'meowth', 'eevee'] -client = OpenAI() - -# highlight-next-line -@weave.op -def get_pokemon_data(pokemon_name): + ```python # highlight-next-line - # This is a step within your application, like the retrieval step within a RAG app - url = f"https://pokeapi.co/api/v2/pokemon/{pokemon_name}" - response = requests.get(url) - if response.status_code == 200: - data = response.json() - name = data["name"] - types = [t["type"]["name"] for t in data["types"]] - species_url = data["species"]["url"] - species_response = requests.get(species_url) - evolved_from = "Unknown" - if species_response.status_code == 200: - species_data = species_response.json() - if species_data["evolves_from_species"]: - evolved_from = species_data["evolves_from_species"]["name"] - return {"name": name, "types": types, "evolved_from": evolved_from} - else: - return None - -# highlight-next-line -@weave.op -def pokedex(name: str, prompt: str) -> str: + import weave + from openai import OpenAI + import requests, random + PROMPT="""Emulate the Pokedex from early Pokémon episodes. State the name of the Pokemon and then describe it. + Your tone is informative yet sassy, blending factual details with a touch of dry humor. Be concise, no more than 3 sentences. """ + POKEMON = ['pikachu', 'charmander', 'squirtle', 'bulbasaur', 'jigglypuff', 'meowth', 'eevee'] + client = OpenAI() + # highlight-next-line - # This is your root op that calls out to other ops + @weave.op + def get_pokemon_data(pokemon_name): + # highlight-next-line + # This is a step within your application, like the retrieval step within a RAG app + url = f"https://pokeapi.co/api/v2/pokemon/{pokemon_name}" + response = requests.get(url) + if response.status_code == 200: + data = response.json() + name = data["name"] + types = [t["type"]["name"] for t in data["types"]] + species_url = data["species"]["url"] + species_response = requests.get(species_url) + evolved_from = "Unknown" + if species_response.status_code == 200: + species_data = species_response.json() + if species_data["evolves_from_species"]: + evolved_from = species_data["evolves_from_species"]["name"] + return {"name": name, "types": types, "evolved_from": evolved_from} + else: + return None + # highlight-next-line - data = get_pokemon_data(name) - if not data: return "Error: Unable to fetch data" - response = client.chat.completions.create( - model="gpt-3.5-turbo", - messages=[ - {"role": "system","content": prompt}, - {"role": "user", "content": str(data)} - ], - temperature=0.7, - max_tokens=100, - top_p=1 - ) - return response.choices[0].message.content + @weave.op + def pokedex(name: str, prompt: str) -> str: + # highlight-next-line + # This is your root op that calls out to other ops + # highlight-next-line + data = get_pokemon_data(name) + if not data: return "Error: Unable to fetch data" + response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system","content": prompt}, + {"role": "user", "content": str(data)} + ], + temperature=0.7, + max_tokens=100, + top_p=1 + ) + return response.choices[0].message.content -# highlight-next-line -weave.init('pokedex-openai') -# Get data for a specific Pokémon -pokemon_data = pokedex(random.choice(POKEMON), PROMPT) -``` + # highlight-next-line + weave.init('pokedex-openai') + # Get data for a specific Pokémon + pokemon_data = pokedex(random.choice(POKEMON), PROMPT) + ``` Navigate to Weave and you can click `get_pokemon_data` in the UI to see the inputs & outputs of that step. + + +Wrapping a function with `weave.op` starts capturing inputs, outputs and app logic so you can debug how data flows through your app. You can deeply nest ops and build a tree of functions that you want to track. This also starts automatically versioning code as you experiment to capture ad-hoc details that haven't been committed to git. + + Simply create a function wrapped with [`weave.op`](/guides/tracking/ops) that calls into [openai typescript library](https://platform.openai.com/docs/libraries/node-js-library). + + In the example below, we have 2 functions wrapped with op. This helps us see how intermediate steps, like the retrieval step within a RAG app, are affecting how our app behaves. + + ```typescript + import OpenAI from 'openai'; + // highlight-next-line + import * as weave from 'weave'; + + const PROMPT = `Emulate the Pokedex from early Pokémon episodes. State the name of the Pokemon and then describe it. + Your tone is informative yet sassy, blending factual details with a touch of dry humor. Be concise, no more than 3 sentences.`; + const POKEMON = [ + 'pikachu', + 'charmander', + 'squirtle', + 'bulbasaur', + 'jigglypuff', + 'meowth', + 'eevee', + ]; + + const openai = weave.wrapOpenAI(new OpenAI()); + + interface PokemonData { + name: string; + types: string[]; + evolved_from: string; + } -[![openai-pokedex.png](imgs/openai-pokedex.png)](https://wandb.ai/_scott/pokedex-openai/weave) - -## Create a `Model` for easier experimentation - -Organizing experimentation is difficult when there are many moving pieces. By using the [`Model`](/guides/core-types/models) class, you can capture and organize the experimental details of your app like your system prompt or the model you're using. This helps organize and compare different iterations of your app. - -In addition to versioning code and capturing inputs/outputs, [`Model`](/guides/core-types/models)s capture structured parameters that control your application’s behavior, making it easy to find what parameters worked best. You can also use Weave Models with `serve`, and [`Evaluation`](/guides/core-types/evaluations)s. - -In the example below, you can experiment with `model` and `system_message`. Every time you change one of these, you'll get a new _version_ of `GrammarCorrectorModel`. - -```python -import weave -from openai import OpenAI + // highlight-next-line + const getPokemonData = weave.op(async function getPokemonData( + pokemonName: string + ): Promise { + try { + const url = `https://pokeapi.co/api/v2/pokemon/${pokemonName}`; + const response = await fetch(url); + + if (response.ok) { + const data = await response.json(); + const name = data.name; + const types = data.types.map((t: any) => t.type.name); + + const speciesResponse = await fetch(data.species.url); + let evolved_from = 'Unknown'; + + if (speciesResponse.ok) { + const speciesData = await speciesResponse.json(); + if (speciesData.evolves_from_species) { + evolved_from = speciesData.evolves_from_species.name; + } + } -weave.init('grammar-openai') + return {name, types, evolved_from}; + } + return null; + } catch (error) { + return null; + } + }); + + // highlight-next-line + const pokedex = weave.op(async function pokedex( + name: string, + prompt: string + ): Promise { + const data = await getPokemonData(name); + if (!data) return 'Error: Unable to fetch data'; + + const response = await openai.chat.completions.create({ + model: 'gpt-3.5-turbo', + messages: [ + {role: 'system', content: prompt}, + {role: 'user', content: JSON.stringify(data)}, + ], + temperature: 0.7, + max_tokens: 100, + top_p: 1, + }); + + return response.choices[0].message.content || ''; + }); + + async function main() { + await weave.init('pokedex-openai'); + const randomPokemon = POKEMON[Math.floor(Math.random() * POKEMON.length)]; + const pokemonData = await pokedex(randomPokemon, PROMPT); + console.log(pokemonData); + } -class GrammarCorrectorModel(weave.Model): # Change to `weave.Model` - model: str - system_message: str + main(); + ``` - @weave.op() - def predict(self, user_input): # Change to `predict` - client = OpenAI() - response = client.chat.completions.create( - model=self.model, - messages=[ - { - "role": "system", - "content": self.system_message - }, - { - "role": "user", - "content": user_input - } - ], - temperature=0, - ) - return response.choices[0].message.content + + +[![openai-pokedex.png](imgs/openai-pokedex.png)](https://wandb.ai/_scott/pokedex-openai/weave) -corrector = GrammarCorrectorModel( - model="gpt-3.5-turbo-1106", - system_message = "You are a grammar checker, correct the following user input.") -result = corrector.predict("That was so easy, it was a piece of pie!") -print(result) -``` +## Create a `Model` for easier experimentation -[![openai-model.png](imgs/openai-model.png)](https://wandb.ai/_scott/grammar-openai/weave/calls) + + + Organizing experimentation is difficult when there are many moving pieces. By using the [`Model`](/guides/core-types/models) class, you can capture and organize the experimental details of your app like your system prompt or the model you're using. This helps organize and compare different iterations of your app. + + In addition to versioning code and capturing inputs/outputs, [`Model`](/guides/core-types/models)s capture structured parameters that control your application’s behavior, making it easy to find what parameters worked best. You can also use Weave Models with `serve`, and [`Evaluation`](/guides/core-types/evaluations)s. + + In the example below, you can experiment with `model` and `system_message`. Every time you change one of these, you'll get a new _version_ of `GrammarCorrectorModel`. + + ```python + import weave + from openai import OpenAI + + weave.init('grammar-openai') + + class GrammarCorrectorModel(weave.Model): # Change to `weave.Model` + model: str + system_message: str + + @weave.op() + def predict(self, user_input): # Change to `predict` + client = OpenAI() + response = client.chat.completions.create( + model=self.model, + messages=[ + { + "role": "system", + "content": self.system_message + }, + { + "role": "user", + "content": user_input + } + ], + temperature=0, + ) + return response.choices[0].message.content + + + corrector = GrammarCorrectorModel( + model="gpt-3.5-turbo-1106", + system_message = "You are a grammar checker, correct the following user input.") + result = corrector.predict("That was so easy, it was a piece of pie!") + print(result) + ``` + + [![openai-model.png](imgs/openai-model.png)](https://wandb.ai/_scott/grammar-openai/weave/calls) + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + ## Usage Info @@ -162,4 +314,4 @@ If you explicitly set `stream=True` and `stream_options={"include_usage": True}` ## Support for deprecated function calling -OpenAI deprecated the `functions` argument in favor of `tool_calls`. Since frameworks like Langchain, LlamaIndex, etc., still support this argument our OpenAI weave integration will trace if you pass list of function schemas to `functions` argument. +OpenAI deprecated the `functions` argument in favor of `tool_calls`. Since frameworks like Langchain, LlamaIndex, etc., still support this argument our OpenAI weave integration will trace if you pass list of function schemas to `functions` argument. diff --git a/docs/docs/guides/tracking/costs.md b/docs/docs/guides/tracking/costs.md index bedca15aa173..cf130eabea2a 100644 --- a/docs/docs/guides/tracking/costs.md +++ b/docs/docs/guides/tracking/costs.md @@ -1,99 +1,146 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Costs ## Adding a custom cost -You can add a custom cost by using the [`add_cost`](/reference/python-sdk/weave/trace/weave.trace.weave_client#method-add_cost) method. -The three required fields are `llm_id`, `prompt_token_cost`, and `completion_token_cost`. -`llm_id` is the name of the LLM (e.g. `gpt-4o`). `prompt_token_cost` and `completion_token_cost` are cost per token for the LLM (if the LLM prices were specified inper million tokens, make sure to convert the value). -You can also set `effective_date` to a datetime, to make the cost effective at a specific date, this defaults to the current date. + + + You can add a custom cost by using the [`add_cost`](/reference/python-sdk/weave/trace/weave.trace.weave_client#method-add_cost) method. + The three required fields are `llm_id`, `prompt_token_cost`, and `completion_token_cost`. + `llm_id` is the name of the LLM (e.g. `gpt-4o`). `prompt_token_cost` and `completion_token_cost` are cost per token for the LLM (if the LLM prices were specified inper million tokens, make sure to convert the value). + You can also set `effective_date` to a datetime, to make the cost effective at a specific date, this defaults to the current date. + + ```python + import weave + from datetime import datetime + + client = weave.init("my_custom_cost_model") -```python -import weave -from datetime import datetime + client.add_cost( + llm_id="your_model_name", + prompt_token_cost=0.01, + completion_token_cost=0.02 + ) + + client.add_costs( + llm_id="your_model_name", + prompt_token_cost=10, + completion_token_cost=20, + # If for example I want to raise the price of the model after a certain date + effective_date=datetime(2025, 4, 22), + ) + ``` -client = weave.init("my_custom_cost_model") + + -client.add_cost( - llm_id="your_model_name", - prompt_token_cost=0.01, - completion_token_cost=0.02 -) + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` -client.add_costs({ - llm_id="your_model_name", - prompt_token_cost=10, - completion_token_cost=20, - # If for example I want to raise the price of the model after a certain date - effective_date=datetime(2025, 4, 22), -) -``` + + ## Querying for costs -You can query for costs by using the [`query_costs`](/reference/python-sdk/weave/trace/weave.trace.weave_client#method-query_costs) method. -There are a few ways to query for costs, you can pass in a singular cost id, or a list of LLM model names. + + + You can query for costs by using the [`query_costs`](/reference/python-sdk/weave/trace/weave.trace.weave_client#method-query_costs) method. + There are a few ways to query for costs, you can pass in a singular cost id, or a list of LLM model names. -```python -import weave + ```python + import weave -client = weave.init("my_custom_cost_model") + client = weave.init("my_custom_cost_model") -costs = client.query_costs(llm_ids=["your_model_name"]) + costs = client.query_costs(llm_ids=["your_model_name"]) -cost = client.query_costs(costs[0].id) -``` + cost = client.query_costs(costs[0].id) + ``` -## Purging a custom cost + + -You can purge a custom cost by using the [`purge_costs`](/reference/python-sdk/weave/trace/weave.trace.weave_client#method-purge_costs) method. You pass in a list of cost ids, and the costs with those ids are purged. + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` -```python -import weave + + -client = weave.init("my_custom_cost_model") +## Purging a custom cost -costs = client.query_costs(llm_ids=["your_model_name"]) -client.purge_costs([cost.id for cost in costs]) -``` + + + You can purge a custom cost by using the [`purge_costs`](/reference/python-sdk/weave/trace/weave.trace.weave_client#method-purge_costs) method. You pass in a list of cost ids, and the costs with those ids are purged. -## Calculating costs for a Project + ```python + import weave -You can calculate costs for a project by using our `calls_query` and adding `include_costs=True` with a little bit of setup. + client = weave.init("my_custom_cost_model") -```python -import weave + costs = client.query_costs(llm_ids=["your_model_name"]) + client.purge_costs([cost.id for cost in costs]) + ``` -weave.init("project_costs") -@weave.op() -def get_costs_for_project(project_name: str): - total_cost = 0 - requests = 0 + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + - client = weave.init(project_name) - # Fetch all the calls in the project - calls = list( - client.get_calls(filter={"trace_roots_only": True}, include_costs=True) - ) +## Calculating costs for a Project - for call in calls: - # If the call has costs, we add them to the total cost - if call.summary["weave"] is not None and call.summary["weave"].get("costs", None) is not None: - for k, cost in call.summary["weave"]["costs"].items(): - requests += cost["requests"] - total_cost += cost["prompt_tokens_total_cost"] - total_cost += cost["completion_tokens_total_cost"] - - # We return the total cost, requests, and calls - return { - "total_cost": total_cost, - "requests": requests, - "calls": len(calls), - } - -# Since we decorated our function with @weave.op(), -# our totals are stored in weave for historic cost total calculations -get_costs_for_project("my_custom_cost_model") -``` + + + You can calculate costs for a project by using our `calls_query` and adding `include_costs=True` with a little bit of setup. + + ```python + import weave + + weave.init("project_costs") + @weave.op() + def get_costs_for_project(project_name: str): + total_cost = 0 + requests = 0 + + client = weave.init(project_name) + # Fetch all the calls in the project + calls = list( + client.get_calls(filter={"trace_roots_only": True}, include_costs=True) + ) + + for call in calls: + # If the call has costs, we add them to the total cost + if call.summary["weave"] is not None and call.summary["weave"].get("costs", None) is not None: + for k, cost in call.summary["weave"]["costs"].items(): + requests += cost["requests"] + total_cost += cost["prompt_tokens_total_cost"] + total_cost += cost["completion_tokens_total_cost"] + + # We return the total cost, requests, and calls + return { + "total_cost": total_cost, + "requests": requests, + "calls": len(calls), + } + + # Since we decorated our function with @weave.op(), + # our totals are stored in weave for historic cost total calculations + get_costs_for_project("my_custom_cost_model") + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + ## Setting up a custom model with custom costs diff --git a/docs/docs/guides/tracking/feedback.md b/docs/docs/guides/tracking/feedback.md index cd4ccd98d35c..67875db77a55 100644 --- a/docs/docs/guides/tracking/feedback.md +++ b/docs/docs/guides/tracking/feedback.md @@ -1,3 +1,6 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Feedback Evaluating LLM applications automatically is challenging. Teams often rely on direct user feedback, particularly from domain experts, who assess the content quality using simple indicators such as thumbs up or down. Developers also actively identify and resolve content issues. @@ -24,98 +27,156 @@ Access copy-and-paste examples on the "Use" tab of the call details page to mani ## SDK -Use the Weave Python SDK to programmatically add, remove, and query feedback on calls. +Use the Weave SDK to programmatically add, remove, and query feedback on calls. ### Querying a project's feedback -```python -import weave -client = weave.init('intro-example') - -# Get all feedback in a project -all_feedback = client.get_feedback() - -# Fetch a specific feedback object by id. -# Note that the API still returns a collection, which is expected -# to contain zero or one item(s). -one_feedback = client.get_feedback("")[0] - -# Find all feedback objects with a specific reaction. You can specify offset and limit. -thumbs_up = client.get_feedback(reaction="👍", limit=10) - -# After retrieval you can view the details of individual feedback objects. -for f in client.get_feedback(): - print(f.id) - print(f.created_at) - print(f.feedback_type) - print(f.payload) -``` + + + ```python + import weave + client = weave.init('intro-example') + + # Get all feedback in a project + all_feedback = client.get_feedback() + + # Fetch a specific feedback object by id. + # Note that the API still returns a collection, which is expected + # to contain zero or one item(s). + one_feedback = client.get_feedback("")[0] + + # Find all feedback objects with a specific reaction. You can specify offset and limit. + thumbs_up = client.get_feedback(reaction="👍", limit=10) + + # After retrieval you can view the details of individual feedback objects. + for f in client.get_feedback(): + print(f.id) + print(f.created_at) + print(f.feedback_type) + print(f.payload) + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + ### Adding feedback to a call -```python -import weave -client = weave.init('intro-example') + + + ```python + import weave + client = weave.init('intro-example') -call = client.get_call("") + call = client.get_call("") -# Adding an emoji reaction -call.feedback.add_reaction("👍") + # Adding an emoji reaction + call.feedback.add_reaction("👍") -# Adding a note -call.feedback.add_note("this is a note") + # Adding a note + call.feedback.add_note("this is a note") -# Adding custom key/value pairs. -# The first argument is a user-defined "type" string. -# Feedback must be JSON serializable and less than 1kb when serialized. -call.feedback.add("correctness", { "value": 5 }) -``` + # Adding custom key/value pairs. + # The first argument is a user-defined "type" string. + # Feedback must be JSON serializable and less than 1kb when serialized. + call.feedback.add("correctness", { "value": 5 }) + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + ### Retrieving the Call UUID For scenarios where you need to add feedback immediately after a call, you can retrieve the call UUID programmatically during or after the call execution. Here is how to get the UUID of the call from within the operation: -```python - -import weave -weave.init("uuid") - -@weave.op() -def simple_operation(input_value): - # Perform some simple operation - output = f"Processed {input_value}" - # Get the current call ID - current_call = weave.require_current_call() - call_id = current_call.id - return output, call_id -``` + + + ```python + + import weave + weave.init("uuid") + + @weave.op() + def simple_operation(input_value): + # Perform some simple operation + output = f"Processed {input_value}" + # Get the current call ID + current_call = weave.require_current_call() + call_id = current_call.id + return output, call_id + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + Additionally, you can use call() method to execute the operation and retrieve the call ID after execution of the function: -```python -import weave -weave.init("uuid") - -@weave.op() -def simple_operation(input_value): - return f"Processed {input_value}" - -# Execute the operation and retrieve the result and call ID -result, call = simple_operation.call("example input") -call_id = call.id -``` + + + ```python + import weave + weave.init("uuid") + + @weave.op() + def simple_operation(input_value): + return f"Processed {input_value}" + + # Execute the operation and retrieve the result and call ID + result, call = simple_operation.call("example input") + call_id = call.id + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + ### Querying feedback on a call -```python -for f in call.feedback: - print(f.id) - print(f.feedback_type) - print(f.payload) -``` + + + ```python + for f in call.feedback: + print(f.id) + print(f.feedback_type) + print(f.payload) + ``` + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + ### Deleting feedback from a call -```python -call.feedback.purge("") -``` + + + ```python + call.feedback.purge("") + ``` + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + diff --git a/docs/docs/guides/tracking/objects.md b/docs/docs/guides/tracking/objects.md index 9241fc2484d7..9c6c8012e7a9 100644 --- a/docs/docs/guides/tracking/objects.md +++ b/docs/docs/guides/tracking/objects.md @@ -1,29 +1,62 @@ -# Objects +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; -Weave's serialization layer saves and versions Python objects. +# Objects ## Publishing an object -```python -import weave -# Initialize tracking to the project 'intro-example' -weave.init('intro-example') -# Save a list, giving it the name 'cat-names' -weave.publish(['felix', 'jimbo', 'billie'], 'cat-names') -``` +Weave's serialization layer saves and versions objects. + + + + + ```python + import weave + # Initialize tracking to the project 'intro-example' + weave.init('intro-example') + # Save a list, giving it the name 'cat-names' + weave.publish(['felix', 'jimbo', 'billie'], 'cat-names') + ``` + + + + Publishing in TypeScript is still early, so not all objects are fully supported yet. + + ```typescript + import * as weave from 'weave' + + // Initialize tracking to the project 'intro-example' + const client = await weave.init('intro-example') + + // Save an array, giving it the name 'cat-names' + client.publish(['felix', 'jimbo', 'billie'], 'cat-names') + ``` + + + Saving an object with a name will create the first version of that object if it doesn't exist. ## Getting an object back -`weave.publish` returns a Ref. You can call `.get()` on any Ref to get the object back. + + + `weave.publish` returns a Ref. You can call `.get()` on any Ref to get the object back. -You can construct a ref and then fetch the object back. + You can construct a ref and then fetch the object back. -```python -weave.init('intro-example') -cat_names = weave.ref('cat-names').get() -``` + ```python + weave.init('intro-example') + cat_names = weave.ref('cat-names').get() + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + ## Ref styles diff --git a/docs/docs/guides/tracking/ops.md b/docs/docs/guides/tracking/ops.md index 48ac1e5ff2a6..52b58a0033cc 100644 --- a/docs/docs/guides/tracking/ops.md +++ b/docs/docs/guides/tracking/ops.md @@ -1,69 +1,117 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Ops A Weave op is a versioned function that automatically logs all calls. -To create an op, decorate a python function with `weave.op()` + + + To create an op, decorate a python function with `weave.op()` + + ```python showLineNumbers + import weave + + @weave.op() + def track_me(v): + return v + 5 + + weave.init('intro-example') + track_me(15) + ``` + + Calling an op will create a new op version if the code has changed from the last call, and log the inputs and outputs of the function. -```python -import weave + :::note + Functions decorated with `@weave.op()` will behave normally (without code versioning and tracking), if you don't call `weave.init('your-project-name')` before calling them. + ::: -@weave.op() -def track_me(v): - return v + 5 + Ops can be [served](/guides/tools/serve) or [deployed](/guides/tools/deploy) using the Weave toolbelt. -weave.init('intro-example') -track_me(15) -``` + + + To create an op, wrap a typescript function with `weave.op` -Calling an op will create a new op version if the code has changed from the last call, and log the inputs and outputs of the function. + ```typescript showLineNumbers + import * as weave from 'weave' -:::note -Functions decorated with `@weave.op()` will behave normally (without code versioning and tracking), if you don't call `weave.init('your-project-name')` before calling them. -::: + function trackMe(v: number) { + return v + 5 + } -Ops can be [served](/guides/tools/serve) or [deployed](/guides/tools/deploy) using the Weave toolbelt. + const trackMeOp = weave.op(trackMe) + trackMeOp(15) + + + // You can also do this inline, which may be more convenient + const trackMeInline = weave.op((v: number) => v + 5) + trackMeInline(15) + ``` + + + ## Customize display names -You can customize the op's display name by setting the `name` parameter in the `@weave.op` decorator: + + + You can customize the op's display name by setting the `name` parameter in the `@weave.op` decorator: -```python -@weave.op(name="custom_name") -def func(): - ... -``` + ```python + @weave.op(name="custom_name") + def func(): + ... + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + ## Customize logged inputs and outputs -If you want to change the data that is logged to weave without modifying the original function (e.g. to hide sensitive data), you can pass `postprocess_inputs` and `postprocess_output` to the op decorator. + + + If you want to change the data that is logged to weave without modifying the original function (e.g. to hide sensitive data), you can pass `postprocess_inputs` and `postprocess_output` to the op decorator. + + `postprocess_inputs` takes in a dict where the keys are the argument names and the values are the argument values, and returns a dict with the transformed inputs. -`postprocess_inputs` takes in a dict where the keys are the argument names and the values are the argument values, and returns a dict with the transformed inputs. + `postprocess_output` takes in any value which would normally be returned by the function and returns the transformed output. -`postprocess_output` takes in any value which would normally be returned by the function and returns the transformed output. + ```py + from dataclasses import dataclass + from typing import Any + import weave -```py -from dataclasses import dataclass -from typing import Any -import weave + @dataclass + class CustomObject: + x: int + secret_password: str -@dataclass -class CustomObject: - x: int - secret_password: str + def postprocess_inputs(inputs: dict[str, Any]) -> dict[str, Any]: + return {k:v for k,v in inputs.items() if k != "hide_me"} -def postprocess_inputs(inputs: dict[str, Any]) -> dict[str, Any]: - return {k:v for k,v in inputs.items() if k != "hide_me"} + def postprocess_output(output: CustomObject) -> CustomObject: + return CustomObject(x=output.x, secret_password="REDACTED") -def postprocess_output(output: CustomObject) -> CustomObject: - return CustomObject(x=output.x, secret_password="REDACTED") + @weave.op( + postprocess_inputs=postprocess_inputs, + postprocess_output=postprocess_output, + ) + def func(a: int, hide_me: str) -> CustomObject: + return CustomObject(x=a, secret_password=hide_me) -@weave.op( - postprocess_inputs=postprocess_inputs, - postprocess_output=postprocess_output, -) -def func(a: int, hide_me: str) -> CustomObject: - return CustomObject(x=a, secret_password=hide_me) + weave.init('hide-data-example') # 🐝 + func(a=1, hide_me="password123") + ``` -weave.init('hide-data-example') # 🐝 -func(a=1, hide_me="password123") -``` + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + diff --git a/docs/docs/guides/tracking/tracing.mdx b/docs/docs/guides/tracking/tracing.mdx index 2a2b81e45761..57d7ef426af8 100644 --- a/docs/docs/guides/tracking/tracing.mdx +++ b/docs/docs/guides/tracking/tracing.mdx @@ -34,237 +34,358 @@ There are three main ways to create Calls in Weave: ### 1. Automatic Tracking of LLM Libraries -Weave automatically tracks [calls to common LLM libraries](../integrations/index.md) like `openai`, `anthropic`, `cohere`, and `mistral`. Simply call [`weave.init('project_name')`](../../reference/python-sdk/weave/index.md#function-init) at the start of your program: -```python showLineNumbers -import weave + + + Weave automatically tracks [calls to common LLM libraries](../integrations/index.md) like `openai`, `anthropic`, `cohere`, and `mistral`. Simply call [`weave.init('project_name')`](../../reference/python-sdk/weave/index.md#function-init) at the start of your program: -from openai import OpenAI -client = OpenAI() + ```python showLineNumbers + import weave -# Initialize Weave Tracing -weave.init('intro-example') + from openai import OpenAI + client = OpenAI() -response = client.chat.completions.create( - model="gpt-4", - messages=[ - { - "role": "user", - "content": "How are you?" - } - ], - temperature=0.8, - max_tokens=64, - top_p=1 -) -``` + # Initialize Weave Tracing + weave.init('intro-example') -### 2. Decorating Functions + response = client.chat.completions.create( + model="gpt-4", + messages=[ + { + "role": "user", + "content": "How are you?" + } + ], + temperature=0.8, + max_tokens=64, + top_p=1, + ) + ``` -However, often LLM applications have additional logic (such as pre/post processing, prompts, etc.) that you want to track. -Weave allows you to manually track these calls using the [`@weave.op`](../../reference/python-sdk/weave/index.md#function-op) decorator. For example: + + + Weave automatically tracks [calls to common LLM libraries](../integrations/index.md) like `openai`. Simply call [`await weave.init('project_name')`](../../reference/typescript-sdk/weave/functions/init.md) and wrap your OpenAI client with [`weave.wrapOpenAI`](../../reference/typescript-sdk/weave/functions/wrapOpenAI.md) at the start of your program: -```python showLineNumbers -import weave + ```typescript showLineNumbers + import OpenAI from 'openai' + import * as weave from 'weave' -# Initialize Weave Tracing -weave.init('intro-example') + const client = weave.wrapOpenAI(new OpenAI()) -# Decorate your function -@weave.op -def my_function(name: str): - return f"Hello, {name}!" + // Initialize Weave Tracing + await weave.init('intro-example') -# Call your function -- Weave will automatically track inputs and outputs -print(my_function("World")) -``` + const response = await client.chat.completions.create({ + model: 'gpt-4', + messages: [ + { + role: 'user', + content: 'How are you?', + }, + ], + temperature: 0.8, + max_tokens: 64, + top_p: 1, + }); + ``` + + + -This works for both functions as well as methods on classes: -```python showLineNumbers -import weave +### 2. Decorating/Wrapping Functions -# Initialize Weave Tracing -weave.init("intro-example") +However, often LLM applications have additional logic (such as pre/post processing, prompts, etc.) that you want to track. + + + + Weave allows you to manually track these calls using the [`@weave.op`](../../reference/python-sdk/weave/index.md#function-op) decorator. For example: -class MyClass: - # Decorate your method + ```python showLineNumbers + import weave + + # Initialize Weave Tracing + weave.init('intro-example') + + # Decorate your function @weave.op - def my_method(self, name: str): + def my_function(name: str): return f"Hello, {name}!" -instance = MyClass() + # Call your function -- Weave will automatically track inputs and outputs + print(my_function("World")) + ``` -# Call your method -- Weave will automatically track inputs and outputs -print(instance.my_method("World")) -``` + This works for both functions as well as methods on classes: -Sometimes it is useful to get a handle to the `Call` object itself. You can do this by calling the `op.call` method, which returns both the result and the `Call` object. For example: -```python showLineNumbers -result, call = my_function.call("World") -``` + ```python showLineNumbers + import weave -Then, `call` can be used to set / update / fetch additional properties (most commonly used to get the ID of the call to be used for feedback). + # Initialize Weave Tracing + weave.init("intro-example") -:::note -If your op is a method on a class, you need to pass the instance as the first argument to the op (see example below). -::: + class MyClass: + # Decorate your method + @weave.op + def my_method(self, name: str): + return f"Hello, {name}!" -```python showLineNumbers -# Notice that we pass the `instance` as the first argument. -# highlight-next-line -print(instance.my_method.call(instance, "World")) -``` + instance = MyClass() + # Call your method -- Weave will automatically track inputs and outputs + print(instance.my_method("World")) + ``` + + + Weave allows you to manually track these calls by wrapping your function with [`weave.op`](../../reference/typescript-sdk/weave/functions/op.md). For example: -```python showLineNumbers -import weave + ```typescript showLineNumbers + import * as weave from 'weave' -# Initialize Weave Tracing -weave.init("intro-example") + await weave.init('intro-example') -class MyClass: - # Decorate your method - @weave.op - def my_method(self, name: str): - return f"Hello, {name}!" + function myFunction(name: string) { + return `Hello, ${name}!` + } + + const myFunctionOp = weave.op(myFunction) + ``` + + You can also define the wrapping inline: + + ```typescript + const myFunctionOp = weave.op((name: string) => `Hello, ${name}!`) + ``` + + This works for both functions as well as methods on classes: + + ```typescript + class MyClass { + constructor() { + this.myMethod = weave.op(this.myMethod) + } + + myMethod(name: string) { + return `Hello, ${name}!` + } + } + ``` + + + + +#### Getting a handle to the Call object during execution + + + + Sometimes it is useful to get a handle to the `Call` object itself. You can do this by calling the `op.call` method, which returns both the result and the `Call` object. For example: + + ```python showLineNumbers + result, call = my_function.call("World") + ``` + + Then, `call` can be used to set / update / fetch additional properties (most commonly used to get the ID of the call to be used for feedback). + + :::note + If your op is a method on a class, you need to pass the instance as the first argument to the op (see example below). + ::: + + ```python showLineNumbers + # Notice that we pass the `instance` as the first argument. + # highlight-next-line + print(instance.my_method.call(instance, "World")) + ``` + + + ```python showLineNumbers + import weave + + # Initialize Weave Tracing + weave.init("intro-example") + + class MyClass: + # Decorate your method + @weave.op + def my_method(self, name: str): + return f"Hello, {name}!" + + instance = MyClass() + + # Call your method -- Weave will automatically track inputs and outputs + instance.my_method.call(instance, "World") + ``` + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + -instance = MyClass() -# Call your method -- Weave will automatically track inputs and outputs -instance.my_method.call(instance, "World") -``` #### Call Display Name -Sometimes you may want to override the display name of a call. You can achieve this in one of four ways: + + + Sometimes you may want to override the display name of a call. You can achieve this in one of four ways: -1. Change the display name at the time of calling the op: + 1. Change the display name at the time of calling the op: -```python showLineNumbers -result = my_function("World", __weave={"display_name": "My Custom Display Name"}) -``` + ```python showLineNumbers + result = my_function("World", __weave={"display_name": "My Custom Display Name"}) + ``` -:::note + :::note -Using the `__weave` dictionary sets the call display name which will take precedence over the Op display name. + Using the `__weave` dictionary sets the call display name which will take precedence over the Op display name. -::: + ::: -2. Change the display name on a per-call basis. This uses the [`Op.call`](../../reference/python-sdk/weave/trace/weave.trace.op.md#function-call) method to return a `Call` object, which you can then use to set the display name using [`Call.set_display_name`](../../reference/python-sdk/weave/trace/weave.trace.weave_client.md#method-set_display_name). -```python showLineNumbers -result, call = my_function.call("World") -call.set_display_name("My Custom Display Name") -``` + 2. Change the display name on a per-call basis. This uses the [`Op.call`](../../reference/python-sdk/weave/trace/weave.trace.op.md#function-call) method to return a `Call` object, which you can then use to set the display name using [`Call.set_display_name`](../../reference/python-sdk/weave/trace/weave.trace.weave_client.md#method-set_display_name). + ```python showLineNumbers + result, call = my_function.call("World") + call.set_display_name("My Custom Display Name") + ``` -3. Change the display name for all Calls of a given Op: + 3. Change the display name for all Calls of a given Op: -```python showLineNumbers -@weave.op(call_display_name="My Custom Display Name") -def my_function(name: str): - return f"Hello, {name}!" -``` + ```python showLineNumbers + @weave.op(call_display_name="My Custom Display Name") + def my_function(name: str): + return f"Hello, {name}!" + ``` -4. The `call_display_name` can also be a function that takes in a `Call` object and returns a string. The `Call` object will be passed automatically when the function is called, so you can use it to dynamically generate names based on the function's name, call inputs, attributes, etc. + 4. The `call_display_name` can also be a function that takes in a `Call` object and returns a string. The `Call` object will be passed automatically when the function is called, so you can use it to dynamically generate names based on the function's name, call inputs, attributes, etc. - 1. One common use case is just appending a timestamp to the function's name. + 1. One common use case is just appending a timestamp to the function's name. - ```py - from datetime import datetime + ```py + from datetime import datetime - @weave.op(call_display_name=lambda call: f"{call.func_name}__{datetime.now()}") - def func(): - return ... - ``` + @weave.op(call_display_name=lambda call: f"{call.func_name}__{datetime.now()}") + def func(): + return ... + ``` - 2. You can also get creative with custom attributes + 2. You can also get creative with custom attributes - ```py - def custom_attribute_name(call): - model = call.attributes["model"] - revision = call.attributes["revision"] - now = call.attributes["date"] + ```py + def custom_attribute_name(call): + model = call.attributes["model"] + revision = call.attributes["revision"] + now = call.attributes["date"] - return f"{model}__{revision}__{now}" + return f"{model}__{revision}__{now}" - @weave.op(call_display_name=custom_attribute_name) - def func(): - return ... + @weave.op(call_display_name=custom_attribute_name) + def func(): + return ... - with weave.attributes( - { - "model": "finetuned-llama-3.1-8b", - "revision": "v0.1.2", - "date": "2024-08-01", - } - ): - func() # the display name will be "finetuned-llama-3.1-8b__v0.1.2__2024-08-01" + with weave.attributes( + { + "model": "finetuned-llama-3.1-8b", + "revision": "v0.1.2", + "date": "2024-08-01", + } + ): + func() # the display name will be "finetuned-llama-3.1-8b__v0.1.2__2024-08-01" - with weave.attributes( - { - "model": "finetuned-gpt-4o", - "revision": "v0.1.3", - "date": "2024-08-02", - } - ): - func() # the display name will be "finetuned-gpt-4o__v0.1.3__2024-08-02" - ``` + with weave.attributes( + { + "model": "finetuned-gpt-4o", + "revision": "v0.1.3", + "date": "2024-08-02", + } + ): + func() # the display name will be "finetuned-gpt-4o__v0.1.3__2024-08-02" + ``` -**Technical Note:** "Calls" are produced by "Ops". An Op is a function or method that is decorated with `@weave.op`. -By default, the Op's name is the function name, and the associated calls will have the same display name. The above example shows how to override the display name for all Calls of a given Op. Sometimes, users wish to override the name of the Op itself. This can be achieved in one of two ways: + **Technical Note:** "Calls" are produced by "Ops". An Op is a function or method that is decorated with `@weave.op`. + By default, the Op's name is the function name, and the associated calls will have the same display name. The above example shows how to override the display name for all Calls of a given Op. Sometimes, users wish to override the name of the Op itself. This can be achieved in one of two ways: + + 1. Set the `name` property of the Op before any calls are logged + ```python showLineNumbers + my_function.name = "My Custom Op Name" + ``` + + 2. Set the `name` option on the op decorator + ```python showLineNumbers + @weave.op(name="My Custom Op Name) + ``` + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + -1. Set the `name` property of the Op before any calls are logged -```python showLineNumbers -my_function.name = "My Custom Op Name" -``` -2. Set the `name` option on the op decorator -```python showLineNumbers -@weave.op(name="My Custom Op Name) -``` #### Attributes -When calling tracked functions, you can add additional metadata to the call by using the [`weave.attributes`](../../reference/python-sdk/weave/index.md#function-attributes) context manager. In the example below, we add an `env` attribute to the call specified as `'production'`. -```python showLineNumbers -# ... continued from above ... + + + When calling tracked functions, you can add additional metadata to the call by using the [`weave.attributes`](../../reference/python-sdk/weave/index.md#function-attributes) context manager. In the example below, we add an `env` attribute to the call specified as `'production'`. -# Add additional "attributes" to the call -with weave.attributes({'env': 'production'}): - print(my_function.call("World")) -``` + ```python showLineNumbers + # ... continued from above ... + + # Add additional "attributes" to the call + with weave.attributes({'env': 'production'}): + print(my_function.call("World")) + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + ### 3. Manual Call Tracking You can also manually create Calls using the API directly. - - - ```python showLineNumbers - import weave + + - # Initialize Weave Tracing - weave.init('intro-example') + ```python showLineNumbers + import weave - def my_function(name: str): - # Start a call - call = weave.create_call(op="my_function", inputs={"name": name}) + # Initialize Weave Tracing + weave.init('intro-example') - # ... your function code ... + def my_function(name: str): + # Start a call + call = weave.create_call(op="my_function", inputs={"name": name}) - # End a call - weave.finish_call(call, output="Hello, World!") + # ... your function code ... - # Call your function - print(my_function("World")) + # End a call + weave.finish_call(call, output="Hello, World!") + + # Call your function + print(my_function("World")) + ``` + + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! ``` + + * Start a call: [POST `/call/start`](../../reference/service-api/call-start-call-start-post.api.mdx) * End a call: [POST `/call/end`](../../reference/service-api/call-end-call-end-post.api.mdx) @@ -294,7 +415,7 @@ You can also manually create Calls using the API directly. ## Viewing Calls - + To view a call in the web app: 1. Navigate to your project's "Traces" tab @@ -321,6 +442,20 @@ You can also manually create Calls using the API directly. ``` + + ```typescript showLineNumbers + import * as weave from 'weave' + + // Initialize the client + const client = await weave.init('intro-example') + + // Get a specific call by its ID + const call = await client.getCall('call-uuid-here') + + console.log(call) + ``` + + To view a call using the Service API, you can make a request to the [`/call/read`](../../reference/service-api/call-read-call-read-post.api.mdx) endpoint. @@ -367,6 +502,11 @@ All of these mutations can be performed from the UI by navigating to the call de call.set_display_name("My Custom Display Name") ``` + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + To set the display name of a call using the Service API, you can make a request to the [`/call/update`](../../reference/service-api/call-update-call-update-post.api.mdx) endpoint. @@ -406,6 +546,11 @@ Please see the [Feedback Documentation](./feedback.md) for more details. call.delete() ``` + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` To delete a call using the Service API, you can make a request to the [`/calls/delete`](../../reference/service-api/calls-delete-calls-delete-post.api.mdx) endpoint. @@ -456,7 +601,7 @@ The easiest way to get started is to construct a view in the UI, then learn more client = weave.init("your-project-name") # Fetch calls - calls = client.calls(filter=...) + calls = client.get_calls(filter=...) ``` :::info[Notice: Evolving APIs] @@ -478,6 +623,18 @@ The easiest way to get started is to construct a view in the UI, then learn more ``` ::: + + To fetch calls using the TypeScript API, you can use the [`client.getCalls`](../../reference/typescript-sdk/weave/classes/WeaveClient#getcalls) method. + ```typescript + import * as weave from 'weave' + + // Initialize the client + const client = await weave.init('intro-example') + + // Fetch calls + const calls = await client.getCalls(filter=...) + ``` + The most powerful query layer is at the Service API. To fetch calls using the Service API, you can make a request to the [`/calls/stream_query`](../../reference/service-api/calls-query-stream-calls-stream-query-post.api.mdx) endpoint. diff --git a/docs/docs/quickstart.md b/docs/docs/quickstart.md index 24702739ab5a..59ac16ef2360 100644 --- a/docs/docs/quickstart.md +++ b/docs/docs/quickstart.md @@ -1,23 +1,34 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Track LLM inputs & outputs - Follow these steps to track your first call or Open In Colab ## 1. Install Weave and create an API Key **Install weave** -First install the weave python library: - -```python -pip install weave -``` +First install the weave library: + + + + ```bash + pip install weave + ``` + + + ```bash + pnpm install weave + ``` + + **Get your API key** -Then, create a Weights & Biases (W&B) account here https://wandb.ai/site and copy your API key from https://wandb.ai/authorize +Then, create a Weights & Biases (W&B) account at https://wandb.ai and copy your API key from https://wandb.ai/authorize ## 2. Log a trace to a new project @@ -25,68 +36,109 @@ To get started with tracking your first project with Weave: - Import the `weave` library - Call `weave.init('project-name')` to start tracking - - You will be prompted to log in with your API key if you are not yet logged in on your machine. - - To log to a specific W&B Team name, replace `project-name` with `team-name/project-name` - - **NOTE:** In automated environments, you can define the environment variable `WANDB_API_KEY` with your API key to login without prompting. + - You will be prompted to log in with your API key if you are not yet logged in on your machine. + - To log to a specific W&B Team name, replace `project-name` with `team-name/project-name` + - **NOTE:** In automated environments, you can define the environment variable `WANDB_API_KEY` with your API key to login without prompting. - Add the `@weave.op()` decorator to the python functions you want to track -*In this example, we're using openai so you will need to add an OpenAI [API key](https://platform.openai.com/docs/quickstart/step-2-setup-your-api-key).* - -```python -# highlight-next-line -import weave -from openai import OpenAI - -client = OpenAI(api_key="...") - -# Weave will track the inputs, outputs and code of this function -# highlight-next-line -@weave.op() -def extract_dinos(sentence: str) -> dict: - response = client.chat.completions.create( - model="gpt-4o", - messages=[ - { - "role": "system", - "content": """In JSON format extract a list of `dinosaurs`, with their `name`, -their `common_name`, and whether its `diet` is a herbivore or carnivore""" - }, - { - "role": "user", - "content": sentence - } - ], - response_format={ "type": "json_object" } - ) - return response.choices[0].message.content - - -# Initialise the weave project -# highlight-next-line -weave.init('jurassic-park') - -sentence = """I watched as a Tyrannosaurus rex (T. rex) chased after a Triceratops (Trike), \ -both carnivore and herbivore locked in an ancient dance. Meanwhile, a gentle giant \ -Brachiosaurus (Brachi) calmly munched on treetops, blissfully unaware of the chaos below.""" - -result = extract_dinos(sentence) -print(result) -``` - -When you call the `extract_dinos` function Weave will output a link to view your trace. +_In this example, we're using openai so you will need to add an OpenAI [API key](https://platform.openai.com/docs/quickstart/step-2-setup-your-api-key)._ + + + + ```python + # highlight-next-line + import weave + from openai import OpenAI + + client = OpenAI(api_key="...") + + # Weave will track the inputs, outputs and code of this function + # highlight-next-line + @weave.op() + def extract_dinos(sentence: str) -> dict: + response = client.chat.completions.create( + model="gpt-4o", + messages=[ + { + "role": "system", + "content": """In JSON format extract a list of `dinosaurs`, with their `name`, + their `common_name`, and whether its `diet` is a herbivore or carnivore""" + }, + { + "role": "user", + "content": sentence + } + ], + response_format={ "type": "json_object" } + ) + return response.choices[0].message.content + + + # Initialise the weave project + # highlight-next-line + weave.init('jurassic-park') + + sentence = """I watched as a Tyrannosaurus rex (T. rex) chased after a Triceratops (Trike), \ + both carnivore and herbivore locked in an ancient dance. Meanwhile, a gentle giant \ + Brachiosaurus (Brachi) calmly munched on treetops, blissfully unaware of the chaos below.""" + + result = extract_dinos(sentence) + print(result) + ``` + When you call the `extract_dinos` function Weave will output a link to view your trace. + + + + ```typescript + import OpenAI from 'openai'; + // highlight-next-line + import * as weave from 'weave'; + + // highlight-next-line + const openai = weave.wrapOpenAI(new OpenAI()); + + async function extractDinos(input: string) { + const response = await openai.chat.completions.create({ + model: 'gpt-4o', + messages: [ + { + role: 'user', + content: `In JSON format extract a list of 'dinosaurs', with their 'name', their 'common_name', and whether its 'diet' is a herbivore or carnivore: ${input}`, + }, + ], + }); + return response.choices[0].message.content; + } + // highlight-next-line + const extractDinosOp = weave.op(extractDinos); + + async function main() { + // highlight-next-line + await weave.init('examples'); + const result = await extractDinosOp( + 'I watched as a Tyrannosaurus rex (T. rex) chased after a Triceratops (Trike), both carnivore and herbivore locked in an ancient dance. Meanwhile, a gentle giant Brachiosaurus (Brachi) calmly munched on treetops, blissfully unaware of the chaos below.' + ); + console.log(result); + } + + main(); + + ``` + When you call the `extractDinos` function Weave will output a link to view your trace. + + + ## 3. Automated LLM library logging Calls made to OpenAI, Anthropic and [many more LLM libraries](guides/integrations/) are automatically tracked with Weave, with **LLM metadata**, **token usage** and **cost** being logged automatically. If your LLM library isn't currently one of our integrations you can track calls to other LLMs libraries or frameworks easily by wrapping them with `@weave.op()`. - ## 4. See traces of your application in your project 🎉 Congrats! Now, every time you call this function, weave will automatically capture the input & output data and log any changes made to the code. ![Weave Trace Outputs 1](../static/img/tutorial_trace_1.png) - ## What's next? - Follow the [Tracking flows and app metadata](/tutorial-tracing_2) to start tracking and the data flowing through your app. diff --git a/docs/docs/reference/gen_notebooks/leaderboard_quickstart.md b/docs/docs/reference/gen_notebooks/leaderboard_quickstart.md new file mode 100644 index 000000000000..1830209847f1 --- /dev/null +++ b/docs/docs/reference/gen_notebooks/leaderboard_quickstart.md @@ -0,0 +1,343 @@ +--- +title: Leaderboard Quickstart +--- + + +:::tip[This is a notebook] + +
Open In Colab
Open in Colab
+ +
View in Github
View in Github
+ +::: + + + + + +# Leaderboard Quickstart + +In this notebook we will learn to use Weave's Leaderboard to compare model performance across different datasets and scoring functions. Specifically, we will: + +1. Generate a dataset of fake zip code data +2. Author some scoring functions and evaluate a baseline model. +3. Use these techniques to evaluate a matrix of models vs evaluations. +4. Review the leaderboard in the Weave UI. + +## Step 1: Generate a dataset of fake zip code data + +First we will create a function `generate_dataset_rows` that generates a list of fake zip code data. + + +```python +import json + +from openai import OpenAI +from pydantic import BaseModel + + +class Row(BaseModel): + zip_code: str + city: str + state: str + avg_temp_f: float + population: int + median_income: int + known_for: str + + +class Rows(BaseModel): + rows: list[Row] + + +def generate_dataset_rows( + location: str = "United States", count: int = 5, year: int = 2022 +): + client = OpenAI() + + completion = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": f"Please generate {count} rows of data for random zip codes in {location} for the year {year}.", + }, + ], + response_format={ + "type": "json_schema", + "json_schema": { + "name": "response_format", + "schema": Rows.model_json_schema(), + }, + }, + ) + + return json.loads(completion.choices[0].message.content)["rows"] +``` + + +```python +import weave + +weave.init("leaderboard-demo") +``` + +## Step 2: Author scoring functions + +Next we will author 3 scoring functions: + +1. `check_concrete_fields`: Checks if the model output matches the expected city and state. +2. `check_value_fields`: Checks if the model output is within 10% of the expected population and median income. +3. `check_subjective_fields`: Uses a LLM to check if the model output matches the expected "known for" field. + + + +```python +@weave.op +def check_concrete_fields(city: str, state: str, output: dict): + return { + "city_match": city == output["city"], + "state_match": state == output["state"], + } + + +@weave.op +def check_value_fields( + avg_temp_f: float, population: int, median_income: int, output: dict +): + return { + "avg_temp_f_err": abs(avg_temp_f - output["avg_temp_f"]) / avg_temp_f, + "population_err": abs(population - output["population"]) / population, + "median_income_err": abs(median_income - output["median_income"]) + / median_income, + } + + +@weave.op +def check_subjective_fields(zip_code: str, known_for: str, output: dict): + client = OpenAI() + + class Response(BaseModel): + correct_known_for: bool + + completion = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": f"My student was asked what the zip code {zip_code} is best known best for. The right answer is '{known_for}', and they said '{output['known_for']}'. Is their answer correct?", + }, + ], + response_format={ + "type": "json_schema", + "json_schema": { + "name": "response_format", + "schema": Response.model_json_schema(), + }, + }, + ) + + return json.loads(completion.choices[0].message.content) +``` + +## Step 3: Create a simple Evaluation + +Next we define a simple evaliation using our fake data and scoring functions. + + + +```python +rows = generate_dataset_rows() +evaluation = weave.Evaluation( + name="United States - 2022", + dataset=rows, + scorers=[ + check_concrete_fields, + check_value_fields, + check_subjective_fields, + ], +) +``` + +## Step 4: Evaluate a baseline model + +Now we will evaluate a baseline model which returns a static response. + + + +```python +@weave.op +def baseline_model(zip_code: str): + return { + "city": "New York", + "state": "NY", + "avg_temp_f": 50.0, + "population": 1000000, + "median_income": 100000, + "known_for": "The Big Apple", + } + + +await evaluation.evaluate(baseline_model) +``` + +## Step 5: Create more Models + +Now we will create 2 more models to compare against the baseline. + + +```python +@weave.op +def gpt_4o_mini_no_context(zip_code: str): + client = OpenAI() + + completion = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": f"""Zip code {zip_code}"""}], + response_format={ + "type": "json_schema", + "json_schema": { + "name": "response_format", + "schema": Row.model_json_schema(), + }, + }, + ) + + return json.loads(completion.choices[0].message.content) + + +await evaluation.evaluate(gpt_4o_mini_no_context) +``` + + +```python +@weave.op +def gpt_4o_mini_with_context(zip_code: str): + client = OpenAI() + + completion = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + { + "role": "user", + "content": f"""Please answer the following questions about the zip code {zip_code}: + 1. What is the city? + 2. What is the state? + 3. What is the average temperature in Fahrenheit? + 4. What is the population? + 5. What is the median income? + 6. What is the most well known thing about this zip code? + """, + } + ], + response_format={ + "type": "json_schema", + "json_schema": { + "name": "response_format", + "schema": Row.model_json_schema(), + }, + }, + ) + + return json.loads(completion.choices[0].message.content) + + +await evaluation.evaluate(gpt_4o_mini_with_context) +``` + +## Step 6: Create more Evaluations + +Now we will evaluate a matrix of models vs evaluations. + + + +```python +scorers = [ + check_concrete_fields, + check_value_fields, + check_subjective_fields, +] +evaluations = [ + weave.Evaluation( + name="United States - 2022", + dataset=weave.Dataset( + name="United States - 2022", + rows=generate_dataset_rows("United States", 5, 2022), + ), + scorers=scorers, + ), + weave.Evaluation( + name="California - 2022", + dataset=weave.Dataset( + name="California - 2022", rows=generate_dataset_rows("California", 5, 2022) + ), + scorers=scorers, + ), + weave.Evaluation( + name="United States - 2000", + dataset=weave.Dataset( + name="United States - 2000", + rows=generate_dataset_rows("United States", 5, 2000), + ), + scorers=scorers, + ), +] +models = [ + baseline_model, + gpt_4o_mini_no_context, + gpt_4o_mini_with_context, +] + +for evaluation in evaluations: + for model in models: + await evaluation.evaluate( + model, __weave={"display_name": evaluation.name + ":" + model.__name__} + ) +``` + +## Step 7: Review the Leaderboard + +You can create a new leaderboard by navigating to the leaderboard tab in the UI and clicking "Create Leaderboard". + +We can also generate a leaderboard directly from Python: + + +```python +from weave.flow import leaderboard +from weave.trace.weave_client import get_ref + +spec = leaderboard.Leaderboard( + name="Zip Code World Knowledge", + description=""" +This leaderboard compares the performance of models in terms of world knowledge about zip codes. + +### Columns + +1. **State Match against `United States - 2022`**: The fraction of zip codes that the model correctly identified the state for. +2. **Avg Temp F Error against `California - 2022`**: The mean absolute error of the model's average temperature prediction. +3. **Correct Known For against `United States - 2000`**: The fraction of zip codes that the model correctly identified the most well known thing about the zip code. +""", + columns=[ + leaderboard.LeaderboardColumn( + evaluation_object_ref=get_ref(evaluations[0]).uri(), + scorer_name="check_concrete_fields", + summary_metric_path="state_match.true_fraction", + ), + leaderboard.LeaderboardColumn( + evaluation_object_ref=get_ref(evaluations[1]).uri(), + scorer_name="check_value_fields", + should_minimize=True, + summary_metric_path="avg_temp_f_err.mean", + ), + leaderboard.LeaderboardColumn( + evaluation_object_ref=get_ref(evaluations[2]).uri(), + scorer_name="check_subjective_fields", + summary_metric_path="correct_known_for.true_fraction", + ), + ], +) + +ref = weave.publish(spec) +``` diff --git a/docs/docs/reference/typescript-sdk/weave/README.md b/docs/docs/reference/typescript-sdk/weave/README.md new file mode 100644 index 000000000000..5cbed111fbde --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/README.md @@ -0,0 +1,32 @@ +**weave** • **Docs** + +*** + +# weave + +## Classes + +- [Dataset](classes/Dataset.md) +- [Evaluation](classes/Evaluation.md) +- [WeaveClient](classes/WeaveClient.md) +- [WeaveObject](classes/WeaveObject.md) + +## Interfaces + +- [CallSchema](interfaces/CallSchema.md) +- [CallsFilter](interfaces/CallsFilter.md) + +## Type Aliases + +- [Op](type-aliases/Op.md) + +## Functions + +- [init](functions/init.md) +- [login](functions/login.md) +- [op](functions/op.md) +- [requireCurrentCallStackEntry](functions/requireCurrentCallStackEntry.md) +- [requireCurrentChildSummary](functions/requireCurrentChildSummary.md) +- [weaveAudio](functions/weaveAudio.md) +- [weaveImage](functions/weaveImage.md) +- [wrapOpenAI](functions/wrapOpenAI.md) diff --git a/docs/docs/reference/typescript-sdk/weave/classes/Dataset.md b/docs/docs/reference/typescript-sdk/weave/classes/Dataset.md new file mode 100644 index 000000000000..cb0ac50e9727 --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/classes/Dataset.md @@ -0,0 +1,229 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / Dataset + +# Class: Dataset\ + +Dataset object with easy saving and automatic versioning + +## Example + +```ts +// Create a dataset +const dataset = new Dataset({ + id: 'grammar-dataset', + rows: [ + { id: '0', sentence: "He no likes ice cream.", correction: "He doesn't like ice cream." }, + { id: '1', sentence: "She goed to the store.", correction: "She went to the store." }, + { id: '2', sentence: "They plays video games all day.", correction: "They play video games all day." } + ] +}) + +// Access a specific example +const exampleLabel = dataset.getRow(2).sentence; + +// Save the dataset +const ref = await dataset.save() +``` + +## Extends + +- [`WeaveObject`](WeaveObject.md) + +## Type Parameters + +• **R** *extends* `DatasetRow` + +## Constructors + +### new Dataset() + +> **new Dataset**\<`R`\>(`parameters`): [`Dataset`](Dataset.md)\<`R`\> + +#### Parameters + +• **parameters**: `DatasetParameters`\<`R`\> + +#### Returns + +[`Dataset`](Dataset.md)\<`R`\> + +#### Overrides + +[`WeaveObject`](WeaveObject.md).[`constructor`](WeaveObject.md#constructors) + +#### Defined in + +[dataset.ts:51](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/dataset.ts#L51) + +## Properties + +### \_\_savedRef? + +> `optional` **\_\_savedRef**: `ObjectRef` \| `Promise`\<`ObjectRef`\> + +#### Inherited from + +[`WeaveObject`](WeaveObject.md).[`__savedRef`](WeaveObject.md#__savedref) + +#### Defined in + +[weaveObject.ts:49](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L49) + +*** + +### \_baseParameters + +> `protected` **\_baseParameters**: `WeaveObjectParameters` + +#### Inherited from + +[`WeaveObject`](WeaveObject.md).[`_baseParameters`](WeaveObject.md#_baseparameters) + +#### Defined in + +[weaveObject.ts:51](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L51) + +*** + +### rows + +> **rows**: `Table`\<`R`\> + +#### Defined in + +[dataset.ts:49](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/dataset.ts#L49) + +## Accessors + +### description + +> `get` **description**(): `undefined` \| `string` + +#### Returns + +`undefined` \| `string` + +#### Inherited from + +[`WeaveObject`](WeaveObject.md).[`description`](WeaveObject.md#description) + +#### Defined in + +[weaveObject.ts:89](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L89) + +*** + +### id + +> `get` **id**(): `string` + +#### Returns + +`string` + +#### Inherited from + +[`WeaveObject`](WeaveObject.md).[`id`](WeaveObject.md#id) + +#### Defined in + +[weaveObject.ts:85](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L85) + +*** + +### length + +> `get` **length**(): `number` + +#### Returns + +`number` + +#### Defined in + +[dataset.ts:64](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/dataset.ts#L64) + +## Methods + +### \[asyncIterator\]() + +> **\[asyncIterator\]**(): `AsyncIterator`\<`any`, `any`, `undefined`\> + +#### Returns + +`AsyncIterator`\<`any`, `any`, `undefined`\> + +#### Defined in + +[dataset.ts:68](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/dataset.ts#L68) + +*** + +### className() + +> **className**(): `any` + +#### Returns + +`any` + +#### Inherited from + +[`WeaveObject`](WeaveObject.md).[`className`](WeaveObject.md#classname) + +#### Defined in + +[weaveObject.ts:53](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L53) + +*** + +### getRow() + +> **getRow**(`index`): `R` + +#### Parameters + +• **index**: `number` + +#### Returns + +`R` + +#### Defined in + +[dataset.ts:74](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/dataset.ts#L74) + +*** + +### save() + +> **save**(): `Promise`\<`ObjectRef`\> + +#### Returns + +`Promise`\<`ObjectRef`\> + +#### Defined in + +[dataset.ts:60](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/dataset.ts#L60) + +*** + +### saveAttrs() + +> **saveAttrs**(): `object` + +#### Returns + +`object` + +#### Inherited from + +[`WeaveObject`](WeaveObject.md).[`saveAttrs`](WeaveObject.md#saveattrs) + +#### Defined in + +[weaveObject.ts:57](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L57) diff --git a/docs/docs/reference/typescript-sdk/weave/classes/Evaluation.md b/docs/docs/reference/typescript-sdk/weave/classes/Evaluation.md new file mode 100644 index 000000000000..d1cb3b962a60 --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/classes/Evaluation.md @@ -0,0 +1,249 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / Evaluation + +# Class: Evaluation\ + +Sets up an evaluation which includes a set of scorers and a dataset. + +Calling evaluation.evaluate(model) will pass in rows form a dataset into a model matching +the names of the columns of the dataset to the argument names in model.predict. + +Then it will call all of the scorers and save the results in weave. + +## Example + +```ts +// Collect your examples into a dataset +const dataset = new weave.Dataset({ + id: 'my-dataset', + rows: [ + { question: 'What is the capital of France?', expected: 'Paris' }, + { question: 'Who wrote "To Kill a Mockingbird"?', expected: 'Harper Lee' }, + { question: 'What is the square root of 64?', expected: '8' }, + ], +}); + +// Define any custom scoring function +const scoringFunction = weave.op(function isEqual({ modelOutput, datasetRow }) { + return modelOutput == datasetRow.expected; +}); + +// Define the function to evaluate +const model = weave.op(async function alwaysParisModel({ question }) { + return 'Paris'; +}); + +// Start evaluating +const evaluation = new weave.Evaluation({ + id: 'my-evaluation', + dataset: dataset, + scorers: [scoringFunction], +}); + +const results = await evaluation.evaluate({ model }); +``` + +## Extends + +- [`WeaveObject`](WeaveObject.md) + +## Type Parameters + +• **R** *extends* `DatasetRow` + +• **E** *extends* `DatasetRow` + +• **M** + +## Constructors + +### new Evaluation() + +> **new Evaluation**\<`R`, `E`, `M`\>(`parameters`): [`Evaluation`](Evaluation.md)\<`R`, `E`, `M`\> + +#### Parameters + +• **parameters**: `EvaluationParameters`\<`R`, `E`, `M`\> + +#### Returns + +[`Evaluation`](Evaluation.md)\<`R`, `E`, `M`\> + +#### Overrides + +[`WeaveObject`](WeaveObject.md).[`constructor`](WeaveObject.md#constructors) + +#### Defined in + +[evaluation.ts:148](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/evaluation.ts#L148) + +## Properties + +### \_\_savedRef? + +> `optional` **\_\_savedRef**: `ObjectRef` \| `Promise`\<`ObjectRef`\> + +#### Inherited from + +[`WeaveObject`](WeaveObject.md).[`__savedRef`](WeaveObject.md#__savedref) + +#### Defined in + +[weaveObject.ts:49](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L49) + +*** + +### \_baseParameters + +> `protected` **\_baseParameters**: `WeaveObjectParameters` + +#### Inherited from + +[`WeaveObject`](WeaveObject.md).[`_baseParameters`](WeaveObject.md#_baseparameters) + +#### Defined in + +[weaveObject.ts:51](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L51) + +## Accessors + +### description + +> `get` **description**(): `undefined` \| `string` + +#### Returns + +`undefined` \| `string` + +#### Inherited from + +[`WeaveObject`](WeaveObject.md).[`description`](WeaveObject.md#description) + +#### Defined in + +[weaveObject.ts:89](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L89) + +*** + +### id + +> `get` **id**(): `string` + +#### Returns + +`string` + +#### Inherited from + +[`WeaveObject`](WeaveObject.md).[`id`](WeaveObject.md#id) + +#### Defined in + +[weaveObject.ts:85](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L85) + +## Methods + +### className() + +> **className**(): `any` + +#### Returns + +`any` + +#### Inherited from + +[`WeaveObject`](WeaveObject.md).[`className`](WeaveObject.md#classname) + +#### Defined in + +[weaveObject.ts:53](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L53) + +*** + +### evaluate() + +> **evaluate**(`__namedParameters`): `Promise`\<`Record`\<`string`, `any`\>\> + +#### Parameters + +• **\_\_namedParameters** + +• **\_\_namedParameters.maxConcurrency?**: `number` = `5` + +• **\_\_namedParameters.model**: `WeaveCallable`\<(...`args`) => `Promise`\<`M`\>\> + +• **\_\_namedParameters.nTrials?**: `number` = `1` + +#### Returns + +`Promise`\<`Record`\<`string`, `any`\>\> + +#### Defined in + +[evaluation.ts:163](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/evaluation.ts#L163) + +*** + +### predictAndScore() + +> **predictAndScore**(`__namedParameters`): `Promise`\<`object`\> + +#### Parameters + +• **\_\_namedParameters** + +• **\_\_namedParameters.columnMapping?**: `ColumnMapping`\<`R`, `E`\> + +• **\_\_namedParameters.example**: `R` + +• **\_\_namedParameters.model**: `WeaveCallable`\<(...`args`) => `Promise`\<`M`\>\> + +#### Returns + +`Promise`\<`object`\> + +##### model\_latency + +> **model\_latency**: `number` = `modelLatency` + +##### model\_output + +> **model\_output**: `any` = `modelOutput` + +##### model\_success + +> **model\_success**: `boolean` = `!modelError` + +##### scores + +> **scores**: `object` + +###### Index Signature + + \[`key`: `string`\]: `any` + +#### Defined in + +[evaluation.ts:232](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/evaluation.ts#L232) + +*** + +### saveAttrs() + +> **saveAttrs**(): `object` + +#### Returns + +`object` + +#### Inherited from + +[`WeaveObject`](WeaveObject.md).[`saveAttrs`](WeaveObject.md#saveattrs) + +#### Defined in + +[weaveObject.ts:57](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L57) diff --git a/docs/docs/reference/typescript-sdk/weave/classes/WeaveClient.md b/docs/docs/reference/typescript-sdk/weave/classes/WeaveClient.md new file mode 100644 index 000000000000..75bd4360afca --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/classes/WeaveClient.md @@ -0,0 +1,327 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / WeaveClient + +# Class: WeaveClient + +## Constructors + +### new WeaveClient() + +> **new WeaveClient**(`traceServerApi`, `wandbServerApi`, `projectId`, `settings`): [`WeaveClient`](WeaveClient.md) + +#### Parameters + +• **traceServerApi**: `Api`\<`any`\> + +• **wandbServerApi**: `WandbServerApi` + +• **projectId**: `string` + +• **settings**: `Settings` = `...` + +#### Returns + +[`WeaveClient`](WeaveClient.md) + +#### Defined in + +[weaveClient.ts:82](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L82) + +## Properties + +### projectId + +> **projectId**: `string` + +#### Defined in + +[weaveClient.ts:85](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L85) + +*** + +### settings + +> **settings**: `Settings` + +#### Defined in + +[weaveClient.ts:86](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L86) + +*** + +### traceServerApi + +> **traceServerApi**: `Api`\<`any`\> + +#### Defined in + +[weaveClient.ts:83](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L83) + +## Methods + +### createCall() + +> **createCall**(`opRef`, `params`, `parameterNames`, `thisArg`, `currentCall`, `parentCall`, `startTime`, `displayName`?): `Promise`\<`void`\> + +#### Parameters + +• **opRef**: `any` + +• **params**: `any`[] + +• **parameterNames**: `ParameterNamesOption` + +• **thisArg**: `any` + +• **currentCall**: `CallStackEntry` + +• **parentCall**: `undefined` \| `CallStackEntry` + +• **startTime**: `Date` + +• **displayName?**: `string` + +#### Returns + +`Promise`\<`void`\> + +#### Defined in + +[weaveClient.ts:610](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L610) + +*** + +### finishCall() + +> **finishCall**(`result`, `currentCall`, `parentCall`, `summarize`, `endTime`, `startCallPromise`): `Promise`\<`void`\> + +#### Parameters + +• **result**: `any` + +• **currentCall**: `CallStackEntry` + +• **parentCall**: `undefined` \| `CallStackEntry` + +• **summarize**: `undefined` \| (`result`) => `Record`\<`string`, `any`\> + +• **endTime**: `Date` + +• **startCallPromise**: `Promise`\<`void`\> + +#### Returns + +`Promise`\<`void`\> + +#### Defined in + +[weaveClient.ts:648](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L648) + +*** + +### finishCallWithException() + +> **finishCallWithException**(`error`, `currentCall`, `parentCall`, `endTime`, `startCallPromise`): `Promise`\<`void`\> + +#### Parameters + +• **error**: `any` + +• **currentCall**: `CallStackEntry` + +• **parentCall**: `undefined` \| `CallStackEntry` + +• **endTime**: `Date` + +• **startCallPromise**: `Promise`\<`void`\> + +#### Returns + +`Promise`\<`void`\> + +#### Defined in + +[weaveClient.ts:677](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L677) + +*** + +### get() + +> **get**(`ref`): `Promise`\<`any`\> + +#### Parameters + +• **ref**: `ObjectRef` + +#### Returns + +`Promise`\<`any`\> + +#### Defined in + +[weaveClient.ts:229](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L229) + +*** + +### getCalls() + +> **getCalls**(`filter`, `includeCosts`, `limit`): `Promise`\<[`CallSchema`](../interfaces/CallSchema.md)[]\> + +#### Parameters + +• **filter**: [`CallsFilter`](../interfaces/CallsFilter.md) = `{}` + +• **includeCosts**: `boolean` = `false` + +• **limit**: `number` = `1000` + +#### Returns + +`Promise`\<[`CallSchema`](../interfaces/CallSchema.md)[]\> + +#### Defined in + +[weaveClient.ts:172](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L172) + +*** + +### getCallsIterator() + +> **getCallsIterator**(`filter`, `includeCosts`, `limit`): `AsyncIterableIterator`\<[`CallSchema`](../interfaces/CallSchema.md)\> + +#### Parameters + +• **filter**: [`CallsFilter`](../interfaces/CallsFilter.md) = `{}` + +• **includeCosts**: `boolean` = `false` + +• **limit**: `number` = `1000` + +#### Returns + +`AsyncIterableIterator`\<[`CallSchema`](../interfaces/CallSchema.md)\> + +#### Defined in + +[weaveClient.ts:184](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L184) + +*** + +### getCallStack() + +> **getCallStack**(): `CallStack` + +#### Returns + +`CallStack` + +#### Defined in + +[weaveClient.ts:537](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L537) + +*** + +### publish() + +> **publish**(`obj`, `objId`?): `Promise`\<`ObjectRef`\> + +#### Parameters + +• **obj**: `any` + +• **objId?**: `string` + +#### Returns + +`Promise`\<`ObjectRef`\> + +#### Defined in + +[weaveClient.ts:160](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L160) + +*** + +### pushNewCall() + +> **pushNewCall**(): `object` + +#### Returns + +`object` + +##### currentCall + +> **currentCall**: `CallStackEntry` + +##### newStack + +> **newStack**: `CallStack` + +##### parentCall? + +> `optional` **parentCall**: `CallStackEntry` + +#### Defined in + +[weaveClient.ts:541](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L541) + +*** + +### runWithCallStack() + +> **runWithCallStack**\<`T`\>(`callStack`, `fn`): `T` + +#### Type Parameters + +• **T** + +#### Parameters + +• **callStack**: `CallStack` + +• **fn** + +#### Returns + +`T` + +#### Defined in + +[weaveClient.ts:545](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L545) + +*** + +### saveOp() + +> **saveOp**(`op`, `objId`?): `Promise`\<`any`\> + +#### Parameters + +• **op**: [`Op`](../type-aliases/Op.md)\<(...`args`) => `any`\> + +• **objId?**: `string` + +#### Returns + +`Promise`\<`any`\> + +#### Defined in + +[weaveClient.ts:575](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L575) + +*** + +### waitForBatchProcessing() + +> **waitForBatchProcessing**(): `Promise`\<`void`\> + +#### Returns + +`Promise`\<`void`\> + +#### Defined in + +[weaveClient.ts:103](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveClient.ts#L103) diff --git a/docs/docs/reference/typescript-sdk/weave/classes/WeaveObject.md b/docs/docs/reference/typescript-sdk/weave/classes/WeaveObject.md new file mode 100644 index 000000000000..a2c37ad21c36 --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/classes/WeaveObject.md @@ -0,0 +1,106 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / WeaveObject + +# Class: WeaveObject + +## Extended by + +- [`Dataset`](Dataset.md) +- [`Evaluation`](Evaluation.md) + +## Constructors + +### new WeaveObject() + +> **new WeaveObject**(`_baseParameters`): [`WeaveObject`](WeaveObject.md) + +#### Parameters + +• **\_baseParameters**: `WeaveObjectParameters` + +#### Returns + +[`WeaveObject`](WeaveObject.md) + +#### Defined in + +[weaveObject.ts:51](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L51) + +## Properties + +### \_\_savedRef? + +> `optional` **\_\_savedRef**: `ObjectRef` \| `Promise`\<`ObjectRef`\> + +#### Defined in + +[weaveObject.ts:49](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L49) + +*** + +### \_baseParameters + +> `protected` **\_baseParameters**: `WeaveObjectParameters` + +#### Defined in + +[weaveObject.ts:51](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L51) + +## Accessors + +### description + +> `get` **description**(): `undefined` \| `string` + +#### Returns + +`undefined` \| `string` + +#### Defined in + +[weaveObject.ts:89](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L89) + +*** + +### id + +> `get` **id**(): `string` + +#### Returns + +`string` + +#### Defined in + +[weaveObject.ts:85](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L85) + +## Methods + +### className() + +> **className**(): `any` + +#### Returns + +`any` + +#### Defined in + +[weaveObject.ts:53](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L53) + +*** + +### saveAttrs() + +> **saveAttrs**(): `object` + +#### Returns + +`object` + +#### Defined in + +[weaveObject.ts:57](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/weaveObject.ts#L57) diff --git a/docs/docs/reference/typescript-sdk/weave/functions/init.md b/docs/docs/reference/typescript-sdk/weave/functions/init.md new file mode 100644 index 000000000000..17c70ff9cd51 --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/functions/init.md @@ -0,0 +1,35 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / init + +# Function: init() + +> **init**(`project`, `settings`?): `Promise`\<[`WeaveClient`](../classes/WeaveClient.md)\> + +Initialize the Weave client, which is required for weave tracing to work. + +## Parameters + +• **project**: `string` + +The W&B project name (can be project or entity/project). + +• **settings?**: `Settings` + +(Optional) Weave tracing settings + +## Returns + +`Promise`\<[`WeaveClient`](../classes/WeaveClient.md)\> + +A promise that resolves to the initialized Weave client. + +## Throws + +If the initialization fails + +## Defined in + +[clientApi.ts:57](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/clientApi.ts#L57) diff --git a/docs/docs/reference/typescript-sdk/weave/functions/login.md b/docs/docs/reference/typescript-sdk/weave/functions/login.md new file mode 100644 index 000000000000..1136c3d18abd --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/functions/login.md @@ -0,0 +1,34 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / login + +# Function: login() + +> **login**(`apiKey`, `host`?): `Promise`\<`void`\> + +Log in to Weights & Biases (W&B) using the provided API key. +This function saves the credentials to your netrc file for future use. + +## Parameters + +• **apiKey**: `string` + +Your W&B API key. + +• **host?**: `string` = `defaultHost` + +(Optional) The host name (usually only needed if you're using a custom W&B server). + +## Returns + +`Promise`\<`void`\> + +## Throws + +If the API key is not specified or if the connection to the weave trace server cannot be verified. + +## Defined in + +[clientApi.ts:22](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/clientApi.ts#L22) diff --git a/docs/docs/reference/typescript-sdk/weave/functions/op.md b/docs/docs/reference/typescript-sdk/weave/functions/op.md new file mode 100644 index 000000000000..e00b6aefd59c --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/functions/op.md @@ -0,0 +1,108 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / op + +# Function: op() + +## op(fn, options) + +> **op**\<`T`\>(`fn`, `options`?): [`Op`](../type-aliases/Op.md)\<(...`args`) => `Promise`\<`Awaited`\<`ReturnType`\<`T`\>\>\>\> + +A wrapper to weave op-ify a function or method that works on sync and async functions. + +Wrapped functions: + 1. Take the same inputs and return the same outputs as the original function. + 2. Will automatically track calls in the Weave UI. + +If you don't call `weave.init` then the function will behave as if it were not wrapped. + +### Type Parameters + +• **T** *extends* (...`args`) => `any` + +### Parameters + +• **fn**: `T` + +The function to wrap + +• **options?**: `OpOptions`\<`T`\> + +Optional configs like call and param naming + +### Returns + +[`Op`](../type-aliases/Op.md)\<(...`args`) => `Promise`\<`Awaited`\<`ReturnType`\<`T`\>\>\>\> + +The wrapped function + +### Example + +```ts +// Basic usage +import OpenAI from 'openai'; +import * as weave from 'weave'; + +const client = await weave.init({ project: 'my-project' }); +const oaiClient = weave.wrapOpenAI(new OpenAI()); + +const extract = weave.op(async function extract() { + return await oaiClient.chat.completions.create({ + model: 'gpt-4-turbo', + messages: [{ role: 'user', content: 'Create a user as JSON' }], + }); +}); + +await extract(); + +// You can also wrap methods by passing the object as the first argument. +// This will bind the method to the object and wrap it with op. +class MyModel { + private oaiClient: OpenAI; + + constructor() { + this.oaiClient = weave.wrapOpenAI(new OpenAI()); + this.invoke = weave.op(this, this.invoke); + } + + async invoke() { + return await this.oaiClient.chat.completions.create({ + model: 'gpt-4-turbo', + messages: [{ role: 'user', content: 'Create a user as JSON' }], + }); + } +} + +const model = new MyModel(); +const res = await model.invoke(); +``` + +### Defined in + +[op.ts:58](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/op.ts#L58) + +## op(thisArg, fn, options) + +> **op**\<`T`\>(`thisArg`, `fn`, `options`?): [`Op`](../type-aliases/Op.md)\<(...`args`) => `Promise`\<`Awaited`\<`ReturnType`\<`T`\>\>\>\> + +### Type Parameters + +• **T** *extends* (...`args`) => `any` + +### Parameters + +• **thisArg**: `any` + +• **fn**: `T` + +• **options?**: `OpOptions`\<`T`\> + +### Returns + +[`Op`](../type-aliases/Op.md)\<(...`args`) => `Promise`\<`Awaited`\<`ReturnType`\<`T`\>\>\>\> + +### Defined in + +[op.ts:62](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/op.ts#L62) diff --git a/docs/docs/reference/typescript-sdk/weave/functions/requireCurrentCallStackEntry.md b/docs/docs/reference/typescript-sdk/weave/functions/requireCurrentCallStackEntry.md new file mode 100644 index 000000000000..dcb978aec7f8 --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/functions/requireCurrentCallStackEntry.md @@ -0,0 +1,17 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / requireCurrentCallStackEntry + +# Function: requireCurrentCallStackEntry() + +> **requireCurrentCallStackEntry**(): `CallStackEntry` + +## Returns + +`CallStackEntry` + +## Defined in + +[clientApi.ts:119](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/clientApi.ts#L119) diff --git a/docs/docs/reference/typescript-sdk/weave/functions/requireCurrentChildSummary.md b/docs/docs/reference/typescript-sdk/weave/functions/requireCurrentChildSummary.md new file mode 100644 index 000000000000..2afe6adc25d7 --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/functions/requireCurrentChildSummary.md @@ -0,0 +1,17 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / requireCurrentChildSummary + +# Function: requireCurrentChildSummary() + +> **requireCurrentChildSummary**(): `object` + +## Returns + +`object` + +## Defined in + +[clientApi.ts:131](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/clientApi.ts#L131) diff --git a/docs/docs/reference/typescript-sdk/weave/functions/weaveAudio.md b/docs/docs/reference/typescript-sdk/weave/functions/weaveAudio.md new file mode 100644 index 000000000000..380927432e09 --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/functions/weaveAudio.md @@ -0,0 +1,34 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / weaveAudio + +# Function: weaveAudio() + +> **weaveAudio**(`options`): `WeaveAudio` + +Create a new WeaveAudio object + +## Parameters + +• **options**: `WeaveAudioInput` + +The options for this media type + - data: The raw audio data as a Buffer + - audioType: (Optional) The type of audio file, currently only 'wav' is supported + +## Returns + +`WeaveAudio` + +## Example + +```ts +const audioBuffer = fs.readFileSync('path/to/audio.wav'); +const weaveAudio = weaveAudio({ data: audioBuffer }); +``` + +## Defined in + +[media.ts:62](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/media.ts#L62) diff --git a/docs/docs/reference/typescript-sdk/weave/functions/weaveImage.md b/docs/docs/reference/typescript-sdk/weave/functions/weaveImage.md new file mode 100644 index 000000000000..a588cd44c015 --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/functions/weaveImage.md @@ -0,0 +1,34 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / weaveImage + +# Function: weaveImage() + +> **weaveImage**(`options`): `WeaveImage` + +Create a new WeaveImage object + +## Parameters + +• **options**: `WeaveImageInput` + +The options for this media type + - data: The raw image data as a Buffer + - imageType: (Optional) The type of image file, currently only 'png' is supported + +## Returns + +`WeaveImage` + +## Example + +```ts +const imageBuffer = fs.readFileSync('path/to/image.png'); +const weaveImage = weaveImage({ data: imageBuffer }); +``` + +## Defined in + +[media.ts:28](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/media.ts#L28) diff --git a/docs/docs/reference/typescript-sdk/weave/functions/wrapOpenAI.md b/docs/docs/reference/typescript-sdk/weave/functions/wrapOpenAI.md new file mode 100644 index 000000000000..f4c3adee5b5f --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/functions/wrapOpenAI.md @@ -0,0 +1,37 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / wrapOpenAI + +# Function: wrapOpenAI() + +> **wrapOpenAI**\<`T`\>(`openai`): `T` + +Wraps the OpenAI API to enable function tracing for OpenAI calls. + +## Type Parameters + +• **T** *extends* `OpenAIAPI` + +## Parameters + +• **openai**: `T` + +## Returns + +`T` + +## Example + +```ts +const openai = wrapOpenAI(new OpenAI()); +const result = await openai.chat.completions.create({ + model: 'gpt-3.5-turbo', + messages: [{ role: 'user', content: 'Hello, world!' }] +}); +``` + +## Defined in + +[integrations/openai.ts:159](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/integrations/openai.ts#L159) diff --git a/docs/docs/reference/typescript-sdk/weave/interfaces/CallSchema.md b/docs/docs/reference/typescript-sdk/weave/interfaces/CallSchema.md new file mode 100644 index 000000000000..8254666f3c9c --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/interfaces/CallSchema.md @@ -0,0 +1,203 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / CallSchema + +# Interface: CallSchema + +CallSchema + +## Properties + +### attributes + +> **attributes**: `object` + +Attributes + +#### Defined in + +[generated/traceServerApi.ts:119](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L119) + +*** + +### deleted\_at? + +> `optional` **deleted\_at**: `null` \| `string` + +Deleted At + +#### Defined in + +[generated/traceServerApi.ts:134](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L134) + +*** + +### display\_name? + +> `optional` **display\_name**: `null` \| `string` + +Display Name + +#### Defined in + +[generated/traceServerApi.ts:108](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L108) + +*** + +### ended\_at? + +> `optional` **ended\_at**: `null` \| `string` + +Ended At + +#### Defined in + +[generated/traceServerApi.ts:123](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L123) + +*** + +### exception? + +> `optional` **exception**: `null` \| `string` + +Exception + +#### Defined in + +[generated/traceServerApi.ts:125](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L125) + +*** + +### id + +> **id**: `string` + +Id + +#### Defined in + +[generated/traceServerApi.ts:102](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L102) + +*** + +### inputs + +> **inputs**: `object` + +Inputs + +#### Defined in + +[generated/traceServerApi.ts:121](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L121) + +*** + +### op\_name + +> **op\_name**: `string` + +Op Name + +#### Defined in + +[generated/traceServerApi.ts:106](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L106) + +*** + +### output? + +> `optional` **output**: `null` + +Output + +#### Defined in + +[generated/traceServerApi.ts:127](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L127) + +*** + +### parent\_id? + +> `optional` **parent\_id**: `null` \| `string` + +Parent Id + +#### Defined in + +[generated/traceServerApi.ts:112](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L112) + +*** + +### project\_id + +> **project\_id**: `string` + +Project Id + +#### Defined in + +[generated/traceServerApi.ts:104](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L104) + +*** + +### started\_at + +> **started\_at**: `string` + +Started At + +#### Format + +date-time + +#### Defined in + +[generated/traceServerApi.ts:117](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L117) + +*** + +### summary? + +> `optional` **summary**: `object` + +#### Defined in + +[generated/traceServerApi.ts:128](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L128) + +*** + +### trace\_id + +> **trace\_id**: `string` + +Trace Id + +#### Defined in + +[generated/traceServerApi.ts:110](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L110) + +*** + +### wb\_run\_id? + +> `optional` **wb\_run\_id**: `null` \| `string` + +Wb Run Id + +#### Defined in + +[generated/traceServerApi.ts:132](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L132) + +*** + +### wb\_user\_id? + +> `optional` **wb\_user\_id**: `null` \| `string` + +Wb User Id + +#### Defined in + +[generated/traceServerApi.ts:130](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L130) diff --git a/docs/docs/reference/typescript-sdk/weave/interfaces/CallsFilter.md b/docs/docs/reference/typescript-sdk/weave/interfaces/CallsFilter.md new file mode 100644 index 000000000000..4b12386c2e69 --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/interfaces/CallsFilter.md @@ -0,0 +1,117 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / CallsFilter + +# Interface: CallsFilter + +CallsFilter + +## Properties + +### call\_ids? + +> `optional` **call\_ids**: `null` \| `string`[] + +Call Ids + +#### Defined in + +[generated/traceServerApi.ts:197](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L197) + +*** + +### input\_refs? + +> `optional` **input\_refs**: `null` \| `string`[] + +Input Refs + +#### Defined in + +[generated/traceServerApi.ts:189](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L189) + +*** + +### op\_names? + +> `optional` **op\_names**: `null` \| `string`[] + +Op Names + +#### Defined in + +[generated/traceServerApi.ts:187](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L187) + +*** + +### output\_refs? + +> `optional` **output\_refs**: `null` \| `string`[] + +Output Refs + +#### Defined in + +[generated/traceServerApi.ts:191](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L191) + +*** + +### parent\_ids? + +> `optional` **parent\_ids**: `null` \| `string`[] + +Parent Ids + +#### Defined in + +[generated/traceServerApi.ts:193](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L193) + +*** + +### trace\_ids? + +> `optional` **trace\_ids**: `null` \| `string`[] + +Trace Ids + +#### Defined in + +[generated/traceServerApi.ts:195](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L195) + +*** + +### trace\_roots\_only? + +> `optional` **trace\_roots\_only**: `null` \| `boolean` + +Trace Roots Only + +#### Defined in + +[generated/traceServerApi.ts:199](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L199) + +*** + +### wb\_run\_ids? + +> `optional` **wb\_run\_ids**: `null` \| `string`[] + +Wb Run Ids + +#### Defined in + +[generated/traceServerApi.ts:203](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L203) + +*** + +### wb\_user\_ids? + +> `optional` **wb\_user\_ids**: `null` \| `string`[] + +Wb User Ids + +#### Defined in + +[generated/traceServerApi.ts:201](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/generated/traceServerApi.ts#L201) diff --git a/docs/docs/reference/typescript-sdk/weave/type-aliases/Op.md b/docs/docs/reference/typescript-sdk/weave/type-aliases/Op.md new file mode 100644 index 000000000000..8cb4a8772200 --- /dev/null +++ b/docs/docs/reference/typescript-sdk/weave/type-aliases/Op.md @@ -0,0 +1,39 @@ +[**weave**](../README.md) • **Docs** + +*** + +[weave](../README.md) / Op + +# Type Alias: Op\ + +> **Op**\<`T`\>: `object` & `T` + +## Type declaration + +### \_\_boundThis? + +> `optional` **\_\_boundThis**: [`WeaveObject`](../classes/WeaveObject.md) + +### \_\_isOp + +> **\_\_isOp**: `true` + +### \_\_name + +> **\_\_name**: `string` + +### \_\_savedRef? + +> `optional` **\_\_savedRef**: `OpRef` \| `Promise`\<`OpRef`\> + +### \_\_wrappedFunction + +> **\_\_wrappedFunction**: `T` + +## Type Parameters + +• **T** *extends* (...`args`) => `any` + +## Defined in + +[opType.ts:6](https://github.com/wandb/weave/blob/e2313369cb35bc1b6f97c70539926dd951ead21e/sdks/node/src/opType.ts#L6) diff --git a/docs/docs/tutorial-eval.md b/docs/docs/tutorial-eval.md index 44ccdfa5a9dc..7d4756dfd693 100644 --- a/docs/docs/tutorial-eval.md +++ b/docs/docs/tutorial-eval.md @@ -1,3 +1,6 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Tutorial: Build an Evaluation pipeline To iterate on an application, we need a way to evaluate if it's improving. To do so, a common practice is to test it against the same set of examples when there is a change. Weave has a first-class way to track evaluations with `Model` & `Evaluation` classes. We have built the APIs to make minimal assumptions to allow for the flexibility to support a wide array of use-cases. @@ -6,6 +9,9 @@ To iterate on an application, we need a way to evaluate if it's improving. To do ## 1. Build a `Model` + + + `Model`s store and version information about your system, such as prompts, temperatures, and more. Weave automatically captures when they are used and updates the version when there are changes. @@ -17,192 +23,356 @@ Weave automatically captures when they are used and updates the version when the ::: -```python -import json -import openai -import weave + ```python + import json + import openai + import weave -# highlight-next-line -class ExtractFruitsModel(weave.Model): - model_name: str - prompt_template: str - - # highlight-next-line - @weave.op() # highlight-next-line - async def predict(self, sentence: str) -> dict: - client = openai.AsyncClient() - - response = await client.chat.completions.create( - model=self.model_name, - messages=[ - {"role": "user", "content": self.prompt_template.format(sentence=sentence)} - ], - ) - result = response.choices[0].message.content - if result is None: - raise ValueError("No response from model") - parsed = json.loads(result) - return parsed -``` - -You can instantiate `Model` objects as normal like this: - -```python -import asyncio -import weave - -weave.init('intro-example') - -model = ExtractFruitsModel(model_name='gpt-3.5-turbo-1106', - prompt_template='Extract fields ("fruit": , "color": , "flavor": ) from the following text, as json: {sentence}') -sentence = "There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy." -print(asyncio.run(model.predict(sentence))) -# if you're in a Jupyter Notebook, run: -# await model.predict(sentence) -``` + class ExtractFruitsModel(weave.Model): + model_name: str + prompt_template: str + + # highlight-next-line + @weave.op() + # highlight-next-line + async def predict(self, sentence: str) -> dict: + client = openai.AsyncClient() + + response = await client.chat.completions.create( + model=self.model_name, + messages=[ + {"role": "user", "content": self.prompt_template.format(sentence=sentence)} + ], + ) + result = response.choices[0].message.content + if result is None: + raise ValueError("No response from model") + parsed = json.loads(result) + return parsed + ``` + + You can instantiate `Model` objects as normal like this: + + ```python + import asyncio + import weave + + weave.init('intro-example') + + model = ExtractFruitsModel(model_name='gpt-3.5-turbo-1106', + prompt_template='Extract fields ("fruit": , "color": , "flavor": ) from the following text, as json: {sentence}') + sentence = "There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy." + print(asyncio.run(model.predict(sentence))) + # if you're in a Jupyter Notebook, run: + # await model.predict(sentence) + ``` :::note Checkout the [Models](/guides/core-types/models) guide to learn more. ::: + + + + `weave.Model` is not supported in TypeScript yet. Instead, you can just wrap your model-like function with `weave.op` + + ```typescript + // highlight-next-line + const model = weave.op(async function myModel({datasetRow}) { + const prompt = `Extract fields ("fruit": , "color": , "flavor") from the following text, as json: ${datasetRow.sentence}`; + const response = await openaiClient.chat.completions.create({ + model: 'gpt-3.5-turbo', + messages: [{role: 'user', content: prompt}], + response_format: {type: 'json_object'}, + }); + const result = response?.choices?.[0]?.message?.content; + if (result == null) { + throw new Error('No response from model'); + } + return JSON.parse(result); + }); + ``` + + + + ## 2. Collect some examples -```python -sentences = ["There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy.", -"Pounits are a bright green color and are more savory than sweet.", -"Finally, there are fruits called glowls, which have a very sour and bitter taste which is acidic and caustic, and a pale orange tinge to them."] -labels = [ - {'fruit': 'neoskizzles', 'color': 'purple', 'flavor': 'candy'}, - {'fruit': 'pounits', 'color': 'bright green', 'flavor': 'savory'}, - {'fruit': 'glowls', 'color': 'pale orange', 'flavor': 'sour and bitter'} -] -examples = [ - {'id': '0', 'sentence': sentences[0], 'target': labels[0]}, - {'id': '1', 'sentence': sentences[1], 'target': labels[1]}, - {'id': '2', 'sentence': sentences[2], 'target': labels[2]} -] -``` + + + + ```python + sentences = [ + "There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy.", + "Pounits are a bright green color and are more savory than sweet.", + "Finally, there are fruits called glowls, which have a very sour and bitter taste which is acidic and caustic, and a pale orange tinge to them." + ] + labels = [ + {'fruit': 'neoskizzles', 'color': 'purple', 'flavor': 'candy'}, + {'fruit': 'pounits', 'color': 'bright green', 'flavor': 'savory'}, + {'fruit': 'glowls', 'color': 'pale orange', 'flavor': 'sour and bitter'} + ] + examples = [ + {'id': '0', 'sentence': sentences[0], 'target': labels[0]}, + {'id': '1', 'sentence': sentences[1], 'target': labels[1]}, + {'id': '2', 'sentence': sentences[2], 'target': labels[2]} + ] + ``` + + + + + ```typescript + const sentences = [ + 'There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy.', + 'Pounits are a bright green color and are more savory than sweet.', + 'Finally, there are fruits called glowls, which have a very sour and bitter taste which is acidic and caustic, and a pale orange tinge to them.', + ]; + const labels = [ + {fruit: 'neoskizzles', color: 'purple', flavor: 'candy'}, + {fruit: 'pounits', color: 'bright green', flavor: 'savory'}, + {fruit: 'glowls', color: 'pale orange', flavor: 'sour and bitter'}, + ]; + const examples = [ + {id: '0', sentence: sentences[0], target: labels[0]}, + {id: '1', sentence: sentences[1], target: labels[1]}, + {id: '2', sentence: sentences[2], target: labels[2]}, + ]; + const dataset = new weave.Dataset({ + id: 'Fruit Dataset', + rows: examples, + }); + ``` + + ## 3. Evaluate a `Model` -`Evaluation`s assess a `Model`s performance on a set of examples using a list of specified scoring functions or `weave.flow.scorer.Scorer` classes. + + + +`Evaluation`s assess a `Model`s performance on a set of examples using a list of specified scoring functions or `weave.scorer.Scorer` classes. Here, we'll use a default scoring class `MultiTaskBinaryClassificationF1` and we'll also define our own `fruit_name_score` scoring function. Here `sentence` is passed to the model's predict function, and `target` is used in the scoring function, these are inferred based on the argument names of the `predict` and scoring functions. The `fruit` key needs to be outputted by the model's predict function and must also be existing as a column in the dataset (or outputted by the `preprocess_model_input` function if defined). -```python -import weave -from weave.scorers import MultiTaskBinaryClassificationF1 + ```python + import weave + from weave.scorers import MultiTaskBinaryClassificationF1 -weave.init('intro-example') + weave.init('intro-example') -@weave.op() -def fruit_name_score(target: dict, model_output: dict) -> dict: - return {'correct': target['fruit'] == model_output['fruit']} + @weave.op() + def fruit_name_score(target: dict, output: dict) -> dict: + return {'correct': target['fruit'] == output['fruit']} -# highlight-next-line -evaluation = weave.Evaluation( - # highlight-next-line - dataset=examples, # highlight-next-line - scorers=[ + evaluation = weave.Evaluation( # highlight-next-line - MultiTaskBinaryClassificationF1(class_names=["fruit", "color", "flavor"]), + dataset=examples, # highlight-next-line - fruit_name_score + scorers=[ + # highlight-next-line + MultiTaskBinaryClassificationF1(class_names=["fruit", "color", "flavor"]), + # highlight-next-line + fruit_name_score + # highlight-next-line + ], + # highlight-next-line + ) # highlight-next-line - ], -# highlight-next-line -) -# highlight-next-line -print(asyncio.run(evaluation.evaluate(model))) -# if you're in a Jupyter Notebook, run: -# await evaluation.evaluate(model) -``` + print(asyncio.run(evaluation.evaluate(model))) + # if you're in a Jupyter Notebook, run: + # await evaluation.evaluate(model) + ``` + + + +`Evaluation`s assess a model's performance on a set of examples using a list of specified scoring functions. + +For this example, we'll define a few simple scoring functions. + +Here, `sentence` is passed to the model and `...` is used in the scoring function. These are defined... + + ```typescript + import * as weave from 'weave'; + import {OpenAI} from 'openai'; + + const client = await weave.init('intro-example'); + const openaiClient = weave.wrapOpenAI(new OpenAI()); + + const fruitNameScorer = weave.op( + ({modelOutput, datasetRow}) => datasetRow.target.fruit == modelOutput.fruit, + {name: 'fruitNameScore'} + ); + + const evaluation = new weave.Evaluation({ + dataset: ds, + scorers: [fruitNameScorer], + }); + + const results = await evaluation.evaluate(model); + console.log(JSON.stringify(results, null, 2)); + ``` + + + In some applications we want to create custom `Scorer` classes - where for example a standardized `LLMJudge` class should be created with specific parameters (e.g. chat model, prompt), specific scoring of each row, and specific calculation of an aggregate score. See the tutorial on defining a `Scorer` class in the next chapter on [Model-Based Evaluation of RAG applications](/tutorial-rag#optional-defining-a-scorer-class) for more information. ## 4. Pulling it all together -```python -import json -import asyncio -# highlight-next-line -import weave -# highlight-next-line -from weave.scorers import MultiTaskBinaryClassificationF1 -import openai - -# We create a model class with one predict function. -# All inputs, predictions and parameters are automatically captured for easy inspection. + + + + ```python + import json + import asyncio + # highlight-next-line + import weave + # highlight-next-line + from weave.scorers import MultiTaskBinaryClassificationF1 + import openai -# highlight-next-line -class ExtractFruitsModel(weave.Model): - model_name: str - prompt_template: str + # We create a model class with one predict function. + # All inputs, predictions and parameters are automatically captured for easy inspection. # highlight-next-line + class ExtractFruitsModel(weave.Model): + model_name: str + prompt_template: str + + # highlight-next-line + @weave.op() + # highlight-next-line + async def predict(self, sentence: str) -> dict: + client = openai.AsyncClient() + + response = await client.chat.completions.create( + model=self.model_name, + messages=[ + {"role": "user", "content": self.prompt_template.format(sentence=sentence)} + ], + response_format={ "type": "json_object" } + ) + result = response.choices[0].message.content + if result is None: + raise ValueError("No response from model") + parsed = json.loads(result) + return parsed + + # We call init to begin capturing data in the project, intro-example. + weave.init('intro-example') + + # We create our model with our system prompt. + model = ExtractFruitsModel(name='gpt4', + model_name='gpt-4-0125-preview', + prompt_template='Extract fields ("fruit": , "color": , "flavor") from the following text, as json: {sentence}') + sentences = ["There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy.", + "Pounits are a bright green color and are more savory than sweet.", + "Finally, there are fruits called glowls, which have a very sour and bitter taste which is acidic and caustic, and a pale orange tinge to them."] + labels = [ + {'fruit': 'neoskizzles', 'color': 'purple', 'flavor': 'candy'}, + {'fruit': 'pounits', 'color': 'bright green', 'flavor': 'savory'}, + {'fruit': 'glowls', 'color': 'pale orange', 'flavor': 'sour and bitter'} + ] + examples = [ + {'id': '0', 'sentence': sentences[0], 'target': labels[0]}, + {'id': '1', 'sentence': sentences[1], 'target': labels[1]}, + {'id': '2', 'sentence': sentences[2], 'target': labels[2]} + ] + # If you have already published the Dataset, you can run: + # dataset = weave.ref('example_labels').get() + + # We define a scoring function to compare our model predictions with a ground truth label. @weave.op() + def fruit_name_score(target: dict, output: dict) -> dict: + return {'correct': target['fruit'] == output['fruit']} + + # Finally, we run an evaluation of this model. + # This will generate a prediction for each input example, and then score it with each scoring function. # highlight-next-line - async def predict(self, sentence: str) -> dict: - client = openai.AsyncClient() - - response = await client.chat.completions.create( - model=self.model_name, - messages=[ - {"role": "user", "content": self.prompt_template.format(sentence=sentence)} - ], - response_format={ "type": "json_object" } - ) - result = response.choices[0].message.content - if result is None: - raise ValueError("No response from model") - parsed = json.loads(result) - return parsed - -# We call init to begin capturing data in the project, intro-example. -weave.init('intro-example') - -# We create our model with our system prompt. -model = ExtractFruitsModel(name='gpt4', - model_name='gpt-4-0125-preview', - prompt_template='Extract fields ("fruit": , "color": , "flavor") from the following text, as json: {sentence}') -sentences = ["There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy.", -"Pounits are a bright green color and are more savory than sweet.", -"Finally, there are fruits called glowls, which have a very sour and bitter taste which is acidic and caustic, and a pale orange tinge to them."] -labels = [ - {'fruit': 'neoskizzles', 'color': 'purple', 'flavor': 'candy'}, - {'fruit': 'pounits', 'color': 'bright green', 'flavor': 'savory'}, - {'fruit': 'glowls', 'color': 'pale orange', 'flavor': 'sour and bitter'} -] -examples = [ - {'id': '0', 'sentence': sentences[0], 'target': labels[0]}, - {'id': '1', 'sentence': sentences[1], 'target': labels[1]}, - {'id': '2', 'sentence': sentences[2], 'target': labels[2]} -] -# If you have already published the Dataset, you can run: -# dataset = weave.ref('example_labels').get() - -# We define a scoring function to compare our model predictions with a ground truth label. -@weave.op() -def fruit_name_score(target: dict, model_output: dict) -> dict: - return {'correct': target['fruit'] == model_output['fruit']} - -# Finally, we run an evaluation of this model. -# This will generate a prediction for each input example, and then score it with each scoring function. -# highlight-next-line -evaluation = weave.Evaluation( - name='fruit_eval', + evaluation = weave.Evaluation( + # highlight-next-line + name='fruit_eval', + # highlight-next-line + dataset=examples, scorers=[MultiTaskBinaryClassificationF1(class_names=["fruit", "color", "flavor"]), fruit_name_score], # highlight-next-line - dataset=examples, scorers=[MultiTaskBinaryClassificationF1(class_names=["fruit", "color", "flavor"]), fruit_name_score], -# highlight-next-line -) -print(asyncio.run(evaluation.evaluate(model))) -# if you're in a Jupyter Notebook, run: -# await evaluation.evaluate(model) -``` + ) + print(asyncio.run(evaluation.evaluate(model))) + # if you're in a Jupyter Notebook, run: + # await evaluation.evaluate(model) + ``` + + + + + ```typescript + import {OpenAI} from 'openai'; + import 'source-map-support/register'; + import * as weave from 'weave'; + + const sentences = [ + 'There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy.', + 'Pounits are a bright green color and are more savory than sweet.', + 'Finally, there are fruits called glowls, which have a very sour and bitter taste which is acidic and caustic, and a pale orange tinge to them.', + 'There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy.', + ]; + const labels = [ + {fruit: 'neoskizzles', color: 'purple', flavor: 'candy'}, + {fruit: 'pounits', color: 'bright green', flavor: 'savory'}, + {fruit: 'glowls', color: 'pale orange', flavor: 'sour and bitter'}, + ]; + const examples = [ + {id: '0', sentence: sentences[0], target: labels[0]}, + {id: '1', sentence: sentences[1], target: labels[1]}, + {id: '2', sentence: sentences[2], target: labels[2]}, + ]; + const dataset = new weave.Dataset({ + id: 'Fruit Dataset', + rows: examples, + }); + + const openaiClient = weave.wrapOpenAI(new OpenAI()); + + const model = weave.op(async function myModel({datasetRow}) { + const prompt = `Extract fields ("fruit": , "color": , "flavor") from the following text, as json: ${datasetRow.sentence}`; + const response = await openaiClient.chat.completions.create({ + model: 'gpt-3.5-turbo', + messages: [{role: 'user', content: prompt}], + response_format: {type: 'json_object'}, + }); + const result = response?.choices?.[0]?.message?.content; + if (result == null) { + throw new Error('No response from model'); + } + return JSON.parse(result); + }); + + const fruitNameScorer = weave.op( + ({modelOutput, datasetRow}) => datasetRow.target.fruit == modelOutput.fruit, + {name: 'fruitNameScore'} + ); + + async function main() { + await weave.init('examples'); + const evaluation = new weave.Evaluation({ + dataset, + scorers: [fruitNameScorer], + }); + + const results = await evaluation.evaluate({model}); + console.log(JSON.stringify(results, null, 2)); + } + + main(); + + ``` + + + ## What's next? diff --git a/docs/docs/tutorial-rag.md b/docs/docs/tutorial-rag.md index e88e27e38bcb..d9fb47ef5282 100644 --- a/docs/docs/tutorial-rag.md +++ b/docs/docs/tutorial-rag.md @@ -1,6 +1,15 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Tutorial: Model-Based Evaluation of RAG applications -Retrieval Augmented Generation (RAG) is a common way of building Generative AI applications that have access to custom knowledge bases. +:::warning + +This tutorial is currently only available for Python! + +::: + +Retrieval Augmented Generation (RAG) is a common way of building Generative AI applications that have access to custom knowledge bases. In this example, we'll show an example that has a retrieval step to get documents. By tracking this, you can debug your app and see what documents were pulled into the LLM context. We'll also show how to evaluate it using an LLM judge. @@ -13,251 +22,302 @@ Check out the [RAG++ course](https://www.wandb.courses/courses/rag-in-production First, we compute the embeddings for our articles. You would typically do this once with your articles and put the embeddings & metadata in a database, but here we're doing it every time we run our script for simplicity. -```python -from openai import OpenAI -import weave -from weave import Model -import numpy as np -import json -import asyncio - -articles = [ - "Novo Nordisk and Eli Lilly rival soars 32 percent after promising weight loss drug results Shares of Denmarks Zealand Pharma shot 32 percent higher in morning trade, after results showed success in its liver disease treatment survodutide, which is also on trial as a drug to treat obesity. The trial “tells us that the 6mg dose is safe, which is the top dose used in the ongoing [Phase 3] obesity trial too,” one analyst said in a note. The results come amid feverish investor interest in drugs that can be used for weight loss.", - "Berkshire shares jump after big profit gain as Buffetts conglomerate nears $1 trillion valuation Berkshire Hathaway shares rose on Monday after Warren Buffetts conglomerate posted strong earnings for the fourth quarter over the weekend. Berkshires Class A and B shares jumped more than 1.5%, each. Class A shares are higher by more than 17% this year, while Class B has gained more than 18%. Berkshire was last valued at $930.1 billion, up from $905.5 billion where it closed on Friday, according to FactSet. Berkshire on Saturday posted fourth-quarter operating earnings of $8.481 billion, about 28 percent higher than the $6.625 billion from the year-ago period, driven by big gains in its insurance business. Operating earnings refers to profits from businesses across insurance, railroads and utilities. Meanwhile, Berkshires cash levels also swelled to record levels. The conglomerate held $167.6 billion in cash in the fourth quarter, surpassing the $157.2 billion record the conglomerate held in the prior quarter.", - "Highmark Health says its combining tech from Google and Epic to give doctors easier access to information Highmark Health announced it is integrating technology from Google Cloud and the health-care software company Epic Systems. The integration aims to make it easier for both payers and providers to access key information they need, even if its stored across multiple points and formats, the company said. Highmark is the parent company of a health plan with 7 million members, a provider network of 14 hospitals and other entities", - "Rivian and Lucid shares plunge after weak EV earnings reports Shares of electric vehicle makers Rivian and Lucid fell Thursday after the companies reported stagnant production in their fourth-quarter earnings after the bell Wednesday. Rivian shares sank about 25 percent, and Lucids stock dropped around 17 percent. Rivian forecast it will make 57,000 vehicles in 2024, slightly less than the 57,232 vehicles it produced in 2023. Lucid said it expects to make 9,000 vehicles in 2024, more than the 8,428 vehicles it made in 2023.", - "Mauritius blocks Norwegian cruise ship over fears of a potential cholera outbreak Local authorities on Sunday denied permission for the Norwegian Dawn ship, which has 2,184 passengers and 1,026 crew on board, to access the Mauritius capital of Port Louis, citing “potential health risks.” The Mauritius Ports Authority said Sunday that samples were taken from at least 15 passengers on board the cruise ship. A spokesperson for the U.S.-headquartered Norwegian Cruise Line Holdings said Sunday that 'a small number of guests experienced mild symptoms of a stomach-related illness' during Norwegian Dawns South Africa voyage.", - "Intuitive Machines lands on the moon in historic first for a U.S. company Intuitive Machines Nova-C cargo lander, named Odysseus after the mythological Greek hero, is the first U.S. spacecraft to soft land on the lunar surface since 1972. Intuitive Machines is the first company to pull off a moon landing — government agencies have carried out all previously successful missions. The company's stock surged in extended trading Thursday, after falling 11 percent in regular trading.", - "Lunar landing photos: Intuitive Machines Odysseus sends back first images from the moon Intuitive Machines cargo moon lander Odysseus returned its first images from the surface. Company executives believe the lander caught its landing gear sideways on the moon's surface while touching down and tipped over. Despite resting on its side, the company's historic IM-1 mission is still operating on the moon.", -] - -def docs_to_embeddings(docs: list) -> list: - openai = OpenAI() - document_embeddings = [] - for doc in docs: - response = ( - openai.embeddings.create(input=doc, model="text-embedding-3-small") - .data[0] - .embedding - ) - document_embeddings.append(response) - return document_embeddings + + + ```python + from openai import OpenAI + import weave + from weave import Model + import numpy as np + import json + import asyncio + + articles = [ + "Novo Nordisk and Eli Lilly rival soars 32 percent after promising weight loss drug results Shares of Denmarks Zealand Pharma shot 32 percent higher in morning trade, after results showed success in its liver disease treatment survodutide, which is also on trial as a drug to treat obesity. The trial “tells us that the 6mg dose is safe, which is the top dose used in the ongoing [Phase 3] obesity trial too,” one analyst said in a note. The results come amid feverish investor interest in drugs that can be used for weight loss.", + "Berkshire shares jump after big profit gain as Buffetts conglomerate nears $1 trillion valuation Berkshire Hathaway shares rose on Monday after Warren Buffetts conglomerate posted strong earnings for the fourth quarter over the weekend. Berkshires Class A and B shares jumped more than 1.5%, each. Class A shares are higher by more than 17% this year, while Class B has gained more than 18%. Berkshire was last valued at $930.1 billion, up from $905.5 billion where it closed on Friday, according to FactSet. Berkshire on Saturday posted fourth-quarter operating earnings of $8.481 billion, about 28 percent higher than the $6.625 billion from the year-ago period, driven by big gains in its insurance business. Operating earnings refers to profits from businesses across insurance, railroads and utilities. Meanwhile, Berkshires cash levels also swelled to record levels. The conglomerate held $167.6 billion in cash in the fourth quarter, surpassing the $157.2 billion record the conglomerate held in the prior quarter.", + "Highmark Health says its combining tech from Google and Epic to give doctors easier access to information Highmark Health announced it is integrating technology from Google Cloud and the health-care software company Epic Systems. The integration aims to make it easier for both payers and providers to access key information they need, even if its stored across multiple points and formats, the company said. Highmark is the parent company of a health plan with 7 million members, a provider network of 14 hospitals and other entities", + "Rivian and Lucid shares plunge after weak EV earnings reports Shares of electric vehicle makers Rivian and Lucid fell Thursday after the companies reported stagnant production in their fourth-quarter earnings after the bell Wednesday. Rivian shares sank about 25 percent, and Lucids stock dropped around 17 percent. Rivian forecast it will make 57,000 vehicles in 2024, slightly less than the 57,232 vehicles it produced in 2023. Lucid said it expects to make 9,000 vehicles in 2024, more than the 8,428 vehicles it made in 2023.", + "Mauritius blocks Norwegian cruise ship over fears of a potential cholera outbreak Local authorities on Sunday denied permission for the Norwegian Dawn ship, which has 2,184 passengers and 1,026 crew on board, to access the Mauritius capital of Port Louis, citing “potential health risks.” The Mauritius Ports Authority said Sunday that samples were taken from at least 15 passengers on board the cruise ship. A spokesperson for the U.S.-headquartered Norwegian Cruise Line Holdings said Sunday that 'a small number of guests experienced mild symptoms of a stomach-related illness' during Norwegian Dawns South Africa voyage.", + "Intuitive Machines lands on the moon in historic first for a U.S. company Intuitive Machines Nova-C cargo lander, named Odysseus after the mythological Greek hero, is the first U.S. spacecraft to soft land on the lunar surface since 1972. Intuitive Machines is the first company to pull off a moon landing — government agencies have carried out all previously successful missions. The company's stock surged in extended trading Thursday, after falling 11 percent in regular trading.", + "Lunar landing photos: Intuitive Machines Odysseus sends back first images from the moon Intuitive Machines cargo moon lander Odysseus returned its first images from the surface. Company executives believe the lander caught its landing gear sideways on the moon's surface while touching down and tipped over. Despite resting on its side, the company's historic IM-1 mission is still operating on the moon.", + ] -article_embeddings = docs_to_embeddings(articles) # Note: you would typically do this once with your articles and put the embeddings & metadata in a database -``` + def docs_to_embeddings(docs: list) -> list: + openai = OpenAI() + document_embeddings = [] + for doc in docs: + response = ( + openai.embeddings.create(input=doc, model="text-embedding-3-small") + .data[0] + .embedding + ) + document_embeddings.append(response) + return document_embeddings + + article_embeddings = docs_to_embeddings(articles) # Note: you would typically do this once with your articles and put the embeddings & metadata in a database + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + ## 2. Create a RAG app Next, we wrap our retrieval function `get_most_relevant_document` with a `weave.op()` decorator and we create our `Model` class. We call `weave.init('rag-qa')` to begin tracking all the inputs and outputs of our functions for later inspection. -```python -from openai import OpenAI -import weave -from weave import Model -import numpy as np -import asyncio - -# highlight-next-line -@weave.op() -def get_most_relevant_document(query): - openai = OpenAI() - query_embedding = ( - openai.embeddings.create(input=query, model="text-embedding-3-small") - .data[0] - .embedding - ) - similarities = [ - np.dot(query_embedding, doc_emb) - / (np.linalg.norm(query_embedding) * np.linalg.norm(doc_emb)) - for doc_emb in article_embeddings - ] - # Get the index of the most similar document - most_relevant_doc_index = np.argmax(similarities) - return articles[most_relevant_doc_index] + + + ```python + from openai import OpenAI + import weave + from weave import Model + import numpy as np + import asyncio -# highlight-next-line -class RAGModel(Model): - system_message: str - model_name: str = "gpt-3.5-turbo-1106" - -# highlight-next-line + # highlight-next-line @weave.op() - def predict(self, question: str) -> dict: # note: `question` will be used later to select data from our evaluation rows - from openai import OpenAI - context = get_most_relevant_document(question) - client = OpenAI() - query = f"""Use the following information to answer the subsequent question. If the answer cannot be found, write "I don't know." - Context: - \"\"\" - {context} - \"\"\" - Question: {question}""" - response = client.chat.completions.create( - model=self.model_name, - messages=[ - {"role": "system", "content": self.system_message}, - {"role": "user", "content": query}, - ], - temperature=0.0, - response_format={"type": "text"}, + def get_most_relevant_document(query): + openai = OpenAI() + query_embedding = ( + openai.embeddings.create(input=query, model="text-embedding-3-small") + .data[0] + .embedding ) - answer = response.choices[0].message.content - return {'answer': answer, 'context': context} - -# highlight-next-line -weave.init('rag-qa') -model = RAGModel( - system_message="You are an expert in finance and answer questions related to finance, financial services, and financial markets. When responding based on provided information, be sure to cite the source." -) -model.predict("What significant result was reported about Zealand Pharma's obesity trial?") -``` + similarities = [ + np.dot(query_embedding, doc_emb) + / (np.linalg.norm(query_embedding) * np.linalg.norm(doc_emb)) + for doc_emb in article_embeddings + ] + # Get the index of the most similar document + most_relevant_doc_index = np.argmax(similarities) + return articles[most_relevant_doc_index] + + # highlight-next-line + class RAGModel(Model): + system_message: str + model_name: str = "gpt-3.5-turbo-1106" + + # highlight-next-line + @weave.op() + def predict(self, question: str) -> dict: # note: `question` will be used later to select data from our evaluation rows + from openai import OpenAI + context = get_most_relevant_document(question) + client = OpenAI() + query = f"""Use the following information to answer the subsequent question. If the answer cannot be found, write "I don't know." + Context: + \"\"\" + {context} + \"\"\" + Question: {question}""" + response = client.chat.completions.create( + model=self.model_name, + messages=[ + {"role": "system", "content": self.system_message}, + {"role": "user", "content": query}, + ], + temperature=0.0, + response_format={"type": "text"}, + ) + answer = response.choices[0].message.content + return {'answer': answer, 'context': context} + + # highlight-next-line + weave.init('rag-qa') + model = RAGModel( + system_message="You are an expert in finance and answer questions related to finance, financial services, and financial markets. When responding based on provided information, be sure to cite the source." + ) + model.predict("What significant result was reported about Zealand Pharma's obesity trial?") + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + ## 3. Evaluating with an LLM Judge -When there aren't simple ways to evaluate your application, one approach is to use an LLM to evaluate aspects of it. Here is an example of using an LLM judge to try to measure the context precision by prompting it to verify if the context was useful in arriving at the given answer. This prompt was augmented from the popular [RAGAS framework](https://docs.ragas.io/). +When there aren't simple ways to evaluate your application, one approach is to use an LLM to evaluate aspects of it. Here is an example of using an LLM judge to try to measure the context precision by prompting it to verify if the context was useful in arriving at the given answer. This prompt was augmented from the popular [RAGAS framework](https://docs.ragas.io/). ### Defining a scoring function -As we did in the [Build an Evaluation pipeline tutorial](/tutorial-eval), we'll define a set of example rows to test our app against and a scoring function. Our scoring function will take one row and evaluate it. The input arguments should match with the corresponding keys in our row, so `question` here will be taken from the row dictionary. `model_output` is the output of the model. The input to the model will be taken from the example based on its input argument, so `question` here too. We're using `async` functions so they run fast in parallel. If you need a quick introduction to async, you can find one [here](https://docs.python.org/3/library/asyncio.html). - -```python -from openai import OpenAI -import weave -import asyncio - -# highlight-next-line -@weave.op() -async def context_precision_score(question, model_output): - context_precision_prompt = """Given question, answer and context verify if the context was useful in arriving at the given answer. Give verdict as "1" if useful and "0" if not with json output. - Output in only valid JSON format. - - question: {question} - context: {context} - answer: {answer} - verdict: """ - client = OpenAI() - - prompt = context_precision_prompt.format( - question=question, - context=model_output['context'], - answer=model_output['answer'], - ) +As we did in the [Build an Evaluation pipeline tutorial](/tutorial-eval), we'll define a set of example rows to test our app against and a scoring function. Our scoring function will take one row and evaluate it. The input arguments should match with the corresponding keys in our row, so `question` here will be taken from the row dictionary. `output` is the output of the model. The input to the model will be taken from the example based on its input argument, so `question` here too. We're using `async` functions so they run fast in parallel. If you need a quick introduction to async, you can find one [here](https://docs.python.org/3/library/asyncio.html). - response = client.chat.completions.create( - model="gpt-4-turbo-preview", - messages=[{"role": "user", "content": prompt}], - response_format={ "type": "json_object" } - ) - response_message = response.choices[0].message - response = json.loads(response_message.content) - return { - "verdict": int(response["verdict"]) == 1, - } - -questions = [ - {"question": "What significant result was reported about Zealand Pharma's obesity trial?"}, - {"question": "How much did Berkshire Hathaway's cash levels increase in the fourth quarter?"}, - {"question": "What is the goal of Highmark Health's integration of Google Cloud and Epic Systems technology?"}, - {"question": "What were Rivian and Lucid's vehicle production forecasts for 2024?"}, - {"question": "Why was the Norwegian Dawn cruise ship denied access to Mauritius?"}, - {"question": "Which company achieved the first U.S. moon landing since 1972?"}, - {"question": "What issue did Intuitive Machines' lunar lander encounter upon landing on the moon?"} -] -# highlight-next-line -evaluation = weave.Evaluation(dataset=questions, scorers=[context_precision_score]) -# highlight-next-line -asyncio.run(evaluation.evaluate(model)) # note: you'll need to define a model to evaluate -``` + + + ```python + from openai import OpenAI + import weave + import asyncio -### Optional: Defining a `Scorer` class -In some applications we want to create custom evaluation classes - where for example a standardized `LLMJudge` class should be created with specific parameters (e.g. chat model, prompt), specific scoring of each row, and specific calculation of an aggregate score. In order to do that Weave defines a list of ready-to-use `Scorer` classes and also makes it easy to create a custom `Scorer` - in the following we'll see how to create a custom `class CorrectnessLLMJudge(Scorer)`. + # highlight-next-line + @weave.op() + async def context_precision_score(question, output): + context_precision_prompt = """Given question, answer and context verify if the context was useful in arriving at the given answer. Give verdict as "1" if useful and "0" if not with json output. + Output in only valid JSON format. + + question: {question} + context: {context} + answer: {answer} + verdict: """ + client = OpenAI() -On a high-level the steps to create custom Scorer are quite simple: -1. Define a custom class that inherits from `weave.flow.scorer.Scorer` -2. Overwrite the `score` function and add a `@weave.op()` if you want to track each call of the function - - this function has to define a `model_output` argument where the prediction of the model will be passed to. We define it as type `Optional[dict]` in case the mode might return "None". - - the rest of the arguments can either be a general `Any` or `dict` or can select specific columns from the dataset that is used to evaluate the model using the `weave.Evaluate` class - they have to have the exact same names as the column names or keys of a single row after being passed to `preprocess_model_input` if that is used. -3. *Optional:* Overwrite the `summarize` function to customize the calculation of the aggregate score. By default Weave uses the `weave.flow.scorer.auto_summarize` function if you don't define a custom function. - - this function has to have a `@weave.op()` decorator. + prompt = context_precision_prompt.format( + question=question, + context=output['context'], + answer=output['answer'], + ) + response = client.chat.completions.create( + model="gpt-4-turbo-preview", + messages=[{"role": "user", "content": prompt}], + response_format={ "type": "json_object" } + ) + response_message = response.choices[0].message + response = json.loads(response_message.content) + return { + "verdict": int(response["verdict"]) == 1, + } -```python -from weave.scorers import Scorer -from weave import WeaveList + questions = [ + {"question": "What significant result was reported about Zealand Pharma's obesity trial?"}, + {"question": "How much did Berkshire Hathaway's cash levels increase in the fourth quarter?"}, + {"question": "What is the goal of Highmark Health's integration of Google Cloud and Epic Systems technology?"}, + {"question": "What were Rivian and Lucid's vehicle production forecasts for 2024?"}, + {"question": "Why was the Norwegian Dawn cruise ship denied access to Mauritius?"}, + {"question": "Which company achieved the first U.S. moon landing since 1972?"}, + {"question": "What issue did Intuitive Machines' lunar lander encounter upon landing on the moon?"} + ] + # highlight-next-line + evaluation = weave.Evaluation(dataset=questions, scorers=[context_precision_score]) + # highlight-next-line + asyncio.run(evaluation.evaluate(model)) # note: you'll need to define a model to evaluate + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + -class CorrectnessLLMJudge(Scorer): - prompt: str - model_name: str - device: str +### Optional: Defining a `Scorer` class - @weave.op() - async def score(self, model_output: Optional[dict], query: str, answer: str) -> Any: - """Score the correctness of the predictions by comparing the pred, query, target. - Args: - - model_output: the dict that will be provided by the model that is evaluated - - query: the question asked - as defined in the dataset - - answer: the target answer - as defined in the dataset - Returns: - - single dict {metric name: single evaluation value}""" - - # get_model is defined as general model getter based on provided params (OpenAI,HF...) - eval_model = get_model( - model_name = self.model_name, - prompt = self.prompt - device = self.device, - ) - # async evaluation to speed up evaluation - this doesn't have to be async - grade = await eval_model.async_predict( - { - "query": query, - "answer": answer, - "result": model_output.get("result"), - } - ) - # output parsing - could be done more reobustly with pydantic - evaluation = "incorrect" not in grade["text"].strip().lower() +In some applications we want to create custom evaluation classes - where for example a standardized `LLMJudge` class should be created with specific parameters (e.g. chat model, prompt), specific scoring of each row, and specific calculation of an aggregate score. In order to do that Weave defines a list of ready-to-use `Scorer` classes and also makes it easy to create a custom `Scorer` - in the following we'll see how to create a custom `class CorrectnessLLMJudge(Scorer)`. - # the column name displayed in Weave - return {"correct": evaluation} +On a high-level the steps to create custom Scorer are quite simple: - @weave.op() - def summarize(self, score_rows: WeaveList) -> Optional[dict]: - """Aggregate all the scores that are calculated for each row by the scoring function. - Args: - - score_rows: a WeaveList object, nested dict of metrics and scores - Returns: - - nested dict with the same structure as the input""" - - # if nothing is provided the weave.flow.scorer.auto_summarize function is used - # return auto_summarize(score_rows) - - valid_data = [x.get("correct") for x in score_rows if x.get("correct") is not None] - count_true = list(valid_data).count(True) - int_data = [int(x) for x in valid_data] - - sample_mean = np.mean(int_data) if int_data else 0 - sample_variance = np.var(int_data) if int_data else 0 - sample_error = np.sqrt(sample_variance / len(int_data)) if int_data else 0 - - # the extra "correct" layer is not necessary but adds structure in the UI - return { - "correct": { - "true_count": count_true, - "true_fraction": sample_mean, - "stderr": sample_error, +1. Define a custom class that inherits from `weave.flow.scorer.Scorer` +2. Overwrite the `score` function and add a `@weave.op()` if you want to track each call of the function + - this function has to define an `output` argument where the prediction of the model will be passed to. We define it as type `Optional[dict]` in case the mode might return "None". + - the rest of the arguments can either be a general `Any` or `dict` or can select specific columns from the dataset that is used to evaluate the model using the `weave.Evaluate` class - they have to have the exact same names as the column names or keys of a single row after being passed to `preprocess_model_input` if that is used. +3. _Optional:_ Overwrite the `summarize` function to customize the calculation of the aggregate score. By default Weave uses the `weave.flow.scorer.auto_summarize` function if you don't define a custom function. + - this function has to have a `@weave.op()` decorator. + + + + ```python + from weave.scorers import Scorer + from weave import WeaveList + + class CorrectnessLLMJudge(Scorer): + prompt: str + model_name: str + device: str + + @weave.op() + async def score(self, output: Optional[dict], query: str, answer: str) -> Any: + """Score the correctness of the predictions by comparing the pred, query, target. + Args: + - output: the dict that will be provided by the model that is evaluated + - query: the question asked - as defined in the dataset + - answer: the target answer - as defined in the dataset + Returns: + - single dict {metric name: single evaluation value}""" + + # get_model is defined as general model getter based on provided params (OpenAI,HF...) + eval_model = get_model( + model_name = self.model_name, + prompt = self.prompt + device = self.device, + ) + # async evaluation to speed up evaluation - this doesn't have to be async + grade = await eval_model.async_predict( + { + "query": query, + "answer": answer, + "result": output.get("result"), + } + ) + # output parsing - could be done more reobustly with pydantic + evaluation = "incorrect" not in grade["text"].strip().lower() + + # the column name displayed in Weave + return {"correct": evaluation} + + @weave.op() + def summarize(self, score_rows: WeaveList) -> Optional[dict]: + """Aggregate all the scores that are calculated for each row by the scoring function. + Args: + - score_rows: a WeaveList object, nested dict of metrics and scores + Returns: + - nested dict with the same structure as the input""" + + # if nothing is provided the weave.flow.scorer.auto_summarize function is used + # return auto_summarize(score_rows) + + valid_data = [x.get("correct") for x in score_rows if x.get("correct") is not None] + count_true = list(valid_data).count(True) + int_data = [int(x) for x in valid_data] + + sample_mean = np.mean(int_data) if int_data else 0 + sample_variance = np.var(int_data) if int_data else 0 + sample_error = np.sqrt(sample_variance / len(int_data)) if int_data else 0 + + # the extra "correct" layer is not necessary but adds structure in the UI + return { + "correct": { + "true_count": count_true, + "true_fraction": sample_mean, + "stderr": sample_error, + } } - } -``` + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + To use this as a scorer, you would initialize it and pass it to `scorers` argument in your `Evaluation like this: -```python -evaluation = weave.Evaluation(dataset=questions, scorers=[CorrectnessLLMJudge()]) -``` + + + ```python + evaluation = weave.Evaluation(dataset=questions, scorers=[CorrectnessLLMJudge()]) + ``` + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + ## 4. Pulling it all together To get the same result for your RAG apps: + - Wrap LLM calls & retrieval step functions with `weave.op()` - (optional) Create a `Model` subclass with `predict` function and app details - Collect examples to evaluate @@ -268,146 +328,159 @@ To get the same result for your RAG apps: Here, we show the code in it's entirety. -```python -from openai import OpenAI -import weave -from weave import Model -import numpy as np -import json -import asyncio - -# Examples we've gathered that we want to use for evaluations -articles = [ - "Novo Nordisk and Eli Lilly rival soars 32 percent after promising weight loss drug results Shares of Denmarks Zealand Pharma shot 32 percent higher in morning trade, after results showed success in its liver disease treatment survodutide, which is also on trial as a drug to treat obesity. The trial “tells us that the 6mg dose is safe, which is the top dose used in the ongoing [Phase 3] obesity trial too,” one analyst said in a note. The results come amid feverish investor interest in drugs that can be used for weight loss.", - "Berkshire shares jump after big profit gain as Buffetts conglomerate nears $1 trillion valuation Berkshire Hathaway shares rose on Monday after Warren Buffetts conglomerate posted strong earnings for the fourth quarter over the weekend. Berkshires Class A and B shares jumped more than 1.5%, each. Class A shares are higher by more than 17% this year, while Class B has gained more than 18%. Berkshire was last valued at $930.1 billion, up from $905.5 billion where it closed on Friday, according to FactSet. Berkshire on Saturday posted fourth-quarter operating earnings of $8.481 billion, about 28 percent higher than the $6.625 billion from the year-ago period, driven by big gains in its insurance business. Operating earnings refers to profits from businesses across insurance, railroads and utilities. Meanwhile, Berkshires cash levels also swelled to record levels. The conglomerate held $167.6 billion in cash in the fourth quarter, surpassing the $157.2 billion record the conglomerate held in the prior quarter.", - "Highmark Health says its combining tech from Google and Epic to give doctors easier access to information Highmark Health announced it is integrating technology from Google Cloud and the health-care software company Epic Systems. The integration aims to make it easier for both payers and providers to access key information they need, even if it's stored across multiple points and formats, the company said. Highmark is the parent company of a health plan with 7 million members, a provider network of 14 hospitals and other entities", - "Rivian and Lucid shares plunge after weak EV earnings reports Shares of electric vehicle makers Rivian and Lucid fell Thursday after the companies reported stagnant production in their fourth-quarter earnings after the bell Wednesday. Rivian shares sank about 25 percent, and Lucids stock dropped around 17 percent. Rivian forecast it will make 57,000 vehicles in 2024, slightly less than the 57,232 vehicles it produced in 2023. Lucid said it expects to make 9,000 vehicles in 2024, more than the 8,428 vehicles it made in 2023.", - "Mauritius blocks Norwegian cruise ship over fears of a potential cholera outbreak Local authorities on Sunday denied permission for the Norwegian Dawn ship, which has 2,184 passengers and 1,026 crew on board, to access the Mauritius capital of Port Louis, citing “potential health risks.” The Mauritius Ports Authority said Sunday that samples were taken from at least 15 passengers on board the cruise ship. A spokesperson for the U.S.-headquartered Norwegian Cruise Line Holdings said Sunday that 'a small number of guests experienced mild symptoms of a stomach-related illness' during Norwegian Dawns South Africa voyage.", - "Intuitive Machines lands on the moon in historic first for a U.S. company Intuitive Machines Nova-C cargo lander, named Odysseus after the mythological Greek hero, is the first U.S. spacecraft to soft land on the lunar surface since 1972. Intuitive Machines is the first company to pull off a moon landing — government agencies have carried out all previously successful missions. The company's stock surged in extended trading Thursday, after falling 11 percent in regular trading.", - "Lunar landing photos: Intuitive Machines Odysseus sends back first images from the moon Intuitive Machines cargo moon lander Odysseus returned its first images from the surface. Company executives believe the lander caught its landing gear sideways on the surface of the moon while touching down and tipped over. Despite resting on its side, the company's historic IM-1 mission is still operating on the moon.", -] - -def docs_to_embeddings(docs: list) -> list: - openai = OpenAI() - document_embeddings = [] - for doc in docs: - response = ( - openai.embeddings.create(input=doc, model="text-embedding-3-small") + + + ```python + from openai import OpenAI + import weave + from weave import Model + import numpy as np + import json + import asyncio + + # Examples we've gathered that we want to use for evaluations + articles = [ + "Novo Nordisk and Eli Lilly rival soars 32 percent after promising weight loss drug results Shares of Denmarks Zealand Pharma shot 32 percent higher in morning trade, after results showed success in its liver disease treatment survodutide, which is also on trial as a drug to treat obesity. The trial “tells us that the 6mg dose is safe, which is the top dose used in the ongoing [Phase 3] obesity trial too,” one analyst said in a note. The results come amid feverish investor interest in drugs that can be used for weight loss.", + "Berkshire shares jump after big profit gain as Buffetts conglomerate nears $1 trillion valuation Berkshire Hathaway shares rose on Monday after Warren Buffetts conglomerate posted strong earnings for the fourth quarter over the weekend. Berkshires Class A and B shares jumped more than 1.5%, each. Class A shares are higher by more than 17% this year, while Class B has gained more than 18%. Berkshire was last valued at $930.1 billion, up from $905.5 billion where it closed on Friday, according to FactSet. Berkshire on Saturday posted fourth-quarter operating earnings of $8.481 billion, about 28 percent higher than the $6.625 billion from the year-ago period, driven by big gains in its insurance business. Operating earnings refers to profits from businesses across insurance, railroads and utilities. Meanwhile, Berkshires cash levels also swelled to record levels. The conglomerate held $167.6 billion in cash in the fourth quarter, surpassing the $157.2 billion record the conglomerate held in the prior quarter.", + "Highmark Health says its combining tech from Google and Epic to give doctors easier access to information Highmark Health announced it is integrating technology from Google Cloud and the health-care software company Epic Systems. The integration aims to make it easier for both payers and providers to access key information they need, even if it's stored across multiple points and formats, the company said. Highmark is the parent company of a health plan with 7 million members, a provider network of 14 hospitals and other entities", + "Rivian and Lucid shares plunge after weak EV earnings reports Shares of electric vehicle makers Rivian and Lucid fell Thursday after the companies reported stagnant production in their fourth-quarter earnings after the bell Wednesday. Rivian shares sank about 25 percent, and Lucids stock dropped around 17 percent. Rivian forecast it will make 57,000 vehicles in 2024, slightly less than the 57,232 vehicles it produced in 2023. Lucid said it expects to make 9,000 vehicles in 2024, more than the 8,428 vehicles it made in 2023.", + "Mauritius blocks Norwegian cruise ship over fears of a potential cholera outbreak Local authorities on Sunday denied permission for the Norwegian Dawn ship, which has 2,184 passengers and 1,026 crew on board, to access the Mauritius capital of Port Louis, citing “potential health risks.” The Mauritius Ports Authority said Sunday that samples were taken from at least 15 passengers on board the cruise ship. A spokesperson for the U.S.-headquartered Norwegian Cruise Line Holdings said Sunday that 'a small number of guests experienced mild symptoms of a stomach-related illness' during Norwegian Dawns South Africa voyage.", + "Intuitive Machines lands on the moon in historic first for a U.S. company Intuitive Machines Nova-C cargo lander, named Odysseus after the mythological Greek hero, is the first U.S. spacecraft to soft land on the lunar surface since 1972. Intuitive Machines is the first company to pull off a moon landing — government agencies have carried out all previously successful missions. The company's stock surged in extended trading Thursday, after falling 11 percent in regular trading.", + "Lunar landing photos: Intuitive Machines Odysseus sends back first images from the moon Intuitive Machines cargo moon lander Odysseus returned its first images from the surface. Company executives believe the lander caught its landing gear sideways on the surface of the moon while touching down and tipped over. Despite resting on its side, the company's historic IM-1 mission is still operating on the moon.", + ] + + def docs_to_embeddings(docs: list) -> list: + openai = OpenAI() + document_embeddings = [] + for doc in docs: + response = ( + openai.embeddings.create(input=doc, model="text-embedding-3-small") + .data[0] + .embedding + ) + document_embeddings.append(response) + return document_embeddings + + article_embeddings = docs_to_embeddings(articles) # Note: you would typically do this once with your articles and put the embeddings & metadata in a database + + # We've added a decorator to our retrieval step + # highlight-next-line + @weave.op() + def get_most_relevant_document(query): + openai = OpenAI() + query_embedding = ( + openai.embeddings.create(input=query, model="text-embedding-3-small") .data[0] .embedding ) - document_embeddings.append(response) - return document_embeddings - -article_embeddings = docs_to_embeddings(articles) # Note: you would typically do this once with your articles and put the embeddings & metadata in a database - -# We've added a decorator to our retrieval step -# highlight-next-line -@weave.op() -def get_most_relevant_document(query): - openai = OpenAI() - query_embedding = ( - openai.embeddings.create(input=query, model="text-embedding-3-small") - .data[0] - .embedding + similarities = [ + np.dot(query_embedding, doc_emb) + / (np.linalg.norm(query_embedding) * np.linalg.norm(doc_emb)) + for doc_emb in article_embeddings + ] + # Get the index of the most similar document + most_relevant_doc_index = np.argmax(similarities) + return articles[most_relevant_doc_index] + + # We create a Model subclass with some details about our app, along with a predict function that produces a response + # highlight-next-line + class RAGModel(Model): + system_message: str + model_name: str = "gpt-3.5-turbo-1106" + + # highlight-next-line + @weave.op() + # highlight-next-line + def predict(self, question: str) -> dict: # note: `question` will be used later to select data from our evaluation rows + from openai import OpenAI + context = get_most_relevant_document(question) + client = OpenAI() + query = f"""Use the following information to answer the subsequent question. If the answer cannot be found, write "I don't know." + Context: + \"\"\" + {context} + \"\"\" + Question: {question}""" + response = client.chat.completions.create( + model=self.model_name, + messages=[ + {"role": "system", "content": self.system_message}, + {"role": "user", "content": query}, + ], + temperature=0.0, + response_format={"type": "text"}, + ) + answer = response.choices[0].message.content + return {'answer': answer, 'context': context} + + # highlight-next-line + weave.init('rag-qa') + # highlight-next-line + model = RAGModel( + system_message="You are an expert in finance and answer questions related to finance, financial services, and financial markets. When responding based on provided information, be sure to cite the source." ) - similarities = [ - np.dot(query_embedding, doc_emb) - / (np.linalg.norm(query_embedding) * np.linalg.norm(doc_emb)) - for doc_emb in article_embeddings - ] - # Get the index of the most similar document - most_relevant_doc_index = np.argmax(similarities) - return articles[most_relevant_doc_index] -# We create a Model subclass with some details about our app, along with a predict function that produces a response -# highlight-next-line -class RAGModel(Model): - system_message: str - model_name: str = "gpt-3.5-turbo-1106" - -# highlight-next-line + # Here is our scoring function uses our question and output to product a score + # highlight-next-line @weave.op() -# highlight-next-line - def predict(self, question: str) -> dict: # note: `question` will be used later to select data from our evaluation rows - from openai import OpenAI - context = get_most_relevant_document(question) + # highlight-next-line + async def context_precision_score(question, output): + context_precision_prompt = """Given question, answer and context verify if the context was useful in arriving at the given answer. Give verdict as "1" if useful and "0" if not with json output. + Output in only valid JSON format. + + question: {question} + context: {context} + answer: {answer} + verdict: """ client = OpenAI() - query = f"""Use the following information to answer the subsequent question. If the answer cannot be found, write "I don't know." - Context: - \"\"\" - {context} - \"\"\" - Question: {question}""" + + prompt = context_precision_prompt.format( + question=question, + context=output['context'], + answer=output['answer'], + ) + response = client.chat.completions.create( - model=self.model_name, - messages=[ - {"role": "system", "content": self.system_message}, - {"role": "user", "content": query}, - ], - temperature=0.0, - response_format={"type": "text"}, + model="gpt-4-turbo-preview", + messages=[{"role": "user", "content": prompt}], + response_format={ "type": "json_object" } ) - answer = response.choices[0].message.content - return {'answer': answer, 'context': context} - -# highlight-next-line -weave.init('rag-qa') -# highlight-next-line -model = RAGModel( - system_message="You are an expert in finance and answer questions related to finance, financial services, and financial markets. When responding based on provided information, be sure to cite the source." -) - -# Here is our scoring function uses our question and model_output to product a score -# highlight-next-line -@weave.op() -# highlight-next-line -async def context_precision_score(question, model_output): - context_precision_prompt = """Given question, answer and context verify if the context was useful in arriving at the given answer. Give verdict as "1" if useful and "0" if not with json output. - Output in only valid JSON format. - - question: {question} - context: {context} - answer: {answer} - verdict: """ - client = OpenAI() - - prompt = context_precision_prompt.format( - question=question, - context=model_output['context'], - answer=model_output['answer'], - ) + response_message = response.choices[0].message + response = json.loads(response_message.content) + return { + "verdict": int(response["verdict"]) == 1, + } - response = client.chat.completions.create( - model="gpt-4-turbo-preview", - messages=[{"role": "user", "content": prompt}], - response_format={ "type": "json_object" } - ) - response_message = response.choices[0].message - response = json.loads(response_message.content) - return { - "verdict": int(response["verdict"]) == 1, - } - -questions = [ - {"question": "What significant result was reported about Zealand Pharma's obesity trial?"}, - {"question": "How much did Berkshire Hathaway's cash levels increase in the fourth quarter?"}, - {"question": "What is the goal of Highmark Health's integration of Google Cloud and Epic Systems technology?"}, - {"question": "What were Rivian and Lucid's vehicle production forecasts for 2024?"}, - {"question": "Why was the Norwegian Dawn cruise ship denied access to Mauritius?"}, - {"question": "Which company achieved the first U.S. moon landing since 1972?"}, - {"question": "What issue did Intuitive Machines' lunar lander encounter upon landing on the moon?"} -] - -# We define an Evaluation object and pass our example questions along with scoring functions -# highlight-next-line -evaluation = weave.Evaluation(dataset=questions, scorers=[context_precision_score]) -# highlight-next-line -asyncio.run(evaluation.evaluate(model)) -``` + questions = [ + {"question": "What significant result was reported about Zealand Pharma's obesity trial?"}, + {"question": "How much did Berkshire Hathaway's cash levels increase in the fourth quarter?"}, + {"question": "What is the goal of Highmark Health's integration of Google Cloud and Epic Systems technology?"}, + {"question": "What were Rivian and Lucid's vehicle production forecasts for 2024?"}, + {"question": "Why was the Norwegian Dawn cruise ship denied access to Mauritius?"}, + {"question": "Which company achieved the first U.S. moon landing since 1972?"}, + {"question": "What issue did Intuitive Machines' lunar lander encounter upon landing on the moon?"} + ] + + # We define an Evaluation object and pass our example questions along with scoring functions + # highlight-next-line + evaluation = weave.Evaluation(dataset=questions, scorers=[context_precision_score]) + # highlight-next-line + asyncio.run(evaluation.evaluate(model)) + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + ## Conclusion We've learned how to build observability into different steps of our applications, like the retrieval step in this example. We've also learned how to build more complex scoring functions, like an LLM judge, for doing automatic evaluation of application responses. +``` + +``` diff --git a/docs/docs/tutorial-tracing_2.md b/docs/docs/tutorial-tracing_2.md index 461132be8b75..719ee99e0128 100644 --- a/docs/docs/tutorial-tracing_2.md +++ b/docs/docs/tutorial-tracing_2.md @@ -1,100 +1,176 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Track data flows and app metadata In the [Track LLM inputs & outputs](/quickstart) tutorial, the basics of tracking the inputs and outputs of your LLMs was covered. In this tutorial you will learn how to: + - **Track data** as it flows through your application - **Track metadata** at call time ## Tracking nested function calls -LLM-powered applications can contain multiple LLMs calls and additional data processing and validation logic that is important to monitor. Even deep nested call structures common in many apps, Weave will keep track of the parent-child relationships in nested functions as long as `weave.op()` is added to every function you'd like to track. +LLM-powered applications can contain multiple LLMs calls and additional data processing and validation logic that is important to monitor. Even deep nested call structures common in many apps, Weave will keep track of the parent-child relationships in nested functions as long as `weave.op()` is added to every function you'd like to track. Building on our [basic tracing example](/quickstart), we will now add additional logic to count the returned items from our LLM and wrap them all in a higher level function. We'll then add `weave.op()` to trace every function, its call order and its parent-child relationship: -```python -import weave -import json -from openai import OpenAI - -client = OpenAI(api_key="...") - -# highlight-next-line -@weave.op() -def extract_dinos(sentence: str) -> dict: - response = client.chat.completions.create( - model="gpt-4o", - messages=[ - { - "role": "system", - "content": """Extract any dinosaur `name`, their `common_name`, \ - names and whether its `diet` is a herbivore or carnivore, in JSON format.""" - }, - { - "role": "user", - "content": sentence - } - ], - response_format={ "type": "json_object" } - ) - return response.choices[0].message.content - -# highlight-next-line -@weave.op() -def count_dinos(dino_data: dict) -> int: - # count the number of items in the returned list - k = list(dino_data.keys())[0] - return len(dino_data[k]) - -# highlight-next-line -@weave.op() -def dino_tracker(sentence: str) -> dict: - # extract dinosaurs using a LLM - dino_data = extract_dinos(sentence) - - # count the number of dinosaurs returned - dino_data = json.loads(dino_data) - n_dinos = count_dinos(dino_data) - return {"n_dinosaurs": n_dinos, "dinosaurs": dino_data} - -# highlight-next-line -weave.init('jurassic-park') - -sentence = """I watched as a Tyrannosaurus rex (T. rex) chased after a Triceratops (Trike), \ -both carnivore and herbivore locked in an ancient dance. Meanwhile, a gentle giant \ -Brachiosaurus (Brachi) calmly munched on treetops, blissfully unaware of the chaos below.""" - -result = dino_tracker(sentence) -print(result) -``` - -**Nested functions** - -When you run the above code you will see the the inputs and outputs from the two nested functions (`extract_dinos` and `count_dinos`), as well as the automatically-logged OpenAI trace. - -![Nested Weave Trace](../static/img/tutorial_tracing_2_nested_dinos.png) + + + + ```python + import weave + import json + from openai import OpenAI + + client = OpenAI(api_key="...") + + # highlight-next-line + @weave.op() + def extract_dinos(sentence: str) -> dict: + response = client.chat.completions.create( + model="gpt-4o", + messages=[ + { + "role": "system", + "content": """Extract any dinosaur `name`, their `common_name`, \ + names and whether its `diet` is a herbivore or carnivore, in JSON format.""" + }, + { + "role": "user", + "content": sentence + } + ], + response_format={ "type": "json_object" } + ) + return response.choices[0].message.content + + # highlight-next-line + @weave.op() + def count_dinos(dino_data: dict) -> int: + # count the number of items in the returned list + k = list(dino_data.keys())[0] + return len(dino_data[k]) + + # highlight-next-line + @weave.op() + def dino_tracker(sentence: str) -> dict: + # extract dinosaurs using a LLM + dino_data = extract_dinos(sentence) + + # count the number of dinosaurs returned + dino_data = json.loads(dino_data) + n_dinos = count_dinos(dino_data) + return {"n_dinosaurs": n_dinos, "dinosaurs": dino_data} + + # highlight-next-line + weave.init('jurassic-park') + + sentence = """I watched as a Tyrannosaurus rex (T. rex) chased after a Triceratops (Trike), \ + both carnivore and herbivore locked in an ancient dance. Meanwhile, a gentle giant \ + Brachiosaurus (Brachi) calmly munched on treetops, blissfully unaware of the chaos below.""" + result = dino_tracker(sentence) + print(result) + ``` + **Nested functions** -## Tracking metadata + When you run the above code you will see the the inputs and outputs from the two nested functions (`extract_dinos` and `count_dinos`), as well as the automatically-logged OpenAI trace. -Tracking metadata can be done easily by using the `weave.attributes` context manager and passing it a dictionary of the metadata to track at call time. + ![Nested Weave Trace](../static/img/tutorial_tracing_2_nested_dinos.png) -Continuing our example from above: + + -```python -import weave + ```typescript + import OpenAI from 'openai'; + import * as weave from 'weave'; -weave.init('jurassic-park') + const openai = weave.wrapOpenAI(new OpenAI()); -sentence = """I watched as a Tyrannosaurus rex (T. rex) chased after a Triceratops (Trike), \ -both carnivore and herbivore locked in an ancient dance. Meanwhile, a gentle giant \ -Brachiosaurus (Brachi) calmly munched on treetops, blissfully unaware of the chaos below.""" + const extractDinos = weave.op(async (sentence: string) => { + const response = await openai.chat.completions.create({ + model: 'gpt-4o', + messages: [ + { + role: 'system', + content: + 'Extract any dinosaur `name`, their `common_name`, names and whether its `diet` is a herbivore or carnivore, in JSON format.', + }, + {role: 'user', content: sentence}, + ], + response_format: {type: 'json_object'}, + }); + return response.choices[0].message.content; + }); -# track metadata alongside our previously defined function -# highlight-next-line -with weave.attributes({'user_id': 'lukas', 'env': 'production'}): - result = dino_tracker(sentence) -``` + const countDinos = weave.op(async (dinoData: string) => { + const parsed = JSON.parse(dinoData); + return Object.keys(parsed).length; + }); + + const dinoTracker = weave.op(async (sentence: string) => { + const dinoData = await extractDinos(sentence); + const nDinos = await countDinos(dinoData); + return {nDinos, dinoData}; + }); + + async function main() { + await weave.init('jurassic-park'); + + const sentence = `I watched as a Tyrannosaurus rex (T. rex) chased after a Triceratops (Trike), + both carnivore and herbivore locked in an ancient dance. Meanwhile, a gentle giant + Brachiosaurus (Brachi) calmly munched on treetops, blissfully unaware of the chaos below.`; + + const result = await dinoTracker(sentence); + console.log(result); + } + + main(); + + ``` + + **Nested functions** + + When you run the above code you will see the the inputs and outputs from the two nested functions (`extractDinos` and `countDinos`), as well as the automatically-logged OpenAI trace. + + + ![Nested Weave Trace](../static/img/tutorial_tracing_2_nested_dinos.png) + + + + +## Tracking metadata + +Tracking metadata can be done easily by using the `weave.attributes` context manager and passing it a dictionary of the metadata to track at call time. + +Continuing our example from above: + + + + ```python + import weave + + weave.init('jurassic-park') + + sentence = """I watched as a Tyrannosaurus rex (T. rex) chased after a Triceratops (Trike), \ + both carnivore and herbivore locked in an ancient dance. Meanwhile, a gentle giant \ + Brachiosaurus (Brachi) calmly munched on treetops, blissfully unaware of the chaos below.""" + + # track metadata alongside our previously defined function + # highlight-next-line + with weave.attributes({'user_id': 'lukas', 'env': 'production'}): + result = dino_tracker(sentence) + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + :::note It's recommended to use metadata tracking to track metadata at run time, e.g. user ids or whether or not the call is part of the development process or is in production etc. diff --git a/docs/docs/tutorial-weave_models.md b/docs/docs/tutorial-weave_models.md index 3a3470d8a633..e9e2ad997326 100644 --- a/docs/docs/tutorial-weave_models.md +++ b/docs/docs/tutorial-weave_models.md @@ -1,7 +1,9 @@ -# App versioning +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; -Tracking the [inputs, outputs, metadata](/quickstart) as well as [data flowing through your app](/tutorial-tracing_2) is critical to understanding the performance of your system. However **versioning your app over time** is also critical to understand how modifications to your code or app attributes change your outputs. Weave's `Model` class is how these changes can be tracked in Weave. +# App versioning +Tracking the [inputs, outputs, metadata](/quickstart) as well as [data flowing through your app](/tutorial-tracing_2) is critical to understanding the performance of your system. However **versioning your app over time** is also critical to understand how modifications to your code or app attributes change your outputs. Weave's `Model` class is how these changes can be tracked in Weave. In this tutorial you'll learn: @@ -10,6 +12,12 @@ In this tutorial you'll learn: ## Using `weave.Model` +:::warning + +The `weave.Model` class is currently only supported in Python! + +::: + Using Weave `Model`s means that attributes such as model vendor ids, prompts, temperature, and more are stored and versioned when they change. To create a `Model` in Weave, you need the following: @@ -22,105 +30,136 @@ When you change the class attributes or the code that defines your model, **thes In the example below, the **model name, temperature and system prompt will be tracked and versioned**: -```python -import json -from openai import OpenAI - -import weave - -@weave.op() -def extract_dinos(wmodel: weave.Model, sentence: str) -> dict: - response = wmodel.client.chat.completions.create( - model=wmodel.model_name, - temperature=wmodel.temperature, - messages=[ - { - "role": "system", - "content": wmodel.system_prompt - }, - { - "role": "user", - "content": sentence - } - ], - response_format={ "type": "json_object" } - ) - return response.choices[0].message.content - -# Sub-class with a weave.Model -# highlight-next-line -class ExtractDinos(weave.Model): - client: OpenAI = None - model_name: str - temperature: float - system_prompt: str - - # Ensure your function is called `invoke` or `predict` - # highlight-next-line + + + ```python + import json + from openai import OpenAI + + import weave + @weave.op() + def extract_dinos(wmodel: weave.Model, sentence: str) -> dict: + response = wmodel.client.chat.completions.create( + model=wmodel.model_name, + temperature=wmodel.temperature, + messages=[ + { + "role": "system", + "content": wmodel.system_prompt + }, + { + "role": "user", + "content": sentence + } + ], + response_format={ "type": "json_object" } + ) + return response.choices[0].message.content + + # Sub-class with a weave.Model # highlight-next-line - def invoke(self, sentence: str) -> dict: - dino_data = extract_dinos(self, sentence) - return json.loads(dino_data) -``` + class ExtractDinos(weave.Model): + client: OpenAI = None + model_name: str + temperature: float + system_prompt: str + + # Ensure your function is called `invoke` or `predict` + # highlight-next-line + @weave.op() + # highlight-next-line + def invoke(self, sentence: str) -> dict: + dino_data = extract_dinos(self, sentence) + return json.loads(dino_data) + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + Now you can instantiate and call the model with `invoke`: -```python -weave.init('jurassic-park') -client = OpenAI() + + + ```python + weave.init('jurassic-park') + client = OpenAI() -system_prompt = """Extract any dinosaur `name`, their `common_name`, \ -names and whether its `diet` is a herbivore or carnivore, in JSON format.""" + system_prompt = """Extract any dinosaur `name`, their `common_name`, \ + names and whether its `diet` is a herbivore or carnivore, in JSON format.""" -# highlight-next-line -dinos = ExtractDinos( - client=client, - model_name='gpt-4o', - temperature=0.4, - system_prompt=system_prompt -) + # highlight-next-line + dinos = ExtractDinos( + client=client, + model_name='gpt-4o', + temperature=0.4, + system_prompt=system_prompt + ) -sentence = """I watched as a Tyrannosaurus rex (T. rex) chased after a Triceratops (Trike), \ -both carnivore and herbivore locked in an ancient dance. Meanwhile, a gentle giant \ -Brachiosaurus (Brachi) calmly munched on treetops, blissfully unaware of the chaos below.""" + sentence = """I watched as a Tyrannosaurus rex (T. rex) chased after a Triceratops (Trike), \ + both carnivore and herbivore locked in an ancient dance. Meanwhile, a gentle giant \ + Brachiosaurus (Brachi) calmly munched on treetops, blissfully unaware of the chaos below.""" -# highlight-next-line -result = dinos.invoke(sentence) -print(result) -``` + # highlight-next-line + result = dinos.invoke(sentence) + print(result) + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + Now after calling `.invoke` you can see the trace in Weave **now tracks the model attributes as well as the code** for the model functions that have been decorated with `weave.op()`. You can see the model is also versioned, "v21" in this case, and if you click on the model **you can see all of the calls** that have used that version of the model ![Re-using a weave model](../static/img/tutorial-model_invoke3.png) **A note on using `weave.Model`:** + - You can use `predict` instead of `invoke` for the name of the function in your Weave `Model` if you prefer. - If you want other class methods to be tracked by weave they need to be wrapped in `weave.op()` - Attributes starting with an underscore are ignored by weave and won't be logged ## Exporting and re-using a logged `weave.Model` + Because Weave stores and versions Models that have been invoked, it is possible to export and re-use these models. **Get the Model ref** In the Weave UI you can get the Model ref for a particular version - **Using the Model** Once you have the URI of the Model object, you can export and re-use it. Note that the exported model is already initialised and ready to use: -```python -# the exported weave model is already initialised and ready to be called -# highlight-next-line -new_dinos = weave.ref("weave:///morgan/jurassic-park/object/ExtractDinos:ey4udBU2MU23heQFJenkVxLBX4bmDsFk7vsGcOWPjY4").get() - -# set the client to the openai client again -new_dinos.client = client - -new_sentence = """I also saw an Ankylosaurus grazing on giant ferns""" -new_result = new_dinos.invoke(new_sentence) -print(new_result) -``` + + + ```python + # the exported weave model is already initialised and ready to be called + # highlight-next-line + new_dinos = weave.ref("weave:///morgan/jurassic-park/object/ExtractDinos:ey4udBU2MU23heQFJenkVxLBX4bmDsFk7vsGcOWPjY4").get() + + # set the client to the openai client again + new_dinos.client = client + + new_sentence = """I also saw an Ankylosaurus grazing on giant ferns""" + new_result = new_dinos.invoke(new_sentence) + print(new_result) + ``` + + + + ```plaintext + This feature is not available in TypeScript yet. Stay tuned! + ``` + + Here you can now see the name Model version (v21) was used with the new input: diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index 81ecbc46047f..9616064c73e9 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -1,7 +1,7 @@ -import { themes as prismThemes } from "prism-react-renderer"; -import type { Config } from "@docusaurus/types"; import type * as Preset from "@docusaurus/preset-classic"; +import type {Config} from "@docusaurus/types"; import type * as OpenApiPlugin from "docusaurus-plugin-openapi-docs"; +import {themes as prismThemes} from "prism-react-renderer"; const config: Config = { title: "W&B Weave", @@ -108,7 +108,7 @@ const config: Config = { anonymizeIP: true, }, ], - 'plugin-image-zoom', + "plugin-image-zoom", ], themes: [ @@ -153,6 +153,11 @@ const config: Config = { sidebarId: "pythonSdkSidebar", label: "Python SDK", }, + { + type: "docSidebar", + sidebarId: "typescriptSdkSidebar", + label: "TypeScript SDK", + }, { type: "docSidebar", sidebarId: "serviceApiSidebar", @@ -233,13 +238,13 @@ const config: Config = { { className: "theme-code-block-highlighted-line", line: "highlight-next-line", - block: { start: "highlight-start", end: "highlight-end" }, + block: {start: "highlight-start", end: "highlight-end"}, }, ], }, imageZoom: { // CSS selector to apply the plugin to - selector: 'img.zoomable', + selector: "img.zoomable", // Optional medium-zoom options // see: https://www.npmjs.com/package/medium-zoom#options options: {}, diff --git a/docs/notebooks/leaderboard_quickstart.ipynb b/docs/notebooks/leaderboard_quickstart.ipynb new file mode 100644 index 000000000000..b09d336de51c --- /dev/null +++ b/docs/notebooks/leaderboard_quickstart.ipynb @@ -0,0 +1,445 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Leaderboard Quickstart\n", + "\n", + "In this notebook we will learn to use Weave's Leaderboard to compare model performance across different datasets and scoring functions. Specifically, we will:\n", + "\n", + "1. Generate a dataset of fake zip code data\n", + "2. Author some scoring functions and evaluate a baseline model.\n", + "3. Use these techniques to evaluate a matrix of models vs evaluations.\n", + "4. Review the leaderboard in the Weave UI." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Generate a dataset of fake zip code data\n", + "\n", + "First we will create a function `generate_dataset_rows` that generates a list of fake zip code data." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "from openai import OpenAI\n", + "from pydantic import BaseModel\n", + "\n", + "\n", + "class Row(BaseModel):\n", + " zip_code: str\n", + " city: str\n", + " state: str\n", + " avg_temp_f: float\n", + " population: int\n", + " median_income: int\n", + " known_for: str\n", + "\n", + "\n", + "class Rows(BaseModel):\n", + " rows: list[Row]\n", + "\n", + "\n", + "def generate_dataset_rows(\n", + " location: str = \"United States\", count: int = 5, year: int = 2022\n", + "):\n", + " client = OpenAI()\n", + "\n", + " completion = client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"Please generate {count} rows of data for random zip codes in {location} for the year {year}.\",\n", + " },\n", + " ],\n", + " response_format={\n", + " \"type\": \"json_schema\",\n", + " \"json_schema\": {\n", + " \"name\": \"response_format\",\n", + " \"schema\": Rows.model_json_schema(),\n", + " },\n", + " },\n", + " )\n", + "\n", + " return json.loads(completion.choices[0].message.content)[\"rows\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import weave\n", + "\n", + "weave.init(\"leaderboard-demo\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Author scoring functions\n", + "\n", + "Next we will author 3 scoring functions:\n", + "\n", + "1. `check_concrete_fields`: Checks if the model output matches the expected city and state.\n", + "2. `check_value_fields`: Checks if the model output is within 10% of the expected population and median income.\n", + "3. `check_subjective_fields`: Uses a LLM to check if the model output matches the expected \"known for\" field.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "@weave.op\n", + "def check_concrete_fields(city: str, state: str, output: dict):\n", + " return {\n", + " \"city_match\": city == output[\"city\"],\n", + " \"state_match\": state == output[\"state\"],\n", + " }\n", + "\n", + "\n", + "@weave.op\n", + "def check_value_fields(\n", + " avg_temp_f: float, population: int, median_income: int, output: dict\n", + "):\n", + " return {\n", + " \"avg_temp_f_err\": abs(avg_temp_f - output[\"avg_temp_f\"]) / avg_temp_f,\n", + " \"population_err\": abs(population - output[\"population\"]) / population,\n", + " \"median_income_err\": abs(median_income - output[\"median_income\"])\n", + " / median_income,\n", + " }\n", + "\n", + "\n", + "@weave.op\n", + "def check_subjective_fields(zip_code: str, known_for: str, output: dict):\n", + " client = OpenAI()\n", + "\n", + " class Response(BaseModel):\n", + " correct_known_for: bool\n", + "\n", + " completion = client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"My student was asked what the zip code {zip_code} is best known best for. The right answer is '{known_for}', and they said '{output['known_for']}'. Is their answer correct?\",\n", + " },\n", + " ],\n", + " response_format={\n", + " \"type\": \"json_schema\",\n", + " \"json_schema\": {\n", + " \"name\": \"response_format\",\n", + " \"schema\": Response.model_json_schema(),\n", + " },\n", + " },\n", + " )\n", + "\n", + " return json.loads(completion.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Create a simple Evaluation\n", + "\n", + "Next we define a simple evaliation using our fake data and scoring functions.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "rows = generate_dataset_rows()\n", + "evaluation = weave.Evaluation(\n", + " name=\"United States - 2022\",\n", + " dataset=rows,\n", + " scorers=[\n", + " check_concrete_fields,\n", + " check_value_fields,\n", + " check_subjective_fields,\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Evaluate a baseline model\n", + "\n", + "Now we will evaluate a baseline model which returns a static response.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@weave.op\n", + "def baseline_model(zip_code: str):\n", + " return {\n", + " \"city\": \"New York\",\n", + " \"state\": \"NY\",\n", + " \"avg_temp_f\": 50.0,\n", + " \"population\": 1000000,\n", + " \"median_income\": 100000,\n", + " \"known_for\": \"The Big Apple\",\n", + " }\n", + "\n", + "\n", + "await evaluation.evaluate(baseline_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5: Create more Models\n", + "\n", + "Now we will create 2 more models to compare against the baseline." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@weave.op\n", + "def gpt_4o_mini_no_context(zip_code: str):\n", + " client = OpenAI()\n", + "\n", + " completion = client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[{\"role\": \"user\", \"content\": f\"\"\"Zip code {zip_code}\"\"\"}],\n", + " response_format={\n", + " \"type\": \"json_schema\",\n", + " \"json_schema\": {\n", + " \"name\": \"response_format\",\n", + " \"schema\": Row.model_json_schema(),\n", + " },\n", + " },\n", + " )\n", + "\n", + " return json.loads(completion.choices[0].message.content)\n", + "\n", + "\n", + "await evaluation.evaluate(gpt_4o_mini_no_context)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@weave.op\n", + "def gpt_4o_mini_with_context(zip_code: str):\n", + " client = OpenAI()\n", + "\n", + " completion = client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"\"\"Please answer the following questions about the zip code {zip_code}:\n", + " 1. What is the city?\n", + " 2. What is the state?\n", + " 3. What is the average temperature in Fahrenheit?\n", + " 4. What is the population?\n", + " 5. What is the median income?\n", + " 6. What is the most well known thing about this zip code?\n", + " \"\"\",\n", + " }\n", + " ],\n", + " response_format={\n", + " \"type\": \"json_schema\",\n", + " \"json_schema\": {\n", + " \"name\": \"response_format\",\n", + " \"schema\": Row.model_json_schema(),\n", + " },\n", + " },\n", + " )\n", + "\n", + " return json.loads(completion.choices[0].message.content)\n", + "\n", + "\n", + "await evaluation.evaluate(gpt_4o_mini_with_context)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 6: Create more Evaluations\n", + "\n", + "Now we will evaluate a matrix of models vs evaluations.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scorers = [\n", + " check_concrete_fields,\n", + " check_value_fields,\n", + " check_subjective_fields,\n", + "]\n", + "evaluations = [\n", + " weave.Evaluation(\n", + " name=\"United States - 2022\",\n", + " dataset=weave.Dataset(\n", + " name=\"United States - 2022\",\n", + " rows=generate_dataset_rows(\"United States\", 5, 2022),\n", + " ),\n", + " scorers=scorers,\n", + " ),\n", + " weave.Evaluation(\n", + " name=\"California - 2022\",\n", + " dataset=weave.Dataset(\n", + " name=\"California - 2022\", rows=generate_dataset_rows(\"California\", 5, 2022)\n", + " ),\n", + " scorers=scorers,\n", + " ),\n", + " weave.Evaluation(\n", + " name=\"United States - 2000\",\n", + " dataset=weave.Dataset(\n", + " name=\"United States - 2000\",\n", + " rows=generate_dataset_rows(\"United States\", 5, 2000),\n", + " ),\n", + " scorers=scorers,\n", + " ),\n", + "]\n", + "models = [\n", + " baseline_model,\n", + " gpt_4o_mini_no_context,\n", + " gpt_4o_mini_with_context,\n", + "]\n", + "\n", + "for evaluation in evaluations:\n", + " for model in models:\n", + " await evaluation.evaluate(\n", + " model, __weave={\"display_name\": evaluation.name + \":\" + model.__name__}\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 7: Review the Leaderboard\n", + "\n", + "You can create a new leaderboard by navigating to the leaderboard tab in the UI and clicking \"Create Leaderboard\".\n", + "\n", + "We can also generate a leaderboard directly from Python:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "📦 Published to https://wandb.ai/timssweeney/leaderboard-demo/weave/leaderboards/Zip-Code-World-Knowledge\n" + ] + } + ], + "source": [ + "from weave.flow import leaderboard\n", + "from weave.trace.weave_client import get_ref\n", + "\n", + "spec = leaderboard.Leaderboard(\n", + " name=\"Zip Code World Knowledge\",\n", + " description=\"\"\"\n", + "This leaderboard compares the performance of models in terms of world knowledge about zip codes.\n", + "\n", + "### Columns\n", + "\n", + "1. **State Match against `United States - 2022`**: The fraction of zip codes that the model correctly identified the state for.\n", + "2. **Avg Temp F Error against `California - 2022`**: The mean absolute error of the model's average temperature prediction.\n", + "3. **Correct Known For against `United States - 2000`**: The fraction of zip codes that the model correctly identified the most well known thing about the zip code.\n", + "\"\"\",\n", + " columns=[\n", + " leaderboard.LeaderboardColumn(\n", + " evaluation_object_ref=get_ref(evaluations[0]).uri(),\n", + " scorer_name=\"check_concrete_fields\",\n", + " summary_metric_path=\"state_match.true_fraction\",\n", + " ),\n", + " leaderboard.LeaderboardColumn(\n", + " evaluation_object_ref=get_ref(evaluations[1]).uri(),\n", + " scorer_name=\"check_value_fields\",\n", + " should_minimize=True,\n", + " summary_metric_path=\"avg_temp_f_err.mean\",\n", + " ),\n", + " leaderboard.LeaderboardColumn(\n", + " evaluation_object_ref=get_ref(evaluations[2]).uri(),\n", + " scorer_name=\"check_subjective_fields\",\n", + " summary_metric_path=\"correct_known_for.true_fraction\",\n", + " ),\n", + " ],\n", + ")\n", + "\n", + "ref = weave.publish(spec)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wandb-weave", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/scripts/generate_typescript_sdk_docs.sh b/docs/scripts/generate_typescript_sdk_docs.sh new file mode 100644 index 000000000000..0025641b8494 --- /dev/null +++ b/docs/scripts/generate_typescript_sdk_docs.sh @@ -0,0 +1,5 @@ +cd ../sdks/node +pnpm typedoc src/index.ts \ + --plugin typedoc-plugin-markdown \ + --out ../../docs/docs/reference/typescript-sdk/weave/ \ + --readme none diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 303532aeb97f..29e0a09ccd9d 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -129,6 +129,8 @@ const sidebars: SidebarsConfig = { ], }, ], + // TODO: add the actual ts-sdk sidebar + typescriptSdkSidebar: [{ type: "autogenerated", dirName: "reference/typescript-sdk" }], pythonSdkSidebar: [{ type: "autogenerated", dirName: "reference/python-sdk" }], serviceApiSidebar: require("./docs/reference/service-api/sidebar.ts").filter((row) => { if (row.id == "reference/service-api/fastapi") { diff --git a/noxfile.py b/noxfile.py index 90aa3bfaac4c..e2e3d7748cb1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -25,6 +25,12 @@ def lint(session): @nox.parametrize( "shard", [ + # The `custom` shard is included if you want to run your own tests. By default, + # no tests are specified, which means ALL tests will run. To run just your own + # subset, you can pass `-- test_your_thing.py` to nox. + # For example, + # nox -e "tests-3.12(shard='custom')" -- test_your_thing.py + "custom", "trace", "trace_server", "anthropic", @@ -76,6 +82,7 @@ def tests(session, shard): default_test_dirs = [f"integrations/{shard}/"] test_dirs_dict = { + "custom": [], "trace": ["trace/"], "trace_server": ["trace_server/"], "mistral0": ["integrations/mistral/v0/"], diff --git a/pyproject.toml b/pyproject.toml index 407b75483279..b89d442888f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,14 @@ llamaindex = ["llama-index>=0.10.35"] mistral0 = ["mistralai>=0.1.8,<1.0.0"] mistral1 = ["mistralai>=1.0.0"] scorers = ["Levenshtein>=0.26.0", "instructor>=1.5.2"] -scorers_tests = ["instructor>=1.5.2", "Levenshtein>=0.26.0", "openai>=1.0.0", "google-generativeai>=0.8.0", "mistralai>=1.0.3", "anthropic>=0.30.0"] +scorers_tests = [ + "instructor>=1.5.2", + "Levenshtein>=0.26.0", + "openai>=1.0.0", + "google-generativeai>=0.8.0", + "mistralai>=1.0.3", + "anthropic>=0.30.0", +] notdiamond = ["notdiamond>=0.3.21", "litellm<=1.49.1"] openai = ["openai>=1.0.0"] pandas-test = ["pandas>=2.2.3"] @@ -176,6 +183,19 @@ line-length = 88 show-fixes = true exclude = ["weave_query"] +[tool.pyright] +include = ["weave"] +exclude = ["weave_query", "tests", "examples", "docs", "noxfile.py"] +# In cases where we support multiple versions of an integration, some imports can be missing +reportMissingImports = false + +# TODO: Gradually remove as we improve our code! +reportAttributeAccessIssue = false +reportPossiblyUnboundVariable = false +reportOptionalMemberAccess = false +reportArgumentType = false +reportCallIssue = false + [tool.mypy] warn_unused_configs = true # Note: You have to update .pre-commit-config.yaml too! @@ -192,7 +212,7 @@ module = "weave_query.*" ignore_errors = true [tool.bumpversion] -current_version = "0.51.18-dev0" +current_version = "0.51.20-dev0" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/sdks/node/examples/evaluate.ts b/sdks/node/examples/evaluate.ts index da88e506e57d..5f691e26e602 100644 --- a/sdks/node/examples/evaluate.ts +++ b/sdks/node/examples/evaluate.ts @@ -16,7 +16,7 @@ async function main() { const evaluation = new weave.Evaluation({ dataset: ds, scorers: [ - weave.op(({modelOutput, datasetItem}) => modelOutput == datasetItem.age, { + weave.op(({modelOutput, datasetRow}) => modelOutput == datasetRow.age, { name: 'isEqual', }), ], diff --git a/sdks/node/examples/evaluateWithColumnMapping.ts b/sdks/node/examples/evaluateWithColumnMapping.ts index 44b7f1bf14e6..1907c1fbd82c 100644 --- a/sdks/node/examples/evaluateWithColumnMapping.ts +++ b/sdks/node/examples/evaluateWithColumnMapping.ts @@ -16,7 +16,7 @@ async function main() { const evaluation = new weave.Evaluation({ dataset: ds, scorers: [ - weave.op(({modelOutput, datasetItem}) => modelOutput == datasetItem.age, { + weave.op(({modelOutput, datasetRow}) => modelOutput == datasetRow.age, { name: 'isEqual', }), ], diff --git a/sdks/node/examples/evaluateWithImages.ts b/sdks/node/examples/evaluateWithImages.ts deleted file mode 100644 index dd12f0afffd2..000000000000 --- a/sdks/node/examples/evaluateWithImages.ts +++ /dev/null @@ -1,71 +0,0 @@ -import {OpenAI} from 'openai'; -import 'source-map-support/register'; -import * as weave from 'weave'; - -const sentences = [ - 'There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy.', - 'Pounits are a bright green color and are more savory than sweet.', - 'Finally, there are fruits called glowls, which have a very sour and bitter taste which is acidic and caustic, and a pale orange tinge to them.', - 'There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy.', -]; -const labels = [ - {fruit: 'neoskizzles', color: 'purple', flavor: 'candy'}, - {fruit: 'pounits', color: 'bright green', flavor: 'savory'}, - {fruit: 'glowls', color: 'pale orange', flavor: 'sour and bitter'}, -]; -const examples = [ - {id: '0', sentence: sentences[0], target: labels[0]}, - {id: '1', sentence: sentences[1], target: labels[1]}, - {id: '2', sentence: sentences[2], target: labels[2]}, -]; - -const openaiClient = weave.wrapOpenAI(new OpenAI()); - -const model = weave.op(async function myModel({datasetRow}) { - const prompt = `Extract fields ("fruit": , "color": , "flavor") from the following text, as json: ${datasetRow.sentence}`; - const response = await openaiClient.chat.completions.create({ - model: 'gpt-3.5-turbo', - messages: [{role: 'user', content: prompt}], - response_format: {type: 'json_object'}, - }); - const result = response.choices[0].message.content; - if (result == null) { - throw new Error('No response from model'); - } - if (datasetRow.id == '3') { - throw new Error('This is an error'); - } - return JSON.parse(result); -}); - -async function main() { - await weave.init('examples'); - const ds = new weave.Dataset({ - id: 'Fruit Dataset', - rows: examples, - }); - const evaluation = new weave.Evaluation({ - dataset: ds, - scorers: [ - weave.op(function fruitEqual({modelOutput, datasetRow}) { - return { - correct: modelOutput.fruit == datasetRow.target.fruit, - }; - }), - weave.op(async function genImage({modelOutput, datasetRow}) { - const result = await openaiClient.images.generate({ - prompt: `A fruit that's ${modelOutput.color} and ${modelOutput.flavor}`, - n: 1, - size: '256x256', - response_format: 'b64_json', - }); - return result.data[0]; - }), - ], - }); - - const results = await evaluation.evaluate({model}); - console.log(JSON.stringify(results, null, 2)); -} - -main(); diff --git a/sdks/node/examples/quickstartEvaluate.ts b/sdks/node/examples/quickstartEvaluate.ts index 241cd29a28a8..94c2d79ad1d3 100644 --- a/sdks/node/examples/quickstartEvaluate.ts +++ b/sdks/node/examples/quickstartEvaluate.ts @@ -52,9 +52,9 @@ async function main() { const evaluation = new weave.Evaluation({ dataset: ds, scorers: [ - weave.op(function fruitEqual({modelOutput, datasetItem}) { + weave.op(function fruitEqual({modelOutput, datasetRow}) { return { - correct: modelOutput.fruit == datasetItem.target.fruit, + correct: modelOutput.fruit == datasetRow.target.fruit, }; }), ], diff --git a/sdks/node/jest.setup.js b/sdks/node/jest.setup.js new file mode 100644 index 000000000000..45d442388133 --- /dev/null +++ b/sdks/node/jest.setup.js @@ -0,0 +1,10 @@ +const path = require('path'); + +const liveTestTimeout = 20000; // 20 seconds + +beforeEach(() => { + const testPath = expect.getState().testPath; + if (testPath && path.normalize(testPath).includes(path.normalize('/live/'))) { + jest.setTimeout(liveTestTimeout); + } +}); diff --git a/sdks/node/package-lock.json b/sdks/node/package-lock.json index 2fbfa2dd7189..83fe5755ee6a 100644 --- a/sdks/node/package-lock.json +++ b/sdks/node/package-lock.json @@ -1,12 +1,12 @@ { "name": "weave", - "version": "0.6.9", + "version": "0.7.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "weave", - "version": "0.6.9", + "version": "0.7.0", "license": "Apache-2.0", "dependencies": { "cli-progress": "^3.12.0", diff --git a/sdks/node/package.json b/sdks/node/package.json index 48c496d12608..f2f884f3dcdb 100644 --- a/sdks/node/package.json +++ b/sdks/node/package.json @@ -1,12 +1,12 @@ { "name": "weave", - "version": "0.6.9", + "version": "0.7.0", "description": "AI development toolkit", "types": "dist/src/index.d.ts", "main": "dist/src/index.js", "type": "commonjs", "scripts": { - "test": "jest", + "test": "jest --silent", "test:coverage": "jest --coverage", "test:watch": "jest --watch", "format": "prettier --write \"src/**/*.ts\" \"examples/**/*.ts\"", @@ -22,6 +22,9 @@ "license": "Apache-2.0", "jest": { "testEnvironment": "node", + "setupFilesAfterEnv": [ + "/jest.setup.js" + ], "transform": { "^.+\\.tsx?$": "ts-jest" }, @@ -83,6 +86,8 @@ "swagger-typescript-api": "^13.0.22", "ts-jest": "^29.2.5", "tsconfig-paths": "^4.2.0", - "tsx": "^4.19.1" + "tsx": "^4.19.1", + "typedoc": "^0.26.10", + "typedoc-plugin-markdown": "^4.2.9" } } diff --git a/sdks/node/pnpm-lock.yaml b/sdks/node/pnpm-lock.yaml index e55b4d16df34..836bea95eb4a 100644 --- a/sdks/node/pnpm-lock.yaml +++ b/sdks/node/pnpm-lock.yaml @@ -51,6 +51,12 @@ importers: tsx: specifier: ^4.19.1 version: 4.19.1 + typedoc: + specifier: ^0.26.10 + version: 0.26.10(typescript@5.5.4) + typedoc-plugin-markdown: + specifier: ^4.2.9 + version: 4.2.9(typedoc@0.26.10(typescript@5.5.4)) packages: @@ -458,6 +464,21 @@ packages: '@jridgewell/trace-mapping@0.3.25': resolution: {integrity: sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==} + '@shikijs/core@1.22.2': + resolution: {integrity: sha512-bvIQcd8BEeR1yFvOYv6HDiyta2FFVePbzeowf5pPS1avczrPK+cjmaxxh0nx5QzbON7+Sv0sQfQVciO7bN72sg==} + + '@shikijs/engine-javascript@1.22.2': + resolution: {integrity: sha512-iOvql09ql6m+3d1vtvP8fLCVCK7BQD1pJFmHIECsujB0V32BJ0Ab6hxk1ewVSMFA58FI0pR2Had9BKZdyQrxTw==} + + '@shikijs/engine-oniguruma@1.22.2': + resolution: {integrity: sha512-GIZPAGzQOy56mGvWMoZRPggn0dTlBf1gutV5TdceLCZlFNqWmuc7u+CzD0Gd9vQUTgLbrt0KLzz6FNprqYAxlA==} + + '@shikijs/types@1.22.2': + resolution: {integrity: sha512-NCWDa6LGZqTuzjsGfXOBWfjS/fDIbDdmVDug+7ykVe1IKT4c1gakrvlfFYp5NhAXH/lyqLM8wsAPo5wNy73Feg==} + + '@shikijs/vscode-textmate@9.3.0': + resolution: {integrity: sha512-jn7/7ky30idSkd/O5yDBfAnVt+JJpepofP/POZ1iMOxK59cOfqIgg/Dj0eFsjOTMw+4ycJN0uhZH/Eb0bs/EUA==} + '@sinclair/typebox@0.27.8': resolution: {integrity: sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==} @@ -485,6 +506,9 @@ packages: '@types/graceful-fs@4.1.9': resolution: {integrity: sha512-olP3sd1qOEe5dXTSaFvQG+02VdRXcdytWLAZsAq1PecU8uqQAhkrnbli7DagjtXKW/Bl7YJbUsa8MPcuc8LHEQ==} + '@types/hast@3.0.4': + resolution: {integrity: sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==} + '@types/istanbul-lib-coverage@2.0.6': resolution: {integrity: sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==} @@ -497,6 +521,9 @@ packages: '@types/jest@29.5.14': resolution: {integrity: sha512-ZN+4sdnLUbo8EVvVc2ao0GFW6oVrQRPn4K2lglySj7APvSrgzxHiNNK99us4WDMi57xxA2yggblIAMNhXOotLQ==} + '@types/mdast@4.0.4': + resolution: {integrity: sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==} + '@types/node-fetch@2.6.11': resolution: {integrity: sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==} @@ -512,12 +539,18 @@ packages: '@types/swagger-schema-official@2.0.25': resolution: {integrity: sha512-T92Xav+Gf/Ik1uPW581nA+JftmjWPgskw/WBf4TJzxRG/SJ+DfNnNE+WuZ4mrXuzflQMqMkm1LSYjzYW7MB1Cg==} + '@types/unist@3.0.3': + resolution: {integrity: sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==} + '@types/yargs-parser@21.0.3': resolution: {integrity: sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==} '@types/yargs@17.0.33': resolution: {integrity: sha512-WpxBCKWPLr4xSsHgz511rFJAM+wS28w2zEO1QDNY5zM/S8ok70NNfztH0xwhqKyaK0OHCbN98LDAZuy1ctxDkA==} + '@ungap/structured-clone@1.2.0': + resolution: {integrity: sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==} + abort-controller@3.0.0: resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==} engines: {node: '>=6.5'} @@ -644,6 +677,9 @@ packages: caniuse-lite@1.0.30001669: resolution: {integrity: sha512-DlWzFDJqstqtIVx1zeSpIMLjunf5SmwOw0N2Ck/QSQdS8PLS4+9HrLaYei4w8BIAL7IB/UEDu889d8vhCTPA0w==} + ccount@2.0.1: + resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==} + chalk@4.1.2: resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==} engines: {node: '>=10'} @@ -652,6 +688,12 @@ packages: resolution: {integrity: sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==} engines: {node: '>=10'} + character-entities-html4@2.1.0: + resolution: {integrity: sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA==} + + character-entities-legacy@3.0.0: + resolution: {integrity: sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ==} + ci-info@3.9.0: resolution: {integrity: sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==} engines: {node: '>=8'} @@ -692,6 +734,9 @@ packages: resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} engines: {node: '>= 0.8'} + comma-separated-tokens@2.0.3: + resolution: {integrity: sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==} + commondir@1.0.1: resolution: {integrity: sha512-W9pAhw0ja1Edb5GVdIF1mjZw/ASI0AlShXM83UUGe2DVr5TdAPEA1OA8m/g8zWp9x6On7gqufY+FatDbC3MDQg==} @@ -759,10 +804,17 @@ packages: resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} engines: {node: '>=0.4.0'} + dequal@2.0.3: + resolution: {integrity: sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==} + engines: {node: '>=6'} + detect-newline@3.1.0: resolution: {integrity: sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==} engines: {node: '>=8'} + devlop@1.1.0: + resolution: {integrity: sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==} + didyoumean@1.2.2: resolution: {integrity: sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==} @@ -785,6 +837,10 @@ packages: emoji-regex@8.0.0: resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==} + entities@4.5.0: + resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==} + engines: {node: '>=0.12'} + env-paths@2.2.1: resolution: {integrity: sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==} engines: {node: '>=6'} @@ -935,9 +991,18 @@ packages: resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==} engines: {node: '>= 0.4'} + hast-util-to-html@9.0.3: + resolution: {integrity: sha512-M17uBDzMJ9RPCqLMO92gNNUDuBSq10a25SDBI08iCCxmorf4Yy6sYHK57n9WAbRAAaU+DuR4W6GN9K4DFZesYg==} + + hast-util-whitespace@3.0.0: + resolution: {integrity: sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw==} + html-escaper@2.0.2: resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==} + html-void-elements@3.0.0: + resolution: {integrity: sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==} + http2-client@1.3.5: resolution: {integrity: sha512-EC2utToWl4RKfs5zd36Mxq7nzHHBuomZboI0yYL6Y0RmBgT7Sgkq4rQ0ezFTYoIsSs7Tm9SJe+o2FcAg6GBhGA==} @@ -1206,6 +1271,9 @@ packages: lines-and-columns@1.2.4: resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==} + linkify-it@5.0.0: + resolution: {integrity: sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ==} + locate-path@5.0.0: resolution: {integrity: sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==} engines: {node: '>=8'} @@ -1222,6 +1290,9 @@ packages: lru-cache@5.1.1: resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==} + lunr@2.3.9: + resolution: {integrity: sha512-zTU3DaZaF3Rt9rhN3uBMGQD3dD2/vFQqnvZCDv4dl5iOzq2IZQqTxu90r4E5J+nP70J3ilqVCrbho2eWaeW8Ow==} + make-dir@3.1.0: resolution: {integrity: sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==} engines: {node: '>=8'} @@ -1236,9 +1307,34 @@ packages: makeerror@1.0.12: resolution: {integrity: sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==} + markdown-it@14.1.0: + resolution: {integrity: sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg==} + hasBin: true + + mdast-util-to-hast@13.2.0: + resolution: {integrity: sha512-QGYKEuUsYT9ykKBCMOEDLsU5JRObWQusAolFMeko/tYPufNkRffBAQjIE+99jbA87xv6FgmjLtwjh9wBWajwAA==} + + mdurl@2.0.0: + resolution: {integrity: sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w==} + merge-stream@2.0.0: resolution: {integrity: sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==} + micromark-util-character@2.1.0: + resolution: {integrity: sha512-KvOVV+X1yLBfs9dCBSopq/+G1PcgT3lAK07mC4BzXi5E7ahzMAF8oIupDDJ6mievI6F+lAATkbQQlQixJfT3aQ==} + + micromark-util-encode@2.0.0: + resolution: {integrity: sha512-pS+ROfCXAGLWCOc8egcBvT0kf27GoWMqtdarNfDcjb6YLuV5cM3ioG45Ys2qOVqeqSbjaKg72vU+Wby3eddPsA==} + + micromark-util-sanitize-uri@2.0.0: + resolution: {integrity: sha512-WhYv5UEcZrbAtlsnPuChHUAsu/iBPOVaEVsntLBIdpibO0ddy8OzavZz3iL2xVvBZOpolujSliP65Kq0/7KIYw==} + + micromark-util-symbol@2.0.0: + resolution: {integrity: sha512-8JZt9ElZ5kyTnO94muPxIGS8oyElRJaiJO8EzV6ZSyGQ1Is8xwl4Q45qU5UOg+bGH4AikWziz0iN4sFLWs8PGw==} + + micromark-util-types@2.0.0: + resolution: {integrity: sha512-oNh6S2WMHWRZrmutsRmDDfkzKtxF+bc2VxLC9dvtrDIRFln627VsFP6fLMgTryGDljgLPjkrzQSDcPrjPyDJ5w==} + micromatch@4.0.8: resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==} engines: {node: '>=8.6'} @@ -1262,6 +1358,10 @@ packages: resolution: {integrity: sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==} engines: {node: '>=10'} + minimatch@9.0.5: + resolution: {integrity: sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==} + engines: {node: '>=16 || 14 >=14.17'} + minimist@1.2.8: resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==} @@ -1342,6 +1442,9 @@ packages: resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==} engines: {node: '>=6'} + oniguruma-to-js@0.4.3: + resolution: {integrity: sha512-X0jWUcAlxORhOqqBREgPMgnshB7ZGYszBNspP+tS9hPD3l13CdaXcHbgImoHUHlrvGx/7AvFEkTRhAGYh+jzjQ==} + openai@4.68.4: resolution: {integrity: sha512-LRinV8iU9VQplkr25oZlyrsYGPGasIwYN8KFMAAFTHHLHjHhejtJ5BALuLFrkGzY4wfbKhOhuT+7lcHZ+F3iEA==} hasBin: true @@ -1430,6 +1533,13 @@ packages: resolution: {integrity: sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==} engines: {node: '>= 6'} + property-information@6.5.0: + resolution: {integrity: sha512-PgTgs/BlvHxOu8QuEN7wi5A0OmXaBcHpmCSTehcs6Uuu9IkDIEo13Hy7n898RHfrQ49vKCoGeWZSaAK01nwVig==} + + punycode.js@2.3.1: + resolution: {integrity: sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA==} + engines: {node: '>=6'} + pure-rand@6.1.0: resolution: {integrity: sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA==} @@ -1439,6 +1549,9 @@ packages: reftools@1.1.9: resolution: {integrity: sha512-OVede/NQE13xBQ+ob5CKd5KyeJYU2YInb1bmV4nRoOfquZPkAkxuOXicSe1PvqIuZZ4kD13sPKBbR7UFDmli6w==} + regex@4.3.3: + resolution: {integrity: sha512-r/AadFO7owAq1QJVeZ/nq9jNS1vyZt+6t1p/E59B56Rn2GCya+gr1KSyOzNL/er+r+B7phv5jG2xU2Nz1YkmJg==} + release-zalgo@1.0.0: resolution: {integrity: sha512-gUAyHVHPPC5wdqX/LG4LWtRYtgjxyX78oanFNTMMyFEfOqdC54s3eE82imuWKbOeqYht2CrNf64Qb8vgmmtZGA==} engines: {node: '>=4'} @@ -1498,6 +1611,9 @@ packages: resolution: {integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==} engines: {node: '>=8'} + shiki@1.22.2: + resolution: {integrity: sha512-3IZau0NdGKXhH2bBlUk4w1IHNxPh6A5B2sUpyY+8utLu2j/h1QpFkAaUA1bAMxOWWGtTWcAh531vnS4NJKS/lA==} + should-equal@2.0.0: resolution: {integrity: sha512-ZP36TMrK9euEuWQYBig9W55WPC7uo37qzAEmbjHz4gfyuXrEUgF8cUvQVO+w+d3OMfPvSRQJ22lSm8MQJ43LTA==} @@ -1540,6 +1656,9 @@ packages: resolution: {integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==} engines: {node: '>=0.10.0'} + space-separated-tokens@2.0.2: + resolution: {integrity: sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==} + spawn-wrap@2.0.0: resolution: {integrity: sha512-EeajNjfN9zMnULLwhZZQU3GWBoFNkbngTUPfaawT4RkMiviTxcX0qfhVbGey39mfctfDHkWtuecgQ8NJcyQWHg==} engines: {node: '>=8'} @@ -1559,6 +1678,9 @@ packages: resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==} engines: {node: '>=8'} + stringify-entities@4.0.4: + resolution: {integrity: sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==} + strip-ansi@6.0.1: resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==} engines: {node: '>=8'} @@ -1617,6 +1739,9 @@ packages: tr46@0.0.3: resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} + trim-lines@3.0.1: + resolution: {integrity: sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==} + ts-jest@29.2.5: resolution: {integrity: sha512-KD8zB2aAZrcKIdGk4OwpJggeLcH1FgrICqDSROWqlnJXGCXK4Mn6FcdK2B6670Xr73lHMG1kHw8R87A0ecZ+vA==} engines: {node: ^14.15.0 || ^16.10.0 || ^18.0.0 || >=20.0.0} @@ -1665,17 +1790,48 @@ packages: typedarray-to-buffer@3.1.5: resolution: {integrity: sha512-zdu8XMNEDepKKR+XYOXAVPtWui0ly0NtohUscw+UmaHiAWT8hrV1rr//H6V+0DvJ3OQ19S979M0laLfX8rm82Q==} + typedoc-plugin-markdown@4.2.9: + resolution: {integrity: sha512-Wqmx+7ezKFgtTklEq/iUhQ5uFeBDhAT6wiS2na9cFLidIpl9jpDHJy/COYh8jUZXgIRIZVQ/bPNjyrnPFoDwzg==} + engines: {node: '>= 18'} + peerDependencies: + typedoc: 0.26.x + + typedoc@0.26.10: + resolution: {integrity: sha512-xLmVKJ8S21t+JeuQLNueebEuTVphx6IrP06CdV7+0WVflUSW3SPmR+h1fnWVdAR/FQePEgsSWCUHXqKKjzuUAw==} + engines: {node: '>= 18'} + hasBin: true + peerDependencies: + typescript: 4.6.x || 4.7.x || 4.8.x || 4.9.x || 5.0.x || 5.1.x || 5.2.x || 5.3.x || 5.4.x || 5.5.x || 5.6.x + typescript@5.5.4: resolution: {integrity: sha512-Mtq29sKDAEYP7aljRgtPOpTvOfbwRWlS6dPRzwjdE+C0R4brX/GUyhHSecbHMFLNBLcJIPt9nl9yG5TZ1weH+Q==} engines: {node: '>=14.17'} hasBin: true + uc.micro@2.1.0: + resolution: {integrity: sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A==} + undici-types@5.26.5: resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} undici-types@6.19.8: resolution: {integrity: sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==} + unist-util-is@6.0.0: + resolution: {integrity: sha512-2qCTHimwdxLfz+YzdGfkqNlH0tLi9xjTnHddPmJwtIG9MGsdbutfTc4P+haPD7l7Cjxf/WZj+we5qfVPvvxfYw==} + + unist-util-position@5.0.0: + resolution: {integrity: sha512-fucsC7HjXvkB5R3kTCO7kUjRdrS0BJt3M/FPxmHMBOm8JQi2BsHAHFsy27E0EolP8rp0NzXsJ+jNPyDWvOJZPA==} + + unist-util-stringify-position@4.0.0: + resolution: {integrity: sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==} + + unist-util-visit-parents@6.0.1: + resolution: {integrity: sha512-L/PqWzfTP9lzzEa6CKs0k2nARxTdZduw3zyh8d2NVBnsyvHjSX4TWse388YrrQKbvI8w20fGjGlhgT96WwKykw==} + + unist-util-visit@5.0.0: + resolution: {integrity: sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==} + update-browserslist-db@1.1.1: resolution: {integrity: sha512-R8UzCaa9Az+38REPiJ1tXlImTJXlVfgHZsglwBD/k6nj76ctsH1E3q4doGrukiLQd3sGQYu56r5+lo5r94l29A==} hasBin: true @@ -1694,6 +1850,12 @@ packages: resolution: {integrity: sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA==} engines: {node: '>=10.12.0'} + vfile-message@4.0.2: + resolution: {integrity: sha512-jRDZ1IMLttGj41KcZvlrYAaI3CfqpLpfpf+Mfig13viT6NKvRzWZ+lXz0Y5D60w6uJIBAOGq9mSHf0gktF0duw==} + + vfile@6.0.3: + resolution: {integrity: sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==} + walker@1.0.8: resolution: {integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==} @@ -1747,6 +1909,11 @@ packages: resolution: {integrity: sha512-r3vXyErRCYJ7wg28yvBY5VSoAF8ZvlcW9/BwUzEtUsjvX/DKs24dIkuwjtuprwJJHsbyUbLApepYTR1BN4uHrg==} engines: {node: '>= 6'} + yaml@2.6.0: + resolution: {integrity: sha512-a6ae//JvKDEra2kdi1qzCyrJW/WZCgFi8ydDV+eXExl95t+5R+ijnqHJbz9tmMh8FUjx3iv2fCQ4dclAQlO2UQ==} + engines: {node: '>= 14'} + hasBin: true + yargs-parser@18.1.3: resolution: {integrity: sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==} engines: {node: '>=6'} @@ -1767,6 +1934,9 @@ packages: resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==} engines: {node: '>=10'} + zwitch@2.0.4: + resolution: {integrity: sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==} + snapshots: '@ampproject/remapping@2.3.0': @@ -2224,6 +2394,33 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.5.0 + '@shikijs/core@1.22.2': + dependencies: + '@shikijs/engine-javascript': 1.22.2 + '@shikijs/engine-oniguruma': 1.22.2 + '@shikijs/types': 1.22.2 + '@shikijs/vscode-textmate': 9.3.0 + '@types/hast': 3.0.4 + hast-util-to-html: 9.0.3 + + '@shikijs/engine-javascript@1.22.2': + dependencies: + '@shikijs/types': 1.22.2 + '@shikijs/vscode-textmate': 9.3.0 + oniguruma-to-js: 0.4.3 + + '@shikijs/engine-oniguruma@1.22.2': + dependencies: + '@shikijs/types': 1.22.2 + '@shikijs/vscode-textmate': 9.3.0 + + '@shikijs/types@1.22.2': + dependencies: + '@shikijs/vscode-textmate': 9.3.0 + '@types/hast': 3.0.4 + + '@shikijs/vscode-textmate@9.3.0': {} + '@sinclair/typebox@0.27.8': {} '@sinonjs/commons@3.0.1': @@ -2263,6 +2460,10 @@ snapshots: dependencies: '@types/node': 22.8.0 + '@types/hast@3.0.4': + dependencies: + '@types/unist': 3.0.3 + '@types/istanbul-lib-coverage@2.0.6': {} '@types/istanbul-lib-report@3.0.3': @@ -2278,6 +2479,10 @@ snapshots: expect: 29.7.0 pretty-format: 29.7.0 + '@types/mdast@4.0.4': + dependencies: + '@types/unist': 3.0.3 + '@types/node-fetch@2.6.11': dependencies: '@types/node': 22.8.0 @@ -2295,12 +2500,16 @@ snapshots: '@types/swagger-schema-official@2.0.25': {} + '@types/unist@3.0.3': {} + '@types/yargs-parser@21.0.3': {} '@types/yargs@17.0.33': dependencies: '@types/yargs-parser': 21.0.3 + '@ungap/structured-clone@1.2.0': {} + abort-controller@3.0.0: dependencies: event-target-shim: 5.0.1 @@ -2451,6 +2660,8 @@ snapshots: caniuse-lite@1.0.30001669: {} + ccount@2.0.1: {} + chalk@4.1.2: dependencies: ansi-styles: 4.3.0 @@ -2458,6 +2669,10 @@ snapshots: char-regex@1.0.2: {} + character-entities-html4@2.1.0: {} + + character-entities-legacy@3.0.0: {} + ci-info@3.9.0: {} cjs-module-lexer@1.4.1: {} @@ -2494,6 +2709,8 @@ snapshots: dependencies: delayed-stream: 1.0.0 + comma-separated-tokens@2.0.3: {} + commondir@1.0.1: {} concat-map@0.0.1: {} @@ -2550,8 +2767,14 @@ snapshots: delayed-stream@1.0.0: {} + dequal@2.0.3: {} + detect-newline@3.1.0: {} + devlop@1.1.0: + dependencies: + dequal: 2.0.3 + didyoumean@1.2.2: {} diff-sequences@29.6.3: {} @@ -2566,6 +2789,8 @@ snapshots: emoji-regex@8.0.0: {} + entities@4.5.0: {} + env-paths@2.2.1: {} error-ex@1.3.2: @@ -2730,8 +2955,28 @@ snapshots: dependencies: function-bind: 1.1.2 + hast-util-to-html@9.0.3: + dependencies: + '@types/hast': 3.0.4 + '@types/unist': 3.0.3 + ccount: 2.0.1 + comma-separated-tokens: 2.0.3 + hast-util-whitespace: 3.0.0 + html-void-elements: 3.0.0 + mdast-util-to-hast: 13.2.0 + property-information: 6.5.0 + space-separated-tokens: 2.0.2 + stringify-entities: 4.0.4 + zwitch: 2.0.4 + + hast-util-whitespace@3.0.0: + dependencies: + '@types/hast': 3.0.4 + html-escaper@2.0.2: {} + html-void-elements@3.0.0: {} + http2-client@1.3.5: {} human-signals@2.1.0: {} @@ -3173,6 +3418,10 @@ snapshots: lines-and-columns@1.2.4: {} + linkify-it@5.0.0: + dependencies: + uc.micro: 2.1.0 + locate-path@5.0.0: dependencies: p-locate: 4.1.0 @@ -3187,6 +3436,8 @@ snapshots: dependencies: yallist: 3.1.1 + lunr@2.3.9: {} + make-dir@3.1.0: dependencies: semver: 6.3.1 @@ -3201,8 +3452,48 @@ snapshots: dependencies: tmpl: 1.0.5 + markdown-it@14.1.0: + dependencies: + argparse: 2.0.1 + entities: 4.5.0 + linkify-it: 5.0.0 + mdurl: 2.0.0 + punycode.js: 2.3.1 + uc.micro: 2.1.0 + + mdast-util-to-hast@13.2.0: + dependencies: + '@types/hast': 3.0.4 + '@types/mdast': 4.0.4 + '@ungap/structured-clone': 1.2.0 + devlop: 1.1.0 + micromark-util-sanitize-uri: 2.0.0 + trim-lines: 3.0.1 + unist-util-position: 5.0.0 + unist-util-visit: 5.0.0 + vfile: 6.0.3 + + mdurl@2.0.0: {} + merge-stream@2.0.0: {} + micromark-util-character@2.1.0: + dependencies: + micromark-util-symbol: 2.0.0 + micromark-util-types: 2.0.0 + + micromark-util-encode@2.0.0: {} + + micromark-util-sanitize-uri@2.0.0: + dependencies: + micromark-util-character: 2.1.0 + micromark-util-encode: 2.0.0 + micromark-util-symbol: 2.0.0 + + micromark-util-symbol@2.0.0: {} + + micromark-util-types@2.0.0: {} + micromatch@4.0.8: dependencies: braces: 3.0.3 @@ -3224,6 +3515,10 @@ snapshots: dependencies: brace-expansion: 2.0.1 + minimatch@9.0.5: + dependencies: + brace-expansion: 2.0.1 + minimist@1.2.8: {} ms@2.1.3: {} @@ -3331,6 +3626,10 @@ snapshots: dependencies: mimic-fn: 2.1.0 + oniguruma-to-js@0.4.3: + dependencies: + regex: 4.3.3 + openai@4.68.4: dependencies: '@types/node': 18.19.59 @@ -3414,12 +3713,18 @@ snapshots: kleur: 3.0.3 sisteransi: 1.0.5 + property-information@6.5.0: {} + + punycode.js@2.3.1: {} + pure-rand@6.1.0: {} react-is@18.3.1: {} reftools@1.1.9: {} + regex@4.3.3: {} + release-zalgo@1.0.0: dependencies: es6-error: 4.1.1 @@ -3462,6 +3767,15 @@ snapshots: shebang-regex@3.0.0: {} + shiki@1.22.2: + dependencies: + '@shikijs/core': 1.22.2 + '@shikijs/engine-javascript': 1.22.2 + '@shikijs/engine-oniguruma': 1.22.2 + '@shikijs/types': 1.22.2 + '@shikijs/vscode-textmate': 9.3.0 + '@types/hast': 3.0.4 + should-equal@2.0.0: dependencies: should-type: 1.4.0 @@ -3508,6 +3822,8 @@ snapshots: source-map@0.6.1: {} + space-separated-tokens@2.0.2: {} + spawn-wrap@2.0.0: dependencies: foreground-child: 2.0.0 @@ -3534,6 +3850,11 @@ snapshots: is-fullwidth-code-point: 3.0.0 strip-ansi: 6.0.1 + stringify-entities@4.0.4: + dependencies: + character-entities-html4: 2.1.0 + character-entities-legacy: 3.0.0 + strip-ansi@6.0.1: dependencies: ansi-regex: 5.0.1 @@ -3605,6 +3926,8 @@ snapshots: tr46@0.0.3: {} + trim-lines@3.0.1: {} + ts-jest@29.2.5(@babel/core@7.26.0)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.26.0))(jest@29.7.0(@types/node@22.8.0))(typescript@5.5.4): dependencies: bs-logger: 0.2.6 @@ -3647,12 +3970,50 @@ snapshots: dependencies: is-typedarray: 1.0.0 + typedoc-plugin-markdown@4.2.9(typedoc@0.26.10(typescript@5.5.4)): + dependencies: + typedoc: 0.26.10(typescript@5.5.4) + + typedoc@0.26.10(typescript@5.5.4): + dependencies: + lunr: 2.3.9 + markdown-it: 14.1.0 + minimatch: 9.0.5 + shiki: 1.22.2 + typescript: 5.5.4 + yaml: 2.6.0 + typescript@5.5.4: {} + uc.micro@2.1.0: {} + undici-types@5.26.5: {} undici-types@6.19.8: {} + unist-util-is@6.0.0: + dependencies: + '@types/unist': 3.0.3 + + unist-util-position@5.0.0: + dependencies: + '@types/unist': 3.0.3 + + unist-util-stringify-position@4.0.0: + dependencies: + '@types/unist': 3.0.3 + + unist-util-visit-parents@6.0.1: + dependencies: + '@types/unist': 3.0.3 + unist-util-is: 6.0.0 + + unist-util-visit@5.0.0: + dependencies: + '@types/unist': 3.0.3 + unist-util-is: 6.0.0 + unist-util-visit-parents: 6.0.1 + update-browserslist-db@1.1.1(browserslist@4.24.2): dependencies: browserslist: 4.24.2 @@ -3669,6 +4030,16 @@ snapshots: '@types/istanbul-lib-coverage': 2.0.6 convert-source-map: 2.0.0 + vfile-message@4.0.2: + dependencies: + '@types/unist': 3.0.3 + unist-util-stringify-position: 4.0.0 + + vfile@6.0.3: + dependencies: + '@types/unist': 3.0.3 + vfile-message: 4.0.2 + walker@1.0.8: dependencies: makeerror: 1.0.12 @@ -3722,6 +4093,8 @@ snapshots: yaml@1.10.2: {} + yaml@2.6.0: {} + yargs-parser@18.1.3: dependencies: camelcase: 5.3.1 @@ -3754,3 +4127,5 @@ snapshots: yargs-parser: 21.1.1 yocto-queue@0.1.0: {} + + zwitch@2.0.4: {} diff --git a/sdks/node/src/__tests__/live/dataset.test.ts b/sdks/node/src/__tests__/live/dataset.test.ts index e2b87f215e8f..09e73ef95c7e 100644 --- a/sdks/node/src/__tests__/live/dataset.test.ts +++ b/sdks/node/src/__tests__/live/dataset.test.ts @@ -3,7 +3,7 @@ import {Dataset} from '../../dataset'; describe('Dataset', () => { beforeEach(async () => { - await login({apiKey: process.env.WANDB_API_KEY ?? ''}); + await login(process.env.WANDB_API_KEY ?? ''); }); test('should save a dataset', async () => { diff --git a/sdks/node/src/__tests__/live/fn.test.ts b/sdks/node/src/__tests__/live/fn.test.ts index 20488144d89b..3635255167a8 100644 --- a/sdks/node/src/__tests__/live/fn.test.ts +++ b/sdks/node/src/__tests__/live/fn.test.ts @@ -29,7 +29,7 @@ class ParametrizedFunction extends CallableObject< describe('Fn', () => { beforeEach(async () => { - await login({apiKey: process.env.WANDB_API_KEY ?? ''}); + await login(process.env.WANDB_API_KEY ?? ''); }); test('use fn', async () => { diff --git a/sdks/node/src/__tests__/live/publish.test.ts b/sdks/node/src/__tests__/live/publish.test.ts index 67edc1b50662..03812c37a3d9 100644 --- a/sdks/node/src/__tests__/live/publish.test.ts +++ b/sdks/node/src/__tests__/live/publish.test.ts @@ -3,7 +3,7 @@ import {Dataset, op, weaveAudio, weaveImage} from '../../index'; describe('Publishing Various Data Types', () => { beforeEach(async () => { - await login({apiKey: process.env.WANDB_API_KEY ?? ''}); + await login(process.env.WANDB_API_KEY ?? ''); }); const primitiveOp = op(async function primitive(input: string) { @@ -79,5 +79,5 @@ describe('Publishing Various Data Types', () => { const datasetResult = await datasetOp(); expect(datasetResult).toBeInstanceOf(Dataset); expect(datasetResult.rows).toHaveLength(3); - }); + }, 20000); // Adding explicit timeout here, though I'm not sure why it's needed }); diff --git a/sdks/node/src/__tests__/live/table.test.ts b/sdks/node/src/__tests__/live/table.test.ts index b5a02360b53f..cbfbee0316e2 100644 --- a/sdks/node/src/__tests__/live/table.test.ts +++ b/sdks/node/src/__tests__/live/table.test.ts @@ -3,7 +3,7 @@ import {Table} from '../../table'; describe('table', () => { beforeEach(async () => { - await login({apiKey: process.env.WANDB_API_KEY ?? ''}); + await login(process.env.WANDB_API_KEY ?? ''); }); test('example', async () => { diff --git a/sdks/node/src/__tests__/live/weaveObject.test.ts b/sdks/node/src/__tests__/live/weaveObject.test.ts index fdf48ab95eb7..7f3d34550cad 100644 --- a/sdks/node/src/__tests__/live/weaveObject.test.ts +++ b/sdks/node/src/__tests__/live/weaveObject.test.ts @@ -19,7 +19,7 @@ class ExampleObject extends WeaveObject { describe('weaveObject', () => { beforeEach(async () => { - await login({apiKey: process.env.WANDB_API_KEY ?? ''}); + await login(process.env.WANDB_API_KEY ?? ''); }); test('basic-example', async () => { diff --git a/sdks/node/src/__tests__/login.test.ts b/sdks/node/src/__tests__/login.test.ts index 8bdfcb19cfa5..7384c31115c0 100644 --- a/sdks/node/src/__tests__/login.test.ts +++ b/sdks/node/src/__tests__/login.test.ts @@ -18,6 +18,7 @@ describe('login', () => { (getUrls as jest.Mock).mockReturnValue({ traceBaseUrl: 'https://api.wandb.ai', domain: 'wandb.ai', + host: 'api.wandb.ai', }); const mockSetEntry = jest.fn(); @@ -33,22 +34,19 @@ describe('login', () => { }, })); - await login({apiKey: 'test-api-key'}); + await login('test-api-key'); - expect(mockSetEntry).toHaveBeenCalledWith('wandb.ai', { + expect(mockSetEntry).toHaveBeenCalledWith({ + machine: 'api.wandb.ai', login: 'user', password: 'test-api-key', }); expect(mockSave).toHaveBeenCalled(); expect(console.log).toHaveBeenCalledWith( - 'Successfully logged in. Credentials saved for wandb.ai' + 'Successfully logged in. Credentials saved for api.wandb.ai' ); }); - it('should throw an error if API key is not provided', async () => { - await expect(login()).rejects.toThrow('API Key must be specified'); - }); - it('should throw an error if connection verification fails', async () => { (getUrls as jest.Mock).mockReturnValue({ traceBaseUrl: 'https://api.wandb.ai', @@ -63,7 +61,7 @@ describe('login', () => { }, })); - await expect(login({apiKey: 'test-api-key'})).rejects.toThrow( + await expect(login('test-api-key')).rejects.toThrow( 'Unable to verify connection to the weave trace server with given API Key' ); }); diff --git a/sdks/node/src/__tests__/util/netrc.test.ts b/sdks/node/src/__tests__/util/netrc.test.ts index 59d015381870..26d994a7e92a 100644 --- a/sdks/node/src/__tests__/util/netrc.test.ts +++ b/sdks/node/src/__tests__/util/netrc.test.ts @@ -23,7 +23,6 @@ describe('Netrc', () => { machine api.example.com login user2 password pass2 - account acc2 `; (fs.readFileSync as jest.Mock).mockReturnValue(mockContent); @@ -39,7 +38,6 @@ describe('Netrc', () => { machine: 'api.example.com', login: 'user2', password: 'pass2', - account: 'acc2', }); }); @@ -54,11 +52,11 @@ describe('Netrc', () => { test('save writes entries correctly', () => { const netrc = new Netrc(); - netrc.setEntry('example.com', {login: 'user1', password: 'pass1'}); - netrc.setEntry('api.example.com', { + netrc.setEntry({machine: 'example.com', login: 'user1', password: 'pass1'}); + netrc.setEntry({ + machine: 'api.example.com', login: 'user2', password: 'pass2', - account: 'acc2', }); netrc.save(); @@ -70,7 +68,6 @@ describe('Netrc', () => { machine api.example.com login user2 password pass2 - account acc2 `; expect(fs.writeFileSync).toHaveBeenCalledWith( @@ -82,8 +79,16 @@ machine api.example.com test('getLastEntry returns the last entry', () => { const netrc = new Netrc(); - netrc.setEntry('example1.com', {login: 'user1', password: 'pass1'}); - netrc.setEntry('example2.com', {login: 'user2', password: 'pass2'}); + netrc.setEntry({ + machine: 'example1.com', + login: 'user1', + password: 'pass1', + }); + netrc.setEntry({ + machine: 'example2.com', + login: 'user2', + password: 'pass2', + }); expect(netrc.getLastEntry()).toEqual({ machine: 'example2.com', diff --git a/sdks/node/src/__tests__/wandb/settings.test.ts b/sdks/node/src/__tests__/wandb/settings.test.ts index 22b2f6c1b857..16f77987643e 100644 --- a/sdks/node/src/__tests__/wandb/settings.test.ts +++ b/sdks/node/src/__tests__/wandb/settings.test.ts @@ -54,6 +54,7 @@ describe('settings', () => { apiKey: 'test-api-key', baseUrl: expect.stringContaining('api.wandb.ai'), traceBaseUrl: expect.stringContaining('https://trace.wandb.ai'), + resolvedHost: 'api.wandb.ai', domain: expect.any(String), }); }); diff --git a/sdks/node/src/__tests__/weaveClient.test.ts b/sdks/node/src/__tests__/weaveClient.test.ts index 3a3523daa20d..43dc69b1965c 100644 --- a/sdks/node/src/__tests__/weaveClient.test.ts +++ b/sdks/node/src/__tests__/weaveClient.test.ts @@ -7,6 +7,30 @@ import {WeaveClient} from '../weaveClient'; jest.mock('../generated/traceServerApi'); jest.mock('../wandb/wandbServerApi'); +function createStreamFromCalls(calls: any[] = []) { + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + start(controller) { + calls.forEach(call => { + controller.enqueue(encoder.encode(JSON.stringify(call) + '\n')); + }); + controller.close(); + }, + }); + return stream; +} +function mockStreamResponse( + api: jest.Mocked>, + calls: any[] +) { + const stream = createStreamFromCalls(calls); + ( + api.calls.callsQueryStreamCallsStreamQueryPost as jest.Mock + ).mockResolvedValue({ + body: stream, + } as any); +} + describe('WeaveClient', () => { let client: WeaveClient; let mockTraceServerApi: jest.Mocked>; @@ -32,21 +56,7 @@ describe('WeaveClient', () => { {id: '1', name: 'call1'}, {id: '2', name: 'call2'}, ]; - const encoder = new TextEncoder(); - const stream = new ReadableStream({ - start(controller) { - mockCalls.forEach(call => { - controller.enqueue(encoder.encode(JSON.stringify(call) + '\n')); - }); - controller.close(); - }, - }); - ( - mockTraceServerApi.calls - .callsQueryStreamCallsStreamQueryPost as jest.Mock - ).mockResolvedValue({ - body: stream, - } as any); + mockStreamResponse(mockTraceServerApi, mockCalls); // Call the method const filter = {}; @@ -227,4 +237,21 @@ describe('WeaveClient', () => { expect((client as any).callQueue.length).toBe(0); }); }); + + describe('getCall', () => { + it('should fetch a single call by ID', async () => { + const mockCall = {id: 'test-id', name: 'test-call'}; + mockStreamResponse(mockTraceServerApi, [mockCall]); + + const result = await client.getCall('test-id'); + expect(result).toEqual(mockCall); + }); + + it('should throw error when call is not found', async () => { + mockStreamResponse(mockTraceServerApi, []); + expect(client.getCall('non-existent-id')).rejects.toThrow( + 'Call not found: non-existent-id' + ); + }); + }); }); diff --git a/sdks/node/src/clientApi.ts b/sdks/node/src/clientApi.ts index 499322e8d5bd..cdd40c81a2f4 100644 --- a/sdks/node/src/clientApi.ts +++ b/sdks/node/src/clientApi.ts @@ -1,6 +1,6 @@ import {Api as TraceServerApi} from './generated/traceServerApi'; import {Settings} from './settings'; -import {getUrls, setGlobalDomain} from './urls'; +import {defaultHost, getUrls, setGlobalDomain} from './urls'; import {ConcurrencyLimiter} from './utils/concurrencyLimit'; import {Netrc} from './utils/netrc'; import {createFetchWithRetry} from './utils/retry'; @@ -8,11 +8,6 @@ import {getWandbConfigs} from './wandb/settings'; import {WandbServerApi} from './wandb/wandbServerApi'; import {CallStackEntry, WeaveClient} from './weaveClient'; -export interface LoginOptions { - apiKey: string; - host?: string; -} - // Global client instance export let globalClient: WeaveClient | null = null; @@ -20,16 +15,16 @@ export let globalClient: WeaveClient | null = null; * Log in to Weights & Biases (W&B) using the provided API key. * This function saves the credentials to your netrc file for future use. * - * @param options - The login options. - * @param options.apiKey - Your W&B API key. - * @param options.host - (Optional) The host name (usually only needed if you're using a custom W&B server). + * @param {string} apiKey - Your W&B API key. + * @param {string} [host] - (Optional) The host name (usually only needed if you're using a custom W&B server). * @throws {Error} If the API key is not specified or if the connection to the weave trace server cannot be verified. */ -export async function login(options?: LoginOptions) { - if (!options?.apiKey) { - throw Error('API Key must be specified'); +export async function login(apiKey: string, host?: string) { + if (!host) { + console.warn('No host provided, using default host:', defaultHost); + host = defaultHost; } - const {traceBaseUrl, domain} = getUrls(options?.host); + const {traceBaseUrl} = getUrls(host); // Test the connection to the traceServerApi const testTraceServerApi = new TraceServerApi({ @@ -37,7 +32,7 @@ export async function login(options?: LoginOptions) { baseApiParams: { headers: { 'User-Agent': `W&B Weave JS Client ${process.env.VERSION || 'unknown'}`, - Authorization: `Basic ${Buffer.from(`api:${options.apiKey}`).toString('base64')}`, + Authorization: `Basic ${Buffer.from(`api:${apiKey}`).toString('base64')}`, }, }, }); @@ -50,9 +45,9 @@ export async function login(options?: LoginOptions) { } const netrc = new Netrc(); - netrc.setEntry(domain, {login: 'user', password: options.apiKey}); + netrc.setEntry({machine: host, login: 'user', password: apiKey}); netrc.save(); - console.log(`Successfully logged in. Credentials saved for ${domain}`); + console.log(`Successfully logged in. Credentials saved for ${host}`); } /** diff --git a/sdks/node/src/media.ts b/sdks/node/src/media.ts index ca44ae118df3..58d0323cb98d 100644 --- a/sdks/node/src/media.ts +++ b/sdks/node/src/media.ts @@ -18,8 +18,8 @@ interface WeaveImage extends WeaveImageInput { * Create a new WeaveImage object * * @param options The options for this media type - * @param options.data The raw image data as a Buffer - * @param options.imageType (Optional) The type of image file, currently only 'png' is supported + * - data: The raw image data as a Buffer + * - imageType: (Optional) The type of image file, currently only 'png' is supported * * @example * const imageBuffer = fs.readFileSync('path/to/image.png'); @@ -52,8 +52,8 @@ export interface WeaveAudio extends WeaveAudioInput { * Create a new WeaveAudio object * * @param options The options for this media type - * @param options.data The raw audio data as a Buffer - * @param options.audioType (Optional) The type of audio file, currently only 'wav' is supported + * - data: The raw audio data as a Buffer + * - audioType: (Optional) The type of audio file, currently only 'wav' is supported * * @example * const audioBuffer = fs.readFileSync('path/to/audio.wav'); diff --git a/sdks/node/src/urls.ts b/sdks/node/src/urls.ts index c96d0916016c..e90033d21e00 100644 --- a/sdks/node/src/urls.ts +++ b/sdks/node/src/urls.ts @@ -10,7 +10,8 @@ export function getUrls(host?: string) { traceBaseUrl: isDefault ? `https://trace.wandb.ai` : `https://${resolvedHost}`, - domain: isDefault ? defaultHost : resolvedHost, + domain: isDefault ? defaultDomain : resolvedHost, + host: isDefault ? defaultHost : resolvedHost, }; } diff --git a/sdks/node/src/utils/netrc.ts b/sdks/node/src/utils/netrc.ts index f878f4b69973..df78632c9d19 100644 --- a/sdks/node/src/utils/netrc.ts +++ b/sdks/node/src/utils/netrc.ts @@ -6,7 +6,6 @@ interface NetrcEntry { machine: string; login: string; password: string; - account?: string; } export class Netrc { @@ -22,35 +21,26 @@ export class Netrc { private load(): void { try { const content = readFileSync(this.path, 'utf8'); - const lines = content.split('\n'); - let currentMachine: string | null = null; let currentEntry: Partial = {}; + const lines = content.split('\n'); for (const line of lines) { const [key, value] = line.trim().split(/\s+/); - switch (key) { - case 'machine': - if (currentMachine && Object.keys(currentEntry).length) { - this.entries.set(currentMachine, currentEntry as NetrcEntry); - } - currentMachine = value; - currentEntry = {machine: value}; - break; - case 'login': - case 'password': - case 'account': - if (currentMachine) { - currentEntry[key] = value; - } - break; + if (key === 'machine') { + if (currentEntry.machine && currentEntry.login) { + this.entries.set(currentEntry.machine, currentEntry as NetrcEntry); + } + currentEntry = {machine: value}; + } else if (key === 'login' || key === 'password') { + currentEntry[key] = value; } } - if (currentMachine && Object.keys(currentEntry).length > 1) { - this.entries.set(currentMachine, currentEntry as NetrcEntry); + if (currentEntry.machine && currentEntry.login) { + this.entries.set(currentEntry.machine, currentEntry as NetrcEntry); } } catch (error) { - // File doesn't exist or can't be read, starting with empty entries + console.error('Error parsing netrc file', error); } } @@ -60,7 +50,6 @@ export class Netrc { let str = `machine ${machine}\n`; if (entry.login) str += ` login ${entry.login}\n`; if (entry.password) str += ` password ${entry.password}\n`; - if (entry.account) str += ` account ${entry.account}\n`; return str; }) .join('\n'); @@ -72,11 +61,13 @@ export class Netrc { return this.entries.get(machine); } - setEntry(machine: string, entry: Partial): void { - const existingEntry = this.entries.get(machine) || {machine}; - const updatedEntry = {...existingEntry, ...entry} as NetrcEntry; - this.entries.delete(machine); - this.entries.set(machine, updatedEntry); + setEntry({machine, ...entryProps}: NetrcEntry): void { + if (!machine) { + throw new Error('Machine is required'); + } + const existing = this.entries.get(machine) ?? {machine}; + const updated = {...existing, ...entryProps, machine}; + this.entries.set(machine, updated); } getLastEntry(): NetrcEntry | undefined { diff --git a/sdks/node/src/wandb/settings.ts b/sdks/node/src/wandb/settings.ts index 7b6b8ad7176a..21a0f6a3fad0 100644 --- a/sdks/node/src/wandb/settings.ts +++ b/sdks/node/src/wandb/settings.ts @@ -41,6 +41,6 @@ export function getWandbConfigs() { ); } const apiKey = getApiKey(host); - const {baseUrl, traceBaseUrl, domain} = getUrls(host); - return {apiKey, baseUrl, traceBaseUrl, domain}; + const {baseUrl, traceBaseUrl, domain, host: resolvedHost} = getUrls(host); + return {apiKey, baseUrl, traceBaseUrl, domain, resolvedHost}; } diff --git a/sdks/node/src/weaveClient.ts b/sdks/node/src/weaveClient.ts index 1a10cb2bec86..f96c263ec079 100644 --- a/sdks/node/src/weaveClient.ts +++ b/sdks/node/src/weaveClient.ts @@ -169,6 +169,16 @@ export class WeaveClient { } } + public async getCall( + callId: string, + includeCosts: boolean = false + ): Promise { + const calls = await this.getCalls({call_ids: [callId]}, includeCosts); + if (calls.length === 0) { + throw new Error(`Call not found: ${callId}`); + } + return calls[0]; + } public async getCalls( filter: CallsFilter = {}, includeCosts: boolean = false, diff --git a/sdks/node/src/weaveObject.ts b/sdks/node/src/weaveObject.ts index 41a7eb046ef7..77666385df04 100644 --- a/sdks/node/src/weaveObject.ts +++ b/sdks/node/src/weaveObject.ts @@ -9,6 +9,18 @@ export interface WeaveObjectParameters { description?: string; } +/** + * Represents a reference to a saved Weave object. + * + * Generally, end users will not need to interact with this class directly. + * + * An ObjectRef contains the project ID, object ID, and digest that uniquely identify + * a saved object in Weave's storage system. + * + * @example + * const ref = new ObjectRef('my-project', 'abc123', 'def456'); + * const uri = ref.uri(); // weave:///my-project/object/abc123:def456 + */ export class ObjectRef { constructor( public projectId: string, diff --git a/sdks/node/tsconfig.json b/sdks/node/tsconfig.json index 2b676b0408cb..f44dc0d29335 100644 --- a/sdks/node/tsconfig.json +++ b/sdks/node/tsconfig.json @@ -12,6 +12,7 @@ "weave": ["./src/index.ts"] } }, + "include": ["src/**/*"], "exclude": ["src", "examples", "dist", "node_modules"], "references": [ { diff --git a/tests/integrations/google_ai_studio/google_ai_studio_test.py b/tests/integrations/google_ai_studio/google_ai_studio_test.py index 044de479cee4..18b083ef96df 100644 --- a/tests/integrations/google_ai_studio/google_ai_studio_test.py +++ b/tests/integrations/google_ai_studio/google_ai_studio_test.py @@ -47,7 +47,7 @@ def assert_correct_summary(summary: dict, trace_name: str): assert summary["weave"]["latency_ms"] > 0 -@pytest.mark.retry(max_attempts=5) +@pytest.mark.retry(max_attempts=5, delay=5) @pytest.mark.skip_clickhouse_client def test_content_generation(client): import google.generativeai as genai @@ -65,12 +65,11 @@ def test_content_generation(client): trace_name = op_name_from_ref(call.op_name) assert trace_name == "google.generativeai.GenerativeModel.generate_content" assert call.output is not None - # TODO: Re-enable after dictify is fixed - # assert_correct_output_shape(call.output) - # assert_correct_summary(call.summary, trace_name) + assert_correct_output_shape(call.output) + assert_correct_summary(call.summary, trace_name) -@pytest.mark.retry(max_attempts=5) +@pytest.mark.retry(max_attempts=5, delay=5) @pytest.mark.skip_clickhouse_client def test_content_generation_stream(client): import google.generativeai as genai @@ -92,12 +91,11 @@ def test_content_generation_stream(client): trace_name = op_name_from_ref(call.op_name) assert trace_name == "google.generativeai.GenerativeModel.generate_content" assert call.output is not None - # TODO: Re-enable after dictify is fixed - # assert_correct_output_shape(call.output) - # assert_correct_summary(call.summary, trace_name) + assert_correct_output_shape(call.output) + assert_correct_summary(call.summary, trace_name) -@pytest.mark.retry(max_attempts=5) +@pytest.mark.retry(max_attempts=5, delay=5) @pytest.mark.asyncio @pytest.mark.skip_clickhouse_client async def test_content_generation_async(client): @@ -116,6 +114,5 @@ async def test_content_generation_async(client): trace_name = op_name_from_ref(call.op_name) assert trace_name == "google.generativeai.GenerativeModel.generate_content_async" assert call.output is not None - # TODO: Re-enable after dictify is fixed - # assert_correct_output_shape(call.output) - # assert_correct_summary(call.summary, trace_name) + assert_correct_output_shape(call.output) + assert_correct_summary(call.summary, trace_name) diff --git a/tests/integrations/litellm/client_completions_create_test.py b/tests/integrations/litellm/client_completions_create_test.py index a48f91554651..a5d9fb11baa3 100644 --- a/tests/integrations/litellm/client_completions_create_test.py +++ b/tests/integrations/litellm/client_completions_create_test.py @@ -57,8 +57,18 @@ def test_completions_create(client): "completion_tokens": 9, "prompt_tokens": 11, "total_tokens": 20, - "completion_tokens_details": {"audio_tokens": None, "reasoning_tokens": 0}, - "prompt_tokens_details": {"audio_tokens": None, "cached_tokens": 0}, + "completion_tokens_details": { + "accepted_prediction_tokens": None, + "audio_tokens": None, + "reasoning_tokens": 0, + "rejected_prediction_tokens": None, + }, + "prompt_tokens_details": { + "accepted_prediction_tokens": None, + "audio_tokens": None, + "cached_tokens": 0, + "rejected_prediction_tokens": None, + }, }, "service_tier": None, } diff --git a/tests/integrations/litellm/litellm_test.py b/tests/integrations/litellm/litellm_test.py index 4c696d94fffe..75bdcd7e8839 100644 --- a/tests/integrations/litellm/litellm_test.py +++ b/tests/integrations/litellm/litellm_test.py @@ -225,7 +225,6 @@ async def test_litellm_quickstart_stream_async( filter_headers=["authorization", "x-api-key"], allowed_hosts=["api.wandb.ai", "localhost"], ) -@pytest.mark.asyncio def test_model_predict( client: weave.trace.weave_client.WeaveClient, patch_litellm: None ) -> None: diff --git a/tests/trace/test_base_object_classes.py b/tests/trace/test_base_object_classes.py new file mode 100644 index 000000000000..a264941f7b00 --- /dev/null +++ b/tests/trace/test_base_object_classes.py @@ -0,0 +1,366 @@ +""" +This test file ensures the base_object_classes behavior is as expected. Specifically: +1. We ensure that pythonic publishing and getting of objects: + a. Results in the correct base_object_class filter in the query. + b. Produces identical results. +2. We ensure that using the low-level interface: + a. Results in the correct base_object_class filter in the query. + b. Produces identical results. +3. We ensure that digests are equivalent between pythonic and interface style creation. + This is important to ensure that UI-based generation of objects is consistent with + programmatic generation. +4. We ensure that invalid schemas are properly rejected from the server. +""" + +from typing import Literal, Optional + +import pytest +from pydantic import ValidationError + +import weave +from weave.trace import base_objects +from weave.trace.refs import ObjectRef +from weave.trace.weave_client import WeaveClient +from weave.trace_server import trace_server_interface as tsi +from weave.trace_server.interface.base_object_classes.test_only_example import ( + TestOnlyNestedBaseModel, +) + + +def with_base_object_class_annotations( + val: dict, + class_name: str, + base_object_name: Optional[Literal["Object", "BaseObject"]] = None, +): + """ + When serializing pydantic objects, add additional fields to indicate the class information. This is + a utlity to perform that mapping for the purposes of testing. We want to ensure that both the client + and server agree on this structure, therefore I am adding this utility here. + """ + bases = ["BaseModel"] + if base_object_name is not None: + bases.insert(0, base_object_name) + return { + **val, + "_type": class_name, + "_class_name": class_name, + "_bases": bases, + } + + +def test_pythonic_creation(client: WeaveClient): + # First, let's use the high-level pythonic creation API. + nested_obj = base_objects.TestOnlyNestedBaseObject(b=3) + top_obj = base_objects.TestOnlyExample( + primitive=1, + nested_base_model=TestOnlyNestedBaseModel(a=2), + nested_base_object=weave.publish(nested_obj).uri(), + ) + ref = weave.publish(top_obj) + + top_obj_gotten = weave.ref(ref.uri()).get() + + assert isinstance(top_obj_gotten, base_objects.TestOnlyExample) + assert top_obj_gotten.model_dump() == top_obj.model_dump() + + objs_res = client.server.objs_query( + tsi.ObjQueryReq.model_validate( + { + "project_id": client._project_id(), + "filter": {"base_object_classes": ["TestOnlyExample"]}, + }, + ) + ) + objs = objs_res.objs + + assert len(objs) == 1 + assert ( + objs[0].val + == { + **with_base_object_class_annotations( + top_obj.model_dump(), "TestOnlyExample", "BaseObject" + ), + "nested_base_model": with_base_object_class_annotations( + top_obj.nested_base_model.model_dump(), "TestOnlyNestedBaseModel" + ), + } + == { + "_type": "TestOnlyExample", + "name": None, + "description": None, + "primitive": 1, + "nested_base_model": { + "_type": "TestOnlyNestedBaseModel", + "a": 2, + "_class_name": "TestOnlyNestedBaseModel", + "_bases": ["BaseModel"], + }, + "nested_base_object": "weave:///shawn/test-project/object/TestOnlyNestedBaseObject:JyFvHfyaJ79uCKpdZ3DD3if4NYam8QgTkzUlXQXAILI", + "_class_name": "TestOnlyExample", + "_bases": ["BaseObject", "BaseModel"], + } + ) + + objs_res = client.server.objs_query( + tsi.ObjQueryReq.model_validate( + { + "project_id": client._project_id(), + "filter": {"base_object_classes": ["TestOnlyNestedBaseObject"]}, + }, + ) + ) + objs = objs_res.objs + + assert len(objs) == 1 + assert ( + objs[0].val + == with_base_object_class_annotations( + nested_obj.model_dump(), "TestOnlyNestedBaseObject", "BaseObject" + ) + == { + "_type": "TestOnlyNestedBaseObject", + "name": None, + "description": None, + "b": 3, + "_class_name": "TestOnlyNestedBaseObject", + "_bases": ["BaseObject", "BaseModel"], + } + ) + + +def test_interface_creation(client): + # Now we will do the equivant operation using low-level interface. + nested_obj_id = "TestOnlyNestedBaseObject" + nested_obj = base_objects.TestOnlyNestedBaseObject(b=3) + nested_obj_res = client.server.obj_create( + tsi.ObjCreateReq.model_validate( + { + "obj": { + "project_id": client._project_id(), + "object_id": nested_obj_id, + "val": nested_obj.model_dump(), + "set_base_object_class": "TestOnlyNestedBaseObject", + } + } + ) + ) + nested_obj_ref = ObjectRef( + entity=client.entity, + project=client.project, + name=nested_obj_id, + _digest=nested_obj_res.digest, + ) + + top_level_obj_id = "TestOnlyExample" + top_obj = base_objects.TestOnlyExample( + primitive=1, + nested_base_model=TestOnlyNestedBaseModel(a=2), + nested_base_object=nested_obj_ref.uri(), + ) + top_obj_res = client.server.obj_create( + tsi.ObjCreateReq.model_validate( + { + "obj": { + "project_id": client._project_id(), + "object_id": top_level_obj_id, + "val": top_obj.model_dump(), + "set_base_object_class": "TestOnlyExample", + } + } + ) + ) + top_obj_ref = ObjectRef( + entity=client.entity, + project=client.project, + name=top_level_obj_id, + _digest=top_obj_res.digest, + ) + + top_obj_gotten = weave.ref(top_obj_ref.uri()).get() + + assert top_obj_gotten.model_dump() == top_obj.model_dump() + + nested_obj_gotten = weave.ref(nested_obj_ref.uri()).get() + + assert nested_obj_gotten.model_dump() == nested_obj.model_dump() + + objs_res = client.server.objs_query( + tsi.ObjQueryReq.model_validate( + { + "project_id": client._project_id(), + "filter": {"base_object_classes": ["TestOnlyExample"]}, + }, + ) + ) + + objs = objs_res.objs + assert len(objs) == 1 + assert ( + objs[0].val + == { + **with_base_object_class_annotations( + top_obj.model_dump(), "TestOnlyExample", "BaseObject" + ), + "nested_base_model": with_base_object_class_annotations( + top_obj.nested_base_model.model_dump(), "TestOnlyNestedBaseModel" + ), + } + == { + "_type": "TestOnlyExample", + "name": None, + "description": None, + "primitive": 1, + "nested_base_model": { + "_type": "TestOnlyNestedBaseModel", + "a": 2, + "_class_name": "TestOnlyNestedBaseModel", + "_bases": ["BaseModel"], + }, + "nested_base_object": "weave:///shawn/test-project/object/TestOnlyNestedBaseObject:JyFvHfyaJ79uCKpdZ3DD3if4NYam8QgTkzUlXQXAILI", + "_class_name": "TestOnlyExample", + "_bases": ["BaseObject", "BaseModel"], + } + ) + + objs_res = client.server.objs_query( + tsi.ObjQueryReq.model_validate( + { + "project_id": client._project_id(), + "filter": {"base_object_classes": ["TestOnlyNestedBaseObject"]}, + }, + ) + ) + objs = objs_res.objs + assert len(objs) == 1 + assert ( + objs[0].val + == with_base_object_class_annotations( + nested_obj.model_dump(), "TestOnlyNestedBaseObject", "BaseObject" + ) + == { + "_type": "TestOnlyNestedBaseObject", + "name": None, + "description": None, + "b": 3, + "_class_name": "TestOnlyNestedBaseObject", + "_bases": ["BaseObject", "BaseModel"], + } + ) + + +def test_digest_equality(client): + # Next, let's make sure that the digests are all equivalent + nested_obj = base_objects.TestOnlyNestedBaseObject(b=3) + nested_ref = weave.publish(nested_obj) + top_obj = base_objects.TestOnlyExample( + primitive=1, + nested_base_model=TestOnlyNestedBaseModel(a=2), + nested_base_object=nested_ref.uri(), + ) + ref = weave.publish(top_obj) + nested_pythonic_digest = nested_ref.digest + top_level_pythonic_digest = ref.digest + + # Now we will do the equivant operation using low-level interface. + nested_obj_id = "TestOnlyNestedBaseObject" + nested_obj = base_objects.TestOnlyNestedBaseObject(b=3) + nested_obj_res = client.server.obj_create( + tsi.ObjCreateReq.model_validate( + { + "obj": { + "project_id": client._project_id(), + "object_id": nested_obj_id, + "val": nested_obj.model_dump(), + "set_base_object_class": "TestOnlyNestedBaseObject", + } + } + ) + ) + nested_obj_ref = ObjectRef( + entity=client.entity, + project=client.project, + name=nested_obj_id, + _digest=nested_obj_res.digest, + ) + + nested_interface_style_digest = nested_obj_ref.digest + + assert nested_pythonic_digest == nested_interface_style_digest + + top_level_obj_id = "TestOnlyExample" + top_obj = base_objects.TestOnlyExample( + primitive=1, + nested_base_model=TestOnlyNestedBaseModel(a=2), + nested_base_object=nested_obj_ref.uri(), + ) + top_obj_res = client.server.obj_create( + tsi.ObjCreateReq.model_validate( + { + "obj": { + "project_id": client._project_id(), + "object_id": top_level_obj_id, + "val": top_obj.model_dump(), + "set_base_object_class": "TestOnlyExample", + } + } + ) + ) + + top_level_interface_style_digest = top_obj_res.digest + + assert top_level_pythonic_digest == top_level_interface_style_digest + + +def test_schema_validation(client): + # Test that we can't create an object with the wrong schema + with pytest.raises(ValidationError): + client.server.obj_create( + tsi.ObjCreateReq.model_validate( + { + "obj": { + "project_id": client._project_id(), + "object_id": "nested_obj", + # Incorrect schema, should raise! + "val": {"a": 2}, + "set_base_object_class": "TestOnlyNestedBaseObject", + } + } + ) + ) + + # Correct schema, should work + client.server.obj_create( + tsi.ObjCreateReq.model_validate( + { + "obj": { + "project_id": client._project_id(), + "object_id": "nested_obj", + "val": { + "b": 2, + "_class_name": "TestOnlyNestedBaseObject", + "_bases": ["BaseObject", "BaseModel"], + }, + "set_base_object_class": "TestOnlyNestedBaseObject", + } + } + ) + ) + + with pytest.raises(ValueError): + # Mismatching base object class, should raise + client.server.obj_create( + tsi.ObjCreateReq.model_validate( + { + "obj": { + "project_id": client._project_id(), + "object_id": "nested_obj", + "val": { + "b": 2, + "_class_name": "TestOnlyNestedBaseObject", + "_bases": ["BaseObject", "BaseModel"], + }, + "set_base_object_class": "TestOnlyExample", + } + } + ) + ) diff --git a/tests/trace/test_deepcopy.py b/tests/trace/test_deepcopy.py new file mode 100644 index 000000000000..8ecc6fd42794 --- /dev/null +++ b/tests/trace/test_deepcopy.py @@ -0,0 +1,97 @@ +from copy import deepcopy + +import pytest + +import weave +from weave.trace.object_record import ObjectRecord +from weave.trace.vals import WeaveDict, WeaveList, WeaveObject + + +@pytest.fixture +def example_class(): + class Example(weave.Object): + a: int = 1 + b: int = 2 + + expected_record = ObjectRecord( + attrs={ + "name": None, + "description": None, + "_class_name": "Example", + "_bases": ["Object", "BaseModel"], + "a": 1, + "b": 2, + } + ) + + return Example, expected_record + + +def test_deepcopy_weavelist(client): + lst = WeaveList([1, 2, 3], server=client.server) + res = deepcopy(lst) + assert res == [1, 2, 3] + assert id(res) != id(lst) + + +def test_deepcopy_weavelist_e2e(client): + lst = [1, 2, 3] + ref = weave.publish(lst) + lst2 = ref.get() + res = deepcopy(lst2) + assert res == [1, 2, 3] + assert id(res) != id(lst2) + + +def test_deepcopy_weavedict(client): + d = WeaveDict({"a": 1, "b": 2}, server=client.server) + res = deepcopy(d) + assert res == {"a": 1, "b": 2} + assert id(res) != id(d) + + +def test_deepcopy_weavedict_e2e(client): + d = {"a": 1, "b": 2} + ref = weave.publish(d) + d2 = ref.get() + res = deepcopy(d2) + assert res == {"a": 1, "b": 2} + assert id(res) != id(d2) + + +def test_deepcopy_weaveobject(client, example_class): + _, expected_record = example_class + + o = WeaveObject( + expected_record, + ref=None, + root=None, + server=client.server, + ) + res = deepcopy(o) + assert res == expected_record + assert id(res) != id(o) + + +def test_deepcopy_weaveobject_e2e(client, example_class): + cls, expected_record = example_class + + o = cls() + ref = weave.publish(o) + o2 = ref.get() + res = deepcopy(o2) + assert res == expected_record + assert id(res) != id(o2) + + +# # Not sure about the implications here yet +# def test_deepcopy_weavetable(client): +# t = WeaveTable( +# table_ref=None, +# ref=None, +# server=client.server, +# filter=TableRowFilter(), +# root=None, +# ) +# res = deepcopy(t) +# assert res == t diff --git a/tests/trace/test_leaderboard.py b/tests/trace/test_leaderboard.py new file mode 100644 index 000000000000..a25fccbdd357 --- /dev/null +++ b/tests/trace/test_leaderboard.py @@ -0,0 +1,191 @@ +import pytest + +import weave +from weave.flow import leaderboard +from weave.trace.weave_client import get_ref + + +def test_leaderboard_empty(client): + evaluation_obj_1 = weave.Evaluation( + name="test_evaluation_name", + dataset=[{"input": -1, "target": -1}], + scorers=[], + ) + + weave.publish(evaluation_obj_1) + + spec = leaderboard.Leaderboard( + name="Empty Leaderboard", + description="""This is an empty leaderboard""", + columns=[ + leaderboard.LeaderboardColumn( + evaluation_object_ref=get_ref(evaluation_obj_1).uri(), + scorer_name="test_scorer_name", + summary_metric_path="test_summary_metric_path", + ) + ], + ) + + ref = weave.publish(spec) + + # Overriding spec to show that this works + spec = ref.get() + + results = leaderboard.get_leaderboard_results(spec, client) + assert len(results) == 0 + + +def test_leaderboard_mis_configured(client): + spec = leaderboard.Leaderboard( + name="Misconfigured Leaderboard", + description="""This is a misconfigured leaderboard""", + columns=[ + leaderboard.LeaderboardColumn( + evaluation_object_ref="test_evaluation_object_ref", + scorer_name="test_scorer_name", + summary_metric_path="test_summary_metric_path", + ) + ], + ) + + ref = weave.publish(spec) + + # Overriding spec to show that this works + spec = ref.get() + + results = leaderboard.get_leaderboard_results(spec, client) + assert len(results) == 0 + + +async def do_evaluations(): + @weave.op + def my_scorer(target, output): + return target == output + + evaluation_obj_1 = weave.Evaluation( + name="test_evaluation_name", + dataset=[{"input": 1, "target": 1}], + scorers=[my_scorer], + ) + + @weave.op + def simple_model(input): + return input + + await evaluation_obj_1.evaluate(simple_model) + + evaluation_obj_2 = weave.Evaluation( + name="test_evaluation_name", + dataset=[{"input": 1, "target": 1}, {"input": 2, "target": 2}], + scorers=[my_scorer], + ) + + @weave.op + def static_model(input): + return 1 + + @weave.op + def bad_model(input): + return input + 1 + + await evaluation_obj_2.evaluate(simple_model) + await evaluation_obj_2.evaluate(static_model) + await evaluation_obj_2.evaluate(bad_model) + + return evaluation_obj_1, evaluation_obj_2, simple_model, static_model, bad_model + + +@pytest.mark.asyncio +async def test_leaderboard_with_results(client): + ( + evaluation_obj_1, + evaluation_obj_2, + simple_model, + static_model, + bad_model, + ) = await do_evaluations() + + spec = leaderboard.Leaderboard( + name="Simple Leaderboard", + description="""This is a simple leaderboard""", + columns=[ + leaderboard.LeaderboardColumn( + evaluation_object_ref=get_ref(evaluation_obj_1).uri(), + scorer_name="my_scorer", + summary_metric_path="true_fraction", + ) + ], + ) + + ref = weave.publish(spec) + + # Overriding spec to show that this works + spec = ref.get() + + results = leaderboard.get_leaderboard_results(spec, client) + assert len(results) == 1 + assert results[0].model_ref == get_ref(simple_model).uri() + assert results[0].column_scores[0].scores[0].value == 1.0 + + spec = leaderboard.Leaderboard( + name="Complex Leaderboard", + description=""" +This leaderboard has multiple columns + +### Columns + +1. Column 1: + - Evaluation Object: test_evaluation_object_ref + - Scorer Name: test_scorer_name + - Summary Metric Path: test_summary_metric_path +2. Column 2: + - Evaluation Object: test_evaluation_object_ref + - Scorer Name: test_scorer_name + - Summary Metric Path: test_summary_metric_path +3. Column 3: + - Evaluation Object: test_evaluation_object_ref + - Scorer Name: test_scorer_name + - Summary Metric Path: test_summary_metric_path +""", + columns=[ + leaderboard.LeaderboardColumn( + evaluation_object_ref=get_ref(evaluation_obj_2).uri(), + scorer_name="my_scorer", + summary_metric_path="true_count", + ), + leaderboard.LeaderboardColumn( + evaluation_object_ref=get_ref(evaluation_obj_2).uri(), + scorer_name="my_scorer", + should_minimize=True, + summary_metric_path="true_fraction", + ), + leaderboard.LeaderboardColumn( + evaluation_object_ref=get_ref(evaluation_obj_1).uri(), + scorer_name="my_scorer", + summary_metric_path="true_fraction", + ), + ], + ) + + ref = weave.publish(spec) + + # Overriding spec to show that this works + spec = ref.get() + + results = leaderboard.get_leaderboard_results(spec, client) + assert len(results) == 3 + assert results[0].model_ref == get_ref(simple_model).uri() + assert len(results[0].column_scores) == 3 + assert results[0].column_scores[0].scores[0].value == 2.0 + assert results[0].column_scores[1].scores[0].value == 1.0 + assert results[0].column_scores[1].scores[0].value == 1.0 + assert results[1].model_ref == get_ref(static_model).uri() + assert len(results[1].column_scores) == 3 + assert results[1].column_scores[0].scores[0].value == 1.0 + assert results[1].column_scores[1].scores[0].value == 0.5 + assert len(results[1].column_scores[2].scores) == 0 + assert results[2].model_ref == get_ref(bad_model).uri() + assert len(results[1].column_scores) == 3 + assert results[2].column_scores[0].scores[0].value == 0 + assert results[2].column_scores[1].scores[0].value == 0 + assert len(results[2].column_scores[2].scores) == 0 diff --git a/tests/trace/test_serialize.py b/tests/trace/test_serialize.py index ae3ec52246d3..a6f08dfad1c7 100644 --- a/tests/trace/test_serialize.py +++ b/tests/trace/test_serialize.py @@ -150,3 +150,52 @@ def to_dict(self) -> dict: pt = Point(1, 2) assert fallback_encode(pt) == repr(pt) + + +def test_dictify_sanitizes() -> None: + class MyClass: + api_key: str + + def __init__(self, secret: str) -> None: + self.api_key = secret + + instance = MyClass("sk-1234567890qwertyuiop") + assert dictify(instance) == { + "__class__": { + "module": "test_serialize", + "qualname": "test_dictify_sanitizes..MyClass", + "name": "MyClass", + }, + "api_key": "REDACTED", + } + + +def test_dictify_sanitizes_nested() -> None: + class MyClassA: + api_key: str + + def __init__(self, secret: str) -> None: + self.api_key = secret + + class MyClassB: + a: MyClassA + + def __init__(self, a: MyClassA) -> None: + self.a = a + + instance = MyClassB(MyClassA("sk-1234567890qwertyuiop")) + assert dictify(instance) == { + "__class__": { + "module": "test_serialize", + "qualname": "test_dictify_sanitizes_nested..MyClassB", + "name": "MyClassB", + }, + "a": { + "__class__": { + "module": "test_serialize", + "qualname": "test_dictify_sanitizes_nested..MyClassA", + "name": "MyClassA", + }, + "api_key": "REDACTED", + }, + } diff --git a/tests/trace/test_weave_client.py b/tests/trace/test_weave_client.py index 6f0af63d1038..fe6479df5da7 100644 --- a/tests/trace/test_weave_client.py +++ b/tests/trace/test_weave_client.py @@ -666,7 +666,6 @@ def custom_obj_load(artifact, name): assert obj2.b == "x" -@pytest.mark.skip(reason="Re-enable after dictify is fixed") def test_save_unknown_type(client): class SomeUnknownThing: def __init__(self, a): @@ -675,14 +674,7 @@ def __init__(self, a): obj = SomeUnknownThing(3) ref = client._save_object(obj, "my-np-array") obj2 = client.get(ref) - assert obj2 == { - "__class__": { - "module": "test_weave_client", - "qualname": "test_save_unknown_type..SomeUnknownThing", - "name": "SomeUnknownThing", - }, - "a": 3, - } + assert obj2 == repr(obj) def test_save_model(client): diff --git a/tests/trace_server/test_calls_query_builder.py b/tests/trace_server/test_calls_query_builder.py index d9a92c6201e4..23716a13a68c 100644 --- a/tests/trace_server/test_calls_query_builder.py +++ b/tests/trace_server/test_calls_query_builder.py @@ -6,27 +6,12 @@ from weave.trace_server.orm import ParamBuilder -def assert_sql(cq: CallsQuery, exp_queries, exp_params): - pb = ParamBuilder("pb") - queries = cq.as_sql(pb) - params = pb.get_params() - - for qr, qe in zip(queries, exp_queries): - exp_formatted = sqlparse.format(qe, reindent=True) - found_formatted = sqlparse.format(qr, reindent=True) - - assert exp_formatted == found_formatted - - assert exp_params == params - - def test_query_baseline() -> None: cq = CallsQuery(project_id="project") cq.add_field("id") assert_sql( cq, - [ - """ + """ SELECT calls_merged.id AS id FROM calls_merged WHERE project_id = {pb_0:String} @@ -42,8 +27,7 @@ def test_query_baseline() -> None: )) )) ) - """ - ], + """, {"pb_0": "project"}, ) @@ -54,8 +38,7 @@ def test_query_light_column() -> None: cq.add_field("started_at") assert_sql( cq, - [ - """ + """ SELECT calls_merged.id AS id, any(calls_merged.started_at) AS started_at @@ -73,8 +56,7 @@ def test_query_light_column() -> None: )) )) ) - """ - ], + """, {"pb_0": "project"}, ) @@ -85,8 +67,7 @@ def test_query_heavy_column() -> None: cq.add_field("inputs") assert_sql( cq, - [ - """ + """ SELECT calls_merged.id AS id, any(calls_merged.inputs_dump) AS inputs_dump @@ -104,8 +85,7 @@ def test_query_heavy_column() -> None: )) )) ) - """ - ], + """, {"pb_0": "project"}, ) @@ -123,8 +103,7 @@ def test_query_heavy_column_simple_filter() -> None: ) assert_sql( cq, - [ - """ + """ WITH filtered_calls AS ( SELECT calls_merged.id AS id @@ -146,8 +125,7 @@ def test_query_heavy_column_simple_filter() -> None: AND (id IN filtered_calls) GROUP BY (project_id,id) - """ - ], + """, {"pb_0": ["a", "b"], "pb_1": "project", "pb_2": "project"}, ) @@ -166,8 +144,7 @@ def test_query_heavy_column_simple_filter_with_order() -> None: ) assert_sql( cq, - [ - """ + """ WITH filtered_calls AS ( SELECT calls_merged.id AS id @@ -190,8 +167,7 @@ def test_query_heavy_column_simple_filter_with_order() -> None: (id IN filtered_calls) GROUP BY (project_id,id) ORDER BY any(calls_merged.started_at) DESC - """ - ], + """, {"pb_0": ["a", "b"], "pb_1": "project", "pb_2": "project"}, ) @@ -211,8 +187,7 @@ def test_query_heavy_column_simple_filter_with_order_and_limit() -> None: ) assert_sql( cq, - [ - """ + """ WITH filtered_calls AS ( SELECT calls_merged.id AS id @@ -239,8 +214,7 @@ def test_query_heavy_column_simple_filter_with_order_and_limit() -> None: (id IN filtered_calls) GROUP BY (project_id,id) ORDER BY any(calls_merged.started_at) DESC - """ - ], + """, {"pb_0": ["a", "b"], "pb_1": "project", "pb_2": "project"}, ) @@ -279,8 +253,7 @@ def test_query_heavy_column_simple_filter_with_order_and_limit_and_mixed_query_c ) assert_sql( cq, - [ - """ + """ WITH filtered_calls AS ( SELECT calls_merged.id AS id @@ -311,8 +284,7 @@ def test_query_heavy_column_simple_filter_with_order_and_limit_and_mixed_query_c ) ORDER BY any(calls_merged.started_at) DESC LIMIT 10 - """ - ], + """, { "pb_0": "my_user_id", "pb_1": ["a", "b"], @@ -324,83 +296,17 @@ def test_query_heavy_column_simple_filter_with_order_and_limit_and_mixed_query_c ) -def test_query_heavy_column_simple_filter_with_order_and_limit_and_mixed_query_conditions_two_step() -> ( - None -): - cq = CallsQuery(project_id="project") - cq.add_field("id") - cq.add_field("inputs") - cq.add_order("started_at", "desc") - cq.set_limit(10) - cq.set_hardcoded_filter( - HardCodedFilter( - filter=tsi.CallsFilter( - op_names=["a", "b"], - ) - ) - ) - cq.add_condition( - tsi_query.AndOperation.model_validate( - { - "$and": [ - { - "$eq": [ - {"$getField": "inputs.param.val"}, - {"$literal": "hello"}, - ] - }, # <-- heavy condition - { - "$eq": [{"$getField": "wb_user_id"}, {"$literal": "my_user_id"}] - }, # <-- light condition - ] - } - ) - ) - cq.set_filtered_output_param("filtered_calls") - assert_sql( - cq, - [ - """ - SELECT - calls_merged.id AS id - FROM calls_merged - WHERE project_id = {pb_2:String} - GROUP BY (project_id,id) - HAVING ( - ((any(calls_merged.wb_user_id) = {pb_0:String})) - AND - ((any(calls_merged.deleted_at) IS NULL)) - AND - ((NOT ((any(calls_merged.started_at) IS NULL)))) - AND - (any(calls_merged.op_name) IN {pb_1:Array(String)}) - )""", - """ - SELECT - calls_merged.id AS id, - any(calls_merged.inputs_dump) AS inputs_dump - FROM calls_merged - WHERE - project_id = {pb_5:String} - AND - (id IN {filtered_calls:Array(String)}) - GROUP BY (project_id,id) - HAVING ( - JSON_VALUE(any(calls_merged.inputs_dump), {pb_3:String}) = {pb_4:String} - ) - ORDER BY any(calls_merged.started_at) DESC - LIMIT 10 - """, - ], - { - "pb_0": "my_user_id", - "pb_1": ["a", "b"], - "pb_2": "project", - "pb_3": '$."param"."val"', - "pb_4": "hello", - "pb_5": "project", - }, - ) +def assert_sql(cq: CallsQuery, exp_query, exp_params): + pb = ParamBuilder("pb") + query = cq.as_sql(pb) + params = pb.get_params() + + assert exp_params == params + + exp_formatted = sqlparse.format(exp_query, reindent=True) + found_formatted = sqlparse.format(query, reindent=True) + + assert exp_formatted == found_formatted def test_query_light_column_with_costs() -> None: @@ -418,8 +324,7 @@ def test_query_light_column_with_costs() -> None: ) assert_sql( cq, - [ - """ + """ WITH filtered_calls AS ( SELECT calls_merged.id AS id @@ -531,8 +436,7 @@ def test_query_light_column_with_costs() -> None: FROM ranked_prices WHERE (rank = {pb_3:UInt64}) GROUP BY id, started_at - """ - ], + """, { "pb_0": ["a", "b"], "pb_1": "UHJvamVjdEludGVybmFsSWQ6Mzk1NDg2Mjc=", diff --git a/weave-js/package.json b/weave-js/package.json index d925f9d0a423..6f0a7ab4aeb0 100644 --- a/weave-js/package.json +++ b/weave-js/package.json @@ -19,8 +19,10 @@ "generate:watch": "graphql-codegen -w", "prettier": "prettier --config .prettierrc --check \"src/**/*.ts\" \"src/**/*.tsx\"", "prettier-fix": "prettier --loglevel warn --config .prettierrc --write \"src/**/*.ts\" \"src/**/*.tsx\"", + "direct-prettier": "prettier", "lint": "yarn eslint & yarn tslint & yarn prettier & wait", - "lint-fix": "yarn eslint-fix & yarn tslint-fix & yarn prettier-fix & wait" + "lint-fix": "yarn eslint-fix & yarn tslint-fix & yarn prettier-fix & wait", + "generate-schemas": "bash scripts/generate-schemas.sh" }, "dependencies": { "@apollo/client": "^3.8.4", @@ -91,13 +93,13 @@ "plotly.js-dist-min": "^2.6.3", "prismjs": "1.29.0", "query-string": "^8.1.0", - "react": "^17.0.2", + "react": "^18.3.1", "react-app-polyfill": "^3.0.0", "react-base-table": "^1.12.0", "react-cytoscapejs": "^1.2.1", "react-datetime": "^3", "react-diff-viewer": "^3.1.1", - "react-dom": "^17.0.2", + "react-dom": "^18.3.1", "react-hook-mousetrap": "^2.0.4", "react-intersection-observer": "^8.31.0", "react-markdown": "^8.0.7", @@ -145,7 +147,8 @@ "wavesurfer.js": "^2.0.0", "web-tree-sitter": "^0.20.5", "yet-another-react-lightbox": "^3.17.5", - "zen-observable": "^0.10.0" + "zen-observable": "^0.10.0", + "zod": "^3.23.8" }, "devDependencies": { "@babel/core": "^7.23.2", @@ -217,11 +220,13 @@ "identity-obj-proxy": "^3.0.0", "jsdom": "^22.1.0", "json-schema-to-typescript": "^11.0.2", + "json-schema-to-zod": "^2.4.1", "less": "^2.7.3", "lodash.defaults": "^4.2.0", "nodemon": "^2.0.22", "prettier": "^2.8.7", "prettier-plugin-tailwindcss": "^0.2.1", + "quicktype": "^23.0.170", "rimraf": "^3.0.2", "rollup-plugin-visualizer": "^5.5.2", "tailwindcss": "^3.3.2", @@ -233,13 +238,13 @@ "typescript": "4.7.4", "uuid": "^9.0.0", "vite": "5.2.9", - "vitest": "^1.6.0" + "vitest": "^1.6.0", + "tsd": "^0.30.4" }, "resolutions": { "@types/react": "^17.0.26", "@types/react-dom": "^17.0.9", "handlebars": "^4.7.7", - "lodash": "^4.17.21", - "react": "^17.0.2" + "lodash": "^4.17.21" } } diff --git a/weave-js/scripts/generate-schemas.sh b/weave-js/scripts/generate-schemas.sh new file mode 100644 index 000000000000..545d81e17afb --- /dev/null +++ b/weave-js/scripts/generate-schemas.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# Exit on error +set -e + +SCHEMA_INPUT_PATH="../weave/trace_server/interface/base_object_classes/generated/generated_base_object_class_schemas.json" +SCHEMA_OUTPUT_PATH="./src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/generatedBaseObjectClasses.zod.ts" + +echo "Generating schemas..." + +# Generate TypeScript-Zod types from schema +yarn quicktype -s schema "$SCHEMA_INPUT_PATH" -o "$SCHEMA_OUTPUT_PATH" --lang typescript-zod + +# Transform the schema to extract the type map +sed -i.bak ' + # Find the GeneratedBaseObjectClassesZodSchema definition and capture its contents + /export const GeneratedBaseObjectClassesZodSchema = z.object({/,/});/ { + # Replace the opening line with typeMap declaration + s/export const GeneratedBaseObjectClassesZodSchema = z.object({/export const baseObjectClassRegistry = ({/ + # Store the pattern + h + # If this is the last line (with closing brace), append the schema definition + /});/ { + p + s/.*// + x + s/.*// + i\ +\ +export const GeneratedBaseObjectClassesZodSchema = z.object(baseObjectClassRegistry) + } + } +' "$SCHEMA_OUTPUT_PATH" + +# Remove backup file +rm "${SCHEMA_OUTPUT_PATH}.bak" + +# Format the generated file +yarn direct-prettier --write "$SCHEMA_OUTPUT_PATH" + +echo "Schema generation completed successfully" \ No newline at end of file diff --git a/weave-js/src/assets/icons/icon-drag-grip.svg b/weave-js/src/assets/icons/icon-drag-grip.svg new file mode 100644 index 000000000000..5a34d1c4b436 --- /dev/null +++ b/weave-js/src/assets/icons/icon-drag-grip.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/weave-js/src/assets/icons/icon-overflow-vertical.svg b/weave-js/src/assets/icons/icon-overflow-vertical.svg new file mode 100644 index 000000000000..07505a23c59d --- /dev/null +++ b/weave-js/src/assets/icons/icon-overflow-vertical.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/weave-js/src/components/FancyPage/useProjectSidebar.ts b/weave-js/src/components/FancyPage/useProjectSidebar.ts index b77290b7d1d6..c9d0b9289976 100644 --- a/weave-js/src/components/FancyPage/useProjectSidebar.ts +++ b/weave-js/src/components/FancyPage/useProjectSidebar.ts @@ -31,6 +31,7 @@ export const useProjectSidebar = ( const isNoSidebarItems = !showModelsSidebarItems && !showWeaveSidebarItems; const isBothSidebarItems = showModelsSidebarItems && showWeaveSidebarItems; const isShowAll = isNoSidebarItems || isBothSidebarItems; + return useMemo(() => { const allItems = isLoading ? [] @@ -137,6 +138,11 @@ export const useProjectSidebar = ( isShown: showWeaveSidebarItems || isShowAll, iconName: IconNames.LayoutTabs, }, + { + type: 'divider' as const, + key: 'dividerWithinWeave-1', + isShown: isWeaveOnly, + }, { type: 'button' as const, name: 'Evals', @@ -144,6 +150,18 @@ export const useProjectSidebar = ( isShown: showWeaveSidebarItems || isShowAll, iconName: IconNames.BaselineAlt, }, + { + type: 'button' as const, + name: 'Leaders', + slug: 'weave/leaderboards', + isShown: isWeaveOnly, + iconName: IconNames.BenchmarkSquare, + }, + { + type: 'divider' as const, + key: 'dividerWithinWeave-2', + isShown: isWeaveOnly, + }, // { // type: 'button' as const, // name: 'Prompts', @@ -167,7 +185,7 @@ export const useProjectSidebar = ( }, { type: 'divider' as const, - key: 'dividerWithinWeave', + key: 'dividerWithinWeave-3', isShown: isWeaveOnly, }, { @@ -193,7 +211,7 @@ export const useProjectSidebar = ( key: 'moreWeave', isShown: isShowAll, // iconName: IconNames.OverflowHorizontal, - menu: ['weave/operations', 'weave/objects'], + menu: ['weave/leaderboards', 'weave/operations', 'weave/objects'], }, ]; @@ -220,10 +238,10 @@ export const useProjectSidebar = ( return onlyShownItems; }, [ isLoading, - isModelsOnly, - isWeaveOnly, - showWeaveSidebarItems, isShowAll, + isWeaveOnly, viewingRestricted, + isModelsOnly, + showWeaveSidebarItems, ]); }; diff --git a/weave-js/src/components/Form/AutoComplete.tsx b/weave-js/src/components/Form/AutoComplete.tsx index 6455790fb3cf..4fba92aec381 100644 --- a/weave-js/src/components/Form/AutoComplete.tsx +++ b/weave-js/src/components/Form/AutoComplete.tsx @@ -63,6 +63,10 @@ const getStyles = (props: AdditionalProps) => { minHeight: `${HEIGHTS[size]} !important`, }, }, + '& .MuiAutocomplete-popupIndicator': { + borderRadius: '4px', + padding: '4px', + }, '&.MuiAutocomplete-hasPopupIcon .MuiOutlinedInput-root, &.MuiAutocomplete-hasClearIcon .MuiOutlinedInput-root': { paddingRight: props.hasInputValue ? '28px' : '0px', // Apply padding only if input exists diff --git a/weave-js/src/components/Form/Select.tsx b/weave-js/src/components/Form/Select.tsx index 30da9ea00fec..2163b6af1805 100644 --- a/weave-js/src/components/Form/Select.tsx +++ b/weave-js/src/components/Form/Select.tsx @@ -16,7 +16,8 @@ import { MOON_800, RED_550, TEAL_300, - TEAL_500, + TEAL_350, + TEAL_400, TEAL_600, } from '@wandb/weave/common/css/globals.styles'; import {Icon} from '@wandb/weave/components/Icon'; @@ -204,10 +205,8 @@ const getStyles = < }, control: (baseStyles, state) => { const colorBorderDefault = MOON_250; - const colorBorderHover = hexToRGB(TEAL_500, 0.4); - const colorBorderOpen = errorState - ? hexToRGB(RED_550, 0.64) - : hexToRGB(TEAL_500, 0.64); + const colorBorderHover = TEAL_350; + const colorBorderOpen = errorState ? hexToRGB(RED_550, 0.64) : TEAL_400; const height = HEIGHTS[size]; const minHeight = MIN_HEIGHTS[size] ?? height; const lineHeight = LINE_HEIGHTS[size]; @@ -226,9 +225,10 @@ const getStyles = < ? `0 0 0 2px ${colorBorderOpen}` : `inset 0 0 0 1px ${colorBorderDefault}`, '&:hover': { - boxShadow: state.menuIsOpen - ? `0 0 0 2px ${colorBorderOpen}` - : `0 0 0 2px ${colorBorderHover}`, + boxShadow: + state.menuIsOpen || state.isFocused + ? `0 0 0 2px ${colorBorderOpen}` + : `0 0 0 2px ${colorBorderHover}`, }, }; }, diff --git a/weave-js/src/components/Icon/Icon.tsx b/weave-js/src/components/Icon/Icon.tsx index f8cec63146bb..e552741b9260 100644 --- a/weave-js/src/components/Icon/Icon.tsx +++ b/weave-js/src/components/Icon/Icon.tsx @@ -60,6 +60,7 @@ import {ReactComponent as ImportDocument} from '../../assets/icons/icon-document import {ReactComponent as ImportDocumentation} from '../../assets/icons/icon-documentation.svg'; import {ReactComponent as ImportDownload} from '../../assets/icons/icon-download.svg'; import {ReactComponent as ImportDraft} from '../../assets/icons/icon-draft.svg'; +import {ReactComponent as ImportDragGrip} from '../../assets/icons/icon-drag-grip.svg'; import {ReactComponent as ImportDragGripHorizontal} from '../../assets/icons/icon-drag-grip-horizontal.svg'; import {ReactComponent as ImportEducationAcademic} from '../../assets/icons/icon-education-academic.svg'; import {ReactComponent as ImportEmailAt} from '../../assets/icons/icon-email-at.svg'; @@ -141,6 +142,7 @@ import {ReactComponent as ImportOpenaiLogo} from '../../assets/icons/icon-openai import {ReactComponent as ImportOrchestrationLaunch} from '../../assets/icons/icon-orchestration-launch.svg'; import {ReactComponent as ImportOrganizationCorporate} from '../../assets/icons/icon-organization-corporate.svg'; import {ReactComponent as ImportOverflowHorizontal} from '../../assets/icons/icon-overflow-horizontal.svg'; +import {ReactComponent as ImportOverflowVertical} from '../../assets/icons/icon-overflow-vertical.svg'; import {ReactComponent as ImportPanTool} from '../../assets/icons/icon-pan-tool.svg'; import {ReactComponent as ImportPanTool1} from '../../assets/icons/icon-pan-tool-1.svg'; import {ReactComponent as ImportPanel} from '../../assets/icons/icon-panel.svg'; @@ -448,6 +450,9 @@ export const IconDownload = (props: SVGIconProps) => ( export const IconDraft = (props: SVGIconProps) => ( ); +export const IconDragGrip = (props: SVGIconProps) => ( + +); export const IconDragGripHorizontal = (props: SVGIconProps) => ( ); @@ -691,6 +696,9 @@ export const IconOrganizationCorporate = (props: SVGIconProps) => ( export const IconOverflowHorizontal = (props: SVGIconProps) => ( ); +export const IconOverflowVertical = (props: SVGIconProps) => ( + +); export const IconPanTool = (props: SVGIconProps) => ( ); @@ -1092,6 +1100,7 @@ const ICON_NAME_TO_ICON: Record = { documentation: IconDocumentation, download: IconDownload, draft: IconDraft, + 'drag-grip': IconDragGrip, 'drag-grip-horizontal': IconDragGripHorizontal, 'education-academic': IconEducationAcademic, 'email-at': IconEmailAt, @@ -1173,6 +1182,7 @@ const ICON_NAME_TO_ICON: Record = { 'orchestration-launch': IconOrchestrationLaunch, 'organization-corporate': IconOrganizationCorporate, 'overflow-horizontal': IconOverflowHorizontal, + 'overflow-vertical': IconOverflowVertical, 'pan-tool': IconPanTool, 'pan-tool-1': IconPanTool1, panel: IconPanel, diff --git a/weave-js/src/components/Icon/index.ts b/weave-js/src/components/Icon/index.ts index fa9e1c104542..f2e4964c77fd 100644 --- a/weave-js/src/components/Icon/index.ts +++ b/weave-js/src/components/Icon/index.ts @@ -60,6 +60,7 @@ export { IconDocumentation, IconDownload, IconDraft, + IconDragGrip, IconDragGripHorizontal, IconEducationAcademic, IconEmailAt, @@ -141,6 +142,7 @@ export { IconOrchestrationLaunch, IconOrganizationCorporate, IconOverflowHorizontal, + IconOverflowVertical, IconPanel, IconPanelAutoGen, IconPanelManual, diff --git a/weave-js/src/components/Icon/types.ts b/weave-js/src/components/Icon/types.ts index e536e3651577..d5a53de5f86a 100644 --- a/weave-js/src/components/Icon/types.ts +++ b/weave-js/src/components/Icon/types.ts @@ -59,6 +59,7 @@ export const IconNames = { Documentation: 'documentation', Download: 'download', Draft: 'draft', + DragGrip: 'drag-grip', DragGripHorizontal: 'drag-grip-horizontal', EducationAcademic: 'education-academic', EmailAt: 'email-at', @@ -140,6 +141,7 @@ export const IconNames = { OrchestrationLaunch: 'orchestration-launch', OrganizationCorporate: 'organization-corporate', OverflowHorizontal: 'overflow-horizontal', + OverflowVertical: 'overflow-vertical', PanTool: 'pan-tool', PanTool1: 'pan-tool-1', Panel: 'panel', diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3.tsx index bee4705042c6..2c4588142450 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3.tsx @@ -82,6 +82,8 @@ import {Empty} from './Browse3/pages/common/Empty'; import {EMPTY_NO_TRACE_SERVER} from './Browse3/pages/common/EmptyContent'; import {SimplePageLayoutContext} from './Browse3/pages/common/SimplePageLayout'; import {CompareEvaluationsPage} from './Browse3/pages/CompareEvaluationsPage/CompareEvaluationsPage'; +import {LeaderboardListingPage} from './Browse3/pages/LeaderboardPage/LeaderboardListingPage'; +import {LeaderboardPage} from './Browse3/pages/LeaderboardPage/LeaderboardPage'; import {ObjectPage} from './Browse3/pages/ObjectPage'; import {ObjectVersionPage} from './Browse3/pages/ObjectVersionPage'; import { @@ -151,6 +153,7 @@ const tabOptions = [ 'op-versions', 'calls', 'evaluations', + 'leaderboards', 'boards', 'tables', ]; @@ -338,90 +341,92 @@ const MainPeekingLayout: FC = () => { - + - - - - -
+ + + + - {peekLocation && ( - - - - + + + + + + + ); +}; + +const Label: React.FC<{children: React.ReactNode}> = ({children}) => { + return ( + + {children} + + ); +}; + +const ColumnEditor: React.FC<{ + column: LeaderboardObjectVal['columns'][0]; + index: number; + evalObjs: EvaluationHelperObj[]; + entity: string; + project: string; + handleColumnChange: (index: number, field: string, value: any) => void; + moveColumn: (fromIndex: number, toIndex: number) => void; + cloneColumn: (index: number) => void; + removeColumn: (index: number) => void; + totalColumns: number; +}> = ({ + column, + index, + evalObjs, + entity, + project, + handleColumnChange, + moveColumn, + cloneColumn, + removeColumn, + totalColumns, +}) => { + const scorers = useScorers(entity, project, column.evaluation_object_ref); + const metrics = useMetrics( + entity, + project, + column.evaluation_object_ref, + column.scorer_name + ); + const selectedEvalObj = evalObjs.find( + obj => obj.ref === column.evaluation_object_ref + ); + const selectedScorer = useMemo( + () => (column.scorer_name ? {val: column.scorer_name} : undefined), + [column.scorer_name] + ); + const selectedMetricPath = useMemo( + () => ({val: column.summary_metric_path}), + [column.summary_metric_path] + ); + const shouldMinimize = column.should_minimize ?? false; + return ( + <> + + value={selectedEvalObj} + placeholder="Evaluation Definition" + onChange={newVal => + handleColumnChange(index, 'evaluation_object_ref', newVal?.ref) + } + options={Object.entries(_.groupBy(evalObjs, 'name')).map( + ([name, objs]) => ({options: objs, label: name}) + )} + getOptionLabel={obj => + `${obj.name}:v${obj.versionIndex} (${obj.digest.slice(0, 6)})` + } + getOptionValue={obj => obj.ref} + /> + + value={selectedScorer} + onChange={newVal => + handleColumnChange(index, 'scorer_name', newVal?.val) + } + options={scorers.map(scorer => ({val: scorer}))} + isDisabled={!column.evaluation_object_ref} + getOptionLabel={scorer => scorer.val} + getOptionValue={scorer => scorer.val} + /> + + value={selectedMetricPath} + onChange={newVal => + handleColumnChange(index, 'summary_metric_path', newVal?.val ?? '') + } + options={metrics.map(metric => ({val: metric}))} + isDisabled={!column.evaluation_object_ref || !column.scorer_name} + getOptionLabel={metric => metric.val} + getOptionValue={metric => metric.val} + /> + , + onClick: () => moveColumn(index, index - 1), + disabled: index === 0, + }, + { + key: 'moveAfter', + text: 'Move After', + icon: , + onClick: () => moveColumn(index, index + 1), + disabled: index === totalColumns - 1, + }, + { + key: 'duplicate', + text: 'Duplicate', + icon: , + onClick: () => cloneColumn(index), + }, + { + key: 'delete', + text: 'Delete', + icon: , + onClick: () => removeColumn(index), + }, + { + key: 'changeSortDirection', + text: shouldMinimize ? 'Sort Descending' : 'Sort Ascending', + icon: shouldMinimize ? ( + + ) : ( + + ), + onClick: () => + handleColumnChange(index, 'should_minimize', !shouldMinimize), + }, + ], + ]} + trigger={ + + + ); +}; + +const LeaderboardTable: React.FC<{ + entity: string; + project: string; +}> = props => { + const history = useHistory(); + const {peekingRouter} = useWeaveflowRouteContext(); + + // TODO: Once `useCollectionObjects` lands from the online + // evals project, switch to that (much more type safe) + const leaderboardQuery = useBaseObjectInstances('Leaderboard', { + project_id: projectIdFromParts({ + entity: props.entity, + project: props.project, + }), + filter: {latest_only: true}, + }); + + const leaderboardObjectVersions = useMemo(() => { + return (leaderboardQuery.result ?? []).map( + convertTraceServerObjectVersionToSchema + ); + }, [leaderboardQuery.result]); + const onClick = useCallback( + (obj: ObjectVersionSchema) => { + const to = peekingRouter.leaderboardsUIUrl( + props.entity, + props.project, + obj.objectId + ); + history.push(to); + }, + [history, peekingRouter, props.entity, props.project] + ); + + if (leaderboardQuery.loading) { + return ; + } + + const isEmpty = leaderboardObjectVersions.length === 0; + if (isEmpty) { + return ; + } + + return ( + + ); +}; + +const generateLeaderboardId = () => { + const timestamp = new Date().getTime(); + const timestampHex = timestamp.toString(36); + return `leaderboard-${timestampHex}`; +}; + +const useCreateLeaderboard = (entity: string, project: string) => { + const createLeaderboardInstance = useCreateBaseObjectInstance('Leaderboard'); + + const createLeaderboard = async () => { + const objectId = sanitizeObjectId(generateLeaderboardId()); + await createLeaderboardInstance({ + obj: { + project_id: projectIdFromParts({entity, project}), + object_id: objectId, + val: { + name: objectId, + description: '', + columns: [], + }, + }, + }); + return objectId; + }; + + return createLeaderboard; +}; + +const useNavigateToLeaderboard = (entity: string, project: string) => { + const history = useHistory(); + const {baseRouter} = useWeaveflowRouteContext(); + const navigateToLeaderboard = useCallback( + (objectId: string) => { + const to = baseRouter.leaderboardsUIUrl(entity, project, objectId, true); + history.push(to); + }, + [history, baseRouter, entity, project] + ); + return navigateToLeaderboard; +}; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/LeaderboardPage/LeaderboardPage.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/LeaderboardPage/LeaderboardPage.tsx new file mode 100644 index 000000000000..6fac8eaa599f --- /dev/null +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/LeaderboardPage/LeaderboardPage.tsx @@ -0,0 +1,385 @@ +import {Box} from '@mui/material'; +import {MOON_250} from '@wandb/weave/common/css/color.styles'; +import {useViewerInfo} from '@wandb/weave/common/hooks/useViewerInfo'; +import {Button} from '@wandb/weave/components/Button'; +import {Loading} from '@wandb/weave/components/Loading'; +import _ from 'lodash'; +import React, { + FC, + useCallback, + useContext, + useEffect, + useMemo, + useState, +} from 'react'; +import ReactMarkdown from 'react-markdown'; +import styled from 'styled-components'; + +import {WeaveflowPeekContext} from '../../context'; +import {NotFoundPanel} from '../../NotFoundPanel'; +import { + LeaderboardColumnOrderType, + LeaderboardGrid, +} from '../../views/Leaderboard/LeaderboardGrid'; +import {useSavedLeaderboardData} from '../../views/Leaderboard/query/hookAdapters'; +import {LeaderboardObjectVal} from '../../views/Leaderboard/types/leaderboardConfigType'; +import {SimplePageLayout} from '../common/SimplePageLayout'; +import { + useBaseObjectInstances, + useCreateBaseObjectInstance, +} from '../wfReactInterface/baseObjectClassQuery'; +import {projectIdFromParts} from '../wfReactInterface/tsDataModelHooks'; +import {LeaderboardConfigEditor} from './LeaderboardConfigEditor'; + +type LeaderboardPageProps = { + entity: string; + project: string; + leaderboardName: string; + openEditorOnMount?: boolean; +}; + +export const LeaderboardPage: React.FC = props => { + const [name, setName] = useState(props.leaderboardName); + const {isPeeking} = useContext(WeaveflowPeekContext); + const {isEditor} = useIsEditor(props.entity); + const [isEditing, setIsEditing] = useState(false); + useEffect(() => { + if (isEditor && props.openEditorOnMount) { + setIsEditing(true); + } + }, [isEditor, props.openEditorOnMount]); + return ( + + ), + }, + ]} + headerExtra={ + !isPeeking && + !isEditing && + isEditor && ( + + ) + } + /> + ); +}; + +export const LeaderboardPageContent: React.FC< + LeaderboardPageProps & { + setName: (name: string) => void; + isEditing: boolean; + setIsEditing: (isEditing: boolean) => void; + } +> = props => { + const {entity, project} = props; + const leaderboardInstances = useBaseObjectInstances('Leaderboard', { + project_id: projectIdFromParts({entity, project}), + filter: {object_ids: [props.leaderboardName], latest_only: true}, + }); + + if (leaderboardInstances.loading) { + return ; + } + + if ( + leaderboardInstances.result == null || + leaderboardInstances.result.length !== 1 + ) { + return ( + + ); + } + + const leaderboardVal = leaderboardInstances.result[0].val; + + if (leaderboardVal == null) { + return ( + + ); + } + + return ( + + ); +}; + +const useUpdateLeaderboard = ( + entity: string, + project: string, + objectId: string +) => { + const createLeaderboard = useCreateBaseObjectInstance('Leaderboard'); + + const updateLeaderboard = async (leaderboardVal: LeaderboardObjectVal) => { + return await createLeaderboard({ + obj: { + project_id: projectIdFromParts({entity, project}), + object_id: objectId, + val: leaderboardVal, + }, + }); + }; + + return updateLeaderboard; +}; + +export const LeaderboardPageContentInner: React.FC< + LeaderboardPageProps & { + setName: (name: string) => void; + isEditing: boolean; + setIsEditing: (isEditing: boolean) => void; + } & { + leaderboardVal: LeaderboardObjectVal; + } +> = props => { + const updateLeaderboard = useUpdateLeaderboard( + props.entity, + props.project, + props.leaderboardName + ); + const [leaderboardVal, setLeaderboardVal] = useState(props.leaderboardVal); + const [workingLeaderboardValCopy, setWorkingLeaderboardValCopy] = + useState(leaderboardVal); + useEffect(() => { + props.setName(workingLeaderboardValCopy.name ?? ''); + }, [props, workingLeaderboardValCopy.name]); + const {loading, data, evalData} = useSavedLeaderboardData( + props.entity, + props.project, + workingLeaderboardValCopy.columns + ); + const [saving, setSaving] = useState(false); + const discardChanges = useCallback(() => { + setWorkingLeaderboardValCopy(leaderboardVal); + props.setIsEditing(false); + }, [leaderboardVal, props]); + const commitChanges = useCallback(() => { + const mounted = true; + setSaving(true); + updateLeaderboard(workingLeaderboardValCopy) + .then(() => { + if (mounted) { + props.setIsEditing(false); + setLeaderboardVal(workingLeaderboardValCopy); + setWorkingLeaderboardValCopy(workingLeaderboardValCopy); + setSaving(false); + } + }) + .catch(e => { + console.error(e); + if (mounted) { + setWorkingLeaderboardValCopy(leaderboardVal); + setSaving(false); + } + }); + }, [leaderboardVal, props, updateLeaderboard, workingLeaderboardValCopy]); + const isDirty = useMemo(() => { + return !_.isEqual(leaderboardVal, workingLeaderboardValCopy); + }, [leaderboardVal, workingLeaderboardValCopy]); + const columnOrder = useMemo(() => { + return workingLeaderboardValCopy.columns + .map(col => { + const datasetGroup = evalData[col.evaluation_object_ref]?.datasetGroup; + const scorerGroup = + evalData[col.evaluation_object_ref]?.scorers[col.scorer_name]; + const metricGroup = col.summary_metric_path; + + if (datasetGroup && scorerGroup && metricGroup) { + return { + datasetGroup, + scorerGroup, + metricGroup, + minimize: col.should_minimize ?? false, + }; + } + return null; + }) + .filter(c => c != null) as LeaderboardColumnOrderType; + }, [workingLeaderboardValCopy, evalData]); + + return ( + + + {workingLeaderboardValCopy.description && ( + + + {workingLeaderboardValCopy.description} + + + )} + + + + + {props.isEditing && ( + + + + )} + + ); +}; + +export const ToggleLeaderboardConfig: React.FC<{ + isOpen: boolean; + onClick: () => void; +}> = ({isOpen, onClick}) => { + return ( + + + + ); +}; + +export const useIsEditor = (entity: string) => { + const {loading: loadingUserInfo, userInfo} = useViewerInfo(); + return useMemo(() => { + if (loadingUserInfo) { + return { + loading: true, + isEditor: false, + }; + } + const viewer = userInfo ? userInfo.id : null; + + return { + loading: false, + isEditor: viewer && userInfo?.teams.includes(entity), + }; + }, [entity, loadingUserInfo, userInfo]); +}; + +const StyledReactMarkdown = styled(ReactMarkdown)` + > *:first-child { + margin-top: 0; + } + h1 { + font-weight: 600; + font-size: 1.2rem; + } + h2 { + font-weight: 600; + font-size: 1.15rem; + } + h3 { + font-weight: 600; + font-size: 1.1rem; + } + h4 { + font-weight: 600; + font-size: 1.05rem; + } + h5 { + font-weight: 600; + font-size: 1rem; + } + h6 { + font-weight: 600; + font-size: 1rem; + } +`; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/LeaderboardPage/SimpleCodeLikeTextArea.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/LeaderboardPage/SimpleCodeLikeTextArea.tsx new file mode 100644 index 000000000000..826e0717b1a6 --- /dev/null +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/LeaderboardPage/SimpleCodeLikeTextArea.tsx @@ -0,0 +1,221 @@ +/** + * A simple multi-line text editor component that mimics code editor styling. + * Features auto-sizing, manual resize handle, and code-like formatting. + * + * Inspired by: weave-js/src/components/Form/TextField.tsx + */ + +import {Tailwind} from '@wandb/weave/components/Tailwind'; +import classNames from 'classnames'; +import React, {useCallback, useEffect, useRef, useState} from 'react'; + +export const TextAreaSizes = { + Medium: 'medium', + Large: 'large', +} as const; +export type TextAreaSize = (typeof TextAreaSizes)[keyof typeof TextAreaSizes]; + +type TextAreaProps = { + placeholder?: string; + value?: string; + onChange?: (value: string) => void; + onKeyDown?: ( + key: string, + e: React.KeyboardEvent + ) => void; + onBlur?: (value: string) => void; + autoFocus?: boolean; + disabled?: boolean; + ariaLabel?: string; + errorState?: boolean; + maxLength?: number; + maxRows?: number; + dataTest?: string; +}; + +export const SimpleCodeLikeTextArea = ({ + placeholder, + value, + onChange, + onKeyDown, + onBlur, + autoFocus, + disabled, + ariaLabel, + errorState, + maxLength, + maxRows = 8, + dataTest, +}: TextAreaProps) => { + const textareaRef = useRef(null); + const [isManuallyResized, setIsManuallyResized] = useState(false); + const isDraggingRef = useRef(false); + const initialHeightRef = useRef(0); + const initialMouseYRef = useRef(0); + + // Automatically adjust height based on content + const adjustHeight = () => { + const textarea = textareaRef.current; + if (!textarea || isManuallyResized) { + return; + } + + textarea.style.height = 'auto'; + const lineHeight = parseInt( + getComputedStyle(textarea).lineHeight || '20', + 10 + ); + const maxHeight = lineHeight * maxRows; + const newHeight = Math.min(textarea.scrollHeight, maxHeight); + textarea.style.height = `${newHeight}px`; + }; + + useEffect(() => { + adjustHeight(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [value, isManuallyResized]); + + // Handle resize drag start + const handleResizeStart = (e: React.MouseEvent) => { + e.preventDefault(); + if (disabled) { + return; + } + + const textarea = textareaRef.current; + if (!textarea) { + return; + } + + isDraggingRef.current = true; + setIsManuallyResized(true); + initialHeightRef.current = textarea.offsetHeight; + initialMouseYRef.current = e.clientY; + + // Add event listeners for drag and release + document.addEventListener('mousemove', handleResizeMove); + document.addEventListener('mouseup', handleResizeEnd); + }; + + // Handle resize drag movement + const handleResizeMove = useCallback((e: MouseEvent) => { + if (!isDraggingRef.current || !textareaRef.current) { + return; + } + + const deltaY = e.clientY - initialMouseYRef.current; + const newHeight = Math.max(80, initialHeightRef.current + deltaY); // Min height of 80px + textareaRef.current.style.height = `${newHeight}px`; + }, []); + + // Handle resize drag end + const handleResizeEnd = useCallback(() => { + isDraggingRef.current = false; + document.removeEventListener('mousemove', handleResizeMove); + document.removeEventListener('mouseup', handleResizeEnd); + }, [handleResizeMove]); + + // Cleanup event listeners + useEffect(() => { + return () => { + document.removeEventListener('mousemove', handleResizeMove); + document.removeEventListener('mouseup', handleResizeEnd); + }; + }, [handleResizeEnd, handleResizeMove]); + + // Double click handler to reset to auto-size + const handleResizeDoubleClick = () => { + setIsManuallyResized(false); + adjustHeight(); + }; + + const handleChange = onChange + ? (e: React.ChangeEvent) => { + onChange(e.target.value); + } + : undefined; + const handleKeyDown = onKeyDown + ? (e: React.KeyboardEvent) => { + onKeyDown(e.key, e); + } + : undefined; + const handleBlur = onBlur + ? (e: React.ChangeEvent) => { + onBlur?.(e.target.value); + } + : undefined; + + return ( + +
+
+