Skip to content

Commit

Permalink
feat(media): add multimodal support (#451)
Browse files Browse the repository at this point in the history
  • Loading branch information
hassiebp authored Nov 18, 2024
1 parent 7961532 commit c64b3a7
Show file tree
Hide file tree
Showing 29 changed files with 2,992 additions and 782 deletions.
27 changes: 23 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ jobs:
cd ./langfuse-server
echo "::group::Run langfuse server"
TELEMETRY_ENABLED=false docker compose up -d db
TELEMETRY_ENABLED=false docker compose -f docker-compose.v3preview.yml up -d postgres
echo "::endgroup::"
echo "::group::Logs from langfuse server"
TELEMETRY_ENABLED=false docker compose logs
TELEMETRY_ENABLED=false docker compose -f docker-compose.v3preview.yml logs
echo "::endgroup::"
echo "::group::Install dependencies (necessary to run seeder)"
Expand All @@ -86,12 +86,31 @@ jobs:
pnpm run db:migrate
pnpm run db:seed
rm -rf node_modules
echo "::endgroup::"
echo "::group::Run server"
TELEMETRY_ENABLED=false docker compose up -d langfuse-server
TELEMETRY_ENABLED=false CLICKHOUSE_MIGRATION_URL=clickhouse://clickhouse:9000 LANGFUSE_ASYNC_INGESTION_PROCESSING=false LANGFUSE_ASYNC_CLICKHOUSE_INGESTION_PROCESSING=false docker compose -f docker-compose.v3preview.yml up -d
echo "::endgroup::"
# Add this step to check the health of the container
- name: Health check for langfuse server
run: |
echo "Checking if the langfuse server is up..."
retry_count=0
max_retries=10
until curl --output /dev/null --silent --head --fail http://localhost:3000/api/public/health
do
retry_count=`expr $retry_count + 1`
echo "Attempt $retry_count of $max_retries..."
if [ $retry_count -ge $max_retries ]; then
echo "Langfuse server did not respond in time. Printing logs..."
docker logs langfuse-server-langfuse-web-1
echo "Failing the step..."
exit 1
fi
sleep 5
done
echo "Langfuse server is up and running!"
- run: yarn install
- run: yarn compile
- run: yarn test:integration
Expand Down
12 changes: 10 additions & 2 deletions integration-test/integration-utils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import axios, { type AxiosResponse } from "axios";
import axios, { AxiosResponse } from "axios";
import fs from "fs/promises";

import { type components } from "../langfuse-core/src/openapi/server";
import { components } from "../langfuse-core/src/openapi/server";

export const LANGFUSE_BASEURL = String(process.env.LANGFUSE_BASEURL);
export const LANGFUSE_PUBLIC_KEY = String(process.env.LANGFUSE_PUBLIC_KEY);
Expand Down Expand Up @@ -34,3 +35,10 @@ export const fetchTraceById = async (id: string): Promise<AxiosResponse<any, any
});
return res;
};

export const encodeFile = async (filePath: string): Promise<string> => {
const file = await fs.readFile(filePath);
const encoded = Buffer.from(file).toString("base64");

return encoded;
};
33 changes: 33 additions & 0 deletions integration-test/langfuse-integration-node.spec.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
// uses the compiled node.js version, run yarn build after making changes to the SDKs
import Langfuse from "../langfuse-node";
import fs from "fs";

// import { wait } from '../langfuse-core/test/test-utils/test-utils'
import axios from "axios";
import { LANGFUSE_BASEURL, getHeaders } from "./integration-utils";
import { utils } from "../langfuse-core/src";
import { LangfuseMedia } from "../langfuse-core/src/media/LangfuseMedia";

describe("Langfuse Node.js", () => {
let langfuse: Langfuse;
Expand Down Expand Up @@ -627,4 +629,35 @@ describe("Langfuse Node.js", () => {
const sessions = await langfuse.fetchSessions();
expect(sessions.data).toContainEqual(expect.objectContaining({ id: sessionId }));
});

it("traces multimodal metadata", async () => {
const trace = langfuse.trace({
name: "test-trace-10",
metadata: {
context: {
nested: new LangfuseMedia({
contentBytes: fs.readFileSync("./static/bitcoin.pdf"),
contentType: "application/pdf",
}),
},
},
});
trace.update({
version: "1.0.0",
});
await langfuse.flushAsync();

const res = await axios.get(`${LANGFUSE_BASEURL}/api/public/traces/${trace.id}`, { headers: getHeaders() });

expect(res.data).toMatchObject({
id: trace.id,
name: "test-trace-10",
version: "1.0.0",
metadata: {
context: {
nested: expect.stringMatching(/^@@@langfuseMedia:type=application\/pdf\|id=.+\|source=bytes@@@$/),
},
},
});
}, 10_000);
});
136 changes: 134 additions & 2 deletions integration-test/langfuse-openai.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import OpenAI from "openai";
import Langfuse, { observeOpenAI } from "../langfuse";
import { randomUUID } from "crypto";
import axios, { type AxiosResponse } from "axios";
import { LANGFUSE_BASEURL, getHeaders, fetchTraceById } from "./integration-utils";
import { LANGFUSE_BASEURL, getHeaders, fetchTraceById, encodeFile } from "./integration-utils";
import { zodResponseFormat } from "openai/helpers/zod";
import { z } from "zod";

Expand Down Expand Up @@ -981,7 +981,7 @@ describe("Langfuse-OpenAI-Integation", () => {
expect(messages).toBeDefined();

await client.flushAsync();
}, 10000);
}, 20_000);

it("should work with structured output parsing with response_format", async () => {
const traceId = randomUUID();
Expand Down Expand Up @@ -1105,4 +1105,136 @@ describe("Langfuse-OpenAI-Integation", () => {
});
expect(generation.model).toBe("gpt-4o-2024-08-06");
}, 10000);

it("should work with vision input", async () => {
const traceId = randomUUID();
const client = observeOpenAI(openai, { traceId, metadata: { someKey: "someValue" } });

const completion = await client.chat.completions.create({
model: "gpt-4o-2024-08-06",
messages: [
{ role: "system", content: "You are a helpful math tutor. Guide the user through the solution step by step." },
{
role: "user",
content: [
{
type: "text",
text: "What’s in this image?",
},
{
type: "image_url",
image_url: {
url: `data:image/jpeg;base64,${await encodeFile("./static/puton.jpg")}`,
},
},
],
},
],
});

await client.flushAsync();

const trace = await fetchTraceById(traceId);
expect(trace.status).toBe(200);

const generation = trace.data.observations[0];
expect(generation.model).toBe("gpt-4o-2024-08-06");
expect(generation.input).toMatchObject({
messages: [
{
role: "system",
content: "You are a helpful math tutor. Guide the user through the solution step by step.",
},
{
role: "user",
content: [
{
text: "What’s in this image?",
type: "text",
},
{
type: "image_url",
image_url: {
url: expect.stringMatching(/^@@@langfuseMedia:type=image\/jpeg\|id=.+\|source=base64_data_uri@@@$/),
},
},
],
},
],
});
}, 10_000);

it("should work with audio input and output", async () => {
const traceId = randomUUID();
const client = observeOpenAI(openai, { traceId, metadata: { someKey: "someValue" } });

const completion = await client.chat.completions.create({
model: "gpt-4o-audio-preview",
modalities: ["text", "audio"],
audio: { voice: "alloy", format: "wav" },
messages: [
{ role: "system", content: "You are a hilarious comedian. Make the user laugh." },
{
role: "user",
content: [
{
type: "text",
text: "Do what this recording says.",
},
{
type: "input_audio",
input_audio: {
data: await encodeFile("./static/joke_prompt.wav"),
format: "wav",
},
},
],
},
],
});

await client.flushAsync();

const trace = await fetchTraceById(traceId);
expect(trace.status).toBe(200);

const generation = trace.data.observations[0];
expect(generation.model).toBe("gpt-4o-audio-preview");
expect(generation.input).toMatchObject({
messages: [
{
role: "system",
content: "You are a hilarious comedian. Make the user laugh.",
},
{
role: "user",
content: [
{
text: "Do what this recording says.",
type: "text",
},
{
type: "input_audio",
input_audio: {
data: expect.stringMatching(/^@@@langfuseMedia:type=audio\/wav\|id=.+\|source=base64_data_uri@@@$/),
format: "wav",
},
},
],
},
],
});

expect(generation.output).toMatchObject({
role: "assistant",
audio: {
id: expect.any(String),
data: expect.stringMatching(/^@@@langfuseMedia:type=audio\/wav\|id=.+\|source=base64_data_uri@@@$/),
expires_at: expect.any(Number),
transcript: expect.any(String),
},
content: null,
refusal: null,
});
}, 20_000);
});
3 changes: 2 additions & 1 deletion integration-test/modules/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
"dependencies": {
"@langchain/community": "^0.2",
"dotenv": "^16.4.5",
"langchain": "^0.2",
"langchain": "^0.3.6",
"openai": "^4.72.0",
"langfuse-langchain": "*"
},
"devDependencies": {
Expand Down
Loading

0 comments on commit c64b3a7

Please sign in to comment.