Skip to content

Commit

Permalink
Resolved merge conflicts between feat/added-anthropic-vision-api and …
Browse files Browse the repository at this point in the history
…main
  • Loading branch information
rafaelsideguide committed Apr 24, 2024
2 parents 56c8109 + db15724 commit 942ac3b
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 45 deletions.
14 changes: 14 additions & 0 deletions apps/api/.env.local
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
NUM_WORKERS_PER_QUEUE=8
PORT=
HOST=
SUPABASE_ANON_TOKEN=
SUPABASE_URL=
SUPABASE_SERVICE_TOKEN=
REDIS_URL=
SCRAPING_BEE_API_KEY=
OPENAI_API_KEY=
ANTHROPIC_API_KEY=
BULL_AUTH_KEY=
LOGTAIL_KEY=
PLAYWRIGHT_MICROSERVICE_URL=

1 change: 1 addition & 0 deletions apps/api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"typescript": "^5.4.2"
},
"dependencies": {
"@anthropic-ai/sdk": "^0.20.5",
"@brillout/import": "^0.2.2",
"@bull-board/api": "^5.14.2",
"@bull-board/express": "^5.8.0",
Expand Down
18 changes: 18 additions & 0 deletions apps/api/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion apps/api/src/__tests__/e2e_withAuth/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ const TEST_URL = "http://127.0.0.1:3002";
expect(response.body).toHaveProperty("success");
expect(response.body.success).toBe(true);
expect(response.body).toHaveProperty("data");
}, 20000);
}, 30000); // 30 seconds timeout
});

describe("GET /v0/crawl/status/:jobId", () => {
Expand Down
6 changes: 3 additions & 3 deletions apps/api/src/scraper/WebScraper/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@ import { scrapSingleUrl } from "./single_url";
import { SitemapEntry, fetchSitemapData, getLinksFromSitemap } from "./sitemap";
import { WebCrawler } from "./crawler";
import { getValue, setValue } from "../../services/redis";
import { getImageDescription } from "./utils/gptVision";
import { getImageDescription } from "./utils/imageDescription";
import { fetchAndProcessPdf } from "./utils/pdfProcessor";
import { replaceImgPathsWithAbsolutePaths, replacePathsWithAbsolutePaths } from "./utils/replacePaths";


export class WebScraperDataProvider {
private urls: string[] = [""];
private mode: "single_urls" | "sitemap" | "crawl" = "single_urls";
Expand All @@ -21,6 +20,7 @@ export class WebScraperDataProvider {
private generateImgAltText: boolean = false;
private pageOptions?: PageOptions;
private replaceAllPathsWithAbsolutePaths?: boolean = false;
private generateImgAltTextModel: "gpt-4-turbo" | "claude-3-opus" = "gpt-4-turbo";

authorize(): void {
throw new Error("Method not implemented.");
Expand Down Expand Up @@ -443,7 +443,7 @@ export class WebScraperDataProvider {
imageUrl,
backText,
frontText
);
, this.generateImgAltTextModel);
}

document.content = document.content.replace(
Expand Down
41 changes: 0 additions & 41 deletions apps/api/src/scraper/WebScraper/utils/gptVision.ts

This file was deleted.

88 changes: 88 additions & 0 deletions apps/api/src/scraper/WebScraper/utils/imageDescription.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import Anthropic from '@anthropic-ai/sdk';
import axios from 'axios';

export async function getImageDescription(
imageUrl: string,
backText: string,
frontText: string,
model: string = "gpt-4-turbo"
): Promise<string> {
try {
const prompt = "What's in the image? You need to answer with the content for the alt tag of the image. To help you with the context, the image is in the following text: " +
backText +
" and the following text: " +
frontText +
". Be super concise."

switch (model) {
case 'claude-3-opus': {
if (!process.env.ANTHROPIC_API_KEY) {
throw new Error("No Anthropic API key provided");
}
const imageRequest = await axios.get(imageUrl, { responseType: 'arraybuffer' });
const imageMediaType = 'image/png';
const imageData = Buffer.from(imageRequest.data, 'binary').toString('base64');

const anthropic = new Anthropic();
const response = await anthropic.messages.create({
model: "claude-3-opus-20240229",
max_tokens: 1024,
messages: [
{
role: "user",
content: [
{
type: "image",
source: {
type: "base64",
media_type: imageMediaType,
data: imageData,
},
},
{
type: "text",
text: prompt
}
],
}
]
});

return response.content[0].text;
}
default: {
if (!process.env.OPENAI_API_KEY) {
throw new Error("No OpenAI API key provided");
}

const { OpenAI } = require("openai");
const openai = new OpenAI();

const response = await openai.chat.completions.create({
model: "gpt-4-turbo",
messages: [
{
role: "user",
content: [
{
type: "text",
text: prompt,
},
{
type: "image_url",
image_url: {
url: imageUrl,
},
},
],
},
],
});
return response.choices[0].message.content;
}
}
} catch (error) {
console.error("Error generating image alt text:", error?.message);
return "";
}
}

0 comments on commit 942ac3b

Please sign in to comment.