Skip to content

Commit

Permalink
Connect the dots into a complete sequence.
Browse files Browse the repository at this point in the history
Move queue to Firestore collections.
  • Loading branch information
bgoldowsky committed Oct 2, 2024
1 parent 734bcd7 commit 4afb23c
Show file tree
Hide file tree
Showing 16 changed files with 346 additions and 429 deletions.
12 changes: 8 additions & 4 deletions functions-v2/lib/src/ai-categorize-document.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,19 @@ const CategorizationResponse = z.object({
{description: "Any other relevant information."}),
});

export default async function categorizeDocument(file: string) {
export async function categorizeDocument(file: string) {
const imageLoading = fs.readFile(file).then((data) => data.toString("base64"));
const image = await imageLoading;
const url = `data:image/png;base64,${image}`;
return categorizeUrl(url);
}

export async function categorizeUrl(url: string) {
const openai = new OpenAI({apiKey: process.env.OPENAI_API_KEY});
try {
return openai.beta.chat.completions.parse({
// model: "gpt-4o-mini",
model: "gpt-4o-2024-08-06",
model: "gpt-4o-mini",
// model: "gpt-4o-2024-08-06",
messages: [
{
role: "system",
Expand All @@ -50,7 +54,7 @@ export default async function categorizeDocument(file: string) {
{
type: "image_url",
image_url: {
url: `data:image/png;base64,${image}`,
url,
detail: "auto", // auto, low, high
},
},
Expand Down
2 changes: 1 addition & 1 deletion functions-v2/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion functions-v2/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"eslint": "^8.22.0",
"eslint-config-google": "^0.14.0",
"eslint-plugin-import": "^2.25.4",
"firebase-functions-test": "^3.1.0",
"firebase-functions-test": "^3.3.0",
"firebase-tools": "^13.15.1",
"jest": "^29.7.0",
"ts-jest": "^29.2.4",
Expand Down
3 changes: 1 addition & 2 deletions functions-v2/src/categorize-docs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
// Usage:
// ts-node categorize-docs.ts source-directory > output-file.csv


import fs from "fs";
import process from "node:process";
import categorizeDocument from "../lib/src/ai-categorize-document";
import {categorizeDocument} from "../lib/src/ai-categorize-document";

// Read directory name from the command-line argument
const sourceDirectory = process.argv[2];
Expand Down
2 changes: 2 additions & 0 deletions functions-v2/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@ import * as admin from "firebase-admin";
export {onUserDocWritten} from "./on-user-doc-written";
export {atMidnight} from "./at-midnight";
export {onAnalyzableDocWritten} from "./on-analyzable-doc-written";
export {onAnalysisDocumentPending} from "./on-analysis-document-pending";
export {onAnalysisDocumentImaged} from "./on-analysis-document-imaged";

admin.initializeApp();
89 changes: 89 additions & 0 deletions functions-v2/src/on-analysis-document-imaged.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import {FirestoreEvent, onDocumentCreated, QueryDocumentSnapshot} from "firebase-functions/v2/firestore";
import * as logger from "firebase-functions/logger";
import * as admin from "firebase-admin";
import {getAnalysisQueueFirestorePath} from "./utils";
import {categorizeUrl} from "../lib/src/ai-categorize-document";

// This is one of three functions for AI analysis of documents:
// 1. Watch for changes to the lastUpdatedAt metadata field and write a queue of docs to process
// 2. Create screenshots of those documents
// 3. (This function) Send those screenshots to the AI service for processing, and create comments with the results

// NOTE: these should match the user specified in src/models/stores/user-types.ts
const commenterName = "Ada Insight";
const commenterUid = "ada_insight_1";

const imagedQueuePath = getAnalysisQueueFirestorePath("imaged", "{docId}");

async function error(error: string, event: FirestoreEvent<QueryDocumentSnapshot | undefined, Record<string, string>>) {
const firestore = admin.firestore();
await firestore.doc(getAnalysisQueueFirestorePath("failedAnalyzing", event.params.docId)).set({
...event.data,
error,
});
await firestore.doc(event.document).delete();
}

export const onAnalysisDocumentImaged =
onDocumentCreated(imagedQueuePath, async (event) => {
const {docId} = event.params;
const firestore = admin.firestore();

const docImageUrl = event.data?.get("docImageUrl");

const completion = await categorizeUrl(docImageUrl);
const reply = completion?.choices[0].message;

if (reply?.refusal) {
logger.info("AI refused to comment on", event.document, reply.refusal);
await error(`AI refusal: ${reply.refusal}`, event);
return;
}
if (!reply?.parsed) {
await error("No response from AI", event);
return;
}
const tags = [reply.parsed.category];
const message = reply.parsed.discussion +
` Key Indicators: ${reply.parsed.keyIndicators.join(", ")}`;

const commentsPath = `demo/AI/documents/${docId}/comments`;

// Look for existing comment
const existing = await firestore.collection(commentsPath)
.where("uid", "==", commenterUid).get().then((snapshot) => {
if (snapshot.size > 0) {
return snapshot.docs[0];
} else {
return undefined;
}
});

if (existing) {
logger.info("Updating existing comment for", event.document);
await existing.ref.update({
tags,
content: message,
createdAt: admin.firestore.FieldValue.serverTimestamp(),
});
} else {
logger.info("Creating comment for", event.document);
// NOTE we are leaving the "network" and "tileId" fields empty in the comment doc.
await firestore.collection(commentsPath).add({
tags,
content: message,
createdAt: admin.firestore.FieldValue.serverTimestamp(),
name: commenterName,
uid: commenterUid,
});
}

// Add to "done" queue
await firestore.doc(getAnalysisQueueFirestorePath("done", event.params.docId)).set({
...event.data?.data(),
"completedAt": admin.firestore.FieldValue.serverTimestamp(),
});

// Remove from the "imaged" queue
await firestore.doc(event.document).delete();
});
32 changes: 32 additions & 0 deletions functions-v2/src/on-analysis-document-pending.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import {onDocumentCreated} from "firebase-functions/v2/firestore";
import {getAnalysisQueueFirestorePath} from "./utils";
import * as admin from "firebase-admin";

// This is one of three functions for AI analysis of documents:
// 1. Watch for changes to the lastUpdatedAt metadata field and write into the queue of docs to process
// 2. (This function) Create screenshots of those documents
// 3. Send those screenshots to the AI service for processing, and create document comments with the results

// TODO just a stub for now

const pendingQueuePath = getAnalysisQueueFirestorePath("pending", "{docId}");

export const onAnalysisDocumentPending =
onDocumentCreated(pendingQueuePath, async (event) => {
const {docId} = event.params;
const firestore = admin.firestore();

// TODO: create screenshot of document
const imageUrl = "https://placehold.co/300x20?text=Wheelbarrow+design";

// Write to the "imaged" queue
const nextQueuePath = getAnalysisQueueFirestorePath("imaged", docId);
firestore.doc(nextQueuePath).set({
...event.data?.data(),
docImaged: admin.firestore.FieldValue.serverTimestamp(),
docImageUrl: imageUrl,
});

// Remove from the "pending "queue
await firestore.doc(event.document).delete();
});
80 changes: 0 additions & 80 deletions functions-v2/src/on-analysis-image-ready.ts

This file was deleted.

47 changes: 22 additions & 25 deletions functions-v2/src/on-analyzable-doc-written.ts
Original file line number Diff line number Diff line change
@@ -1,40 +1,37 @@
import {onDocumentWritten} from "firebase-functions/v2/firestore";
import {getDatabase} from "firebase-admin/database";
import {onValueWritten} from "firebase-functions/v2/database";
import * as logger from "firebase-functions/logger";
// import * as admin from "firebase-admin";
import * as admin from "firebase-admin";
import {getAnalysisQueueFirestorePath} from "./utils";

// This is one of three functions for AI analysis of documents:
// 1. (This function) watch for changes to the lastUpdatedAt metadata field and write a queue of docs to process
// 1. (This function) watch for changes to the lastUpdatedAt metadata field and write into the queue of docs to process
// 2. Create screenshots of those documents
// 3. Send those screenshots to the AI service for processing, and create document comments with the results

// For now, restrict processing to a particular root for testing.
// TODO later we will open this up to all documents, and {root} will be a parameter.
const root = "demo/AI/portals/demo";

// Location of the queue of documents to process, relative to the root
const queuePath = "aiProcessingQueue";

export const onAnalyzableDocWritten =
onDocumentWritten(`${root}/classes/{classId}/users/{userId}/documentMetadata/{docId}/lastUpdatedAt`,
onValueWritten(`${root}/classes/{classId}/users/{userId}/documentMetadata/{docId}/lastEditedAt`,
async (event) => {
const timestamp = event.data.after.val();
// onValueWritten will trigger on create, update, or delete. Ignore deletes.
if (!timestamp) {
logger.info("lastEditedAt field was deleted", event.subject);
return;
}
const {classId, userId, docId} = event.params;
const database = getDatabase();
logger.info("Document update noticed", event.document, classId, userId, docId);

const timestamp = await database.ref(event.document).once("value").then((snap) => {
return snap.val();
},
(error) => {
logger.error("Error reading document", error);
});
getDatabase().ref(`${root}/${queuePath}`).update({
[docId]: {
metadataPath: `classes/${classId}/users/${userId}/documentMetadata/${docId}`,
updated: timestamp,
status: "updated",
},
});
});
const metadataPath = `${root}/classes/${classId}/users/${userId}/documentMetadata/${docId}`;

// TODO: check if we are in a unit that supports analysis

const firestore = admin.firestore();
// This should be safe in the event of dupliclate calls; the second will just overwrite the first.
await firestore.doc(getAnalysisQueueFirestorePath("pending", docId)).set({
metadataPath,
docUpdated: timestamp,
});
logger.info("Added document to analysis queue", metadataPath);
}
);
Loading

0 comments on commit 4afb23c

Please sign in to comment.