bettersg · sarge1989 · Nov 19, 2023 · Nov 11, 2023 · Nov 11, 2023 · Nov 12, 2023
diff --git a/functions/package-lock.json b/functions/package-lock.json
diff --git a/functions/src/definitions/common/genAI.ts b/functions/src/definitions/common/genAI.ts
@@ -11,13 +11,15 @@ type redaction = {
 
 const env = process.env.ENVIRONMENT
 
-async function anonymiseMessage(message: string) {
+async function anonymiseMessage(message: string, isComplex: boolean = true) {
   if (env === "SIT") {
     return message
   }
   let returnMessage = message.replace(/\u00a0/g, " ")
   try {
-    const anonymisationHyperparameters = hyperparameters?.anonymisation
+    let anonymisationHyperparameters = isComplex
+      ? hyperparameters?.complexAnonymisation
+      : hyperparameters?.simpleAnonymisation
     if (anonymisationHyperparameters) {
       const model: string = anonymisationHyperparameters.model
       const systemMessage: string = anonymisationHyperparameters?.prompt?.system
@@ -34,7 +36,10 @@ async function anonymiseMessage(message: string) {
           model,
           systemMessage,
           examples,
-          userMessage
+          userMessage,
+          0,
+          isComplex ? false : true, //update when GPT-4 ready
+          11
         )
         if (response) {
           try {

diff --git a/functions/src/definitions/common/openai/hyperparameters.json b/functions/src/definitions/common/openai/hyperparameters.json
@@ -1,5 +1,5 @@
 {
-  "anonymisation": {
+  "complexAnonymisation": {
     "model": "gpt-4",
     "prompt": {
       "system": "Our company offers a service that allows our users to forward in text messages. We then review the messages and help them check whether the messages contain misinformation, scams, or other online harms. Unfortunately, our users occasionally forward in messages containing their own credit card numbers, transaction IDs, names, addresses, and other information that are of no value to our checkers in fact-checking, but could harm them if our checkers had malicious intent.\n\nGiven this context, I will send your messages, and your job is to redact information that\n\n1) Is not useful for checking the message, and\n2) Contain personal information that could harm the user, i.e. the recipient of the message. There is no need to redact the information of the sender or their organisations, as this is typically useful for checking their legitimacy.\n\nDo NOT redact URLs or phone numbers.\n\nYou will return a JSON string containing two fields, like so.\n\n{\n  \"reasoning\": //string, containing your reason why you chose to, or not to, redact each field.\n  \"redacted\": [ //list of objects, one for each redacted field, as shown below\n    {\n      \"text\": //the original text of the redacted field\n      \"replaceWith: //the field to replace the original text with, which should be in angular brackets, e.g. <NAME> or <TRANSACTION_ID>\n    },\n    ...one object for each redacted field\n  ]\n}",
@@ -36,5 +36,22 @@
       ],
       "user": "Text Message:\n```{{message}}```\n\nAssigned Category:\n```{{category}}```\n"
     }
+  },
+  "simpleAnonymisation": {
+    "model": "gpt-3.5-turbo-1106",
+    "prompt": {
+      "system": "You are a conscientious analyst. I will send you messages. Your job is to take a deep breath, then read the messages and identify Personal Identifiable Information such as names and addresses for redaction. Note, some PII may already have been redacted. In that case, your job is to redact the rest.\n\nDo not redact URLs.\n\nYou will return a JSON object, with one key called \"redacted\". This will contain an array containing two fields, like so.\n\n{\n  \"redacted\": [\n    {\n      \"text\": //the original text of the redacted field\n      \"replaceWith: //the field to replace the original text with, which should be in angular brackets, e.g. <NAME> or <ADDRESS>\n    },\n    ...one object for each redacted field\n  ]\n}\n",
+      "examples": [
+        {
+          "user": "Hi <NAME>, I am Tai Khoon Lee, SARO for Clementi Primary School Counting Centre. You are appointed as Presiding Officer for the upcoming PE.",
+          "assistant": "{\"redacted\": [{\"text\": \"Tai Khoon Lee\", \"replaceWith\": \"<NAME>\"}]}"
+        },
+        {
+          "user": "[\"Hi all! Thanks for confirming your attendance for Ila's party tomorrow. You guys can arrive anytime after 1.30pm. I've ordered food and there's wine and beer too.\n\nAddress: 23 Telok Blangah crescent, #03-34, s090023\"]",
+          "assistant": "{\"redacted\": [{\"text\": \"Ila\", \"replaceWith\": \"<NAME>\"}, {\"text\": \"Address: 23 Telok Blangah crescent, #03-34, s090023\", \"replaceWith\": \"<ADDRESS>\"}]}"
+        }
+      ],
+      "user": "{{message}}"
+    }
   }
 }
diff --git a/functions/src/definitions/common/openai/openai.ts b/functions/src/definitions/common/openai/openai.ts
@@ -11,12 +11,22 @@ type examples = {
   assistant: string
 }
 
+interface ChatCompletionParams {
+  messages: ChatMessage[]
+  model: string
+  temperature: number
+  response_format?: string // Making response_format optional
+  seed?: number
+}
+
 async function callChatCompletion(
   model: string,
   systemMessage: string,
   examples: examples[],
   user: string,
-  temperature: number = 0
+  temperature: number = 0,
+  returnJSON: boolean = false,
+  seed: number | null = null
 ): Promise<string | null> {
   const openai = new OpenAI({
     apiKey: process.env.OPENAI_API_KEY,
@@ -34,11 +44,20 @@ async function callChatCompletion(
     })
   })
   messages.push({ role: "user", content: user })
-  const chatCompletion = await openai.chat.completions.create({
-    messages: messages,
-    model: model,
-    temperature: temperature,
-  })
+  const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming =
+    {
+      messages: messages,
+      model: model,
+      temperature: temperature,
+    }
+  if (seed) {
+    params.seed = seed
+  }
+  if (returnJSON) {
+    params.response_format = { type: "json_object" }
+  }
+
+  const chatCompletion = await openai.chat.completions.create(params)
   return chatCompletion.choices[0].message.content
 }
 

diff --git a/functions/src/definitions/common/parameters/userResponses.json b/functions/src/definitions/common/parameters/userResponses.json
@@ -1,7 +1,7 @@
 {
   "UNTRUE": {
     "en": "{{thanks}}{{matched}}{{methodology}}*untrue*.❌{{image_caveat}}\n\nPlease do not spread it further⛔️⛔️\n\nThank you for keeping Singapore informed!",
-    "cn": "{thanks}}{{matched}}{{methodology}}不属实。❌{{image_caveat}}\n\n请不要转发⛔️⛔️\n\n感谢您对新加坡网络安全的支持和贡献！\n"
+    "cn": "{{thanks}}{{matched}}{{methodology}}不属实。❌{{image_caveat}}\n\n请不要转发⛔️⛔️\n\n感谢您对新加坡网络安全的支持和贡献！\n"
   },
   "MISLEADING": {
     "en": "{{thanks}}{{matched}}{{methodology}}*presented in a misleading or unbalanced way*, even though some elements within could be true!⚠️{{image_caveat}}\n\nPlease take it with a pinch of salt and think twice before spreading it further🚧🚧.\n\nThank you for keeping Singapore informed!",
@@ -40,19 +40,19 @@
     "cn": "{{thanks}}\n\n抱歉，我们的查哥查妹对这条短信无法得出结论🤷🏻‍♂️🤷🏻‍♀️。对不起，我们尽力了！😞\n\n如果您能提供更多信息，例如发信人电话号码或截图，将有助于我们更好地评估。\n\n感谢您对新加坡网络安全的支持和贡献！"
   },
   "THANKS_IMMEDIATE": {
-    "en": "Thanks for sending this in!",
+    "en": "Thanks for sending this in! ",
     "cn": "感谢您提交这条短信！"
   },
   "THANKS_DELAYED": {
-    "en": "Thanks for waiting!",
+    "en": "Thanks for waiting! ",
     "cn": "感谢您的耐心等待！"
   },
   "IMAGE_CAVEAT": {
     "en": "This assessment refers to any claims made within the captions. If there are no claims/captions, it refers to the image itself.",
     "cn": "我们的评估基于图片标题中的内容。如果您提交的短信里没有文字标题，我们对图片本身的内容进行了评估。"
   },
   "METHODOLOGY_HUMAN": {
-    "en": "Our CheckMates have reviewed this message and think it's",
+    "en": "Our CheckMates have reviewed this message and think it's ",
     "cn": "经过查哥查妹的评估，我们认为这条短信"
   },
   "METHODOLOGY_AUTO": {

diff --git a/functions/src/definitions/common/responseUtils.ts b/functions/src/definitions/common/responseUtils.ts
@@ -304,7 +304,11 @@ async function sendVotingStats(instancePath: string) {
   } else truthCategory = "NA"
 
   const categories = [
-    { name: "trivial", count: irrelevantCount, isInfo: false },
+    {
+      name: responses.PLACEHOLDER_IRRELEVANT,
+      count: irrelevantCount,
+      isInfo: false,
+    },
     {
       name:
         scamCount >= illicitCount

diff --git a/functions/src/definitions/common/typesense/collectionOperations.ts b/functions/src/definitions/common/typesense/collectionOperations.ts
@@ -85,7 +85,7 @@ async function updateOne(
     await getClient()
       .collections(`${collection}${getEnvSuffix()}`)
       .documents()
-      .update(document)
+      .update(document, {})
   } catch (error) {
     console.error("Error in updateOne: ", error)
     throw error

diff --git a/functions/src/definitions/eventHandlers/onInstanceCreate.ts b/functions/src/definitions/eventHandlers/onInstanceCreate.ts
@@ -70,7 +70,7 @@ const onInstanceCreate = functions
           data?.type === "text" &&
           data?.text != parentMessageSnap.get("originalText")
         ) {
-          const strippedMessage = await anonymiseMessage(data.text)
+          const strippedMessage = await anonymiseMessage(data.text, true)
           messageUpdateObj.originalText = data.text
           messageUpdateObj.text = strippedMessage
         } else if (data?.type === "image") {

diff --git a/functions/src/definitions/eventHandlers/onMessageUpdate.ts b/functions/src/definitions/eventHandlers/onMessageUpdate.ts
@@ -1,7 +1,7 @@
 import * as functions from "firebase-functions"
 import { respondToInstance } from "../common/responseUtils"
 import { Timestamp } from "firebase-admin/firestore"
-import { rationaliseMessage } from "../common/genAI"
+import { rationaliseMessage, anonymiseMessage } from "../common/genAI"
 
 const onMessageUpdate = functions
   .region("asia-southeast1")
@@ -58,6 +58,15 @@ const onMessageUpdate = functions
         rationalisation: rationalisation,
       })
     }
+    if (
+      before.data().primaryCategory !== primaryCategory &&
+      primaryCategory === "legitimate"
+    ) {
+      const anonymisedText = await anonymiseMessage(text, false)
+      await after.ref.update({
+        text: anonymisedText,
+      })
+    }
     return Promise.resolve()
   })
 

diff --git a/functions/src/definitions/eventHandlers/userHandlers.ts b/functions/src/definitions/eventHandlers/userHandlers.ts
@@ -331,7 +331,12 @@ async function newTextInstanceHandler({
       machineCategory !== "info"
     )
 
-    const strippedMessage = await anonymiseMessage(text)
+    let strippedMessage = await anonymiseMessage(text, true)
+
+    if (strippedMessage && machineCategory === "legitimate") {
+      strippedMessage = await anonymiseMessage(strippedMessage, false) //won't run for now till machineCategory returns legitimate
+    }
+
     let rationalisation: null | string = null
     if (
       isMachineAssessed &&