From 6bfb7db627f8353ece96cac75c86516e305d7efc Mon Sep 17 00:00:00 2001 From: Bing Wen Tan Date: Sat, 23 Sep 2023 18:41:10 +0800 Subject: [PATCH 1/2] openai + anonymisation --- documentation/dataSchema.md | 6 +- functions/.env.local.test | 2 +- functions/package-lock.json | 238 ++++++++++++++++++ functions/package.json | 1 + functions/src/definitions/checkerHandlers.ts | 2 +- .../src/definitions/common/anonymisation.ts | 70 ++++++ .../common/openai/hyperparameters.json | 23 ++ .../src/definitions/common/openai/openai.ts | 45 ++++ functions/src/definitions/onInstanceCreate.ts | 2 +- functions/src/definitions/onInstanceUpdate.ts | 4 +- functions/src/definitions/userHandlers.ts | 23 +- functions/tsconfig.json | 3 +- 12 files changed, 406 insertions(+), 13 deletions(-) create mode 100644 functions/src/definitions/common/anonymisation.ts create mode 100644 functions/src/definitions/common/openai/hyperparameters.json create mode 100644 functions/src/definitions/common/openai/openai.ts diff --git a/documentation/dataSchema.md b/documentation/dataSchema.md index 62390e17..e0f88fd9 100644 --- a/documentation/dataSchema.md +++ b/documentation/dataSchema.md @@ -4,7 +4,8 @@ erDiagram string id PK "Assigned by Firestore" string machineCategory boolean isMachineCategorised - string text "Text. For text, shows the latest iteration" + string originalText "text as sent by user" + string text "pii-stripped text. For text, shows the latest iteration" string caption "Latest caption for image, if applicable" timestamp firstTimestamp "Timestamp of first instance" timestamp lastTimestamp "Timestamp of latest instance" @@ -34,7 +35,8 @@ erDiagram string id "whatsapp message id (needed to reply)" timestamp timestamp string type "text/image" - string text "text (if text message) or ocr-extracted text (if image message)" + string originalText "text as sent by user" + string text "pii-stripped text (if text message) or ocr-extracted text (if image message)" string textHash "md5 hash of text or ocr-extracted text" string caption "caption of image, if applicable" string captionHash "md5 hash of caption" diff --git a/functions/.env.local.test b/functions/.env.local.test index c78dbc2f..10bcc26a 100644 --- a/functions/.env.local.test +++ b/functions/.env.local.test @@ -2,7 +2,7 @@ NUM_SHARDS_INSTANCE_COUNT=5 NUM_SHARDS_VOTE_COUNT=5 GRAPH_API_VERSION=v15.0 TEST_SERVER_URL=http://resultserver:12345 -ENVIRONMENT=DEV +ENVIRONMENT=SIT SPREADSHEET_ID=SPREADSHEET_ID WHATSAPP_TEST_USER_BOT_PHONE_NUMBER_ID=WHATSAPP_TEST_USER_BOT_PHONE_NUMBER_ID WHATSAPP_TEST_CHECKER_BOT_PHONE_NUMBER_ID=WHATSAPP_TEST_CHECKER_BOT_PHONE_NUMBER_ID diff --git a/functions/package-lock.json b/functions/package-lock.json index 4f8f719b..6e7eeb12 100644 --- a/functions/package-lock.json +++ b/functions/package-lock.json @@ -18,6 +18,7 @@ "hashids": "^2.3.0", "image-hash": "^5.3.1", "libphonenumber-js": "^1.10.21", + "openai": "^4.10.0", "typesense": "^1.5.3" }, "devDependencies": { @@ -4246,6 +4247,15 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-18.11.18.tgz", "integrity": "sha512-DHQpWGjyQKSHj3ebjFI/wRKcqQcdR+MoFBygntYOZytCqNfkd2ZC4ARDJ2DQqhjH5p85Nnd3jhUJIXrszFX/JA==" }, + "node_modules/@types/node-fetch": { + "version": "2.6.5", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.5.tgz", + "integrity": "sha512-OZsUlr2nxvkqUFLSaY2ZbA+P1q22q+KrlxWOn/38RX+u5kTkYL2mTujEpzUhGkS+K/QCYp9oagfXG39XOzyySg==", + "dependencies": { + "@types/node": "*", + "form-data": "^4.0.0" + } + }, "node_modules/@types/prettier": { "version": "2.7.2", "resolved": "https://registry.npmjs.org/@types/prettier/-/prettier-2.7.2.tgz", @@ -4606,6 +4616,17 @@ "node": ">= 6.0.0" } }, + "node_modules/agentkeepalive": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz", + "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==", + "dependencies": { + "humanize-ms": "^1.2.1" + }, + "engines": { + "node": ">= 8.0.0" + } + }, "node_modules/ajv": { "version": "6.12.6", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", @@ -4915,6 +4936,11 @@ "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", "devOptional": true }, + "node_modules/base-64": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz", + "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==" + }, "node_modules/base64-js": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", @@ -5162,6 +5188,14 @@ "node": ">=10" } }, + "node_modules/charenc": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz", + "integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==", + "engines": { + "node": "*" + } + }, "node_modules/ci-info": { "version": "3.8.0", "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.8.0.tgz", @@ -5348,6 +5382,14 @@ "node": ">= 8" } }, + "node_modules/crypt": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz", + "integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==", + "engines": { + "node": "*" + } + }, "node_modules/dashdash": { "version": "1.14.1", "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", @@ -5449,6 +5491,15 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, + "node_modules/digest-fetch": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz", + "integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==", + "dependencies": { + "base-64": "^0.1.0", + "md5": "^2.3.0" + } + }, "node_modules/dir-glob": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", @@ -6434,6 +6485,23 @@ "node": ">= 6" } }, + "node_modules/form-data-encoder": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", + "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==" + }, + "node_modules/formdata-node": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", + "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", + "dependencies": { + "node-domexception": "1.0.0", + "web-streams-polyfill": "4.0.0-beta.3" + }, + "engines": { + "node": ">= 12.20" + } + }, "node_modules/formidable": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/formidable/-/formidable-2.1.2.tgz", @@ -7043,6 +7111,14 @@ "node": ">=10.17.0" } }, + "node_modules/humanize-ms": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", + "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", + "dependencies": { + "ms": "^2.0.0" + } + }, "node_modules/iconv-lite": { "version": "0.4.24", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", @@ -7172,6 +7248,11 @@ "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", "dev": true }, + "node_modules/is-buffer": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", + "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==" + }, "node_modules/is-core-module": { "version": "2.12.0", "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.12.0.tgz", @@ -8421,6 +8502,16 @@ "node": ">= 12" } }, + "node_modules/md5": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz", + "integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==", + "dependencies": { + "charenc": "0.0.2", + "crypt": "0.0.2", + "is-buffer": "~1.1.6" + } + }, "node_modules/mdurl": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz", @@ -8574,6 +8665,24 @@ "node": ">= 0.6" } }, + "node_modules/node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "github", + "url": "https://paypal.me/jimmywarting" + } + ], + "engines": { + "node": ">=10.5.0" + } + }, "node_modules/node-fetch": { "version": "2.6.7", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", @@ -8700,6 +8809,24 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/openai": { + "version": "4.10.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-4.10.0.tgz", + "integrity": "sha512-II4b5/7qzwYkqA9MSjgqdofCc798EW+dtF2h6qNaVLet+qO7FShAJTWnoyzb50J4ZH1rPxRFAsmDLIhY3PT6DQ==", + "dependencies": { + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "digest-fetch": "^1.3.0", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7" + }, + "bin": { + "openai": "bin/cli" + } + }, "node_modules/optionator": { "version": "0.9.1", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz", @@ -10357,6 +10484,14 @@ "makeerror": "1.0.12" } }, + "node_modules/web-streams-polyfill": { + "version": "4.0.0-beta.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", + "engines": { + "node": ">= 14" + } + }, "node_modules/webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", @@ -13895,6 +14030,15 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-18.11.18.tgz", "integrity": "sha512-DHQpWGjyQKSHj3ebjFI/wRKcqQcdR+MoFBygntYOZytCqNfkd2ZC4ARDJ2DQqhjH5p85Nnd3jhUJIXrszFX/JA==" }, + "@types/node-fetch": { + "version": "2.6.5", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.5.tgz", + "integrity": "sha512-OZsUlr2nxvkqUFLSaY2ZbA+P1q22q+KrlxWOn/38RX+u5kTkYL2mTujEpzUhGkS+K/QCYp9oagfXG39XOzyySg==", + "requires": { + "@types/node": "*", + "form-data": "^4.0.0" + } + }, "@types/prettier": { "version": "2.7.2", "resolved": "https://registry.npmjs.org/@types/prettier/-/prettier-2.7.2.tgz", @@ -14147,6 +14291,14 @@ "debug": "4" } }, + "agentkeepalive": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz", + "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==", + "requires": { + "humanize-ms": "^1.2.1" + } + }, "ajv": { "version": "6.12.6", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", @@ -14387,6 +14539,11 @@ "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", "devOptional": true }, + "base-64": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz", + "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==" + }, "base64-js": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", @@ -14558,6 +14715,11 @@ "integrity": "sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==", "dev": true }, + "charenc": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz", + "integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==" + }, "ci-info": { "version": "3.8.0", "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.8.0.tgz", @@ -14703,6 +14865,11 @@ "which": "^2.0.1" } }, + "crypt": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz", + "integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==" + }, "dashdash": { "version": "1.14.1", "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", @@ -14774,6 +14941,15 @@ "integrity": "sha512-ofrBgwpPhCD85kMKtE9RYFFq6OC1A89oW2vvgWZNCwxrUpRUILopY7lsYyMDSjc8g6U6aiO0Qubg6r4Wgt5ZnA==", "dev": true }, + "digest-fetch": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz", + "integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==", + "requires": { + "base-64": "^0.1.0", + "md5": "^2.3.0" + } + }, "dir-glob": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", @@ -15557,6 +15733,20 @@ "mime-types": "^2.1.12" } }, + "form-data-encoder": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", + "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==" + }, + "formdata-node": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", + "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", + "requires": { + "node-domexception": "1.0.0", + "web-streams-polyfill": "4.0.0-beta.3" + } + }, "formidable": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/formidable/-/formidable-2.1.2.tgz", @@ -16011,6 +16201,14 @@ "integrity": "sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==", "dev": true }, + "humanize-ms": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", + "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", + "requires": { + "ms": "^2.0.0" + } + }, "iconv-lite": { "version": "0.4.24", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", @@ -16099,6 +16297,11 @@ "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", "dev": true }, + "is-buffer": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", + "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==" + }, "is-core-module": { "version": "2.12.0", "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.12.0.tgz", @@ -17087,6 +17290,16 @@ "integrity": "sha512-yr8hSKa3Fv4D3jdZmtMMPghgVt6TWbk86WQaWhDloQjRSQhMMYCAro7jP7VDJrjjdV8pxVxMssXS8B8Y5DZ5aw==", "optional": true }, + "md5": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz", + "integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==", + "requires": { + "charenc": "0.0.2", + "crypt": "0.0.2", + "is-buffer": "~1.1.6" + } + }, "mdurl": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz", @@ -17198,6 +17411,11 @@ "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", "integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==" }, + "node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==" + }, "node-fetch": { "version": "2.6.7", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", @@ -17283,6 +17501,21 @@ "mimic-fn": "^2.1.0" } }, + "openai": { + "version": "4.10.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-4.10.0.tgz", + "integrity": "sha512-II4b5/7qzwYkqA9MSjgqdofCc798EW+dtF2h6qNaVLet+qO7FShAJTWnoyzb50J4ZH1rPxRFAsmDLIhY3PT6DQ==", + "requires": { + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "digest-fetch": "^1.3.0", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7" + } + }, "optionator": { "version": "0.9.1", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz", @@ -18491,6 +18724,11 @@ "makeerror": "1.0.12" } }, + "web-streams-polyfill": { + "version": "4.0.0-beta.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==" + }, "webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", diff --git a/functions/package.json b/functions/package.json index b2e774ae..9af14e02 100644 --- a/functions/package.json +++ b/functions/package.json @@ -30,6 +30,7 @@ "hashids": "^2.3.0", "image-hash": "^5.3.1", "libphonenumber-js": "^1.10.21", + "openai": "^4.10.0", "typesense": "^1.5.3" }, "devDependencies": { diff --git a/functions/src/definitions/checkerHandlers.ts b/functions/src/definitions/checkerHandlers.ts index c02d5412..837c7182 100644 --- a/functions/src/definitions/checkerHandlers.ts +++ b/functions/src/definitions/checkerHandlers.ts @@ -211,7 +211,7 @@ async function onFactCheckerYes( res = await sendTextMessage( "factChecker", from, - latestInstanceSnap.get("text"), + messageSnap.get("text"), null, platform ) diff --git a/functions/src/definitions/common/anonymisation.ts b/functions/src/definitions/common/anonymisation.ts new file mode 100644 index 00000000..52e75ba5 --- /dev/null +++ b/functions/src/definitions/common/anonymisation.ts @@ -0,0 +1,70 @@ +import { callChatCompletion, ChatMessage, examples } from "./openai/openai" +import hyperparameters from "./openai/hyperparameters.json" +import * as functions from "firebase-functions" + +type redaction = { + text: string + replaceWith: string +} + +const env = process.env.ENVIRONMENT + +async function anonymiseMessage(message: string) { + if (env === "SIT") { + return message + } + let returnMessage = message.replace(/\u00a0/g, " ") + try { + const anonymisationHyperparameters = hyperparameters?.anonymisation + if (anonymisationHyperparameters) { + const model: string = anonymisationHyperparameters.model + const systemMessage: string = anonymisationHyperparameters?.prompt?.system + + const examples: examples[] = + anonymisationHyperparameters?.prompt?.examples + const userMessage: string = + anonymisationHyperparameters?.prompt?.user.replace( + "{{message}}", + message + ) + if (model && systemMessage && examples && userMessage) { + const response = await callChatCompletion( + model, + systemMessage, + examples, + userMessage + ) + if (response) { + try { + const responseObj = JSON.parse(response) + const redactions: redaction[] = responseObj.redacted + console.log(redactions) + redactions.forEach((redaction) => { + let regex = new RegExp(redaction.text, "g") + returnMessage = returnMessage.replace( + regex, + redaction.replaceWith + ) + }) + return returnMessage + } catch (e) { + functions.logger.error( + "OpenAI completion could not be parsed as JSON" + e + ) + return message + } + } + } else { + functions.logger.error( + "Anonymisation hyperparameters not configured correctly" + ) + return message + } + } + } catch (e) { + functions.logger.error("Anonymisation hyperparameters failed: " + e) + return message + } +} + +export { anonymiseMessage } diff --git a/functions/src/definitions/common/openai/hyperparameters.json b/functions/src/definitions/common/openai/hyperparameters.json new file mode 100644 index 00000000..37c3fc16 --- /dev/null +++ b/functions/src/definitions/common/openai/hyperparameters.json @@ -0,0 +1,23 @@ +{ + "anonymisation": { + "model": "gpt-4", + "prompt": { + "system": "Our company offers a service that allows our users to forward in text messages. We then review the messages and help them check whether the messages contain misinformation, scams, or other online harms. Unfortunately, our users occasionally forward in messages containing their own credit card numbers, transaction IDs, names, addresses, and other information that are of no value to our checkers in fact-checking, but could harm them if our checkers had malicious intent.\n\nGiven this context, I will send your messages, and your job is to redact information that\n\n1) Is not useful for checking the message, and\n2) Contain personal information that could harm the user, i.e. the recipient of the message. There is no need to redact the information of the sender or their organisations, as this is typically useful for checking their legitimacy.\n\nDo NOT redact URLs or phone numbers.\n\nYou will return a JSON string containing two fields, like so.\n\n{\n \"reasoning\": //string, containing your reason why you chose to, or not to, redact each field.\n \"redacted\": [ //list of objects, one for each redacted field, as shown below\n {\n \"text\": //the original text of the redacted field\n \"replaceWith: //the field to replace the original text with, which should be in angular brackets, e.g. or \n },\n ...one object for each redacted field\n ]\n}", + "examples": [ + { + "user": "Hi, reminder to collect your parcel SPXSG030423 at 1745 Ang Mo Kio Ave 10, S123456 Collect by: 2023-08-06 PIN: 191748 Name: Manny", + "assistant": "{\"reasoning\": \"The message appears to be a transaction notification, addressed to the user. The parcel number, address, PIN and name are likely that of the user, and are not meaningful in determining whether the message is from a legitimate source or not.\", \"redacted\": [{\"text\": \"SPXSG030423\", \"replaceWith\": \"\"}, {\"text\": \"1745 Ang Mo Kio Ave 10, S123456\", \"replaceWith\": \"
\"}, {\"text\": \"191748\", \"replaceWith\": \"\"}, {\"text\": \"MANNY\", \"replaceWith\": \"\"}]}" + }, + { + "user": "*Bitter Gourd (Karela)*\nDear Amanda,\n\nI am Wu You.\nPlease spread this message to all your near and dear ones.\nProfessor Chen Hui Ren from the Beijing Army General Hospital confirmed that, if everyone who gets this information and then distributes to at least 10 other people, at least one life could be saved.\n\nYou can read more here: https://yahoo.com/bittergourd-karela\n\nI have done my part.\nI hope, you will also do your part.\nThank you.\n\nBitter gourd *(Karela)* in hot water can help you. No matter how busy you are, you need to read this, and then spread to friends and others.", + "assistant": "{\"reasoning\": \"The message appears to be making a claim. The name Amanda looks to be the name of the user, who has received the message. This is not important to fact checking and may harm the user when divulged. Wu You is the name of the sender, and the message is claiming that Professor Chen Hui Ren has given medical advice. Such information is important when determining whether the message is true or not. Thus, these two names should not be redacted.\", \"redacted\": [{\"text\": \"Amanda\", \"replaceWith\": \"\"}]}" + }, + { + "user": "Drinking sea salt will give energy boosting effect", + "assistant": "{\"reasoning\": \"There is no personal information in this message, and thus there is nothing to redact\", \"redacted\": []}" + } + ], + "user": "{{message}}" + } + } +} diff --git a/functions/src/definitions/common/openai/openai.ts b/functions/src/definitions/common/openai/openai.ts new file mode 100644 index 00000000..61d9ebee --- /dev/null +++ b/functions/src/definitions/common/openai/openai.ts @@ -0,0 +1,45 @@ +import OpenAI from "openai" + +// Define the expected type for a chat message +type ChatMessage = { + role: "system" | "user" | "assistant" + content: string +} + +type examples = { + user: string + assistant: string +} + +async function callChatCompletion( + model: string, + systemMessage: string, + examples: examples[], + user: string, + temperature: number = 0 +): Promise { + const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + }) + + const messages: ChatMessage[] = [{ role: "system", content: systemMessage }] + examples.forEach((example) => { + messages.push({ + role: "user", + content: example.user, + }) + messages.push({ + role: "assistant", + content: example.assistant, + }) + }) + messages.push({ role: "user", content: user }) + const chatCompletion = await openai.chat.completions.create({ + messages: messages, + model: model, + temperature: temperature, + }) + return chatCompletion.choices[0].message.content +} + +export { callChatCompletion, ChatMessage, examples } diff --git a/functions/src/definitions/onInstanceCreate.ts b/functions/src/definitions/onInstanceCreate.ts index 0f34848b..187cc968 100644 --- a/functions/src/definitions/onInstanceCreate.ts +++ b/functions/src/definitions/onInstanceCreate.ts @@ -67,7 +67,7 @@ const onInstanceCreate = functions if (data?.embedding && data?.text) { const updateObj = { id: snap.ref.path.replace(/\//g, "_"), //typesense id can't seem to take / - message: data.text, + message: data.originalText, captionHash: data.captionHash ? data.captionHash : "__NULL__", embedding: data.embedding, } diff --git a/functions/src/definitions/onInstanceUpdate.ts b/functions/src/definitions/onInstanceUpdate.ts index 705915aa..1a8bad4d 100644 --- a/functions/src/definitions/onInstanceUpdate.ts +++ b/functions/src/definitions/onInstanceUpdate.ts @@ -15,11 +15,11 @@ const onInstanceUpdate = functions // Grab the current value of what was written to Firestore. const before = change.before.data() const after = change.after.data() - if (after.type === "text" && before.text !== after.text) { + if (after.type === "text" && before.originalText !== after.originalText) { const embedding = await getEmbedding(after.text) const updateDocument = { id: change.after.ref.path, - message: after.text, + message: after.originalText, captionHash: after.captionHash ? after.captionHash : "__NULL__", embedding: embedding, } diff --git a/functions/src/definitions/userHandlers.ts b/functions/src/definitions/userHandlers.ts index 5bc77614..b3441254 100644 --- a/functions/src/definitions/userHandlers.ts +++ b/functions/src/definitions/userHandlers.ts @@ -1,7 +1,6 @@ import * as admin from "firebase-admin" import * as functions from "firebase-functions" import { onMessagePublished } from "firebase-functions/v2/pubsub" - import { Timestamp } from "firebase-admin/firestore" import { sendWhatsappTextMessage, @@ -28,6 +27,7 @@ import { getHash, getSignedUrl, } from "./common/mediaUtils" +import { anonymiseMessage } from "./common/anonymisation" import { calculateSimilarity } from "./calculateSimilarity" import { performOCR } from "./common/machineLearningServer/operations" import { defineString } from "firebase-functions/params" @@ -287,6 +287,7 @@ async function newTextInstanceHandler({ let similarity let embedding let textHash = hashMessage(text) + const strippedMessagePromise = anonymiseMessage(text) // 1 - check if the exact same message exists in database try { ;({ embedding, similarity } = await calculateSimilarity( @@ -320,6 +321,8 @@ async function newTextInstanceHandler({ matchType = similarityScore == 1 ? "exact" : "similarity" } + const strippedMessage = await strippedMessagePromise + if (!hasMatch) { messageRef = db.collection("messages").doc() messageUpdateObj = { @@ -330,7 +333,8 @@ async function newTextInstanceHandler({ machineCategory !== "unsure" && machineCategory !== "info" ), - text: text, //text + originalText: text, + text: strippedMessage, //text caption: null, latestInstance: null, firstTimestamp: timestamp, //timestamp of first instance (firestore timestamp data type) @@ -365,6 +369,7 @@ async function newTextInstanceHandler({ : null, customReply: null, //string instanceCount: 0, + rationalisation: null, } } else { messageRef = matchedParentMessageRef @@ -381,7 +386,8 @@ async function newTextInstanceHandler({ id: id || null, //taken from webhook object, needed to reply timestamp: timestamp, //timestamp, taken from webhook object (firestore timestamp data type) type: "text", //message type, taken from webhook object. Can be 'audio', 'button', 'document', 'text', 'image', 'interactive', 'order', 'sticker', 'system', 'unknown', 'video'. - text: text, //text or caption, taken from webhook object + originalText: text, //text or caption, taken from webhook object + text: strippedMessage, textHash: textHash ?? null, caption: null, captionHash: null, @@ -496,6 +502,7 @@ async function newImageInstanceHandler({ let sender = null let isConvo = null let extractedMessage = null + let strippedMessage = null let machineCategory = null if (!hasMatch || !matchedInstanceSnap) { const temporaryUrl = await getSignedUrl(filename) @@ -542,6 +549,7 @@ async function newImageInstanceHandler({ let textHash = null if (ocrSuccess && isConvo && !!extractedMessage && !hasMatch) { + const strippedMessagePromise = anonymiseMessage(extractedMessage) try { textHash = hashMessage(extractedMessage) ;({ embedding, similarity } = await calculateSimilarity( @@ -570,6 +578,7 @@ async function newImageInstanceHandler({ hasMatch = true matchType = similarityScore == 1 ? "exact" : "similarity" } + strippedMessage = await strippedMessagePromise } if (!hasMatch || (!matchedInstanceSnap && !matchedParentMessageRef)) { @@ -581,7 +590,8 @@ async function newImageInstanceHandler({ machineCategory !== "unsure" && machineCategory !== "info" ), - text: extractedMessage ?? null, //text + originalText: extractedMessage ?? null, + text: strippedMessage ?? null, //text caption: caption ?? null, latestInstance: null, firstTimestamp: timestamp, //timestamp of first instance (firestore timestamp data type) @@ -614,6 +624,7 @@ async function newImageInstanceHandler({ : null, customReply: null, //string instanceCount: 0, + rationalisation: null, } } else { if (matchType === "image" && matchedInstanceSnap) { @@ -637,7 +648,8 @@ async function newImageInstanceHandler({ id: id || null, //taken from webhook object, needed to reply timestamp: timestamp, //timestamp, taken from webhook object (firestore timestamp data type) type: "image", //message type, taken from webhook object. Can be 'audio', 'button', 'document', 'text', 'image', 'interactive', 'order', 'sticker', 'system', 'unknown', 'video'. - text: extractedMessage ?? null, //text extracted from OCR if relevant + originalText: extractedMessage ?? null, //text extracted from OCR if relevant + text: strippedMessage ?? null, textHash: textHash ?? null, caption: caption ?? null, captionHash: captionHash, @@ -884,6 +896,7 @@ const onUserPublish = onMessagePublished( "TYPESENSE_TOKEN", "ML_SERVER_TOKEN", "TELEGRAM_REPORT_BOT_TOKEN", + "OPENAI_API_KEY", ], timeoutSeconds: 120, }, diff --git a/functions/tsconfig.json b/functions/tsconfig.json index 861e26d8..fce7aabb 100644 --- a/functions/tsconfig.json +++ b/functions/tsconfig.json @@ -8,7 +8,8 @@ "strict": true, "target": "es2017", "noErrorTruncation": true, - "esModuleInterop": true + "esModuleInterop": true, + "resolveJsonModule": true }, "include": ["src"], "exclude": ["./lib/**/*"], From 2fcb247bebcf541d462e4b228081882a2de1fcc7 Mon Sep 17 00:00:00 2001 From: Bing Wen Tan Date: Sat, 23 Sep 2023 22:19:05 +0800 Subject: [PATCH 2/2] more targetted usage of GPT to mask --- documentation/dataSchema.md | 4 +- functions/src/definitions/common/constants.ts | 2 +- .../common/{anonymisation.ts => genAI.ts} | 0 functions/src/definitions/onInstanceCreate.ts | 57 ++++++++++++++----- functions/src/definitions/onInstanceUpdate.ts | 2 +- functions/src/definitions/onMessageUpdate.ts | 2 + functions/src/definitions/userHandlers.ts | 19 +++---- .../checkmate.postman_collection.json | 2 +- 8 files changed, 59 insertions(+), 29 deletions(-) rename functions/src/definitions/common/{anonymisation.ts => genAI.ts} (100%) diff --git a/documentation/dataSchema.md b/documentation/dataSchema.md index e0f88fd9..257d2842 100644 --- a/documentation/dataSchema.md +++ b/documentation/dataSchema.md @@ -9,6 +9,7 @@ erDiagram string caption "Latest caption for image, if applicable" timestamp firstTimestamp "Timestamp of first instance" timestamp lastTimestamp "Timestamp of latest instance" + timestamp lastRefreshedTimestamp "Timestamp where details were last updated" boolean isPollStarted boolean isAssessed "Should message be considered assessed and ready for reply" timestamp assessedTimestamp @@ -35,8 +36,7 @@ erDiagram string id "whatsapp message id (needed to reply)" timestamp timestamp string type "text/image" - string originalText "text as sent by user" - string text "pii-stripped text (if text message) or ocr-extracted text (if image message)" + string text "text (if text message) or ocr-extracted text (if image message)" string textHash "md5 hash of text or ocr-extracted text" string caption "caption of image, if applicable" string captionHash "md5 hash of caption" diff --git a/functions/src/definitions/common/constants.ts b/functions/src/definitions/common/constants.ts index ac1fe9e9..25c1e4cf 100644 --- a/functions/src/definitions/common/constants.ts +++ b/functions/src/definitions/common/constants.ts @@ -234,7 +234,7 @@ const thresholds = { falseUpperBound: 1.5, misleadingUpperBound: 3.5, sendInterimMinVotes: 1, - surveyLikelihood: env === "DEV" ? 1 : 0.25, + surveyLikelihood: env !== "PROD" ? 1 : 0.25, satisfactionSurveyCooldownDays: 30, } diff --git a/functions/src/definitions/common/anonymisation.ts b/functions/src/definitions/common/genAI.ts similarity index 100% rename from functions/src/definitions/common/anonymisation.ts rename to functions/src/definitions/common/genAI.ts diff --git a/functions/src/definitions/onInstanceCreate.ts b/functions/src/definitions/onInstanceCreate.ts index 187cc968..ec6cfb71 100644 --- a/functions/src/definitions/onInstanceCreate.ts +++ b/functions/src/definitions/onInstanceCreate.ts @@ -1,6 +1,6 @@ import * as admin from "firebase-admin" import * as functions from "firebase-functions" -import { getCount } from "./common/counters" +import { anonymiseMessage } from "./common/genAI" import { getThresholds } from "./common/utils" import { respondToInstance } from "./common/responseUtils" import { sendWhatsappTemplateMessage } from "./common/sendWhatsappMessage" @@ -15,6 +15,10 @@ import { Timestamp } from "firebase-admin/firestore" // Define some parameters const numInstanceShards = defineInt("NUM_SHARDS_INSTANCE_COUNT") +interface MessageUpdate { + [x: string]: any +} + if (!admin.apps.length) { admin.initializeApp() } @@ -48,26 +52,52 @@ const onInstanceCreate = functions .orderBy("timestamp", "desc") .get() const lastInstanceDocSnap = instancesQuerySnap.docs[0] - await parentMessageRef.update({ + const messageUpdateObj: MessageUpdate = { instanceCount: instancesQuerySnap.size, lastTimestamp: lastInstanceDocSnap.get("timestamp"), - }) - - await upsertUser(data.from, data.timestamp) + latestInstance: snap.ref, + } + const parentMessageSnap = await parentMessageRef.get() - if (data?.type === "text") { - parentMessageRef.update({ text: data.text, latestInstance: snap.ref }) - } else if (data?.type === "image") { - parentMessageRef.update({ - latestInstance: snap.ref, - caption: data.caption, - }) + try { + const lastRefreshedDate = parentMessageSnap + .get("lastRefreshedTimestamp") + .toDate() + const comparisonDate = new Date() + comparisonDate.setDate(comparisonDate.getDate() - 30) + //if lastRefreshedDate is more than 30 days ago + if (lastRefreshedDate < comparisonDate) { + messageUpdateObj.lastRefreshedTimestamp = + Timestamp.fromDate(comparisonDate) + if ( + data?.type === "text" && + data?.text != parentMessageSnap.get("originalText") + ) { + const strippedMessage = await anonymiseMessage(data.text) + messageUpdateObj.originalText = data.text + messageUpdateObj.text = strippedMessage + } else if (data?.type === "image") { + messageUpdateObj.caption = data.caption + // Don't anonymise image captions for now, since OCR may be inaccurate + // if (data?.text != parentMessageSnap.get("originalText")) { + // const strippedMessage = await anonymiseMessage(data.text) + // messageUpdateObj.originalText = data.text + // messageUpdateObj.text = strippedMessage + // } + } + } + } catch (e) { + functions.logger.error("Error refreshing message: ", e) } + await parentMessageRef.update(messageUpdateObj) + + await upsertUser(data.from, data.timestamp) + if (data?.embedding && data?.text) { const updateObj = { id: snap.ref.path.replace(/\//g, "_"), //typesense id can't seem to take / - message: data.originalText, + message: data.text, captionHash: data.captionHash ? data.captionHash : "__NULL__", embedding: data.embedding, } @@ -81,7 +111,6 @@ const onInstanceCreate = functions } } - const parentMessageSnap = await parentMessageRef.get() if (!data.isReplied) { await respondToInstance(snap, false, true) } diff --git a/functions/src/definitions/onInstanceUpdate.ts b/functions/src/definitions/onInstanceUpdate.ts index 1a8bad4d..1e677f1a 100644 --- a/functions/src/definitions/onInstanceUpdate.ts +++ b/functions/src/definitions/onInstanceUpdate.ts @@ -19,7 +19,7 @@ const onInstanceUpdate = functions const embedding = await getEmbedding(after.text) const updateDocument = { id: change.after.ref.path, - message: after.originalText, + message: after.text, captionHash: after.captionHash ? after.captionHash : "__NULL__", embedding: embedding, } diff --git a/functions/src/definitions/onMessageUpdate.ts b/functions/src/definitions/onMessageUpdate.ts index 7c087c6b..71277252 100644 --- a/functions/src/definitions/onMessageUpdate.ts +++ b/functions/src/definitions/onMessageUpdate.ts @@ -1,6 +1,7 @@ import * as functions from "firebase-functions" import { respondToInstance } from "./common/responseUtils" import { Timestamp } from "firebase-admin/firestore" +import {} from "./common/genAI" const onMessageUpdate = functions .region("asia-southeast1") @@ -12,6 +13,7 @@ const onMessageUpdate = functions const after = change.after const messageData = after.data() if (!before.data().isAssessed && messageData.isAssessed) { + //TODO: rationalisation here await after.ref.update({ assessedTimestamp: Timestamp.fromDate(new Date()), }) diff --git a/functions/src/definitions/userHandlers.ts b/functions/src/definitions/userHandlers.ts index b3441254..da210c6f 100644 --- a/functions/src/definitions/userHandlers.ts +++ b/functions/src/definitions/userHandlers.ts @@ -27,7 +27,7 @@ import { getHash, getSignedUrl, } from "./common/mediaUtils" -import { anonymiseMessage } from "./common/anonymisation" +import { anonymiseMessage } from "./common/genAI" import { calculateSimilarity } from "./calculateSimilarity" import { performOCR } from "./common/machineLearningServer/operations" import { defineString } from "firebase-functions/params" @@ -287,7 +287,6 @@ async function newTextInstanceHandler({ let similarity let embedding let textHash = hashMessage(text) - const strippedMessagePromise = anonymiseMessage(text) // 1 - check if the exact same message exists in database try { ;({ embedding, similarity } = await calculateSimilarity( @@ -321,9 +320,8 @@ async function newTextInstanceHandler({ matchType = similarityScore == 1 ? "exact" : "similarity" } - const strippedMessage = await strippedMessagePromise - if (!hasMatch) { + const strippedMessage = await anonymiseMessage(text) messageRef = db.collection("messages").doc() messageUpdateObj = { machineCategory: machineCategory, //Can be "fake news" or "scam" @@ -339,6 +337,7 @@ async function newTextInstanceHandler({ latestInstance: null, firstTimestamp: timestamp, //timestamp of first instance (firestore timestamp data type) lastTimestamp: timestamp, //timestamp of latest instance (firestore timestamp data type) + lastRefreshedTimestamp: timestamp, isPollStarted: false, //boolean, whether or not polling has started isAssessed: !!( machineCategory && @@ -386,8 +385,7 @@ async function newTextInstanceHandler({ id: id || null, //taken from webhook object, needed to reply timestamp: timestamp, //timestamp, taken from webhook object (firestore timestamp data type) type: "text", //message type, taken from webhook object. Can be 'audio', 'button', 'document', 'text', 'image', 'interactive', 'order', 'sticker', 'system', 'unknown', 'video'. - originalText: text, //text or caption, taken from webhook object - text: strippedMessage, + text: text, textHash: textHash ?? null, caption: null, captionHash: null, @@ -549,7 +547,6 @@ async function newImageInstanceHandler({ let textHash = null if (ocrSuccess && isConvo && !!extractedMessage && !hasMatch) { - const strippedMessagePromise = anonymiseMessage(extractedMessage) try { textHash = hashMessage(extractedMessage) ;({ embedding, similarity } = await calculateSimilarity( @@ -578,10 +575,12 @@ async function newImageInstanceHandler({ hasMatch = true matchType = similarityScore == 1 ? "exact" : "similarity" } - strippedMessage = await strippedMessagePromise } if (!hasMatch || (!matchedInstanceSnap && !matchedParentMessageRef)) { + if (extractedMessage) { + strippedMessage = await anonymiseMessage(extractedMessage) + } messageRef = db.collection("messages").doc() messageUpdateObj = { machineCategory: machineCategory, //Can be "fake news" or "scam" @@ -596,6 +595,7 @@ async function newImageInstanceHandler({ latestInstance: null, firstTimestamp: timestamp, //timestamp of first instance (firestore timestamp data type) lastTimestamp: timestamp, //timestamp of latest instance (firestore timestamp data type) + lastRefreshedTimestamp: timestamp, isPollStarted: false, //boolean, whether or not polling has started isAssessed: !!( machineCategory && @@ -648,8 +648,7 @@ async function newImageInstanceHandler({ id: id || null, //taken from webhook object, needed to reply timestamp: timestamp, //timestamp, taken from webhook object (firestore timestamp data type) type: "image", //message type, taken from webhook object. Can be 'audio', 'button', 'document', 'text', 'image', 'interactive', 'order', 'sticker', 'system', 'unknown', 'video'. - originalText: extractedMessage ?? null, //text extracted from OCR if relevant - text: strippedMessage ?? null, + text: extractedMessage ?? null, //text extracted from OCR if relevant textHash: textHash ?? null, caption: caption ?? null, captionHash: captionHash, diff --git a/integration-tests/checkmate.postman_collection.json b/integration-tests/checkmate.postman_collection.json index 8a91ecbc..3a71fcf8 100644 --- a/integration-tests/checkmate.postman_collection.json +++ b/integration-tests/checkmate.postman_collection.json @@ -1410,7 +1410,7 @@ " \"path\": \"/botTELEGRAM_REPORT_BOT_TOKEN/sendMessage\",\r", " \"body\": {\r", " \"chat_id\": \"111111111\",\r", - " \"text\": `${USER_1_NUMBER} has disputed the assessment of a message sent by them.\\n \\nmessage/instance path: ${DISPUTED_INSTANCE_PATH}\\n\\ntype: text\\n\\ntext: 1234567\\n\\ncategory: irrelevant`\r", + " \"text\": `${USER_1_NUMBER} has disputed the assessment of a message sent by them.\\n \\nmessage/instance path: ${DISPUTED_INSTANCE_PATH}\\n\\ntype: text\\n\\ntext: 1234567\\n\\ncategory: irrelevant`\r", " },\r", " \"method\": \"POST\"\r", " }\r",