diff --git a/.github/.release-please-manifest.json b/.github/.release-please-manifest.json index 9f0764413..d823f0757 100644 --- a/.github/.release-please-manifest.json +++ b/.github/.release-please-manifest.json @@ -1,6 +1,7 @@ { ".": "1.0.0-alpha.34", "analyze": "1.0.0-alpha.34", + "analyze-wasm": "1.0.0-alpha.34", "arcjet": "1.0.0-alpha.34", "arcjet-bun": "1.0.0-alpha.34", "arcjet-deno": "1.0.0-alpha.34", diff --git a/.github/release-please-config.json b/.github/release-please-config.json index befd5ff12..48a29ba33 100644 --- a/.github/release-please-config.json +++ b/.github/release-please-config.json @@ -32,6 +32,10 @@ "component": "@arcjet/analyze", "skip-github-release": true }, + "analyze-wasm": { + "component": "@arcjet/analyze-wasm", + "skip-github-release": true + }, "arcjet": { "component": "arcjet", "skip-github-release": true @@ -152,6 +156,7 @@ "components": [ "root", "@arcjet/analyze", + "@arcjet/analyze-wasm", "arcjet", "@arcjet/bun", "@arcjet/deno", diff --git a/.trunk/trunk.yaml b/.trunk/trunk.yaml index 5d7569c5c..fc09951c2 100644 --- a/.trunk/trunk.yaml +++ b/.trunk/trunk.yaml @@ -38,7 +38,7 @@ lint: ignore: - linters: [ALL] paths: - - analyze/wasm/** + - analyze-wasm/wasm/** - redact-wasm/wasm/** - protocol/proto/** - "**/LICENSE" diff --git a/analyze-wasm/.eslintignore b/analyze-wasm/.eslintignore new file mode 100644 index 000000000..9cfa2cae7 --- /dev/null +++ b/analyze-wasm/.eslintignore @@ -0,0 +1,6 @@ +/.turbo/ +/coverage/ +/node_modules/ +*.d.ts +*.js +!*.config.js diff --git a/analyze-wasm/.eslintrc.cjs b/analyze-wasm/.eslintrc.cjs new file mode 100644 index 000000000..abe4cd7b4 --- /dev/null +++ b/analyze-wasm/.eslintrc.cjs @@ -0,0 +1,4 @@ +module.exports = { + root: true, + extends: ["@arcjet/eslint-config"], +}; diff --git a/analyze-wasm/.gitignore b/analyze-wasm/.gitignore new file mode 100644 index 000000000..e49250b1a --- /dev/null +++ b/analyze-wasm/.gitignore @@ -0,0 +1,140 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional stylelint cache +.stylelintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# vuepress v2.x temp and cache directory +.temp +.cache + +# Docusaurus cache and generated files +.docusaurus + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* + +# Generated files +edge-light.js +edge-light.d.ts +index.js +index.d.ts +workerd.js +workerd.d.ts +test/*.js +_virtual/*.js diff --git a/analyze-wasm/LICENSE b/analyze-wasm/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/analyze-wasm/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/analyze-wasm/README.md b/analyze-wasm/README.md new file mode 100644 index 000000000..1ba194c9c --- /dev/null +++ b/analyze-wasm/README.md @@ -0,0 +1,62 @@ + + + + Arcjet Logo + + + +# `@arcjet/analyze-wasm` + +

+ + + + npm badge + + +

+ +[Arcjet][arcjet] helps developers protect their apps in just a few lines of +code. Implement rate limiting, bot protection, email verification, and defense +against common attacks. + +This package provides WebAssembly bindings to [Arcjet's][arcjet] local analysis engine. + +## Installation + +```shell +npm install -S @arcjet/analyze-wasm +``` + +## Implementation + +This package provides analyze logic implemented as a WebAssembly module which +will run local analysis on request details. + +The `_virtual/arcjet_analyze_js_req.component.core.js` file contains the binary inlined as +a base64 [Data URL][mdn-data-url] with the `application/wasm` MIME type. + +This was chosen to save on storage space over inlining the file directly as a +Uint8Array, which would take up ~3x the space of the Wasm file. See +[Better Binary Batter: Mixing Base64 and Uint8Array][wasm-base64-blog] for more +details. + +It is then decoded into an ArrayBuffer to be used directly via WebAssembly's +`compile()` function in our entry point file. + +This is all done to avoid trying to read or bundle the Wasm asset in various +ways based on the platform or bundler a user is targeting. One example being +that Next.js requires special `asyncWebAssembly` webpack config to load our +Wasm file if we don't do this. + +In the future, we hope to do away with this workaround when all bundlers +properly support consistent asset bundling techniques. + +## License + +Licensed under the [Apache License, Version 2.0][apache-license]. + +[arcjet]: https://arcjet.com +[mdn-data-url]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs +[wasm-base64-blog]: https://blobfolio.com/2019/better-binary-batter-mixing-base64-and-uint8array/ +[apache-license]: http://www.apache.org/licenses/LICENSE-2.0 diff --git a/analyze-wasm/edge-light.ts b/analyze-wasm/edge-light.ts new file mode 100644 index 000000000..a00bd9f42 --- /dev/null +++ b/analyze-wasm/edge-light.ts @@ -0,0 +1,55 @@ +import { instantiate } from "./wasm/arcjet_analyze_js_req.component.js"; +import type { + ImportObject, + DetectedSensitiveInfoEntity, + SensitiveInfoEntity, + BotConfig, + EmailValidationResult, + BotResult, + SensitiveInfoResult, + EmailValidationConfig, + SensitiveInfoEntities, +} from "./wasm/arcjet_analyze_js_req.component.js"; +import type { ArcjetJsReqSensitiveInformationIdentifier } from "./wasm/interfaces/arcjet-js-req-sensitive-information-identifier.js"; + +import componentCoreWasm from "./wasm/arcjet_analyze_js_req.component.core.wasm?module"; +import componentCore2Wasm from "./wasm/arcjet_analyze_js_req.component.core2.wasm?module"; +import componentCore3Wasm from "./wasm/arcjet_analyze_js_req.component.core3.wasm?module"; + +type DetectSensitiveInfoFunction = + typeof ArcjetJsReqSensitiveInformationIdentifier.detect; + +async function moduleFromPath(path: string): Promise { + if (path === "arcjet_analyze_js_req.component.core.wasm") { + return componentCoreWasm; + } + if (path === "arcjet_analyze_js_req.component.core2.wasm") { + return componentCore2Wasm; + } + if (path === "arcjet_analyze_js_req.component.core3.wasm") { + return componentCore3Wasm; + } + + throw new Error(`Unknown path: ${path}`); +} + +export async function initializeWasm(coreImports: ImportObject) { + try { + return instantiate(moduleFromPath, coreImports); + } catch { + return undefined; + } +} + +export { + type BotConfig, + type DetectedSensitiveInfoEntity, + type SensitiveInfoEntity, + type EmailValidationConfig, + type EmailValidationResult, + type BotResult, + type SensitiveInfoResult, + type SensitiveInfoEntities, + type DetectSensitiveInfoFunction, + type ImportObject, +}; diff --git a/analyze-wasm/index.ts b/analyze-wasm/index.ts new file mode 100644 index 000000000..6a8b30bf5 --- /dev/null +++ b/analyze-wasm/index.ts @@ -0,0 +1,69 @@ +import { instantiate } from "./wasm/arcjet_analyze_js_req.component.js"; +import type { + ImportObject, + BotConfig, + DetectedSensitiveInfoEntity, + SensitiveInfoEntity, + EmailValidationResult, + BotResult, + SensitiveInfoResult, + EmailValidationConfig, + SensitiveInfoEntities, +} from "./wasm/arcjet_analyze_js_req.component.js"; +import type { ArcjetJsReqSensitiveInformationIdentifier } from "./wasm/interfaces/arcjet-js-req-sensitive-information-identifier.js"; + +import { wasm as componentCoreWasm } from "./wasm/arcjet_analyze_js_req.component.core.wasm?js"; +import { wasm as componentCore2Wasm } from "./wasm/arcjet_analyze_js_req.component.core2.wasm?js"; +import { wasm as componentCore3Wasm } from "./wasm/arcjet_analyze_js_req.component.core3.wasm?js"; + +type DetectSensitiveInfoFunction = + typeof ArcjetJsReqSensitiveInformationIdentifier.detect; + +const wasmCache = new Map(); + +async function moduleFromPath(path: string): Promise { + const cachedModule = wasmCache.get(path); + if (typeof cachedModule !== "undefined") { + return cachedModule; + } + + if (path === "arcjet_analyze_js_req.component.core.wasm") { + const mod = await componentCoreWasm(); + wasmCache.set(path, mod); + return mod; + } + if (path === "arcjet_analyze_js_req.component.core2.wasm") { + const mod = await componentCore2Wasm(); + wasmCache.set(path, mod); + return mod; + } + if (path === "arcjet_analyze_js_req.component.core3.wasm") { + const mod = await componentCore3Wasm(); + wasmCache.set(path, mod); + return mod; + } + + throw new Error(`Unknown path: ${path}`); +} + +export async function initializeWasm(coreImports: ImportObject) { + try { + // Await the instantiation to catch the failure + return instantiate(moduleFromPath, coreImports); + } catch { + return undefined; + } +} + +export { + type BotConfig, + type DetectedSensitiveInfoEntity, + type SensitiveInfoEntity, + type EmailValidationConfig, + type EmailValidationResult, + type BotResult, + type SensitiveInfoResult, + type SensitiveInfoEntities, + type DetectSensitiveInfoFunction, + type ImportObject, +}; diff --git a/analyze-wasm/jest.config.js b/analyze-wasm/jest.config.js new file mode 100644 index 000000000..6d5656840 --- /dev/null +++ b/analyze-wasm/jest.config.js @@ -0,0 +1,16 @@ +/** @type {import('jest').Config} */ +const config = { + // We only test JS files once compiled with TypeScript + moduleFileExtensions: ["js"], + coverageDirectory: "coverage", + collectCoverage: true, + // If this is set to default (babel) rather than v8, tests fail with the edge + // runtime and the error "EvalError: Code generation from strings disallowed + // for this context". Tracking in + // https://github.com/vercel/edge-runtime/issues/250 + coverageProvider: "v8", + verbose: true, + testEnvironment: "node", +}; + +export default config; diff --git a/analyze-wasm/package.json b/analyze-wasm/package.json new file mode 100644 index 000000000..f7053a477 --- /dev/null +++ b/analyze-wasm/package.json @@ -0,0 +1,65 @@ +{ + "name": "@arcjet/analyze-wasm", + "version": "1.0.0-alpha.34", + "description": "WebAssembly bindings to Arcjet's local analysis engine", + "license": "Apache-2.0", + "homepage": "https://arcjet.com", + "repository": { + "type": "git", + "url": "git+https://github.com/arcjet/arcjet-js.git", + "directory": "analyze-wasm" + }, + "bugs": { + "url": "https://github.com/arcjet/arcjet-js/issues", + "email": "support@arcjet.com" + }, + "author": { + "name": "Arcjet", + "email": "support@arcjet.com", + "url": "https://arcjet.com" + }, + "engines": { + "node": ">=18" + }, + "type": "module", + "main": "./index.js", + "types": "./index.d.ts", + "exports": { + "edge-light": "./edge-light.js", + "workerd": "./workerd.js", + "default": "./index.js" + }, + "files": [ + "LICENSE", + "README.md", + "_virtual/", + "wasm/", + "*.js", + "*.d.ts", + "*.ts", + "!*.config.js" + ], + "scripts": { + "prepublishOnly": "npm run build", + "jco": "jco transpile wasm/arcjet_analyze_js_req.component.wasm --no-wasi-shim --instantiation async -o wasm", + "build": "npm run jco; rollup --config rollup.config.js", + "lint": "eslint .", + "pretest": "npm run build", + "test": "node --test" + }, + "dependencies": {}, + "devDependencies": { + "@arcjet/eslint-config": "1.0.0-alpha.34", + "@arcjet/rollup-config": "1.0.0-alpha.34", + "@arcjet/tsconfig": "1.0.0-alpha.34", + "@bytecodealliance/jco": "1.5.0", + "@rollup/wasm-node": "4.28.1", + "@types/node": "18.18.0", + "expect": "29.7.0", + "typescript": "5.7.2" + }, + "publishConfig": { + "access": "public", + "tag": "latest" + } +} diff --git a/analyze-wasm/rollup.config.js b/analyze-wasm/rollup.config.js new file mode 100644 index 000000000..38b5c27fc --- /dev/null +++ b/analyze-wasm/rollup.config.js @@ -0,0 +1,85 @@ +import { createConfig } from "@arcjet/rollup-config"; +import fs from "node:fs/promises"; + +function generateJs(wasm) { + const disclaimer = ` +/** + * This file contains an Arcjet Wasm binary inlined as a base64 + * [Data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs) + * with the application/wasm MIME type. + * + * This was chosen to save on storage space over inlining the file directly as + * a Uint8Array, which would take up ~3x the space of the Wasm file. See + * https://blobfolio.com/2019/better-binary-batter-mixing-base64-and-uint8array/ + * for more details. + * + * It is then decoded into an ArrayBuffer to be used directly via WebAssembly's + * \`compile()\` function in our entry point file. + * + * This is all done to avoid trying to read or bundle the Wasm asset in various + * ways based on the platform or bundler a user is targeting. One example being + * that Next.js requires special \`asyncWebAssembly\` webpack config to load our + * Wasm file if we don't do this. + * + * In the future, we hope to do away with this workaround when all bundlers + * properly support consistent asset bundling techniques. + */ +`; + + return `// @generated by wasm2module - DO NOT EDIT +/* eslint-disable */ +// @ts-nocheck +${disclaimer} +const wasmBase64 = "data:application/wasm;base64,${wasm.toString("base64")}"; +/** + * Returns a WebAssembly.Module for an Arcjet Wasm binary, decoded from a base64 + * Data URL. + */ +// TODO: Switch back to top-level await when our platforms all support it +export async function wasm() { + // This uses fetch to decode the wasm data url, but disabling cache so files + // larger than 2mb don't fail to parse in the Next.js App Router + const wasmDecode = await fetch(wasmBase64, { cache: "no-store" }); + const buf = await wasmDecode.arrayBuffer(); + // And then we return it as a WebAssembly.Module + return WebAssembly.compile(buf); +} +`; +} + +function wasmToModule() { + const idToWasmPath = new Map(); + + return { + name: "base64-wasm", + resolveId(source) { + if (source.endsWith(".wasm?js")) { + // Slice off the `?js` to make it a valid path + const filepath = source.slice(0, -3); + // Create a "virtual module", prefixed with `\0` as per the Rollup docs, + // for our replacement import + const id = `\0${filepath.replace(/\.wasm$/, ".js")}`; + // Store the actual Wasm path against the virtual module ID. + idToWasmPath.set(id, filepath); + return id; + } + + return null; + }, + async load(id) { + const wasmPath = idToWasmPath.get(id); + // If we resolved this `id` during the `resolveId` phase, generate the + // JavaScript file with the base64 Wasm and loading helper + if (wasmPath) { + const wasm = await fs.readFile(wasmPath); + return generateJs(wasm); + } + + return null; + }, + }; +} + +export default createConfig(import.meta.url, { + plugins: [wasmToModule()], +}); diff --git a/analyze-wasm/tsconfig.json b/analyze-wasm/tsconfig.json new file mode 100644 index 000000000..b3da2e770 --- /dev/null +++ b/analyze-wasm/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "@arcjet/tsconfig/base", + "include": ["index.ts", "edge-light.ts", "workerd.ts", "wasm.d.ts"] +} diff --git a/analyze/wasm.d.ts b/analyze-wasm/wasm.d.ts similarity index 100% rename from analyze/wasm.d.ts rename to analyze-wasm/wasm.d.ts diff --git a/analyze/wasm/arcjet_analyze_js_req.component.core.wasm b/analyze-wasm/wasm/arcjet_analyze_js_req.component.core.wasm similarity index 72% rename from analyze/wasm/arcjet_analyze_js_req.component.core.wasm rename to analyze-wasm/wasm/arcjet_analyze_js_req.component.core.wasm index cc4b24d78..969d04b35 100644 Binary files a/analyze/wasm/arcjet_analyze_js_req.component.core.wasm and b/analyze-wasm/wasm/arcjet_analyze_js_req.component.core.wasm differ diff --git a/analyze/wasm/arcjet_analyze_js_req.component.core2.wasm b/analyze-wasm/wasm/arcjet_analyze_js_req.component.core2.wasm similarity index 100% rename from analyze/wasm/arcjet_analyze_js_req.component.core2.wasm rename to analyze-wasm/wasm/arcjet_analyze_js_req.component.core2.wasm diff --git a/analyze/wasm/arcjet_analyze_js_req.component.core3.wasm b/analyze-wasm/wasm/arcjet_analyze_js_req.component.core3.wasm similarity index 100% rename from analyze/wasm/arcjet_analyze_js_req.component.core3.wasm rename to analyze-wasm/wasm/arcjet_analyze_js_req.component.core3.wasm diff --git a/analyze/wasm/arcjet_analyze_js_req.component.d.ts b/analyze-wasm/wasm/arcjet_analyze_js_req.component.d.ts similarity index 100% rename from analyze/wasm/arcjet_analyze_js_req.component.d.ts rename to analyze-wasm/wasm/arcjet_analyze_js_req.component.d.ts diff --git a/analyze/wasm/arcjet_analyze_js_req.component.js b/analyze-wasm/wasm/arcjet_analyze_js_req.component.js similarity index 100% rename from analyze/wasm/arcjet_analyze_js_req.component.js rename to analyze-wasm/wasm/arcjet_analyze_js_req.component.js diff --git a/analyze/wasm/arcjet_analyze_js_req.component.wasm b/analyze-wasm/wasm/arcjet_analyze_js_req.component.wasm similarity index 73% rename from analyze/wasm/arcjet_analyze_js_req.component.wasm rename to analyze-wasm/wasm/arcjet_analyze_js_req.component.wasm index d67c9e7b2..cfa68c8f3 100644 Binary files a/analyze/wasm/arcjet_analyze_js_req.component.wasm and b/analyze-wasm/wasm/arcjet_analyze_js_req.component.wasm differ diff --git a/analyze/wasm/interfaces/arcjet-js-req-email-validator-overrides.d.ts b/analyze-wasm/wasm/interfaces/arcjet-js-req-email-validator-overrides.d.ts similarity index 100% rename from analyze/wasm/interfaces/arcjet-js-req-email-validator-overrides.d.ts rename to analyze-wasm/wasm/interfaces/arcjet-js-req-email-validator-overrides.d.ts diff --git a/analyze/wasm/interfaces/arcjet-js-req-logger.d.ts b/analyze-wasm/wasm/interfaces/arcjet-js-req-logger.d.ts similarity index 100% rename from analyze/wasm/interfaces/arcjet-js-req-logger.d.ts rename to analyze-wasm/wasm/interfaces/arcjet-js-req-logger.d.ts diff --git a/analyze/wasm/interfaces/arcjet-js-req-sensitive-information-identifier.d.ts b/analyze-wasm/wasm/interfaces/arcjet-js-req-sensitive-information-identifier.d.ts similarity index 100% rename from analyze/wasm/interfaces/arcjet-js-req-sensitive-information-identifier.d.ts rename to analyze-wasm/wasm/interfaces/arcjet-js-req-sensitive-information-identifier.d.ts diff --git a/analyze/wasm/interfaces/arcjet-js-req-verify-bot.d.ts b/analyze-wasm/wasm/interfaces/arcjet-js-req-verify-bot.d.ts similarity index 100% rename from analyze/wasm/interfaces/arcjet-js-req-verify-bot.d.ts rename to analyze-wasm/wasm/interfaces/arcjet-js-req-verify-bot.d.ts diff --git a/analyze-wasm/workerd.ts b/analyze-wasm/workerd.ts new file mode 100644 index 000000000..a7cb70f91 --- /dev/null +++ b/analyze-wasm/workerd.ts @@ -0,0 +1,56 @@ +import { instantiate } from "./wasm/arcjet_analyze_js_req.component.js"; +import type { + ImportObject, + BotConfig, + DetectedSensitiveInfoEntity, + SensitiveInfoEntity, + EmailValidationResult, + BotResult, + SensitiveInfoResult, + EmailValidationConfig, + SensitiveInfoEntities, +} from "./wasm/arcjet_analyze_js_req.component.js"; +import type { ArcjetJsReqSensitiveInformationIdentifier } from "./wasm/interfaces/arcjet-js-req-sensitive-information-identifier.js"; + +import componentCoreWasm from "./wasm/arcjet_analyze_js_req.component.core.wasm"; +import componentCore2Wasm from "./wasm/arcjet_analyze_js_req.component.core2.wasm"; +import componentCore3Wasm from "./wasm/arcjet_analyze_js_req.component.core3.wasm"; + +type DetectSensitiveInfoFunction = + typeof ArcjetJsReqSensitiveInformationIdentifier.detect; + +async function moduleFromPath(path: string): Promise { + if (path === "arcjet_analyze_js_req.component.core.wasm") { + return componentCoreWasm; + } + if (path === "arcjet_analyze_js_req.component.core2.wasm") { + return componentCore2Wasm; + } + if (path === "arcjet_analyze_js_req.component.core3.wasm") { + return componentCore3Wasm; + } + + throw new Error(`Unknown path: ${path}`); +} + +export async function initializeWasm(coreImports: ImportObject) { + try { + // Await the instantiation to catch the failure + return instantiate(moduleFromPath, coreImports); + } catch { + return undefined; + } +} + +export { + type BotConfig, + type DetectedSensitiveInfoEntity, + type SensitiveInfoEntity, + type EmailValidationConfig, + type EmailValidationResult, + type BotResult, + type SensitiveInfoResult, + type SensitiveInfoEntities, + type DetectSensitiveInfoFunction, + type ImportObject, +}; diff --git a/analyze/README.md b/analyze/README.md index 16faf79fd..b97d5aa58 100644 --- a/analyze/README.md +++ b/analyze/README.md @@ -42,37 +42,17 @@ console.log("is email valid?", valid); ## Implementation -This package provides analyze logic implemented as a WebAssembly module which -will run local analysis on request details before calling the Arcjet API. +This package uses the Wasm bindings provided by `@arcjet/analyze-wasm` to +call various functions that are exported by our wasm bindings. -The [arcjet.wasm.js](./wasm/arcjet.wasm.js) file contains the binary inlined as -a base64 [Data URL][mdn-data-url] with the `application/wasm` MIME type. - -This was chosen to save on storage space over inlining the file directly as a -Uint8Array, which would take up ~3x the space of the Wasm file. See -[Better Binary Batter: Mixing Base64 and Uint8Array][wasm-base64-blog] for more -details. - -It is then decoded into an ArrayBuffer to be used directly via WebAssembly's -`compile()` function in our entry point file. - -This is all done to avoid trying to read or bundle the Wasm asset in various -ways based on the platform or bundler a user is targeting. One example being -that Next.js requires special `asyncWebAssembly` webpack config to load our -Wasm file if we don't do this. - -In the future, we hope to do away with this workaround when all bundlers -properly support consistent asset bundling techniques. - -## API - -In progress. +We chose to put this logic in a separate package because we need to change the +import structure for each runtime that we support in the wasm bindings. Moving +this to a separate package allows us not to have to duplicate code while providing +a combined higher-level api for calling our core functionality in Wasm. ## License Licensed under the [Apache License, Version 2.0][apache-license]. [arcjet]: https://arcjet.com -[mdn-data-url]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs -[wasm-base64-blog]: https://blobfolio.com/2019/better-binary-batter-mixing-base64-and-uint8array/ [apache-license]: http://www.apache.org/licenses/LICENSE-2.0 diff --git a/analyze/edge-light.ts b/analyze/edge-light.ts deleted file mode 100644 index 702f09823..000000000 --- a/analyze/edge-light.ts +++ /dev/null @@ -1,209 +0,0 @@ -import type { ArcjetLogger } from "@arcjet/protocol"; - -import { instantiate } from "./wasm/arcjet_analyze_js_req.component.js"; -import type { - ImportObject, - EmailValidationConfig, - EmailValidationResult, - DetectedSensitiveInfoEntity, - SensitiveInfoEntities, - SensitiveInfoEntity, - SensitiveInfoResult, - BotConfig, - BotResult, -} from "./wasm/arcjet_analyze_js_req.component.js"; -import type { ArcjetJsReqSensitiveInformationIdentifier } from "./wasm/interfaces/arcjet-js-req-sensitive-information-identifier.js"; - -import componentCoreWasm from "./wasm/arcjet_analyze_js_req.component.core.wasm?module"; -import componentCore2Wasm from "./wasm/arcjet_analyze_js_req.component.core2.wasm?module"; -import componentCore3Wasm from "./wasm/arcjet_analyze_js_req.component.core3.wasm?module"; - -type AnalyzeRequest = { - ip?: string; - method?: string; - protocol?: string; - host?: string; - path?: string; - headers?: Record; - cookies?: string; - query?: string; - extra?: Record; -}; - -const FREE_EMAIL_PROVIDERS = [ - "gmail.com", - "yahoo.com", - "hotmail.com", - "aol.com", - "hotmail.co.uk", -]; - -interface AnalyzeContext { - log: ArcjetLogger; - characteristics: string[]; -} - -type DetectSensitiveInfoFunction = - typeof ArcjetJsReqSensitiveInformationIdentifier.detect; - -async function moduleFromPath(path: string): Promise { - if (path === "arcjet_analyze_js_req.component.core.wasm") { - return componentCoreWasm; - } - if (path === "arcjet_analyze_js_req.component.core2.wasm") { - return componentCore2Wasm; - } - if (path === "arcjet_analyze_js_req.component.core3.wasm") { - return componentCore3Wasm; - } - - throw new Error(`Unknown path: ${path}`); -} - -function noOpDetect(): SensitiveInfoEntity[] { - return []; -} - -async function init( - context: AnalyzeContext, - detectSensitiveInfo?: DetectSensitiveInfoFunction, -) { - const { log } = context; - - if (typeof detectSensitiveInfo !== "function") { - detectSensitiveInfo = noOpDetect; - } - - const coreImports: ImportObject = { - "arcjet:js-req/email-validator-overrides": { - isFreeEmail(domain) { - if (FREE_EMAIL_PROVIDERS.includes(domain)) { - return "yes"; - } - return "unknown"; - }, - isDisposableEmail() { - return "unknown"; - }, - hasMxRecords() { - return "unknown"; - }, - hasGravatar() { - return "unknown"; - }, - }, - "arcjet:js-req/sensitive-information-identifier": { - detect: detectSensitiveInfo, - }, - "arcjet:js-req/verify-bot": { - verify() { - return "unverifiable"; - }, - }, - }; - - try { - // Await the instantiation to catch the failure - return await instantiate(moduleFromPath, coreImports); - } catch { - log.debug("WebAssembly is not supported in this runtime"); - } -} - -export { - type EmailValidationConfig, - type BotConfig, - type DetectedSensitiveInfoEntity, - type SensitiveInfoEntity, - type DetectSensitiveInfoFunction, -}; - -/** - * Generate a fingerprint for the client. This is used to identify the client - * across multiple requests. - * @param context - The Arcjet Analyze context. - * @param request - The request to fingerprint. - * @returns A SHA-256 string fingerprint. - */ -export async function generateFingerprint( - context: AnalyzeContext, - request: AnalyzeRequest, -): Promise { - const analyze = await init(context); - - if (typeof analyze !== "undefined") { - return analyze.generateFingerprint( - JSON.stringify(request), - context.characteristics, - ); - } - - return ""; -} - -export async function isValidEmail( - context: AnalyzeContext, - candidate: string, - options?: EmailValidationConfig, -): Promise { - const analyze = await init(context); - const optionsOrDefault = { - requireTopLevelDomain: true, - allowDomainLiteral: false, - blockedEmails: [], - ...options, - }; - - if (typeof analyze !== "undefined") { - return analyze.isValidEmail(candidate, optionsOrDefault); - } else { - // Skip the local evaluation of the rule if Wasm is not available - return { - validity: "valid", - blocked: [], - }; - } -} - -export async function detectBot( - context: AnalyzeContext, - request: AnalyzeRequest, - options: BotConfig, -): Promise { - const analyze = await init(context); - - if (typeof analyze !== "undefined") { - return analyze.detectBot(JSON.stringify(request), options); - } else { - // Skip the local evaluation of the rule if Wasm is not available - return { - allowed: [], - denied: [], - spoofed: false, - verified: false, - }; - } -} - -export async function detectSensitiveInfo( - context: AnalyzeContext, - candidate: string, - entities: SensitiveInfoEntities, - contextWindowSize: number, - detect?: DetectSensitiveInfoFunction, -): Promise { - const analyze = await init(context, detect); - - if (typeof analyze !== "undefined") { - const skipCustomDetect = typeof detect !== "function"; - return analyze.detectSensitiveInfo(candidate, { - entities, - contextWindowSize, - skipCustomDetect, - }); - } else { - throw new Error( - "SENSITIVE_INFO rule failed to run because Wasm is not supported in this environment.", - ); - } -} diff --git a/analyze/index.ts b/analyze/index.ts index 5de6065b8..4fa8608dc 100644 --- a/analyze/index.ts +++ b/analyze/index.ts @@ -1,22 +1,22 @@ -import type { ArcjetLogger } from "@arcjet/protocol"; - -import { instantiate } from "./wasm/arcjet_analyze_js_req.component.js"; +import { initializeWasm } from "@arcjet/analyze-wasm"; import type { - ImportObject, + BotConfig, + BotResult, + DetectedSensitiveInfoEntity, + DetectSensitiveInfoFunction, EmailValidationConfig, EmailValidationResult, - DetectedSensitiveInfoEntity, SensitiveInfoEntities, SensitiveInfoEntity, SensitiveInfoResult, - BotConfig, - BotResult, -} from "./wasm/arcjet_analyze_js_req.component.js"; -import type { ArcjetJsReqSensitiveInformationIdentifier } from "./wasm/interfaces/arcjet-js-req-sensitive-information-identifier.js"; + ImportObject, +} from "@arcjet/analyze-wasm"; +import type { ArcjetLogger } from "@arcjet/protocol"; -import { wasm as componentCoreWasm } from "./wasm/arcjet_analyze_js_req.component.core.wasm?js"; -import { wasm as componentCore2Wasm } from "./wasm/arcjet_analyze_js_req.component.core2.wasm?js"; -import { wasm as componentCore3Wasm } from "./wasm/arcjet_analyze_js_req.component.core3.wasm?js"; +interface AnalyzeContext { + log: ArcjetLogger; + characteristics: string[]; +} type AnalyzeRequest = { ip?: string; @@ -30,6 +30,13 @@ type AnalyzeRequest = { extra?: Record; }; +export { + type EmailValidationConfig, + type BotConfig, + type SensitiveInfoEntity, + type DetectedSensitiveInfoEntity, +}; + const FREE_EMAIL_PROVIDERS = [ "gmail.com", "yahoo.com", @@ -38,57 +45,16 @@ const FREE_EMAIL_PROVIDERS = [ "hotmail.co.uk", ]; -interface AnalyzeContext { - log: ArcjetLogger; - characteristics: string[]; -} - -type DetectSensitiveInfoFunction = - typeof ArcjetJsReqSensitiveInformationIdentifier.detect; - -// TODO: Do we actually need this wasmCache or does `import` cache correctly? -const wasmCache = new Map(); - -async function moduleFromPath(path: string): Promise { - const cachedModule = wasmCache.get(path); - if (typeof cachedModule !== "undefined") { - return cachedModule; - } - - if (path === "arcjet_analyze_js_req.component.core.wasm") { - const mod = await componentCoreWasm(); - wasmCache.set(path, mod); - return mod; - } - if (path === "arcjet_analyze_js_req.component.core2.wasm") { - const mod = await componentCore2Wasm(); - wasmCache.set(path, mod); - return mod; - } - if (path === "arcjet_analyze_js_req.component.core3.wasm") { - const mod = await componentCore3Wasm(); - wasmCache.set(path, mod); - return mod; - } - - throw new Error(`Unknown path: ${path}`); -} - function noOpDetect(): SensitiveInfoEntity[] { return []; } -async function init( - context: AnalyzeContext, - detectSensitiveInfo?: DetectSensitiveInfoFunction, -) { - const { log } = context; - - if (typeof detectSensitiveInfo !== "function") { - detectSensitiveInfo = noOpDetect; +function createCoreImports(detect?: DetectSensitiveInfoFunction): ImportObject { + if (typeof detect !== "function") { + detect = noOpDetect; } - const coreImports: ImportObject = { + return { "arcjet:js-req/email-validator-overrides": { isFreeEmail(domain) { if (FREE_EMAIL_PROVIDERS.includes(domain)) { @@ -107,7 +73,7 @@ async function init( }, }, "arcjet:js-req/sensitive-information-identifier": { - detect: detectSensitiveInfo, + detect, }, "arcjet:js-req/verify-bot": { verify() { @@ -115,23 +81,8 @@ async function init( }, }, }; - - try { - // Await the instantiation to catch the failure - return await instantiate(moduleFromPath, coreImports); - } catch { - log.debug("WebAssembly is not supported in this runtime"); - } } -export { - type EmailValidationConfig, - type BotConfig, - type DetectedSensitiveInfoEntity, - type SensitiveInfoEntity, - type DetectSensitiveInfoFunction, -}; - /** * Generate a fingerprint for the client. This is used to identify the client * across multiple requests. @@ -143,13 +94,17 @@ export async function generateFingerprint( context: AnalyzeContext, request: AnalyzeRequest, ): Promise { - const analyze = await init(context); + const { log } = context; + const coreImports = createCoreImports(); + const analyze = await initializeWasm(coreImports); if (typeof analyze !== "undefined") { return analyze.generateFingerprint( JSON.stringify(request), context.characteristics, ); + } else { + log.debug("WebAssembly is not supported in this runtime"); } return ""; @@ -160,7 +115,9 @@ export async function isValidEmail( candidate: string, options?: EmailValidationConfig, ): Promise { - const analyze = await init(context); + const { log } = context; + const coreImports = createCoreImports(); + const analyze = await initializeWasm(coreImports); const optionsOrDefault = { requireTopLevelDomain: true, allowDomainLiteral: false, @@ -171,6 +128,7 @@ export async function isValidEmail( if (typeof analyze !== "undefined") { return analyze.isValidEmail(candidate, optionsOrDefault); } else { + log.debug("WebAssembly is not supported in this runtime"); // Skip the local evaluation of the rule if WASM is not available return { validity: "valid", @@ -184,11 +142,14 @@ export async function detectBot( request: AnalyzeRequest, options: BotConfig, ): Promise { - const analyze = await init(context); + const { log } = context; + const coreImports = createCoreImports(); + const analyze = await initializeWasm(coreImports); if (typeof analyze !== "undefined") { return analyze.detectBot(JSON.stringify(request), options); } else { + log.debug("WebAssembly is not supported in this runtime"); // Skip the local evaluation of the rule if Wasm is not available return { allowed: [], @@ -206,7 +167,9 @@ export async function detectSensitiveInfo( contextWindowSize: number, detect?: DetectSensitiveInfoFunction, ): Promise { - const analyze = await init(context, detect); + const { log } = context; + const coreImports = createCoreImports(detect); + const analyze = await initializeWasm(coreImports); if (typeof analyze !== "undefined") { const skipCustomDetect = typeof detect !== "function"; @@ -216,6 +179,7 @@ export async function detectSensitiveInfo( skipCustomDetect, }); } else { + log.debug("WebAssembly is not supported in this runtime"); throw new Error( "SENSITIVE_INFO rule failed to run because Wasm is not supported in this environment.", ); diff --git a/analyze/package.json b/analyze/package.json index c7a3d03e8..1bcf30474 100644 --- a/analyze/package.json +++ b/analyze/package.json @@ -24,11 +24,6 @@ "type": "module", "main": "./index.js", "types": "./index.d.ts", - "exports": { - "edge-light": "./edge-light.js", - "workerd": "./workerd.js", - "default": "./index.js" - }, "files": [ "LICENSE", "README.md", @@ -40,13 +35,13 @@ ], "scripts": { "prepublishOnly": "npm run build", - "jco": "jco transpile wasm/arcjet_analyze_js_req.component.wasm --no-wasi-shim --instantiation async -o wasm", - "build": "npm run jco; rollup --config rollup.config.js", + "build": "rollup --config rollup.config.js", "lint": "eslint .", "pretest": "npm run build", "test": "node --test --experimental-test-coverage" }, "dependencies": { + "@arcjet/analyze-wasm": "1.0.0-alpha.34", "@arcjet/protocol": "1.0.0-alpha.34" }, "devDependencies": { diff --git a/analyze/rollup.config.js b/analyze/rollup.config.js index 38b5c27fc..79177f236 100644 --- a/analyze/rollup.config.js +++ b/analyze/rollup.config.js @@ -1,85 +1,3 @@ import { createConfig } from "@arcjet/rollup-config"; -import fs from "node:fs/promises"; -function generateJs(wasm) { - const disclaimer = ` -/** - * This file contains an Arcjet Wasm binary inlined as a base64 - * [Data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs) - * with the application/wasm MIME type. - * - * This was chosen to save on storage space over inlining the file directly as - * a Uint8Array, which would take up ~3x the space of the Wasm file. See - * https://blobfolio.com/2019/better-binary-batter-mixing-base64-and-uint8array/ - * for more details. - * - * It is then decoded into an ArrayBuffer to be used directly via WebAssembly's - * \`compile()\` function in our entry point file. - * - * This is all done to avoid trying to read or bundle the Wasm asset in various - * ways based on the platform or bundler a user is targeting. One example being - * that Next.js requires special \`asyncWebAssembly\` webpack config to load our - * Wasm file if we don't do this. - * - * In the future, we hope to do away with this workaround when all bundlers - * properly support consistent asset bundling techniques. - */ -`; - - return `// @generated by wasm2module - DO NOT EDIT -/* eslint-disable */ -// @ts-nocheck -${disclaimer} -const wasmBase64 = "data:application/wasm;base64,${wasm.toString("base64")}"; -/** - * Returns a WebAssembly.Module for an Arcjet Wasm binary, decoded from a base64 - * Data URL. - */ -// TODO: Switch back to top-level await when our platforms all support it -export async function wasm() { - // This uses fetch to decode the wasm data url, but disabling cache so files - // larger than 2mb don't fail to parse in the Next.js App Router - const wasmDecode = await fetch(wasmBase64, { cache: "no-store" }); - const buf = await wasmDecode.arrayBuffer(); - // And then we return it as a WebAssembly.Module - return WebAssembly.compile(buf); -} -`; -} - -function wasmToModule() { - const idToWasmPath = new Map(); - - return { - name: "base64-wasm", - resolveId(source) { - if (source.endsWith(".wasm?js")) { - // Slice off the `?js` to make it a valid path - const filepath = source.slice(0, -3); - // Create a "virtual module", prefixed with `\0` as per the Rollup docs, - // for our replacement import - const id = `\0${filepath.replace(/\.wasm$/, ".js")}`; - // Store the actual Wasm path against the virtual module ID. - idToWasmPath.set(id, filepath); - return id; - } - - return null; - }, - async load(id) { - const wasmPath = idToWasmPath.get(id); - // If we resolved this `id` during the `resolveId` phase, generate the - // JavaScript file with the base64 Wasm and loading helper - if (wasmPath) { - const wasm = await fs.readFile(wasmPath); - return generateJs(wasm); - } - - return null; - }, - }; -} - -export default createConfig(import.meta.url, { - plugins: [wasmToModule()], -}); +export default createConfig(import.meta.url); diff --git a/analyze/tsconfig.json b/analyze/tsconfig.json index b3da2e770..7150d9377 100644 --- a/analyze/tsconfig.json +++ b/analyze/tsconfig.json @@ -1,4 +1,4 @@ { "extends": "@arcjet/tsconfig/base", - "include": ["index.ts", "edge-light.ts", "workerd.ts", "wasm.d.ts"] + "include": ["index.ts"] } diff --git a/analyze/workerd.ts b/analyze/workerd.ts deleted file mode 100644 index a68f46c99..000000000 --- a/analyze/workerd.ts +++ /dev/null @@ -1,209 +0,0 @@ -import type { ArcjetLogger } from "@arcjet/protocol"; - -import { instantiate } from "./wasm/arcjet_analyze_js_req.component.js"; -import type { - ImportObject, - EmailValidationConfig, - EmailValidationResult, - DetectedSensitiveInfoEntity, - SensitiveInfoEntities, - SensitiveInfoEntity, - SensitiveInfoResult, - BotConfig, - BotResult, -} from "./wasm/arcjet_analyze_js_req.component.js"; -import type { ArcjetJsReqSensitiveInformationIdentifier } from "./wasm/interfaces/arcjet-js-req-sensitive-information-identifier.js"; - -import componentCoreWasm from "./wasm/arcjet_analyze_js_req.component.core.wasm"; -import componentCore2Wasm from "./wasm/arcjet_analyze_js_req.component.core2.wasm"; -import componentCore3Wasm from "./wasm/arcjet_analyze_js_req.component.core3.wasm"; - -type AnalyzeRequest = { - ip?: string; - method?: string; - protocol?: string; - host?: string; - path?: string; - headers?: Record; - cookies?: string; - query?: string; - extra?: Record; -}; - -const FREE_EMAIL_PROVIDERS = [ - "gmail.com", - "yahoo.com", - "hotmail.com", - "aol.com", - "hotmail.co.uk", -]; - -interface AnalyzeContext { - log: ArcjetLogger; - characteristics: string[]; -} - -type DetectSensitiveInfoFunction = - typeof ArcjetJsReqSensitiveInformationIdentifier.detect; - -async function moduleFromPath(path: string): Promise { - if (path === "arcjet_analyze_js_req.component.core.wasm") { - return componentCoreWasm; - } - if (path === "arcjet_analyze_js_req.component.core2.wasm") { - return componentCore2Wasm; - } - if (path === "arcjet_analyze_js_req.component.core3.wasm") { - return componentCore3Wasm; - } - - throw new Error(`Unknown path: ${path}`); -} - -function noOpDetect(): SensitiveInfoEntity[] { - return []; -} - -async function init( - context: AnalyzeContext, - detectSensitiveInfo?: DetectSensitiveInfoFunction, -) { - const { log } = context; - - if (typeof detectSensitiveInfo !== "function") { - detectSensitiveInfo = noOpDetect; - } - - const coreImports: ImportObject = { - "arcjet:js-req/email-validator-overrides": { - isFreeEmail(domain) { - if (FREE_EMAIL_PROVIDERS.includes(domain)) { - return "yes"; - } - return "unknown"; - }, - isDisposableEmail() { - return "unknown"; - }, - hasMxRecords() { - return "unknown"; - }, - hasGravatar() { - return "unknown"; - }, - }, - "arcjet:js-req/sensitive-information-identifier": { - detect: detectSensitiveInfo, - }, - "arcjet:js-req/verify-bot": { - verify() { - return "unverifiable"; - }, - }, - }; - - try { - // Await the instantiation to catch the failure - return await instantiate(moduleFromPath, coreImports); - } catch { - log.debug("WebAssembly is not supported in this runtime"); - } -} - -export { - type EmailValidationConfig, - type BotConfig, - type DetectedSensitiveInfoEntity, - type SensitiveInfoEntity, - type DetectSensitiveInfoFunction, -}; - -/** - * Generate a fingerprint for the client. This is used to identify the client - * across multiple requests. - * @param context - The Arcjet Analyze context. - * @param request - The request to fingerprint. - * @returns A SHA-256 string fingerprint. - */ -export async function generateFingerprint( - context: AnalyzeContext, - request: AnalyzeRequest, -): Promise { - const analyze = await init(context); - - if (typeof analyze !== "undefined") { - return analyze.generateFingerprint( - JSON.stringify(request), - context.characteristics, - ); - } - - return ""; -} - -export async function isValidEmail( - context: AnalyzeContext, - candidate: string, - options?: EmailValidationConfig, -): Promise { - const analyze = await init(context); - const optionsOrDefault = { - requireTopLevelDomain: true, - allowDomainLiteral: false, - blockedEmails: [], - ...options, - }; - - if (typeof analyze !== "undefined") { - return analyze.isValidEmail(candidate, optionsOrDefault); - } else { - // Skip the local evaluation of the rule if Wasm is not available - return { - validity: "valid", - blocked: [], - }; - } -} - -export async function detectBot( - context: AnalyzeContext, - request: AnalyzeRequest, - options: BotConfig, -): Promise { - const analyze = await init(context); - - if (typeof analyze !== "undefined") { - return analyze.detectBot(JSON.stringify(request), options); - } else { - // Skip the local evaluation of the rule if Wasm is not available - return { - allowed: [], - denied: [], - spoofed: false, - verified: false, - }; - } -} - -export async function detectSensitiveInfo( - context: AnalyzeContext, - candidate: string, - entities: SensitiveInfoEntities, - contextWindowSize: number, - detect?: DetectSensitiveInfoFunction, -): Promise { - const analyze = await init(context, detect); - - if (typeof analyze !== "undefined") { - const skipCustomDetect = typeof detect !== "function"; - return analyze.detectSensitiveInfo(candidate, { - entities, - contextWindowSize, - skipCustomDetect, - }); - } else { - throw new Error( - "SENSITIVE_INFO rule failed to run because Wasm is not supported in this environment.", - ); - } -} diff --git a/package-lock.json b/package-lock.json index 962c4a998..bf886603d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -22,6 +22,7 @@ "version": "1.0.0-alpha.34", "license": "Apache-2.0", "dependencies": { + "@arcjet/analyze-wasm": "1.0.0-alpha.34", "@arcjet/protocol": "1.0.0-alpha.34" }, "devDependencies": { @@ -38,6 +39,24 @@ "node": ">=18" } }, + "analyze-wasm": { + "name": "@arcjet/analyze-wasm", + "version": "1.0.0-alpha.34", + "license": "Apache-2.0", + "devDependencies": { + "@arcjet/eslint-config": "1.0.0-alpha.34", + "@arcjet/rollup-config": "1.0.0-alpha.34", + "@arcjet/tsconfig": "1.0.0-alpha.34", + "@bytecodealliance/jco": "1.5.0", + "@rollup/wasm-node": "4.28.1", + "@types/node": "18.18.0", + "expect": "29.7.0", + "typescript": "5.7.2" + }, + "engines": { + "node": ">=18" + } + }, "arcjet": { "version": "1.0.0-alpha.34", "license": "Apache-2.0", @@ -425,6 +444,10 @@ "resolved": "analyze", "link": true }, + "node_modules/@arcjet/analyze-wasm": { + "resolved": "analyze-wasm", + "link": true + }, "node_modules/@arcjet/body": { "resolved": "body", "link": true