From 8771078177f3758474a0d466f0faa58b4b64662c Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Tue, 21 Jun 2022 14:07:17 +0100 Subject: [PATCH] Basic HTML property testing for WebAssembly (#425) Import https://gist.github.com/jelmervdl/a4c8b6b92ad88a885e1cbd51c6ad4902 and attach it to CI. NodeJS-14 is failing on trying to use the WebAssembly binary. So we use node-16 independently setup. This paves way for more complicated testing for WebAssembly bindings in the future. --- .github/workflows/build.yml | 75 +++++++++------------- wasm/node-test.js | 125 ++++++++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+), 44 deletions(-) create mode 100644 wasm/node-test.js diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2ae9b998e..2bf20cf26 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -232,16 +232,15 @@ jobs: ccache -s # Print current cache stats ccache -z # Zero cache entry - # WORMHOLE=off - - name: "Configure builds for WORMHOLE=off" + - name: "Configure builds" run: | - mkdir -p build-wasm-without-wormhole - cd build-wasm-without-wormhole + mkdir -p build-wasm + cd build-wasm emcmake cmake -DCOMPILE_WASM=on -DWORMHOLE=off .. - - name: "Compile with WORMHOLE=off" - working-directory: build-wasm-without-wormhole + - name: "Compile" + working-directory: build-wasm run: | emmake make -j2 @@ -250,43 +249,24 @@ jobs: ccache -s # Print current cache stats - name: Import GEMM library from a separate wasm module - working-directory: build-wasm-without-wormhole + working-directory: build-wasm run: bash ../wasm/patch-artifacts-import-gemm-module.sh + # Setup nodejs-16, as nodejs-14 provided by emsdk fails when running. + - name: Setup nodejs + uses: actions/setup-node@v3 + with: + node-version: 16 - # WORMHOLE=on - - name: "Configure builds for WORMHOLE=on" - run: | - mkdir -p build-wasm-with-wormhole - cd build-wasm-with-wormhole - emcmake cmake -DCOMPILE_WASM=on -DWORMHOLE=on .. - - - - name: "Compile with WORMHOLE=on" - working-directory: build-wasm-with-wormhole - run: | - emmake make -j2 - - - name: ccache epilog - run: | - ccache -s # Print current cache stats - - - name: Instantiate simd wormhole - working-directory: build-wasm-with-wormhole - run: bash ../wasm/patch-artifacts-enable-wormhole.sh - - - name: Import GEMM library from a separate wasm module - working-directory: build-wasm-with-wormhole - run: bash ../wasm/patch-artifacts-import-gemm-module.sh - - # Rename the wormhole on builds - - name: Rename artefacts with wormhole - working-directory: build-wasm-with-wormhole + - name: Test run + working-directory: wasm run: | - mv bergamot-translator-worker{,-with-wormhole}.js - mv bergamot-translator-worker{,-with-wormhole}.js.bak - mv bergamot-translator-worker{,-with-wormhole}.wasm + cp ../build-wasm/bergamot-translator-worker.{js,wasm} ./ + npm install jsdom + # --unhandled-rejections make the script exit with a non-zero code (at least on node-14). + # So leaving this here. + node --unhandled-rejections=strict node-test.js # Upload both together. - name: Upload wasm artifact @@ -296,13 +276,10 @@ jobs: if-no-files-found: error path: | # Without wormhole - ${{github.workspace}}/build-wasm-without-wormhole/bergamot-translator-worker.js - ${{github.workspace}}/build-wasm-without-wormhole/bergamot-translator-worker.wasm - ${{github.workspace}}/build-wasm-without-wormhole/bergamot-translator-worker.js.bak + ${{github.workspace}}/build-wasm/bergamot-translator-worker.js + ${{github.workspace}}/build-wasm/bergamot-translator-worker.wasm + ${{github.workspace}}/build-wasm/bergamot-translator-worker.js.bak - ${{github.workspace}}/build-wasm-with-wormhole/bergamot-translator-worker-with-wormhole.js - ${{github.workspace}}/build-wasm-with-wormhole/bergamot-translator-worker-with-wormhole.wasm - ${{github.workspace}}/build-wasm-with-wormhole/bergamot-translator-worker-with-wormhole.js.bak # Try to upload a release using https://github.com/marvinpinto/actions/issues/177#issuecomment-917605585 as a model release-latest: @@ -313,6 +290,11 @@ jobs: steps: - name: Download artifacts uses: actions/download-artifact@v2 + + # Leave the below be, it will be useful. + - name: List downloaded assets + run: | + find ./ - name: Update GitHub prerelease uses: marvinpinto/action-automatic-releases@latest @@ -338,6 +320,11 @@ jobs: steps: - name: Download artifacts uses: actions/download-artifact@v2 + + # Leave the below be, it will be useful. + - name: List downloaded assets + run: | + find ./ - name: Update GitHub release uses: marvinpinto/action-automatic-releases@latest diff --git a/wasm/node-test.js b/wasm/node-test.js new file mode 100644 index 000000000..1f697afd3 --- /dev/null +++ b/wasm/node-test.js @@ -0,0 +1,125 @@ +const {Blob} = require('buffer'); +const fs = require('fs'); +const https = require('https'); +const {JSDOM} = require('jsdom'); + + +const wasmBinary = fs.readFileSync('./bergamot-translator-worker.wasm'); +global.Module = { + wasmBinary, + onRuntimeInitialized +}; + +// Execute bergamot-translation-worker.js in this scope +const js = fs.readFileSync('./bergamot-translator-worker.js', {encoding: 'utf8'}); +eval.call(global, js); + +/** + * Helper to download file into ArrayBuffer. + */ +function download(url) { + return new Promise((accept, reject) => { + https.get(url, (res) => { + const chunks = []; + res.on('error', reject); + res.on('data', chunk => chunks.push(chunk)); + res.on('end', async () => { + const data = new Blob(chunks); + data.arrayBuffer().then(accept, reject); + }); + }); + }); +} + +/** + * Loads ArrayBuffer into AlignedMemory. + */ +function load(buffer, alignment) { + const bytes = new Int8Array(buffer); + const memory = new Module.AlignedMemory(bytes.byteLength, alignment); + memory.getByteArrayView().set(bytes); + return memory; +} + +/** + * Called from inside the worker.js script once the wasm module is loaded + * and all the emscripten magic and linking has been done. + */ +async function onRuntimeInitialized() { + // Root url for our models for now. + const root = 'https://storage.googleapis.com/bergamot-models-sandbox/0.2.14'; + + // In order of TranslationMemory's arguments + const files = [ + {url: `${root}/ende/model.ende.intgemm.alphas.bin`, alignment: 256}, + {url: `${root}/ende/lex.50.50.ende.s2t.bin`, alignment: 64}, + {url: `${root}/ende/vocab.deen.spm`, alignment: 64}, + ]; + + // Download model data and load it into aligned memory + const [modelMem, shortlistMem, vocabMem] = await Promise.all(files.map(async (file) => { + return load(await download(file.url), file.alignment); + })); + + // Config yaml (split as array to allow for indentation without adding tabs + // or spaces to the strings themselves.) + const config = [ + 'beam-size: 1', + 'normalize: 1.0', + 'word-penalty: 0', + 'alignment: soft', + 'max-length-break: 128', + 'mini-batch-words: 1024', + 'workspace: 128', + 'max-length-factor: 2.0', + 'skip-cost: true', + 'cpu-threads: 0', + 'quiet: true', + 'quiet-translation: true', + 'gemm-precision: int8shiftAll', + ].join('\n'); + + // Set up translation service + const service = new Module.BlockingService({cacheSize: 0}); + + // Put vocab into its own std::vector + const vocabs = new Module.AlignedMemoryList(); + vocabs.push_back(vocabMem); + + // Setup up model with config yaml and AlignedMemory objects + const model = new Module.TranslationModel(config, modelMem, shortlistMem, vocabs, /*qualityModel=*/ null); + + // Construct std::vector inputs; + const input = new Module.VectorString(); + input.push_back('

Hello world!

Goodbye World!

'); + + // Construct std::vector + const options = new Module.VectorResponseOptions(); + options.push_back({qualityScores: false, alignment: true, html: true}); + + // Translate our batch (of 1) + const output = service.translate(model, input, options); + + // Get output from std::vector + // The following works as a simple black-box test of the API, based on + // properties of HTML. + const translation = output.get(0).getTranslatedText() + + // Print raw translation for inspection. + console.log(translation) + + const fragment = JSDOM.fragment(translation) + + // Print two expected tags. + console.log(fragment.firstElementChild.outerHTML) + console.log(fragment.lastElementChild.outerHTML) + + // Assertion that there are two children at the output. + assert(fragment.childElementCount === 2); + + + // Clean-up + input.delete(); + options.delete(); + output.delete(); +}