Skip to content

Commit

Permalink
Basic HTML property testing for WebAssembly (#425)
Browse files Browse the repository at this point in the history
Import
https://gist.github.com/jelmervdl/a4c8b6b92ad88a885e1cbd51c6ad4902 and
attach it to CI.  NodeJS-14 is failing on trying to use the WebAssembly
binary. So we use node-16 independently setup.  This paves way for more
complicated testing for WebAssembly bindings in the future.
  • Loading branch information
jerinphilip authored Jun 21, 2022
1 parent 61d2c35 commit 8771078
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 44 deletions.
75 changes: 31 additions & 44 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -232,16 +232,15 @@ jobs:
ccache -s # Print current cache stats
ccache -z # Zero cache entry
# WORMHOLE=off
- name: "Configure builds for WORMHOLE=off"
- name: "Configure builds"
run: |
mkdir -p build-wasm-without-wormhole
cd build-wasm-without-wormhole
mkdir -p build-wasm
cd build-wasm
emcmake cmake -DCOMPILE_WASM=on -DWORMHOLE=off ..
- name: "Compile with WORMHOLE=off"
working-directory: build-wasm-without-wormhole
- name: "Compile"
working-directory: build-wasm
run: |
emmake make -j2
Expand All @@ -250,43 +249,24 @@ jobs:
ccache -s # Print current cache stats
- name: Import GEMM library from a separate wasm module
working-directory: build-wasm-without-wormhole
working-directory: build-wasm
run: bash ../wasm/patch-artifacts-import-gemm-module.sh

# Setup nodejs-16, as nodejs-14 provided by emsdk fails when running.
- name: Setup nodejs
uses: actions/setup-node@v3
with:
node-version: 16

# WORMHOLE=on
- name: "Configure builds for WORMHOLE=on"
run: |
mkdir -p build-wasm-with-wormhole
cd build-wasm-with-wormhole
emcmake cmake -DCOMPILE_WASM=on -DWORMHOLE=on ..
- name: "Compile with WORMHOLE=on"
working-directory: build-wasm-with-wormhole
run: |
emmake make -j2
- name: ccache epilog
run: |
ccache -s # Print current cache stats
- name: Instantiate simd wormhole
working-directory: build-wasm-with-wormhole
run: bash ../wasm/patch-artifacts-enable-wormhole.sh

- name: Import GEMM library from a separate wasm module
working-directory: build-wasm-with-wormhole
run: bash ../wasm/patch-artifacts-import-gemm-module.sh

# Rename the wormhole on builds
- name: Rename artefacts with wormhole
working-directory: build-wasm-with-wormhole
- name: Test run
working-directory: wasm
run: |
mv bergamot-translator-worker{,-with-wormhole}.js
mv bergamot-translator-worker{,-with-wormhole}.js.bak
mv bergamot-translator-worker{,-with-wormhole}.wasm
cp ../build-wasm/bergamot-translator-worker.{js,wasm} ./
npm install jsdom
# --unhandled-rejections make the script exit with a non-zero code (at least on node-14).
# So leaving this here.
node --unhandled-rejections=strict node-test.js
# Upload both together.
- name: Upload wasm artifact
Expand All @@ -296,13 +276,10 @@ jobs:
if-no-files-found: error
path: |
# Without wormhole
${{github.workspace}}/build-wasm-without-wormhole/bergamot-translator-worker.js
${{github.workspace}}/build-wasm-without-wormhole/bergamot-translator-worker.wasm
${{github.workspace}}/build-wasm-without-wormhole/bergamot-translator-worker.js.bak
${{github.workspace}}/build-wasm/bergamot-translator-worker.js
${{github.workspace}}/build-wasm/bergamot-translator-worker.wasm
${{github.workspace}}/build-wasm/bergamot-translator-worker.js.bak
${{github.workspace}}/build-wasm-with-wormhole/bergamot-translator-worker-with-wormhole.js
${{github.workspace}}/build-wasm-with-wormhole/bergamot-translator-worker-with-wormhole.wasm
${{github.workspace}}/build-wasm-with-wormhole/bergamot-translator-worker-with-wormhole.js.bak
# Try to upload a release using https://github.com/marvinpinto/actions/issues/177#issuecomment-917605585 as a model
release-latest:
Expand All @@ -313,6 +290,11 @@ jobs:
steps:
- name: Download artifacts
uses: actions/download-artifact@v2

# Leave the below be, it will be useful.
- name: List downloaded assets
run: |
find ./
- name: Update GitHub prerelease
uses: marvinpinto/action-automatic-releases@latest
Expand All @@ -338,6 +320,11 @@ jobs:
steps:
- name: Download artifacts
uses: actions/download-artifact@v2

# Leave the below be, it will be useful.
- name: List downloaded assets
run: |
find ./
- name: Update GitHub release
uses: marvinpinto/action-automatic-releases@latest
Expand Down
125 changes: 125 additions & 0 deletions wasm/node-test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
const {Blob} = require('buffer');
const fs = require('fs');
const https = require('https');
const {JSDOM} = require('jsdom');


const wasmBinary = fs.readFileSync('./bergamot-translator-worker.wasm');
global.Module = {
wasmBinary,
onRuntimeInitialized
};

// Execute bergamot-translation-worker.js in this scope
const js = fs.readFileSync('./bergamot-translator-worker.js', {encoding: 'utf8'});
eval.call(global, js);

/**
* Helper to download file into ArrayBuffer.
*/
function download(url) {
return new Promise((accept, reject) => {
https.get(url, (res) => {
const chunks = [];
res.on('error', reject);
res.on('data', chunk => chunks.push(chunk));
res.on('end', async () => {
const data = new Blob(chunks);
data.arrayBuffer().then(accept, reject);
});
});
});
}

/**
* Loads ArrayBuffer into AlignedMemory.
*/
function load(buffer, alignment) {
const bytes = new Int8Array(buffer);
const memory = new Module.AlignedMemory(bytes.byteLength, alignment);
memory.getByteArrayView().set(bytes);
return memory;
}

/**
* Called from inside the worker.js script once the wasm module is loaded
* and all the emscripten magic and linking has been done.
*/
async function onRuntimeInitialized() {
// Root url for our models for now.
const root = 'https://storage.googleapis.com/bergamot-models-sandbox/0.2.14';

// In order of TranslationMemory's arguments
const files = [
{url: `${root}/ende/model.ende.intgemm.alphas.bin`, alignment: 256},
{url: `${root}/ende/lex.50.50.ende.s2t.bin`, alignment: 64},
{url: `${root}/ende/vocab.deen.spm`, alignment: 64},
];

// Download model data and load it into aligned memory
const [modelMem, shortlistMem, vocabMem] = await Promise.all(files.map(async (file) => {
return load(await download(file.url), file.alignment);
}));

// Config yaml (split as array to allow for indentation without adding tabs
// or spaces to the strings themselves.)
const config = [
'beam-size: 1',
'normalize: 1.0',
'word-penalty: 0',
'alignment: soft',
'max-length-break: 128',
'mini-batch-words: 1024',
'workspace: 128',
'max-length-factor: 2.0',
'skip-cost: true',
'cpu-threads: 0',
'quiet: true',
'quiet-translation: true',
'gemm-precision: int8shiftAll',
].join('\n');

// Set up translation service
const service = new Module.BlockingService({cacheSize: 0});

// Put vocab into its own std::vector<AlignedMemory>
const vocabs = new Module.AlignedMemoryList();
vocabs.push_back(vocabMem);

// Setup up model with config yaml and AlignedMemory objects
const model = new Module.TranslationModel(config, modelMem, shortlistMem, vocabs, /*qualityModel=*/ null);

// Construct std::vector<std::string> inputs;
const input = new Module.VectorString();
input.push_back('<p> Hello world! </p> <p> Goodbye World! </p>');

// Construct std::vector<ResponseOptions>
const options = new Module.VectorResponseOptions();
options.push_back({qualityScores: false, alignment: true, html: true});

// Translate our batch (of 1)
const output = service.translate(model, input, options);

// Get output from std::vector<Response>
// The following works as a simple black-box test of the API, based on
// properties of HTML.
const translation = output.get(0).getTranslatedText()

// Print raw translation for inspection.
console.log(translation)

const fragment = JSDOM.fragment(translation)

// Print two expected tags.
console.log(fragment.firstElementChild.outerHTML)
console.log(fragment.lastElementChild.outerHTML)

// Assertion that there are two children at the output.
assert(fragment.childElementCount === 2);


// Clean-up
input.delete();
options.delete();
output.delete();
}

0 comments on commit 8771078

Please sign in to comment.