From 59ca8baa7e76bce133cc76556a7f14d01730fcde Mon Sep 17 00:00:00 2001 From: Yiwen <15225434259xue@gmail.com> Date: Fri, 18 Aug 2023 11:40:43 +0800 Subject: [PATCH] add meshoptimizer (#399) --- emscripten/meshopt/meshopt_decoder.asm.js | 1307 ++++++++++++++++++++ emscripten/meshopt/meshopt_decoder.d.ts | 21 + emscripten/meshopt/meshopt_decoder.wasm.js | 178 +++ sources/CMakeLists.txt | 7 + sources/meshopt/indexcodec.cpp | 674 ++++++++++ sources/meshopt/meshoptimizer.h | 1079 ++++++++++++++++ sources/meshopt/vertexcodec.cpp | 1248 +++++++++++++++++++ 7 files changed, 4514 insertions(+) create mode 100644 emscripten/meshopt/meshopt_decoder.asm.js create mode 100644 emscripten/meshopt/meshopt_decoder.d.ts create mode 100644 emscripten/meshopt/meshopt_decoder.wasm.js create mode 100644 sources/meshopt/indexcodec.cpp create mode 100644 sources/meshopt/meshoptimizer.h create mode 100644 sources/meshopt/vertexcodec.cpp diff --git a/emscripten/meshopt/meshopt_decoder.asm.js b/emscripten/meshopt/meshopt_decoder.asm.js new file mode 100644 index 000000000..a8da7d550 --- /dev/null +++ b/emscripten/meshopt/meshopt_decoder.asm.js @@ -0,0 +1,1307 @@ +// This file is part of meshoptimizer library and is distributed under the terms of MIT License. +// Copyright (C) 2016-2023, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) +var MeshoptDecoder = (function () { + "use strict"; + + var instance = {}; + + var ready = Promise.resolve().then(function () { + instance.exports = instantiate({// env + abort: function (msg, file, line, column) { + throw Error("abort: " + msg + " at " + file + ":" + line + ":" + column); + }, + }); + instance.exports.__wasm_call_ctors(); + }); + + function unpack(data) { + var result = new Uint8Array(data.length); + for (var i = 0; i < data.length; ++i) { + var ch = data.charCodeAt(i); + result[i] = ch > 96 ? ch - 97 : ch > 64 ? ch - 39 : ch + 4; + } + var write = 0; + for (var i = 0; i < data.length; ++i) { + result[write++] = (result[i] < 60) ? wasmpack[result[i]] : (result[i] - 60) * 64 + result[++i]; + } + return result.buffer.slice(0, write); + } + + function decode(fun, target, count, size, source, filter) { + var sbrk = instance.exports.sbrk; + var count4 = (count + 3) & ~3; + var tp = sbrk(count4 * size); + var sp = sbrk(source.length); + var heap = new Uint8Array(instance.exports.memory.buffer); + heap.set(source, sp); + var res = fun(tp, count, size, sp, source.length); + if (res == 0 && filter) { + filter(tp, count4, size); + } + target.set(heap.subarray(tp, tp + count * size)); + sbrk(tp - sbrk(0)); + if (res != 0) { + throw new Error("Malformed buffer data: " + res); + } + } + + var filters = { + NONE: "", + OCTAHEDRAL: "meshopt_decodeFilterOct", + QUATERNION: "meshopt_decodeFilterQuat", + EXPONENTIAL: "meshopt_decodeFilterExp", + }; + + var decoders = { + ATTRIBUTES: "meshopt_decodeVertexBuffer", + TRIANGLES: "meshopt_decodeIndexBuffer", + INDICES: "meshopt_decodeIndexSequence", + }; + + var workers = []; + var requestId = 0; + + function createWorker(url) { + var worker = { + object: new Worker(url), + pending: 0, + requests: {} + }; + + worker.object.onmessage = function (event) { + var data = event.data; + + worker.pending -= data.count; + worker.requests[data.id][data.action](data.value); + + delete worker.requests[data.id]; + }; + + return worker; + } + + function initWorkers(count) { + var source = + "var instance; var ready = WebAssembly.instantiate(new Uint8Array([" + new Uint8Array(unpack(wasm)) + "]), {})" + + ".then(function(result) { instance = result.instance; instance.exports.__wasm_call_ctors(); });" + + "self.onmessage = workerProcess;" + + decode.toString() + workerProcess.toString(); + + var blob = new Blob([source], { type: 'text/javascript' }); + var url = URL.createObjectURL(blob); + + for (var i = 0; i < count; ++i) { + workers[i] = createWorker(url); + } + + URL.revokeObjectURL(url); + } + + function decodeWorker(count, size, source, mode, filter) { + var worker = workers[0]; + + for (var i = 1; i < workers.length; ++i) { + if (workers[i].pending < worker.pending) { + worker = workers[i]; + } + } + + return new Promise(function (resolve, reject) { + var data = new Uint8Array(source); + var id = requestId++; + + worker.pending += count; + worker.requests[id] = { resolve: resolve, reject: reject }; + worker.object.postMessage({ id: id, count: count, size: size, source: data, mode: mode, filter: filter }, [data.buffer]); + }); + } + + function workerProcess(event) { + ready.then(function () { + var data = event.data; + try { + var target = new Uint8Array(data.count * data.size); + decode(instance.exports[data.mode], target, data.count, data.size, data.source, instance.exports[data.filter]); + self.postMessage({ id: data.id, count: data.count, action: "resolve", value: target }, [target.buffer]); + } catch (error) { + self.postMessage({ id: data.id, count: data.count, action: "reject", value: error }); + } + }); + } + + return { + ready: ready, + supported: true, + useWorkers: function (count) { + initWorkers(count); + }, + decodeVertexBuffer: function (target, count, size, source, filter) { + decode(instance.exports.meshopt_decodeVertexBuffer, target, count, size, source, instance.exports[filters[filter]]); + }, + decodeIndexBuffer: function (target, count, size, source) { + decode(instance.exports.meshopt_decodeIndexBuffer, target, count, size, source); + }, + decodeIndexSequence: function (target, count, size, source) { + decode(instance.exports.meshopt_decodeIndexSequence, target, count, size, source); + }, + decodeGltfBuffer: function (target, count, size, source, mode, filter) { + decode(instance.exports[decoders[mode]], target, count, size, source, instance.exports[filters[filter]]); + }, + decodeGltfBufferAsync: function (count, size, source, mode, filter) { + if (workers.length > 0) { + return decodeWorker(count, size, source, decoders[mode], filters[filter]); + } + + return ready.then(function () { + var target = new Uint8Array(count * size); + decode(instance.exports[decoders[mode]], target, count, size, source, instance.exports[filters[filter]]); + return target; + }); + } + }; +})(); + +// UMD-style export +if (typeof exports === 'object' && typeof module === 'object') + module.exports = MeshoptDecoder; +else if (typeof define === 'function' && define['amd']) + define([], function () { + return MeshoptDecoder; + }); +else if (typeof exports === 'object') + exports["MeshoptDecoder"] = MeshoptDecoder; +else + (typeof self !== 'undefined' ? self : this).MeshoptDecoder = MeshoptDecoder; + +function instantiate(asmLibraryArg) { + var bufferView; + var base64ReverseLookup = new Uint8Array(123/*'z'+1*/); + for (var i = 25; i >= 0; --i) { + base64ReverseLookup[48 + i] = 52 + i; // '0-9' + base64ReverseLookup[65 + i] = i; // 'A-Z' + base64ReverseLookup[97 + i] = 26 + i; // 'a-z' + } + base64ReverseLookup[43] = 62; // '+' + base64ReverseLookup[47] = 63; // '/' + /** @noinline Inlining this function would mean expanding the base64 string 4x times in the source code, which Closure seems to be happy to do. */ + function base64DecodeToExistingUint8Array(uint8Array, offset, b64) { + var b1, b2, i = 0, j = offset, bLength = b64.length, end = offset + (bLength * 3 >> 2) - (b64[bLength - 2] == '=') - (b64[bLength - 1] == '='); + for (; i < bLength; i += 4) { + b1 = base64ReverseLookup[b64.charCodeAt(i + 1)]; + b2 = base64ReverseLookup[b64.charCodeAt(i + 2)]; + uint8Array[j++] = base64ReverseLookup[b64.charCodeAt(i)] << 2 | b1 >> 4; + if (j < end) uint8Array[j++] = b1 << 4 | b2 >> 2; + if (j < end) uint8Array[j++] = b2 << 6 | base64ReverseLookup[b64.charCodeAt(i + 3)]; + } + } + function initActiveSegments(imports) { + base64DecodeToExistingUint8Array(bufferView, 1024, "EGQAAA=="); + } + function asmFunc(env) { + var buffer = new ArrayBuffer(65536); + var HEAP8 = new Int8Array(buffer); + var HEAP16 = new Int16Array(buffer); + var HEAP32 = new Int32Array(buffer); + var HEAPU8 = new Uint8Array(buffer); + var HEAPU16 = new Uint16Array(buffer); + var HEAPU32 = new Uint32Array(buffer); + var HEAPF32 = new Float32Array(buffer); + var HEAPF64 = new Float64Array(buffer); + var Math_imul = Math.imul; + var Math_fround = Math.fround; + var Math_abs = Math.abs; + var Math_clz32 = Math.clz32; + var Math_min = Math.min; + var Math_max = Math.max; + var Math_floor = Math.floor; + var Math_ceil = Math.ceil; + var Math_trunc = Math.trunc; + var Math_sqrt = Math.sqrt; + var abort = env.abort; + var nan = NaN; + var infinity = Infinity; + var global$0 = 25616; + // EMSCRIPTEN_START_FUNCS + ; + function $0() { + + } + + function $1($0_1, $1_1, $2_1, $3_1, $4_1) { + $0_1 = $0_1 | 0; + $1_1 = $1_1 | 0; + $2_1 = $2_1 | 0; + $3_1 = $3_1 | 0; + $4_1 = $4_1 | 0; + var $20 = 0, $19 = 0, $21 = 0, $6 = 0, i64toi32_i32$1 = 0, $8 = 0, $18 = 0, i64toi32_i32$0 = 0, $17 = 0, $13 = 0, $11 = 0, $60 = 0, $7 = 0, $12 = 0, $15 = 0, $10 = 0, $14 = 0, $61 = 0, $62 = 0, $5_1 = 0, $16 = 0, $9 = 0, $117 = 0, $137 = 0, $152 = 0, $166 = 0, $182 = 0, $197 = 0, $212 = 0, $226 = 0, $242 = 0, $257 = 0, $272 = 0, $286 = 0, $302 = 0, $317 = 0, $332 = 0, $338 = 0, $346 = 0, $361 = 0, $380 = 0, $396 = 0, $410 = 0, $426 = 0, $440 = 0, $456 = 0, $470 = 0, $486 = 0, $500 = 0, $516 = 0, $530 = 0, $546 = 0, $560 = 0, $576 = 0, $582 = 0, $590 = 0, $600 = 0, $605 = 0, wasm2js_i32$0 = 0, wasm2js_i32$1 = 0, wasm2js_i32$2 = 0; + $5_1 = global$0 - 8704 | 0; + global$0 = $5_1; + $6 = -2; + label$1: { + if (($2_1 + 1 | 0) >>> 0 > $4_1 >>> 0) { + break label$1 + } + $6 = -1; + if ((HEAPU8[$3_1 >> 0] | 0 | 0) != (160 | 0)) { + break label$1 + } + $7 = $3_1 + $4_1 | 0; + $8 = $4($5_1 | 0, $7 - $2_1 | 0 | 0, $2_1 | 0) | 0; + $4_1 = (8192 >>> 0) / ($2_1 >>> 0) | 0; + $6 = $3_1 + 1 | 0; + label$2: { + if (!$2_1) { + break label$2 + } + $4_1 = $4_1 & 16368 | 0; + $9 = $4_1 >>> 0 < 256 >>> 0 ? $4_1 : 256; + $10 = 0; + label$3: while (1) { + if ($10 >>> 0 >= $1_1 >>> 0) { + break label$2 + } + $11 = ($10 + $9 | 0) >>> 0 < $1_1 >>> 0 ? $9 : $1_1 - $10 | 0; + $4_1 = $11 + 15 | 0; + $12 = (($4_1 >>> 4 | 0) + 3 | 0) >>> 2 | 0; + label$4: { + label$5: { + label$6: { + label$7: { + $13 = $4_1 & -16 | 0; + if (!$13) { + break label$7 + } + $14 = 0; + $15 = 1; + $16 = $8 + 256 | 0; + $17 = $6; + label$8: while (1) { + if (($7 - $17 | 0) >>> 0 < $12 >>> 0) { + break label$5 + } + label$9: { + $6 = $17 + $12 | 0; + if (($7 - $6 | 0) >>> 0 < 24 >>> 0) { + break label$9 + } + $4_1 = 16; + $18 = 0; + label$10: while (1) { + $3_1 = $4_1; + $19 = $4_1 + -16 | 0; + $4_1 = $19 + ($8 + 8448 | 0) | 0; + label$11: { + label$12: { + switch (((HEAPU8[($17 + ($19 >>> 6 | 0) | 0) >> 0] | 0) >>> ($18 & 6 | 0) | 0) & 3 | 0 | 0) { + default: + i64toi32_i32$1 = $4_1; + i64toi32_i32$0 = 0; + HEAP32[$4_1 >> 2] = 0; + HEAP32[($4_1 + 4 | 0) >> 2] = i64toi32_i32$0; + i64toi32_i32$1 = $4_1 + 8 | 0; + i64toi32_i32$0 = 0; + HEAP32[i64toi32_i32$1 >> 2] = 0; + HEAP32[(i64toi32_i32$1 + 4 | 0) >> 2] = i64toi32_i32$0; + break label$11; + case 1: + $19 = HEAPU8[$6 >> 0] | 0; + $20 = $19 >>> 6 | 0; + $117 = $20; + $20 = ($20 | 0) == (3 | 0); + HEAP8[$4_1 >> 0] = (wasm2js_i32$0 = HEAPU8[($6 + 4 | 0) >> 0] | 0, wasm2js_i32$1 = $117, wasm2js_i32$2 = $20, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $4_1 = ($8 + 8448 | 0) + $3_1 | 0; + $20 = ($6 + 4 | 0) + $20 | 0; + $21 = ($19 >>> 4 | 0) & 3 | 0; + $137 = $21; + $21 = ($21 | 0) == (3 | 0); + HEAP8[($4_1 + -15 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$20 >> 0] | 0, wasm2js_i32$1 = $137, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $20 = $20 + $21 | 0; + $21 = ($19 >>> 2 | 0) & 3 | 0; + $152 = $21; + $21 = ($21 | 0) == (3 | 0); + HEAP8[($4_1 + -14 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$20 >> 0] | 0, wasm2js_i32$1 = $152, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $20 = $20 + $21 | 0; + $19 = $19 & 3 | 0; + $166 = $19; + $19 = ($19 | 0) == (3 | 0); + HEAP8[($4_1 + -13 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$20 >> 0] | 0, wasm2js_i32$1 = $166, wasm2js_i32$2 = $19, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $20 = $20 + $19 | 0; + $19 = HEAPU8[($6 + 1 | 0) >> 0] | 0; + $21 = $19 >>> 6 | 0; + $182 = $21; + $21 = ($21 | 0) == (3 | 0); + HEAP8[($4_1 + -12 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$20 >> 0] | 0, wasm2js_i32$1 = $182, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $20 = $20 + $21 | 0; + $21 = ($19 >>> 4 | 0) & 3 | 0; + $197 = $21; + $21 = ($21 | 0) == (3 | 0); + HEAP8[($4_1 + -11 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$20 >> 0] | 0, wasm2js_i32$1 = $197, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $20 = $20 + $21 | 0; + $21 = ($19 >>> 2 | 0) & 3 | 0; + $212 = $21; + $21 = ($21 | 0) == (3 | 0); + HEAP8[($4_1 + -10 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$20 >> 0] | 0, wasm2js_i32$1 = $212, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $20 = $20 + $21 | 0; + $19 = $19 & 3 | 0; + $226 = $19; + $19 = ($19 | 0) == (3 | 0); + HEAP8[($4_1 + -9 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$20 >> 0] | 0, wasm2js_i32$1 = $226, wasm2js_i32$2 = $19, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $20 = $20 + $19 | 0; + $19 = HEAPU8[($6 + 2 | 0) >> 0] | 0; + $21 = $19 >>> 6 | 0; + $242 = $21; + $21 = ($21 | 0) == (3 | 0); + HEAP8[($4_1 + -8 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$20 >> 0] | 0, wasm2js_i32$1 = $242, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $20 = $20 + $21 | 0; + $21 = ($19 >>> 4 | 0) & 3 | 0; + $257 = $21; + $21 = ($21 | 0) == (3 | 0); + HEAP8[($4_1 + -7 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$20 >> 0] | 0, wasm2js_i32$1 = $257, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $20 = $20 + $21 | 0; + $21 = ($19 >>> 2 | 0) & 3 | 0; + $272 = $21; + $21 = ($21 | 0) == (3 | 0); + HEAP8[($4_1 + -6 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$20 >> 0] | 0, wasm2js_i32$1 = $272, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $20 = $20 + $21 | 0; + $19 = $19 & 3 | 0; + $286 = $19; + $19 = ($19 | 0) == (3 | 0); + HEAP8[($4_1 + -5 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$20 >> 0] | 0, wasm2js_i32$1 = $286, wasm2js_i32$2 = $19, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $20 + $19 | 0; + $6 = HEAPU8[($6 + 3 | 0) >> 0] | 0; + $20 = $6 >>> 6 | 0; + $302 = $20; + $20 = ($20 | 0) == (3 | 0); + HEAP8[($4_1 + -4 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $302, wasm2js_i32$2 = $20, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $20 | 0; + $20 = ($6 >>> 4 | 0) & 3 | 0; + $317 = $20; + $20 = ($20 | 0) == (3 | 0); + HEAP8[($4_1 + -3 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $317, wasm2js_i32$2 = $20, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $20 | 0; + $20 = ($6 >>> 2 | 0) & 3 | 0; + $332 = $20; + $20 = ($20 | 0) == (3 | 0); + HEAP8[($4_1 + -2 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $332, wasm2js_i32$2 = $20, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $338 = $4_1 + -1 | 0; + $4_1 = $19 + $20 | 0; + $6 = $6 & 3 | 0; + $346 = $6; + $6 = ($6 | 0) == (3 | 0); + HEAP8[$338 >> 0] = (wasm2js_i32$0 = HEAPU8[$4_1 >> 0] | 0, wasm2js_i32$1 = $346, wasm2js_i32$2 = $6, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $6 = $4_1 + $6 | 0; + break label$11; + case 2: + $19 = HEAPU8[$6 >> 0] | 0; + $20 = $19 >>> 4 | 0; + $361 = $20; + $20 = ($20 | 0) == (15 | 0); + HEAP8[$4_1 >> 0] = (wasm2js_i32$0 = HEAPU8[($6 + 8 | 0) >> 0] | 0, wasm2js_i32$1 = $361, wasm2js_i32$2 = $20, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $4_1 = ($8 + 8448 | 0) + $3_1 | 0; + $20 = ($6 + 8 | 0) + $20 | 0; + $19 = $19 & 15 | 0; + $380 = $19; + $19 = ($19 | 0) == (15 | 0); + HEAP8[($4_1 + -15 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$20 >> 0] | 0, wasm2js_i32$1 = $380, wasm2js_i32$2 = $19, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $20 + $19 | 0; + $20 = HEAPU8[($6 + 1 | 0) >> 0] | 0; + $21 = $20 >>> 4 | 0; + $396 = $21; + $21 = ($21 | 0) == (15 | 0); + HEAP8[($4_1 + -14 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $396, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $21 | 0; + $20 = $20 & 15 | 0; + $410 = $20; + $20 = ($20 | 0) == (15 | 0); + HEAP8[($4_1 + -13 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $410, wasm2js_i32$2 = $20, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $20 | 0; + $20 = HEAPU8[($6 + 2 | 0) >> 0] | 0; + $21 = $20 >>> 4 | 0; + $426 = $21; + $21 = ($21 | 0) == (15 | 0); + HEAP8[($4_1 + -12 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $426, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $21 | 0; + $20 = $20 & 15 | 0; + $440 = $20; + $20 = ($20 | 0) == (15 | 0); + HEAP8[($4_1 + -11 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $440, wasm2js_i32$2 = $20, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $20 | 0; + $20 = HEAPU8[($6 + 3 | 0) >> 0] | 0; + $21 = $20 >>> 4 | 0; + $456 = $21; + $21 = ($21 | 0) == (15 | 0); + HEAP8[($4_1 + -10 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $456, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $21 | 0; + $20 = $20 & 15 | 0; + $470 = $20; + $20 = ($20 | 0) == (15 | 0); + HEAP8[($4_1 + -9 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $470, wasm2js_i32$2 = $20, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $20 | 0; + $20 = HEAPU8[($6 + 4 | 0) >> 0] | 0; + $21 = $20 >>> 4 | 0; + $486 = $21; + $21 = ($21 | 0) == (15 | 0); + HEAP8[($4_1 + -8 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $486, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $21 | 0; + $20 = $20 & 15 | 0; + $500 = $20; + $20 = ($20 | 0) == (15 | 0); + HEAP8[($4_1 + -7 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $500, wasm2js_i32$2 = $20, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $20 | 0; + $20 = HEAPU8[($6 + 5 | 0) >> 0] | 0; + $21 = $20 >>> 4 | 0; + $516 = $21; + $21 = ($21 | 0) == (15 | 0); + HEAP8[($4_1 + -6 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $516, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $21 | 0; + $20 = $20 & 15 | 0; + $530 = $20; + $20 = ($20 | 0) == (15 | 0); + HEAP8[($4_1 + -5 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $530, wasm2js_i32$2 = $20, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $20 | 0; + $20 = HEAPU8[($6 + 6 | 0) >> 0] | 0; + $21 = $20 >>> 4 | 0; + $546 = $21; + $21 = ($21 | 0) == (15 | 0); + HEAP8[($4_1 + -4 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $546, wasm2js_i32$2 = $21, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $21 | 0; + $20 = $20 & 15 | 0; + $560 = $20; + $20 = ($20 | 0) == (15 | 0); + HEAP8[($4_1 + -3 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $560, wasm2js_i32$2 = $20, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $19 = $19 + $20 | 0; + $6 = HEAPU8[($6 + 7 | 0) >> 0] | 0; + $20 = $6 >>> 4 | 0; + $576 = $20; + $20 = ($20 | 0) == (15 | 0); + HEAP8[($4_1 + -2 | 0) >> 0] = (wasm2js_i32$0 = HEAPU8[$19 >> 0] | 0, wasm2js_i32$1 = $576, wasm2js_i32$2 = $20, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $582 = $4_1 + -1 | 0; + $4_1 = $19 + $20 | 0; + $6 = $6 & 15 | 0; + $590 = $6; + $6 = ($6 | 0) == (15 | 0); + HEAP8[$582 >> 0] = (wasm2js_i32$0 = HEAPU8[$4_1 >> 0] | 0, wasm2js_i32$1 = $590, wasm2js_i32$2 = $6, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $6 = $4_1 + $6 | 0; + break label$11; + case 3: + break label$12; + }; + } + i64toi32_i32$0 = HEAPU8[$6 >> 0] | 0 | ((HEAPU8[($6 + 1 | 0) >> 0] | 0) << 8 | 0) | 0 | ((HEAPU8[($6 + 2 | 0) >> 0] | 0) << 16 | 0 | ((HEAPU8[($6 + 3 | 0) >> 0] | 0) << 24 | 0) | 0) | 0; + i64toi32_i32$1 = HEAPU8[($6 + 4 | 0) >> 0] | 0 | ((HEAPU8[($6 + 5 | 0) >> 0] | 0) << 8 | 0) | 0 | ((HEAPU8[($6 + 6 | 0) >> 0] | 0) << 16 | 0 | ((HEAPU8[($6 + 7 | 0) >> 0] | 0) << 24 | 0) | 0) | 0; + $600 = i64toi32_i32$0; + i64toi32_i32$0 = $4_1; + $61 = $600; + HEAP8[$4_1 >> 0] = $61; + HEAP8[($4_1 + 1 | 0) >> 0] = $61 >>> 8 | 0; + HEAP8[($4_1 + 2 | 0) >> 0] = $61 >>> 16 | 0; + HEAP8[($4_1 + 3 | 0) >> 0] = $61 >>> 24 | 0; + HEAP8[($4_1 + 4 | 0) >> 0] = i64toi32_i32$1; + HEAP8[($4_1 + 5 | 0) >> 0] = i64toi32_i32$1 >>> 8 | 0; + HEAP8[($4_1 + 6 | 0) >> 0] = i64toi32_i32$1 >>> 16 | 0; + HEAP8[($4_1 + 7 | 0) >> 0] = i64toi32_i32$1 >>> 24 | 0; + $60 = $6 + 8 | 0; + i64toi32_i32$1 = HEAPU8[$60 >> 0] | 0 | ((HEAPU8[($60 + 1 | 0) >> 0] | 0) << 8 | 0) | 0 | ((HEAPU8[($60 + 2 | 0) >> 0] | 0) << 16 | 0 | ((HEAPU8[($60 + 3 | 0) >> 0] | 0) << 24 | 0) | 0) | 0; + i64toi32_i32$0 = HEAPU8[($60 + 4 | 0) >> 0] | 0 | ((HEAPU8[($60 + 5 | 0) >> 0] | 0) << 8 | 0) | 0 | ((HEAPU8[($60 + 6 | 0) >> 0] | 0) << 16 | 0 | ((HEAPU8[($60 + 7 | 0) >> 0] | 0) << 24 | 0) | 0) | 0; + $605 = i64toi32_i32$1; + i64toi32_i32$1 = $4_1 + 8 | 0; + $62 = $605; + HEAP8[i64toi32_i32$1 >> 0] = $62; + HEAP8[(i64toi32_i32$1 + 1 | 0) >> 0] = $62 >>> 8 | 0; + HEAP8[(i64toi32_i32$1 + 2 | 0) >> 0] = $62 >>> 16 | 0; + HEAP8[(i64toi32_i32$1 + 3 | 0) >> 0] = $62 >>> 24 | 0; + HEAP8[(i64toi32_i32$1 + 4 | 0) >> 0] = i64toi32_i32$0; + HEAP8[(i64toi32_i32$1 + 5 | 0) >> 0] = i64toi32_i32$0 >>> 8 | 0; + HEAP8[(i64toi32_i32$1 + 6 | 0) >> 0] = i64toi32_i32$0 >>> 16 | 0; + HEAP8[(i64toi32_i32$1 + 7 | 0) >> 0] = i64toi32_i32$0 >>> 24 | 0; + $6 = $6 + 16 | 0; + } + label$16: { + if ($3_1 >>> 0 >= $13 >>> 0) { + break label$16 + } + $18 = $18 + 2 | 0; + $4_1 = $3_1 + 16 | 0; + if (($7 - $6 | 0) >>> 0 > 23 >>> 0) { + continue label$10 + } + } + break label$10; + }; + if ($3_1 >>> 0 < $13 >>> 0) { + break label$6 + } + if (!$6) { + break label$6 + } + label$17: { + if (!$11) { + break label$17 + } + $18 = HEAPU8[($8 + $14 | 0) >> 0] | 0; + $4_1 = $8 + 8448 | 0; + $3_1 = $16; + $19 = $11; + label$18: while (1) { + $17 = HEAPU8[$4_1 >> 0] | 0; + $18 = (($17 >>> 1 | 0) ^ (0 - ($17 & 1 | 0) | 0) | 0) + $18 | 0; + HEAP8[$3_1 >> 0] = $18; + $3_1 = $3_1 + $2_1 | 0; + $4_1 = $4_1 + 1 | 0; + $19 = $19 + -1 | 0; + if ($19) { + continue label$18 + } + break label$18; + }; + } + $16 = $16 + 1 | 0; + $14 = $14 + 1 | 0; + $15 = $14 >>> 0 < $2_1 >>> 0; + $17 = $6; + if (($14 | 0) != ($2_1 | 0)) { + continue label$8 + } + break label$4; + } + break label$8; + }; + $6 = 0; + if ($15 & 1 | 0) { + break label$5 + } + break label$4; + } + $21 = $6 + Math_imul($12, $2_1) | 0; + label$19: { + if (!$11) { + break label$19 + } + $13 = 0; + $15 = 1; + $20 = $8 + 256 | 0; + label$20: while (1) { + if (($7 - $6 | 0) >>> 0 < $12 >>> 0) { + break label$5 + } + if (!$6) { + break label$6 + } + $6 = $6 + $12 | 0; + $18 = HEAPU8[($8 + $13 | 0) >> 0] | 0; + $4_1 = $8 + 8448 | 0; + $3_1 = $20; + $19 = $11; + label$21: while (1) { + $17 = HEAPU8[$4_1 >> 0] | 0; + $18 = (($17 >>> 1 | 0) ^ (0 - ($17 & 1 | 0) | 0) | 0) + $18 | 0; + HEAP8[$3_1 >> 0] = $18; + $3_1 = $3_1 + $2_1 | 0; + $4_1 = $4_1 + 1 | 0; + $19 = $19 + -1 | 0; + if ($19) { + continue label$21 + } + break label$21; + }; + $20 = $20 + 1 | 0; + $13 = $13 + 1 | 0; + $15 = $13 >>> 0 < $2_1 >>> 0; + if (($13 | 0) != ($2_1 | 0)) { + continue label$20 + } + break label$20; + }; + $6 = $21; + break label$4; + } + $4_1 = 0; + $15 = 1; + label$22: while (1) { + if (($7 - $6 | 0) >>> 0 < $12 >>> 0) { + break label$5 + } + if (!$6) { + break label$6 + } + $6 = $6 + $12 | 0; + $4_1 = $4_1 + 1 | 0; + $15 = $4_1 >>> 0 < $2_1 >>> 0; + if (($2_1 | 0) != ($4_1 | 0)) { + continue label$22 + } + break label$22; + }; + $6 = $21; + break label$4; + } + $6 = 0; + if (!($15 & 1 | 0)) { + break label$4 + } + } + $6 = -2; + break label$1; + } + $4($0_1 + Math_imul($10, $2_1) | 0 | 0, $8 + 256 | 0 | 0, Math_imul($11, $2_1) | 0) | 0; + $4($8 | 0, ($8 + 256 | 0) + Math_imul($11 + -1 | 0, $2_1) | 0 | 0, $2_1 | 0) | 0; + $10 = $11 + $10 | 0; + if ($6) { + continue label$3 + } + break label$3; + }; + $6 = -2; + break label$1; + } + $6 = ($7 - $6 | 0 | 0) == (($2_1 >>> 0 > 32 >>> 0 ? $2_1 : 32) | 0) ? 0 : -3; + } + global$0 = $5_1 + 8704 | 0; + return $6 | 0; + } + + function $2($0_1, $1_1, $2_1, $3_1, $4_1) { + $0_1 = $0_1 | 0; + $1_1 = $1_1 | 0; + $2_1 = $2_1 | 0; + $3_1 = $3_1 | 0; + $4_1 = $4_1 | 0; + var $9 = 0, $15 = 0, $14 = 0, $5_1 = 0, $6 = 0, $17 = 0, $18 = 0, $16 = 0, i64toi32_i32$1 = 0, $13 = 0, i64toi32_i32$0 = 0, $7 = 0, $12 = 0, $19 = 0, $8 = 0, $20 = 0, $10 = 0, $21 = 0, $11 = 0, wasm2js_i32$0 = 0, wasm2js_i32$1 = 0, wasm2js_i32$2 = 0; + $5_1 = global$0 - 192 | 0; + global$0 = $5_1; + $6 = -2; + label$1: { + $7 = ($1_1 >>> 0) / (3 >>> 0) | 0; + if (($7 + 17 | 0) >>> 0 > $4_1 >>> 0) { + break label$1 + } + $6 = -1; + $8 = HEAPU8[$3_1 >> 0] | 0; + if (($8 & 240 | 0 | 0) != (224 | 0)) { + break label$1 + } + $9 = $8 & 15 | 0; + if ($9 >>> 0 > 1 >>> 0) { + break label$1 + } + $5($5_1 + 64 | 0 | 0, 255 | 0, 128 | 0) | 0; + i64toi32_i32$1 = $5_1 + 56 | 0; + i64toi32_i32$0 = -1; + HEAP32[i64toi32_i32$1 >> 2] = -1; + HEAP32[(i64toi32_i32$1 + 4 | 0) >> 2] = i64toi32_i32$0; + i64toi32_i32$1 = $5_1 + 48 | 0; + i64toi32_i32$0 = -1; + HEAP32[i64toi32_i32$1 >> 2] = -1; + HEAP32[(i64toi32_i32$1 + 4 | 0) >> 2] = i64toi32_i32$0; + i64toi32_i32$1 = $5_1 + 40 | 0; + i64toi32_i32$0 = -1; + HEAP32[i64toi32_i32$1 >> 2] = -1; + HEAP32[(i64toi32_i32$1 + 4 | 0) >> 2] = i64toi32_i32$0; + i64toi32_i32$1 = $5_1 + 32 | 0; + i64toi32_i32$0 = -1; + HEAP32[i64toi32_i32$1 >> 2] = -1; + HEAP32[(i64toi32_i32$1 + 4 | 0) >> 2] = i64toi32_i32$0; + i64toi32_i32$1 = $5_1 + 24 | 0; + i64toi32_i32$0 = -1; + HEAP32[i64toi32_i32$1 >> 2] = -1; + HEAP32[(i64toi32_i32$1 + 4 | 0) >> 2] = i64toi32_i32$0; + i64toi32_i32$1 = $5_1 + 16 | 0; + i64toi32_i32$0 = -1; + HEAP32[i64toi32_i32$1 >> 2] = -1; + HEAP32[(i64toi32_i32$1 + 4 | 0) >> 2] = i64toi32_i32$0; + i64toi32_i32$1 = $5_1; + i64toi32_i32$0 = -1; + HEAP32[($5_1 + 8 | 0) >> 2] = -1; + HEAP32[($5_1 + 12 | 0) >> 2] = i64toi32_i32$0; + i64toi32_i32$1 = $5_1; + i64toi32_i32$0 = -1; + HEAP32[$5_1 >> 2] = -1; + HEAP32[($5_1 + 4 | 0) >> 2] = i64toi32_i32$0; + $10 = ($3_1 + $4_1 | 0) + -16 | 0; + $8 = $3_1 + 1 | 0; + $6 = $8 + $7 | 0; + label$2: { + if (!$1_1) { + break label$2 + } + $11 = ($9 | 0) == (1 | 0) ? 13 : 15; + $12 = 0; + $13 = 0; + $7 = 0; + $3_1 = 0; + $4_1 = 0; + label$3: while (1) { + label$4: { + if ($6 >>> 0 <= $10 >>> 0) { + break label$4 + } + $6 = -2; + break label$1; + } + label$5: { + label$6: { + $9 = HEAPU8[$8 >> 0] | 0; + if ($9 >>> 0 > 239 >>> 0) { + break label$6 + } + $14 = $9 ^ -1 | 0; + $15 = ($5_1 + 64 | 0) + ((($4_1 + ($14 >>> 4 | 0) | 0) & 15 | 0) << 3 | 0) | 0; + $16 = HEAP32[($15 + 4 | 0) >> 2] | 0; + $17 = HEAP32[$15 >> 2] | 0; + label$7: { + $9 = $9 & 15 | 0; + if ($9 >>> 0 >= $11 >>> 0) { + break label$7 + } + $15 = (wasm2js_i32$0 = HEAP32[($5_1 + ((($3_1 + $14 | 0) & 15 | 0) << 2 | 0) | 0) >> 2] | 0, wasm2js_i32$1 = $12, wasm2js_i32$2 = $9, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $9 = !$9; + label$8: { + label$9: { + if (($2_1 | 0) != (2 | 0)) { + break label$9 + } + $14 = $0_1 + ($7 << 1 | 0) | 0; + HEAP16[$14 >> 1] = $17; + HEAP16[($14 + 2 | 0) >> 1] = $16; + HEAP16[($14 + 4 | 0) >> 1] = $15; + break label$8; + } + $14 = $0_1 + ($7 << 2 | 0) | 0; + HEAP32[$14 >> 2] = $17; + HEAP32[($14 + 4 | 0) >> 2] = $16; + HEAP32[($14 + 8 | 0) >> 2] = $15; + } + $12 = $12 + $9 | 0; + $14 = ($5_1 + 64 | 0) + ($4_1 << 3 | 0) | 0; + HEAP32[$14 >> 2] = $15; + HEAP32[($14 + 4 | 0) >> 2] = $16; + HEAP32[($5_1 + ($3_1 << 2 | 0) | 0) >> 2] = $15; + $4_1 = ($4_1 + 1 | 0) & 15 | 0; + $14 = ($5_1 + 64 | 0) + ($4_1 << 3 | 0) | 0; + HEAP32[$14 >> 2] = $17; + HEAP32[($14 + 4 | 0) >> 2] = $15; + $3_1 = $3_1 + $9 | 0; + $4_1 = $4_1 + 1 | 0; + break label$5; + } + label$10: { + label$11: { + if (($9 | 0) == (15 | 0)) { + break label$11 + } + $13 = (($13 + $9 | 0) + ($9 ^ -4 | 0) | 0) + 1 | 0; + break label$10; + } + $9 = $6 + 1 | 0; + $15 = HEAP8[$6 >> 0] | 0; + $14 = $15 & 255 | 0; + label$12: { + label$13: { + if (($15 | 0) <= (-1 | 0)) { + break label$13 + } + $6 = $9; + break label$12; + } + $6 = $6 + 5 | 0; + $14 = $14 & 127 | 0; + $15 = 7; + label$14: { + label$15: while (1) { + $18 = HEAP8[$9 >> 0] | 0; + $14 = ($18 & 127 | 0) << $15 | 0 | $14 | 0; + if (($18 | 0) > (-1 | 0)) { + break label$14 + } + $9 = $9 + 1 | 0; + $15 = $15 + 7 | 0; + if (($15 | 0) != (35 | 0)) { + continue label$15 + } + break label$12; + }; + } + $6 = $9 + 1 | 0; + } + $13 = (($14 >>> 1 | 0) ^ (0 - ($14 & 1 | 0) | 0) | 0) + $13 | 0; + } + label$16: { + label$17: { + if (($2_1 | 0) != (2 | 0)) { + break label$17 + } + $9 = $0_1 + ($7 << 1 | 0) | 0; + HEAP16[$9 >> 1] = $17; + HEAP16[($9 + 2 | 0) >> 1] = $16; + HEAP16[($9 + 4 | 0) >> 1] = $13; + break label$16; + } + $9 = $0_1 + ($7 << 2 | 0) | 0; + HEAP32[$9 >> 2] = $17; + HEAP32[($9 + 4 | 0) >> 2] = $16; + HEAP32[($9 + 8 | 0) >> 2] = $13; + } + $9 = ($5_1 + 64 | 0) + ($4_1 << 3 | 0) | 0; + HEAP32[$9 >> 2] = $13; + HEAP32[($9 + 4 | 0) >> 2] = $16; + HEAP32[($5_1 + ($3_1 << 2 | 0) | 0) >> 2] = $13; + $4_1 = ($4_1 + 1 | 0) & 15 | 0; + $9 = ($5_1 + 64 | 0) + ($4_1 << 3 | 0) | 0; + HEAP32[$9 >> 2] = $17; + HEAP32[($9 + 4 | 0) >> 2] = $13; + $3_1 = $3_1 + 1 | 0; + $4_1 = $4_1 + 1 | 0; + break label$5; + } + label$18: { + if ($9 >>> 0 > 253 >>> 0) { + break label$18 + } + $18 = $12 + 1 | 0; + $15 = HEAPU8[($10 + ($9 & 15 | 0) | 0) >> 0] | 0; + $14 = $15 >>> 0 < 16 >>> 0; + $9 = (wasm2js_i32$0 = $18, wasm2js_i32$1 = HEAP32[($5_1 + ((($3_1 - ($15 >>> 4 | 0) | 0) & 15 | 0) << 2 | 0) | 0) >> 2] | 0, wasm2js_i32$2 = $14, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $16 = $18 + $14 | 0; + $18 = $15 & 15 | 0; + $15 = (wasm2js_i32$0 = HEAP32[($5_1 + ((($3_1 - $15 | 0) & 15 | 0) << 2 | 0) | 0) >> 2] | 0, wasm2js_i32$1 = $16, wasm2js_i32$2 = $18, wasm2js_i32$2 ? wasm2js_i32$0 : wasm2js_i32$1); + $18 = !$18; + label$19: { + label$20: { + if (($2_1 | 0) != (2 | 0)) { + break label$20 + } + $17 = $0_1 + ($7 << 1 | 0) | 0; + HEAP16[$17 >> 1] = $12; + HEAP16[($17 + 2 | 0) >> 1] = $9; + HEAP16[($17 + 4 | 0) >> 1] = $15; + break label$19; + } + $17 = $0_1 + ($7 << 2 | 0) | 0; + HEAP32[$17 >> 2] = $12; + HEAP32[($17 + 4 | 0) >> 2] = $9; + HEAP32[($17 + 8 | 0) >> 2] = $15; + } + HEAP32[($5_1 + ($3_1 << 2 | 0) | 0) >> 2] = $12; + $17 = ($5_1 + 64 | 0) + ($4_1 << 3 | 0) | 0; + HEAP32[$17 >> 2] = $9; + HEAP32[($17 + 4 | 0) >> 2] = $12; + $3_1 = $3_1 + 1 | 0; + HEAP32[($5_1 + (($3_1 & 15 | 0) << 2 | 0) | 0) >> 2] = $9; + $17 = ($5_1 + 64 | 0) + ((($4_1 + 1 | 0) & 15 | 0) << 3 | 0) | 0; + HEAP32[$17 >> 2] = $15; + HEAP32[($17 + 4 | 0) >> 2] = $9; + $3_1 = ($3_1 + $14 | 0) & 15 | 0; + HEAP32[($5_1 + ($3_1 << 2 | 0) | 0) >> 2] = $15; + $4_1 = ($4_1 + 2 | 0) & 15 | 0; + $9 = ($5_1 + 64 | 0) + ($4_1 << 3 | 0) | 0; + HEAP32[$9 >> 2] = $12; + HEAP32[($9 + 4 | 0) >> 2] = $15; + $4_1 = $4_1 + 1 | 0; + $3_1 = $3_1 + $18 | 0; + $12 = $16 + $18 | 0; + break label$5; + } + $17 = HEAPU8[$6 >> 0] | 0; + $19 = $17 ? $12 : 0; + $9 = ($9 | 0) == (254 | 0); + $16 = $19 + $9 | 0; + $20 = $17 & 15 | 0; + $21 = $17 >>> 4 | 0; + label$21: { + label$22: { + if ($17 >>> 0 > 15 >>> 0) { + break label$22 + } + $18 = $16 + 1 | 0; + break label$21; + } + $18 = $16; + $16 = HEAP32[($5_1 + ((($3_1 - $21 | 0) & 15 | 0) << 2 | 0) | 0) >> 2] | 0; + } + label$23: { + label$24: { + if ($20) { + break label$24 + } + $12 = $18 + 1 | 0; + break label$23; + } + $12 = $18; + $18 = HEAP32[($5_1 + ((($3_1 - $17 | 0) & 15 | 0) << 2 | 0) | 0) >> 2] | 0; + } + label$25: { + label$26: { + if (!$9) { + break label$26 + } + $9 = $6 + 1 | 0; + break label$25; + } + $9 = $6 + 2 | 0; + $14 = HEAP8[($6 + 1 | 0) >> 0] | 0; + $15 = $14 & 255 | 0; + label$27: { + if (($14 | 0) > (-1 | 0)) { + break label$27 + } + $19 = $6 + 6 | 0; + $15 = $15 & 127 | 0; + $6 = 7; + label$28: { + label$29: while (1) { + $14 = HEAP8[$9 >> 0] | 0; + $15 = ($14 & 127 | 0) << $6 | 0 | $15 | 0; + if (($14 | 0) > (-1 | 0)) { + break label$28 + } + $9 = $9 + 1 | 0; + $6 = $6 + 7 | 0; + if (($6 | 0) != (35 | 0)) { + continue label$29 + } + break label$29; + }; + $9 = $19; + break label$27; + } + $9 = $9 + 1 | 0; + } + $13 = (($15 >>> 1 | 0) ^ (0 - ($15 & 1 | 0) | 0) | 0) + $13 | 0; + $19 = $13; + } + label$30: { + label$31: { + if (($21 | 0) == (15 | 0)) { + break label$31 + } + $15 = $9; + break label$30; + } + $15 = $9 + 1 | 0; + $6 = HEAP8[$9 >> 0] | 0; + $14 = $6 & 255 | 0; + label$32: { + if (($6 | 0) > (-1 | 0)) { + break label$32 + } + $16 = $9 + 5 | 0; + $14 = $14 & 127 | 0; + $6 = 7; + label$33: { + label$34: while (1) { + $9 = HEAP8[$15 >> 0] | 0; + $14 = ($9 & 127 | 0) << $6 | 0 | $14 | 0; + if (($9 | 0) > (-1 | 0)) { + break label$33 + } + $15 = $15 + 1 | 0; + $6 = $6 + 7 | 0; + if (($6 | 0) != (35 | 0)) { + continue label$34 + } + break label$34; + }; + $15 = $16; + break label$32; + } + $15 = $15 + 1 | 0; + } + $13 = (($14 >>> 1 | 0) ^ (0 - ($14 & 1 | 0) | 0) | 0) + $13 | 0; + $16 = $13; + } + label$35: { + label$36: { + if (($20 | 0) == (15 | 0)) { + break label$36 + } + $6 = $15; + break label$35; + } + $6 = $15 + 1 | 0; + $9 = HEAP8[$15 >> 0] | 0; + $14 = $9 & 255 | 0; + label$37: { + if (($9 | 0) > (-1 | 0)) { + break label$37 + } + $18 = $15 + 5 | 0; + $14 = $14 & 127 | 0; + $9 = 7; + label$38: { + label$39: while (1) { + $15 = HEAP8[$6 >> 0] | 0; + $14 = ($15 & 127 | 0) << $9 | 0 | $14 | 0; + if (($15 | 0) > (-1 | 0)) { + break label$38 + } + $6 = $6 + 1 | 0; + $9 = $9 + 7 | 0; + if (($9 | 0) != (35 | 0)) { + continue label$39 + } + break label$39; + }; + $6 = $18; + break label$37; + } + $6 = $6 + 1 | 0; + } + $13 = (($14 >>> 1 | 0) ^ (0 - ($14 & 1 | 0) | 0) | 0) + $13 | 0; + $18 = $13; + } + label$40: { + label$41: { + if (($2_1 | 0) != (2 | 0)) { + break label$41 + } + $9 = $0_1 + ($7 << 1 | 0) | 0; + HEAP16[$9 >> 1] = $19; + HEAP16[($9 + 2 | 0) >> 1] = $16; + HEAP16[($9 + 4 | 0) >> 1] = $18; + break label$40; + } + $9 = $0_1 + ($7 << 2 | 0) | 0; + HEAP32[$9 >> 2] = $19; + HEAP32[($9 + 4 | 0) >> 2] = $16; + HEAP32[($9 + 8 | 0) >> 2] = $18; + } + $9 = ($5_1 + 64 | 0) + ($4_1 << 3 | 0) | 0; + HEAP32[$9 >> 2] = $16; + HEAP32[($9 + 4 | 0) >> 2] = $19; + HEAP32[($5_1 + ($3_1 << 2 | 0) | 0) >> 2] = $19; + $9 = ($5_1 + 64 | 0) + ((($4_1 + 1 | 0) & 15 | 0) << 3 | 0) | 0; + HEAP32[$9 >> 2] = $18; + HEAP32[($9 + 4 | 0) >> 2] = $16; + $3_1 = $3_1 + 1 | 0; + HEAP32[($5_1 + (($3_1 & 15 | 0) << 2 | 0) | 0) >> 2] = $16; + $9 = ($5_1 + 64 | 0) + ((($4_1 + 2 | 0) & 15 | 0) << 3 | 0) | 0; + HEAP32[$9 >> 2] = $19; + HEAP32[($9 + 4 | 0) >> 2] = $18; + $3_1 = $3_1 + ($17 >>> 0 < 16 >>> 0 | ($21 | 0) == (15 | 0) | 0) | 0; + HEAP32[($5_1 + (($3_1 & 15 | 0) << 2 | 0) | 0) >> 2] = $18; + $3_1 = $3_1 + (!$20 | ($20 | 0) == (15 | 0) | 0) | 0; + $4_1 = $4_1 + 3 | 0; + } + $8 = $8 + 1 | 0; + $4_1 = $4_1 & 15 | 0; + $3_1 = $3_1 & 15 | 0; + $7 = $7 + 3 | 0; + if ($7 >>> 0 < $1_1 >>> 0) { + continue label$3 + } + break label$3; + }; + } + $6 = ($6 | 0) == ($10 | 0) ? 0 : -3; + } + global$0 = $5_1 + 192 | 0; + return $6 | 0; + } + + function $3($0_1) { + $0_1 = $0_1 | 0; + var $1_1 = 0, $2_1 = 0, $3_1 = 0; + $1_1 = HEAP32[(0 + 1024 | 0) >> 2] | 0; + $0_1 = $1_1 + (($0_1 + 3 | 0) & -4 | 0) | 0; + HEAP32[(0 + 1024 | 0) >> 2] = $0_1; + label$1: { + label$2: { + $2_1 = __wasm_memory_size() << 16 | 0; + if ($0_1 >>> 0 <= $2_1 >>> 0) { + break label$2 + } + $3_1 = -1; + if ((__wasm_memory_grow((($0_1 - $2_1 | 0) + 65535 | 0) >>> 16 | 0 | 0) | 0) == (-1 | 0)) { + break label$1 + } + } + $3_1 = $1_1; + } + return $3_1 | 0; + } + + function $4($0_1, $1_1, $2_1) { + $0_1 = $0_1 | 0; + $1_1 = $1_1 | 0; + $2_1 = $2_1 | 0; + var $3_1 = 0; + label$1: { + label$2: { + if (!(($1_1 | $0_1 | 0) & 3 | 0)) { + break label$2 + } + $3_1 = $0_1; + break label$1; + } + label$3: { + label$4: { + if ($2_1 >>> 0 >= 16 >>> 0) { + break label$4 + } + $3_1 = $0_1; + break label$3; + } + $3_1 = $0_1; + label$5: while (1) { + HEAP32[$3_1 >> 2] = HEAP32[$1_1 >> 2] | 0; + HEAP32[($3_1 + 4 | 0) >> 2] = HEAP32[($1_1 + 4 | 0) >> 2] | 0; + HEAP32[($3_1 + 8 | 0) >> 2] = HEAP32[($1_1 + 8 | 0) >> 2] | 0; + HEAP32[($3_1 + 12 | 0) >> 2] = HEAP32[($1_1 + 12 | 0) >> 2] | 0; + $1_1 = $1_1 + 16 | 0; + $3_1 = $3_1 + 16 | 0; + $2_1 = $2_1 + -16 | 0; + if ($2_1 >>> 0 > 15 >>> 0) { + continue label$5 + } + break label$5; + }; + } + if ($2_1 >>> 0 < 4 >>> 0) { + break label$1 + } + label$6: while (1) { + HEAP32[$3_1 >> 2] = HEAP32[$1_1 >> 2] | 0; + $1_1 = $1_1 + 4 | 0; + $3_1 = $3_1 + 4 | 0; + $2_1 = $2_1 + -4 | 0; + if ($2_1 >>> 0 > 3 >>> 0) { + continue label$6 + } + break label$6; + }; + } + label$7: { + if (!$2_1) { + break label$7 + } + label$8: while (1) { + HEAP8[$3_1 >> 0] = HEAPU8[$1_1 >> 0] | 0; + $3_1 = $3_1 + 1 | 0; + $1_1 = $1_1 + 1 | 0; + $2_1 = $2_1 + -1 | 0; + if ($2_1) { + continue label$8 + } + break label$8; + }; + } + return $0_1 | 0; + } + + function $5($0_1, $1_1, $2_1) { + $0_1 = $0_1 | 0; + $1_1 = $1_1 | 0; + $2_1 = $2_1 | 0; + var $3_1 = 0, $4_1 = 0; + label$1: { + label$2: { + if (!($0_1 & 3 | 0)) { + break label$2 + } + $3_1 = $0_1; + break label$1; + } + $4_1 = Math_imul($1_1 & 255 | 0, 16843009); + label$3: { + label$4: { + if ($2_1 >>> 0 >= 16 >>> 0) { + break label$4 + } + $3_1 = $0_1; + break label$3; + } + $3_1 = $0_1; + label$5: while (1) { + HEAP32[$3_1 >> 2] = $4_1; + HEAP32[($3_1 + 12 | 0) >> 2] = $4_1; + HEAP32[($3_1 + 8 | 0) >> 2] = $4_1; + HEAP32[($3_1 + 4 | 0) >> 2] = $4_1; + $3_1 = $3_1 + 16 | 0; + $2_1 = $2_1 + -16 | 0; + if ($2_1 >>> 0 > 15 >>> 0) { + continue label$5 + } + break label$5; + }; + } + if ($2_1 >>> 0 < 4 >>> 0) { + break label$1 + } + label$6: while (1) { + HEAP32[$3_1 >> 2] = $4_1; + $3_1 = $3_1 + 4 | 0; + $2_1 = $2_1 + -4 | 0; + if ($2_1 >>> 0 > 3 >>> 0) { + continue label$6 + } + break label$6; + }; + } + label$7: { + if (!$2_1) { + break label$7 + } + label$8: while (1) { + HEAP8[$3_1 >> 0] = $1_1; + $3_1 = $3_1 + 1 | 0; + $2_1 = $2_1 + -1 | 0; + if ($2_1) { + continue label$8 + } + break label$8; + }; + } + return $0_1 | 0; + } + + // EMSCRIPTEN_END_FUNCS + ; + bufferView = HEAPU8; + initActiveSegments(env); + function __wasm_memory_size() { + return buffer.byteLength / 65536 | 0; + } + + function __wasm_memory_grow(pagesToAdd) { + pagesToAdd = pagesToAdd | 0; + var oldPages = __wasm_memory_size() | 0; + var newPages = oldPages + pagesToAdd | 0; + if ((oldPages < newPages) && (newPages < 65536)) { + var newBuffer = new ArrayBuffer(Math_imul(newPages, 65536)); + var newHEAP8 = new Int8Array(newBuffer); + newHEAP8.set(HEAP8); + HEAP8 = new Int8Array(newBuffer); + HEAP16 = new Int16Array(newBuffer); + HEAP32 = new Int32Array(newBuffer); + HEAPU8 = new Uint8Array(newBuffer); + HEAPU16 = new Uint16Array(newBuffer); + HEAPU32 = new Uint32Array(newBuffer); + HEAPF32 = new Float32Array(newBuffer); + HEAPF64 = new Float64Array(newBuffer); + buffer = newBuffer; + bufferView = HEAPU8; + } + return oldPages; + } + + return { + "memory": Object.create(Object.prototype, { + "grow": { + "value": __wasm_memory_grow + }, + "buffer": { + "get": function () { + return buffer; + } + + } + }), + "__wasm_call_ctors": $0, + "meshopt_decodeVertexBuffer": $1, + "meshopt_decodeIndexBuffer": $2, + "sbrk": $3 + }; + } + + return asmFunc(asmLibraryArg); +} diff --git a/emscripten/meshopt/meshopt_decoder.d.ts b/emscripten/meshopt/meshopt_decoder.d.ts new file mode 100644 index 000000000..13f3405c7 --- /dev/null +++ b/emscripten/meshopt/meshopt_decoder.d.ts @@ -0,0 +1,21 @@ +declare module 'external:emscripten/meshopt/meshopt_decoder.asm.js' { + export default MeshoptDecoder; +} + +declare module 'external:emscripten/meshopt/meshopt_decoder.wasm.js' { + export default MeshoptDecoder; +} + +declare namespace MeshoptDecoder { + const supported: boolean; + const ready: Promise; + + function decodeVertexBuffer (target: Uint8Array, count: number, size: number, source: Uint8Array, filter?: string): void; + function decodeIndexBuffer (target: Uint8Array, count: number, size: number, source: Uint8Array): void; + function decodeIndexSequence (target: Uint8Array, count: number, size: number, source: Uint8Array): void; + + function decodeGltfBuffer (target: Uint8Array, count: number, size: number, source: Uint8Array, mode: string, filter?: string): void; + + function useWorkers (count: number): void; + function decodeGltfBufferAsync (count: number, size: number, source: Uint8Array, mode: string, filter?: string): Promise; +} diff --git a/emscripten/meshopt/meshopt_decoder.wasm.js b/emscripten/meshopt/meshopt_decoder.wasm.js new file mode 100644 index 000000000..60549e5db --- /dev/null +++ b/emscripten/meshopt/meshopt_decoder.wasm.js @@ -0,0 +1,178 @@ +// This file is part of meshoptimizer library and is distributed under the terms of MIT License. +// Copyright (C) 2016-2023, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) +var MeshoptDecoder = (function() { + "use strict"; + + // Built with clang version 16.0.0 + // Built from meshoptimizer 0.19 + var wasm_base = "b9H79TebbbeYl9Gbb9Gvuuuuueu9Geueu9Giuuueuirobeediiviebeoweuec:q;iekr9Evo9TW9T9VV95dbH9F9F939H79T9F9J9H229F9Jt9VV7bb8A9TW79O9V9Wt9F9KW9J9V9KW9wWVtW949c919M9MWVbeY9TW79O9V9Wt9F9KW9J9V9KW69U9KW949c919M9MWVbdl79IV9Rbiq;08Kodbk:yzeHu8Jjjjjbcj;eb9Rgv8Kjjjjbc9:hodnadcefal0mbcuhoaiRbbc:Ge9hmbavaialfgrad9Radz:ejjjbhwcj;abad9UhlaicefhodnadTmbalc;WFbGglcjdalcjd6EhDcbhqinaqae9pmeaDaeaq9RaqaDfae6Egkcsfglcl4cifcd4hxdndndndnalc9WGgmTmbcbhPcehsawcjdfhzaohHinaraH9Rax6midnaraHaxfgo9RcK6mbczhlcbhOinalgic9WfgAawcj;cbffhldndndndndnaHaAco4fRbbaOcoG4ciGPlbedibkal9cb83ibalcwf9cb83ibxikalaoRblaoRbbgAco4gCaCciSgCE86bbawcj;cbfaifglcGfaoclfaCfgCRbbaAcl4ciGgXaXciSgXE86bbalcVfaCaXfgCRbbaAcd4ciGgXaXciSgXE86bbalc7faCaXfgCRbbaAciGgAaAciSgAE86bbalctfaCaAfgCRbbaoRbegAco4gXaXciSgXE86bbalc91faCaXfgCRbbaAcl4ciGgXaXciSgXE86bbalc4faCaXfgCRbbaAcd4ciGgXaXciSgXE86bbalc93faCaXfgCRbbaAciGgAaAciSgAE86bbalc94faCaAfgCRbbaoRbdgAco4gXaXciSgXE86bbalc95faCaXfgCRbbaAcl4ciGgXaXciSgXE86bbalc96faCaXfgCRbbaAcd4ciGgXaXciSgXE86bbalc97faCaXfgCRbbaAciGgAaAciSgAE86bbalc98faCaAfgARbbaoRbigoco4gCaCciSgCE86bbalc99faAaCfgARbbaocl4ciGgCaCciSgCE86bbalc9:faAaCfgARbbaocd4ciGgCaCciSgCE86bbalcufaAaCfglRbbaociGgoaociSgoE86bbalaofhoxdkalaoRbwaoRbbgAcl4gCaCcsSgCE86bbawcj;cbfaifglcGfaocwfaCfgCRbbaAcsGgAaAcsSgAE86bbalcVfaCaAfgARbbaoRbegCcl4gXaXcsSgXE86bbalc7faAaXfgARbbaCcsGgCaCcsSgCE86bbalctfaAaCfgARbbaoRbdgCcl4gXaXcsSgXE86bbalc91faAaXfgARbbaCcsGgCaCcsSgCE86bbalc4faAaCfgARbbaoRbigCcl4gXaXcsSgXE86bbalc93faAaXfgARbbaCcsGgCaCcsSgCE86bbalc94faAaCfgARbbaoRblgCcl4gXaXcsSgXE86bbalc95faAaXfgARbbaCcsGgCaCcsSgCE86bbalc96faAaCfgARbbaoRbvgCcl4gXaXcsSgXE86bbalc97faAaXfgARbbaCcsGgCaCcsSgCE86bbalc98faAaCfgARbbaoRbogCcl4gXaXcsSgXE86bbalc99faAaXfgARbbaCcsGgCaCcsSgCE86bbalc9:faAaCfgARbbaoRbrgocl4gCaCcsSgCE86bbalcufaAaCfglRbbaocsGgoaocsSgoE86bbalaofhoxekalao8Pbb83bbalcwfaocwf8Pbb83bbaoczfhokdnaiam9pmbaOcdfhOaiczfhlarao9RcL0mekkaiam6miaoTmidnakTmbawaPfRbbhOawcj;cbfhlazhiakhAinaialRbbgHce4cbaHceG9R7aOfgO86bbaiadfhialcefhlaAcufgAmbkkazcefhzaPcefgPad6hsaohHaPad9hmexvkkcbhoasceGmdxikaoaxad2fhXdnakTmbcbhmcehsawcjdfhCinarao9Rax6miaoTmdaoaxfhoawamfRbbhOawcj;cbfhlaChiakhAinaialRbbgHce4cbaHceG9R7aOfgO86bbaiadfhialcefhlaAcufgAmbkaCcefhCamcefgmad6hsamad9hmbkaXhoxikcbhlcehsinarao9Rax6mdaoTmeaoaxfhoalcefglad6hsadal9hmbkaXhoxdkcbhoasceGTmekc9:hoxikabaqad2fawcjdfakad2z:ejjjb8Aawawcjdfakcufad2fadz:ejjjb8Aakaqfhqaombkc9:hoxekcbc99arao9Radcaadca0ESEhokavcj;ebf8Kjjjjbaok;xzeHu8Jjjjjbc;ae9Rgv8Kjjjjbc9:hodnaeci9UgrcHfal0mbcuhoaiRbbgwc;WeGc;Ge9hmbawcsGgDce0mbavc;abfcFecjez:fjjjb8AavcUf9cu83ibavc8Wf9cu83ibavcyf9cu83ibavcaf9cu83ibavcKf9cu83ibavczf9cu83ibav9cu83iwav9cu83ibaialfc9WfhqaicefgwarfhodnaeTmbcmcsaDceSEhkcbhxcbhmcbhrcbhicbhlindnaoaq9nmbc9:hoxikdndnawRbbgDc;Ve0mbavc;abfalaDcu7gPcl4fcsGcitfgsydlhzasydbhHdnaDcsGgDak9pmbavaiaPfcsGcdtfydbaxaDEhsaDThDdndnadcd9hmbabarcetfgPaH87ebaPcdfaz87ebaPclfas87ebxekabarcdtfgPaHBdbaPclfazBdbaPcwfasBdbkaxaDfhxavc;abfalcitfgPasBdbaPazBdlavaicdtfasBdbavc;abfalcefcsGglcitfgPaHBdbaPasBdlaiaDfhialcefhlxdkdndnaDcsSmbamaDfaDc987fcefhmxekaocefhDao8SbbgscFeGhPdndnascu9mmbaDhoxekaocvfhoaPcFbGhPcrhsdninaD8SbbgOcFbGastaPVhPaOcu9kmeaDcefhDascrfgsc8J9hmbxdkkaDcefhokaPce4cbaPceG9R7amfhmkdndnadcd9hmbabarcetfgDaH87ebaDcdfaz87ebaDclfam87ebxekabarcdtfgDaHBdbaDclfazBdbaDcwfamBdbkavc;abfalcitfgDamBdbaDazBdlavaicdtfamBdbavc;abfalcefcsGglcitfgDaHBdbaDamBdlaicefhialcefhlxekdnaDcpe0mbaxcefgOavaiaqaDcsGfRbbgscl49RcsGcdtfydbascz6gPEhDavaias9RcsGcdtfydbaOaPfgzascsGgOEhsaOThOdndnadcd9hmbabarcetfgHax87ebaHcdfaD87ebaHclfas87ebxekabarcdtfgHaxBdbaHclfaDBdbaHcwfasBdbkavaicdtfaxBdbavc;abfalcitfgHaDBdbaHaxBdlavaicefgicsGcdtfaDBdbavc;abfalcefcsGcitfgHasBdbaHaDBdlavaiaPfcsGgicdtfasBdbavc;abfalcdfcsGglcitfgDaxBdbaDasBdlalcefhlaiaOfhiazaOfhxxekaxcbaoRbbgHEgAaDc;:eSgDfhzaHcsGhCaHcl4hXdndnaHcs0mbazcefhOxekazhOavaiaX9RcsGcdtfydbhzkdndnaCmbaOcefhxxekaOhxavaiaH9RcsGcdtfydbhOkdndnaDTmbaocefhDxekaocdfhDao8SbegPcFeGhsdnaPcu9kmbaocofhAascFbGhscrhodninaD8SbbgPcFbGaotasVhsaPcu9kmeaDcefhDaocrfgoc8J9hmbkaAhDxekaDcefhDkasce4cbasceG9R7amfgmhAkdndnaXcsSmbaDhsxekaDcefhsaD8SbbgocFeGhPdnaocu9kmbaDcvfhzaPcFbGhPcrhodninas8SbbgDcFbGaotaPVhPaDcu9kmeascefhsaocrfgoc8J9hmbkazhsxekascefhskaPce4cbaPceG9R7amfgmhzkdndnaCcsSmbashoxekascefhoas8SbbgDcFeGhPdnaDcu9kmbascvfhOaPcFbGhPcrhDdninao8SbbgscFbGaDtaPVhPascu9kmeaocefhoaDcrfgDc8J9hmbkaOhoxekaocefhokaPce4cbaPceG9R7amfgmhOkdndnadcd9hmbabarcetfgDaA87ebaDcdfaz87ebaDclfaO87ebxekabarcdtfgDaABdbaDclfazBdbaDcwfaOBdbkavc;abfalcitfgDazBdbaDaABdlavaicdtfaABdbavc;abfalcefcsGcitfgDaOBdbaDazBdlavaicefgicsGcdtfazBdbavc;abfalcdfcsGcitfgDaABdbaDaOBdlavaiaHcz6aXcsSVfgicsGcdtfaOBdbaiaCTaCcsSVfhialcifhlkawcefhwalcsGhlaicsGhiarcifgrae6mbkkcbc99aoaqSEhokavc;aef8Kjjjjbaok9teiucbcbydj1jjbgeabcifc98GfgbBdj1jjbdndnabZbcztgd9nmbcuhiabad9RcFFifcz4nbcuSmekaehikaik;LeeeudndnaeabVciGTmbabhixekdndnadcz9pmbabhixekabhiinaiaeydbBdbaiclfaeclfydbBdbaicwfaecwfydbBdbaicxfaecxfydbBdbaeczfheaiczfhiadc9Wfgdcs0mbkkadcl6mbinaiaeydbBdbaeclfheaiclfhiadc98fgdci0mbkkdnadTmbinaiaeRbb86bbaicefhiaecefheadcufgdmbkkabk;aeedudndnabciGTmbabhixekaecFeGc:b:c:ew2hldndnadcz9pmbabhixekabhiinaialBdbaicxfalBdbaicwfalBdbaiclfalBdbaiczfhiadc9Wfgdcs0mbkkadcl6mbinaialBdbaiclfhiadc98fgdci0mbkkdnadTmbinaiae86bbaicefhiadcufgdmbkkabkkkebcjwklz9Kbb"; + var wasm_simd = "b9H79TebbbeOi9Gbb9Gvuuuuueu9Geueuirobbebedlve9Weeeviebeoweuec:q;Aekr9Evo9TW9T9VV95dbH9F9F939H79T9F9J9H229F9Jt9VV7bb8A9TW79O9V9Wt9F9KW9J9V9KW9wWVtW949c919M9MWVbdY9TW79O9V9Wt9F9KW9J9V9KW69U9KW949c919M9MWVbll79IV9Rbvqp81olbzik9:evu8Jjjjjbcz9Rhbcbheincbhdcbhiinabcwfadfaicjuaead4ceGglE86bbaialfhiadcefgdcw9hmbkaec:q:yjjbfai86bbaecitc:q1jjbfab8Piw83ibaecefgecjd9hmbkk;e8JlHud97euo978Jjjjjbcj;kb9Rgv8Kjjjjbc9:hodnadcefal0mbcuhoaiRbbc:Ge9hmbavaialfgrad9Rad;8qbbcj;abad9UhoaicefhldnadTmbaoc;WFbGgocjdaocjd6EhwcbhDinaDae9pmeawaeaD9RaDawfae6Egqcsfgoc9WGgkci2hxakcethmaocl4cifcd4hPabaDad2fhscbhzdnincbhHalhOcbhAdninaraO9RaP6miavcj;cbfaAak2fhCaOaPfhlcbhidnakc;ab6mbaral9Rc;Gb6mbcbhoinaCaofhidndndndndnaOaoco4fRbbgXciGPlbedibkaipxbbbbbbbbbbbbbbbbpklbxikaialpbblalpbbbgQclp:meaQpmbzeHdOiAlCvXoQrLgQcdp:meaQpmbzeHdOiAlCvXoQrLpxiiiiiiiiiiiiiiiip9ogLpxiiiiiiiiiiiiiiiip8JgQp5b9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibaKc:q:yjjbfpbbbgYaYpmbbbbbbbbbbbbbbbbaQp5e9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibp9UpmbedilvorzHOACXQLpPaLaQp9spklbalclfaYpQbfaKc:q:yjjbfRbbfhlxdkaialpbbwalpbbbgQclp:meaQpmbzeHdOiAlCvXoQrLpxssssssssssssssssp9ogLpxssssssssssssssssp8JgQp5b9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibaKc:q:yjjbfpbbbgYaYpmbbbbbbbbbbbbbbbbaQp5e9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibp9UpmbedilvorzHOACXQLpPaLaQp9spklbalcwfaYpQbfaKc:q:yjjbfRbbfhlxekaialpbbbpklbalczfhlkdndndndndnaXcd4ciGPlbedibkaipxbbbbbbbbbbbbbbbbpklzxikaialpbblalpbbbgQclp:meaQpmbzeHdOiAlCvXoQrLgQcdp:meaQpmbzeHdOiAlCvXoQrLpxiiiiiiiiiiiiiiiip9ogLpxiiiiiiiiiiiiiiiip8JgQp5b9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibaKc:q:yjjbfpbbbgYaYpmbbbbbbbbbbbbbbbbaQp5e9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibp9UpmbedilvorzHOACXQLpPaLaQp9spklzalclfaYpQbfaKc:q:yjjbfRbbfhlxdkaialpbbwalpbbbgQclp:meaQpmbzeHdOiAlCvXoQrLpxssssssssssssssssp9ogLpxssssssssssssssssp8JgQp5b9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibaKc:q:yjjbfpbbbgYaYpmbbbbbbbbbbbbbbbbaQp5e9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibp9UpmbedilvorzHOACXQLpPaLaQp9spklzalcwfaYpQbfaKc:q:yjjbfRbbfhlxekaialpbbbpklzalczfhlkdndndndndnaXcl4ciGPlbedibkaipxbbbbbbbbbbbbbbbbpklaxikaialpbblalpbbbgQclp:meaQpmbzeHdOiAlCvXoQrLgQcdp:meaQpmbzeHdOiAlCvXoQrLpxiiiiiiiiiiiiiiiip9ogLpxiiiiiiiiiiiiiiiip8JgQp5b9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibaKc:q:yjjbfpbbbgYaYpmbbbbbbbbbbbbbbbbaQp5e9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibp9UpmbedilvorzHOACXQLpPaLaQp9spklaalclfaYpQbfaKc:q:yjjbfRbbfhlxdkaialpbbwalpbbbgQclp:meaQpmbzeHdOiAlCvXoQrLpxssssssssssssssssp9ogLpxssssssssssssssssp8JgQp5b9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibaKc:q:yjjbfpbbbgYaYpmbbbbbbbbbbbbbbbbaQp5e9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibp9UpmbedilvorzHOACXQLpPaLaQp9spklaalcwfaYpQbfaKc:q:yjjbfRbbfhlxekaialpbbbpklaalczfhlkdndndndndnaXco4Plbedibkaipxbbbbbbbbbbbbbbbbpkl8WxikaialpbblalpbbbgQclp:meaQpmbzeHdOiAlCvXoQrLgQcdp:meaQpmbzeHdOiAlCvXoQrLpxiiiiiiiiiiiiiiiip9ogLpxiiiiiiiiiiiiiiiip8JgQp5b9cjF;8;4;W;G;ab9:9cU1:NgXcitc:q1jjbfpbibaXc:q:yjjbfpbbbgYaYpmbbbbbbbbbbbbbbbbaQp5e9cjF;8;4;W;G;ab9:9cU1:NgXcitc:q1jjbfpbibp9UpmbedilvorzHOACXQLpPaLaQp9spkl8WalclfaYpQbfaXc:q:yjjbfRbbfhlxdkaialpbbwalpbbbgQclp:meaQpmbzeHdOiAlCvXoQrLpxssssssssssssssssp9ogLpxssssssssssssssssp8JgQp5b9cjF;8;4;W;G;ab9:9cU1:NgXcitc:q1jjbfpbibaXc:q:yjjbfpbbbgYaYpmbbbbbbbbbbbbbbbbaQp5e9cjF;8;4;W;G;ab9:9cU1:NgXcitc:q1jjbfpbibp9UpmbedilvorzHOACXQLpPaLaQp9spkl8WalcwfaYpQbfaXc:q:yjjbfRbbfhlxekaialpbbbpkl8Walczfhlkaoc;abfhiaocjefak0meaihoaral9Rc;Fb0mbkkdndnaiak9pmbaici4hoinaral9RcK6mdaCaifhXdndndndndnaOaico4fRbbaocoG4ciGPlbedibkaXpxbbbbbbbbbbbbbbbbpklbxikaXalpbblalpbbbgQclp:meaQpmbzeHdOiAlCvXoQrLgQcdp:meaQpmbzeHdOiAlCvXoQrLpxiiiiiiiiiiiiiiiip9ogLpxiiiiiiiiiiiiiiiip8JgQp5b9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibaKc:q:yjjbfpbbbgYaYpmbbbbbbbbbbbbbbbbaQp5e9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibp9UpmbedilvorzHOACXQLpPaLaQp9spklbalclfaYpQbfaKc:q:yjjbfRbbfhlxdkaXalpbbwalpbbbgQclp:meaQpmbzeHdOiAlCvXoQrLpxssssssssssssssssp9ogLpxssssssssssssssssp8JgQp5b9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibaKc:q:yjjbfpbbbgYaYpmbbbbbbbbbbbbbbbbaQp5e9cjF;8;4;W;G;ab9:9cU1:NgKcitc:q1jjbfpbibp9UpmbedilvorzHOACXQLpPaLaQp9spklbalcwfaYpQbfaKc:q:yjjbfRbbfhlxekaXalpbbbpklbalczfhlkaocdfhoaiczfgiak6mbkkalTmbaAcd0hHalhOaAcefgAclSmdxekkcbhlaHceGTmdkdnakTmbavcjdfazfhiavazfpbdbhYcbhXinaiavcj;cbfaXfgopblbgLcep9TaLpxeeeeeeeeeeeeeeeegQp9op9Hp9rgLaoakfpblbg8Acep9Ta8AaQp9op9Hp9rg8ApmbzeHdOiAlCvXoQrLgEaoamfpblbg3cep9Ta3aQp9op9Hp9rg3aoaxfpblbg5cep9Ta5aQp9op9Hp9rg5pmbzeHdOiAlCvXoQrLg8EpmbezHdiOAlvCXorQLgQaQpmbedibedibedibediaYp9UgYp9AdbbaiadfgoaYaQaQpmlvorlvorlvorlvorp9UgYp9AdbbaoadfgoaYaQaQpmwDqkwDqkwDqkwDqkp9UgYp9AdbbaoadfgoaYaQaQpmxmPsxmPsxmPsxmPsp9UgYp9AdbbaoadfgoaYaEa8EpmwDKYqk8AExm35Ps8E8FgQaQpmbedibedibedibedip9UgYp9AdbbaoadfgoaYaQaQpmlvorlvorlvorlvorp9UgYp9AdbbaoadfgoaYaQaQpmwDqkwDqkwDqkwDqkp9UgYp9AdbbaoadfgoaYaQaQpmxmPsxmPsxmPsxmPsp9UgYp9AdbbaoadfgoaYaLa8ApmwKDYq8AkEx3m5P8Es8FgLa3a5pmwKDYq8AkEx3m5P8Es8Fg8ApmbezHdiOAlvCXorQLgQaQpmbedibedibedibedip9UgYp9AdbbaoadfgoaYaQaQpmlvorlvorlvorlvorp9UgYp9AdbbaoadfgoaYaQaQpmwDqkwDqkwDqkwDqkp9UgYp9AdbbaoadfgoaYaQaQpmxmPsxmPsxmPsxmPsp9UgYp9AdbbaoadfgoaYaLa8ApmwDKYqk8AExm35Ps8E8FgQaQpmbedibedibedibedip9UgYp9AdbbaoadfgoaYaQaQpmlvorlvorlvorlvorp9UgYp9AdbbaoadfgoaYaQaQpmwDqkwDqkwDqkwDqkp9UgYp9AdbbaoadfgoaYaQaQpmxmPsxmPsxmPsxmPsp9UgYp9AdbbaoadfhiaXczfgXak6mbkkazclfgzad6mbkasavcjdfaqad2;8qbbavavcjdfaqcufad2fad;8qbbaqaDfhDc9:hoalmexikkc9:hoxekcbc99aral9Radcaadca0ESEhokavcj;kbf8Kjjjjbaokwbz:bjjjbk;tzeHu8Jjjjjbc;ae9Rgv8Kjjjjbc9:hodnaeci9UgrcHfal0mbcuhoaiRbbgwc;WeGc;Ge9hmbawcsGgDce0mbavc;abfcFecje;8kbavcUf9cu83ibavc8Wf9cu83ibavcyf9cu83ibavcaf9cu83ibavcKf9cu83ibavczf9cu83ibav9cu83iwav9cu83ibaialfc9WfhqaicefgwarfhodnaeTmbcmcsaDceSEhkcbhxcbhmcbhrcbhicbhlindnaoaq9nmbc9:hoxikdndnawRbbgDc;Ve0mbavc;abfalaDcu7gPcl4fcsGcitfgsydlhzasydbhHdnaDcsGgDak9pmbavaiaPfcsGcdtfydbaxaDEhsaDThDdndnadcd9hmbabarcetfgPaH87ebaPcdfaz87ebaPclfas87ebxekabarcdtfgPaHBdbaPclfazBdbaPcwfasBdbkaxaDfhxavc;abfalcitfgPasBdbaPazBdlavaicdtfasBdbavc;abfalcefcsGglcitfgPaHBdbaPasBdlaiaDfhialcefhlxdkdndnaDcsSmbamaDfaDc987fcefhmxekaocefhDao8SbbgscFeGhPdndnascu9mmbaDhoxekaocvfhoaPcFbGhPcrhsdninaD8SbbgOcFbGastaPVhPaOcu9kmeaDcefhDascrfgsc8J9hmbxdkkaDcefhokaPce4cbaPceG9R7amfhmkdndnadcd9hmbabarcetfgDaH87ebaDcdfaz87ebaDclfam87ebxekabarcdtfgDaHBdbaDclfazBdbaDcwfamBdbkavc;abfalcitfgDamBdbaDazBdlavaicdtfamBdbavc;abfalcefcsGglcitfgDaHBdbaDamBdlaicefhialcefhlxekdnaDcpe0mbaxcefgOavaiaqaDcsGfRbbgscl49RcsGcdtfydbascz6gPEhDavaias9RcsGcdtfydbaOaPfgzascsGgOEhsaOThOdndnadcd9hmbabarcetfgHax87ebaHcdfaD87ebaHclfas87ebxekabarcdtfgHaxBdbaHclfaDBdbaHcwfasBdbkavaicdtfaxBdbavc;abfalcitfgHaDBdbaHaxBdlavaicefgicsGcdtfaDBdbavc;abfalcefcsGcitfgHasBdbaHaDBdlavaiaPfcsGgicdtfasBdbavc;abfalcdfcsGglcitfgDaxBdbaDasBdlalcefhlaiaOfhiazaOfhxxekaxcbaoRbbgHEgAaDc;:eSgDfhzaHcsGhCaHcl4hXdndnaHcs0mbazcefhOxekazhOavaiaX9RcsGcdtfydbhzkdndnaCmbaOcefhxxekaOhxavaiaH9RcsGcdtfydbhOkdndnaDTmbaocefhDxekaocdfhDao8SbegPcFeGhsdnaPcu9kmbaocofhAascFbGhscrhodninaD8SbbgPcFbGaotasVhsaPcu9kmeaDcefhDaocrfgoc8J9hmbkaAhDxekaDcefhDkasce4cbasceG9R7amfgmhAkdndnaXcsSmbaDhsxekaDcefhsaD8SbbgocFeGhPdnaocu9kmbaDcvfhzaPcFbGhPcrhodninas8SbbgDcFbGaotaPVhPaDcu9kmeascefhsaocrfgoc8J9hmbkazhsxekascefhskaPce4cbaPceG9R7amfgmhzkdndnaCcsSmbashoxekascefhoas8SbbgDcFeGhPdnaDcu9kmbascvfhOaPcFbGhPcrhDdninao8SbbgscFbGaDtaPVhPascu9kmeaocefhoaDcrfgDc8J9hmbkaOhoxekaocefhokaPce4cbaPceG9R7amfgmhOkdndnadcd9hmbabarcetfgDaA87ebaDcdfaz87ebaDclfaO87ebxekabarcdtfgDaABdbaDclfazBdbaDcwfaOBdbkavc;abfalcitfgDazBdbaDaABdlavaicdtfaABdbavc;abfalcefcsGcitfgDaOBdbaDazBdlavaicefgicsGcdtfazBdbavc;abfalcdfcsGcitfgDaABdbaDaOBdlavaiaHcz6aXcsSVfgicsGcdtfaOBdbaiaCTaCcsSVfhialcifhlkawcefhwalcsGhlaicsGhiarcifgrae6mbkkcbc99aoaqSEhokavc;aef8Kjjjjbaok9teiucbcbydj1jjbgeabcifc98GfgbBdj1jjbdndnabZbcztgd9nmbcuhiabad9RcFFifcz4nbcuSmekaehikaikkkebcjwklz9Tbb"; + + var detector = new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,3,2,0,0,5,3,1,0,1,12,1,0,10,22,2,12,0,65,0,65,0,65,0,252,10,0,0,11,7,0,65,0,253,15,26,11]); + var wasmpack = new Uint8Array([32,0,65,2,1,106,34,33,3,128,11,4,13,64,6,253,10,7,15,116,127,5,8,12,40,16,19,54,20,9,27,255,113,17,42,67,24,23,146,148,18,14,22,45,70,69,56,114,101,21,25,63,75,136,108,28,118,29,73,115]); + + if (typeof WebAssembly !== 'object') { + return { + supported: false, + }; + } + + var wasm = WebAssembly.validate(detector) ? wasm_simd : wasm_base; + + var instance; + + var ready = + WebAssembly.instantiate(unpack(wasm), {}) + .then(function(result) { + instance = result.instance; + instance.exports.__wasm_call_ctors(); + }); + + function unpack(data) { + var result = new Uint8Array(data.length); + for (var i = 0; i < data.length; ++i) { + var ch = data.charCodeAt(i); + result[i] = ch > 96 ? ch - 97 : ch > 64 ? ch - 39 : ch + 4; + } + var write = 0; + for (var i = 0; i < data.length; ++i) { + result[write++] = (result[i] < 60) ? wasmpack[result[i]] : (result[i] - 60) * 64 + result[++i]; + } + return result.buffer.slice(0, write); + } + + function decode(fun, target, count, size, source, filter) { + var sbrk = instance.exports.sbrk; + var count4 = (count + 3) & ~3; + var tp = sbrk(count4 * size); + var sp = sbrk(source.length); + var heap = new Uint8Array(instance.exports.memory.buffer); + heap.set(source, sp); + var res = fun(tp, count, size, sp, source.length); + if (res == 0 && filter) { + filter(tp, count4, size); + } + target.set(heap.subarray(tp, tp + count * size)); + sbrk(tp - sbrk(0)); + if (res != 0) { + throw new Error("Malformed buffer data: " + res); + } + } + + var filters = { + NONE: "", + OCTAHEDRAL: "meshopt_decodeFilterOct", + QUATERNION: "meshopt_decodeFilterQuat", + EXPONENTIAL: "meshopt_decodeFilterExp", + }; + + var decoders = { + ATTRIBUTES: "meshopt_decodeVertexBuffer", + TRIANGLES: "meshopt_decodeIndexBuffer", + INDICES: "meshopt_decodeIndexSequence", + }; + + var workers = []; + var requestId = 0; + + function createWorker(url) { + var worker = { + object: new Worker(url), + pending: 0, + requests: {} + }; + + worker.object.onmessage = function(event) { + var data = event.data; + + worker.pending -= data.count; + worker.requests[data.id][data.action](data.value); + + delete worker.requests[data.id]; + }; + + return worker; + } + + function initWorkers(count) { + var source = + "var instance; var ready = WebAssembly.instantiate(new Uint8Array([" + new Uint8Array(unpack(wasm)) + "]), {})" + + ".then(function(result) { instance = result.instance; instance.exports.__wasm_call_ctors(); });" + + "self.onmessage = workerProcess;" + + decode.toString() + workerProcess.toString(); + + var blob = new Blob([source], {type: 'text/javascript'}); + var url = URL.createObjectURL(blob); + + for (var i = 0; i < count; ++i) { + workers[i] = createWorker(url); + } + + URL.revokeObjectURL(url); + } + + function decodeWorker(count, size, source, mode, filter) { + var worker = workers[0]; + + for (var i = 1; i < workers.length; ++i) { + if (workers[i].pending < worker.pending) { + worker = workers[i]; + } + } + + return new Promise(function (resolve, reject) { + var data = new Uint8Array(source); + var id = requestId++; + + worker.pending += count; + worker.requests[id] = { resolve: resolve, reject: reject }; + worker.object.postMessage({ id: id, count: count, size: size, source: data, mode: mode, filter: filter }, [ data.buffer ]); + }); + } + + function workerProcess(event) { + ready.then(function() { + var data = event.data; + try { + var target = new Uint8Array(data.count * data.size); + decode(instance.exports[data.mode], target, data.count, data.size, data.source, instance.exports[data.filter]); + self.postMessage({ id: data.id, count: data.count, action: "resolve", value: target }, [ target.buffer ]); + } catch (error) { + self.postMessage({ id: data.id, count: data.count, action: "reject", value: error }); + } + }); + } + + return { + ready: ready, + supported: true, + useWorkers: function(count) { + initWorkers(count); + }, + decodeVertexBuffer: function(target, count, size, source, filter) { + decode(instance.exports.meshopt_decodeVertexBuffer, target, count, size, source, instance.exports[filters[filter]]); + }, + decodeIndexBuffer: function(target, count, size, source) { + decode(instance.exports.meshopt_decodeIndexBuffer, target, count, size, source); + }, + decodeIndexSequence: function(target, count, size, source) { + decode(instance.exports.meshopt_decodeIndexSequence, target, count, size, source); + }, + decodeGltfBuffer: function(target, count, size, source, mode, filter) { + decode(instance.exports[decoders[mode]], target, count, size, source, instance.exports[filters[filter]]); + }, + decodeGltfBufferAsync: function(count, size, source, mode, filter) { + if (workers.length > 0) { + return decodeWorker(count, size, source, decoders[mode], filters[filter]); + } + + return ready.then(function() { + var target = new Uint8Array(count * size); + decode(instance.exports[decoders[mode]], target, count, size, source, instance.exports[filters[filter]]); + return target; + }); + } + }; +})(); + +export default MeshoptDecoder; \ No newline at end of file diff --git a/sources/CMakeLists.txt b/sources/CMakeLists.txt index 8c4308b4b..4f5e942a9 100644 --- a/sources/CMakeLists.txt +++ b/sources/CMakeLists.txt @@ -30,11 +30,18 @@ set(CC_EXTERNAL_SOURCES ${CMAKE_CURRENT_LIST_DIR}/tommyds/tommy.h ) +set (CC_MESHOPT_SOURCES + ${CMAKE_CURRENT_LIST_DIR}/meshopt/meshoptimizer.h + ${CMAKE_CURRENT_LIST_DIR}/meshopt/vertexcodec.cpp + ${CMAKE_CURRENT_LIST_DIR}/meshopt/indexcodec.cpp +) + if(NOT USE_MODULES) list(APPEND CC_EXTERNAL_SOURCES ${CC_UNZIP_SOURCES} ${CC_TINYDIR_SOURCES} ${CC_UTILS_SOURCES} + ${CC_MESHOPT_SOURCES} ) endif() diff --git a/sources/meshopt/indexcodec.cpp b/sources/meshopt/indexcodec.cpp new file mode 100644 index 000000000..e4495b858 --- /dev/null +++ b/sources/meshopt/indexcodec.cpp @@ -0,0 +1,674 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include +#include + +// This work is based on: +// Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013 +// Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014 +namespace meshopt +{ + +const unsigned char kIndexHeader = 0xe0; +const unsigned char kSequenceHeader = 0xd0; + +static int gEncodeIndexVersion = 0; + +typedef unsigned int VertexFifo[16]; +typedef unsigned int EdgeFifo[16][2]; + +static const unsigned int kTriangleIndexOrder[3][3] = { + {0, 1, 2}, + {1, 2, 0}, + {2, 0, 1}, +}; + +static const unsigned char kCodeAuxEncodingTable[16] = { + 0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69, + 0, 0, // last two entries aren't used for encoding +}; + +static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next) +{ + (void)a; + + return (b == next) ? 1 : (c == next) ? 2 : 0; +} + +static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset) +{ + for (int i = 0; i < 16; ++i) + { + size_t index = (offset - 1 - i) & 15; + + unsigned int e0 = fifo[index][0]; + unsigned int e1 = fifo[index][1]; + + if (e0 == a && e1 == b) + return (i << 2) | 0; + if (e0 == b && e1 == c) + return (i << 2) | 1; + if (e0 == c && e1 == a) + return (i << 2) | 2; + } + + return -1; +} + +static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset) +{ + fifo[offset][0] = a; + fifo[offset][1] = b; + offset = (offset + 1) & 15; +} + +static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset) +{ + for (int i = 0; i < 16; ++i) + { + size_t index = (offset - 1 - i) & 15; + + if (fifo[index] == v) + return i; + } + + return -1; +} + +static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1) +{ + fifo[offset] = v; + offset = (offset + cond) & 15; +} + +static void encodeVByte(unsigned char*& data, unsigned int v) +{ + // encode 32-bit value in up to 5 7-bit groups + do + { + *data++ = (v & 127) | (v > 127 ? 128 : 0); + v >>= 7; + } while (v); +} + +static unsigned int decodeVByte(const unsigned char*& data) +{ + unsigned char lead = *data++; + + // fast path: single byte + if (lead < 128) + return lead; + + // slow path: up to 4 extra bytes + // note that this loop always terminates, which is important for malformed data + unsigned int result = lead & 127; + unsigned int shift = 7; + + for (int i = 0; i < 4; ++i) + { + unsigned char group = *data++; + result |= unsigned(group & 127) << shift; + shift += 7; + + if (group < 128) + break; + } + + return result; +} + +static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int last) +{ + unsigned int d = index - last; + unsigned int v = (d << 1) ^ (int(d) >> 31); + + encodeVByte(data, v); +} + +static unsigned int decodeIndex(const unsigned char*& data, unsigned int last) +{ + unsigned int v = decodeVByte(data); + unsigned int d = (v >> 1) ^ -int(v & 1); + + return last + d; +} + +static int getCodeAuxIndex(unsigned char v, const unsigned char* table) +{ + for (int i = 0; i < 16; ++i) + if (table[i] == v) + return i; + + return -1; +} + +static void writeTriangle(void* destination, size_t offset, size_t index_size, unsigned int a, unsigned int b, unsigned int c) +{ + if (index_size == 2) + { + static_cast(destination)[offset + 0] = (unsigned short)(a); + static_cast(destination)[offset + 1] = (unsigned short)(b); + static_cast(destination)[offset + 2] = (unsigned short)(c); + } + else + { + static_cast(destination)[offset + 0] = a; + static_cast(destination)[offset + 1] = b; + static_cast(destination)[offset + 2] = c; + } +} + +} // namespace meshopt + +size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + + // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table + if (buffer_size < 1 + index_count / 3 + 16) + return 0; + + int version = gEncodeIndexVersion; + + buffer[0] = (unsigned char)(kIndexHeader | version); + + EdgeFifo edgefifo; + memset(edgefifo, -1, sizeof(edgefifo)); + + VertexFifo vertexfifo; + memset(vertexfifo, -1, sizeof(vertexfifo)); + + size_t edgefifooffset = 0; + size_t vertexfifooffset = 0; + + unsigned int next = 0; + unsigned int last = 0; + + unsigned char* code = buffer + 1; + unsigned char* data = code + index_count / 3; + unsigned char* data_safe_end = buffer + buffer_size - 16; + + int fecmax = version >= 1 ? 13 : 15; + + // use static encoding table; it's possible to pack the result and then build an optimal table and repack + // for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set + const unsigned char* codeaux_table = kCodeAuxEncodingTable; + + for (size_t i = 0; i < index_count; i += 3) + { + // make sure we have enough space to write a triangle + // each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index + // after this we can be sure we can write without extra bounds checks + if (data > data_safe_end) + return 0; + + int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset); + + if (fer >= 0 && (fer >> 2) < 15) + { + const unsigned int* order = kTriangleIndexOrder[fer & 3]; + + unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]]; + + // encode edge index and vertex fifo index, next or free index + int fe = fer >> 2; + int fc = getVertexFifo(vertexfifo, c, vertexfifooffset); + + int fec = (fc >= 1 && fc < fecmax) ? fc : (c == next) ? (next++, 0) : 15; + + if (fec == 15 && version >= 1) + { + // encode last-1 and last+1 to optimize strip-like sequences + if (c + 1 == last) + fec = 13, last = c; + if (c == last + 1) + fec = 14, last = c; + } + + *code++ = (unsigned char)((fe << 4) | fec); + + // note that we need to update the last index since free indices are delta-encoded + if (fec == 15) + encodeIndex(data, c, last), last = c; + + // we only need to push third vertex since first two are likely already in the vertex fifo + if (fec == 0 || fec >= fecmax) + pushVertexFifo(vertexfifo, c, vertexfifooffset); + + // we only need to push two new edges to edge fifo since the third one is already there + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + else + { + int rotation = rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next); + const unsigned int* order = kTriangleIndexOrder[rotation]; + + unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]]; + + // if a/b/c are 0/1/2, we emit a reset code + bool reset = false; + + if (a == 0 && b == 1 && c == 2 && next > 0 && version >= 1) + { + reset = true; + next = 0; + + // reset vertex fifo to make sure we don't accidentally reference vertices from that in the future + // this makes sure next continues to get incremented instead of being stuck + memset(vertexfifo, -1, sizeof(vertexfifo)); + } + + int fb = getVertexFifo(vertexfifo, b, vertexfifooffset); + int fc = getVertexFifo(vertexfifo, c, vertexfifooffset); + + // after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a + int fea = (a == next) ? (next++, 0) : 15; + int feb = (fb >= 0 && fb < 14) ? (fb + 1) : (b == next) ? (next++, 0) : 15; + int fec = (fc >= 0 && fc < 14) ? (fc + 1) : (c == next) ? (next++, 0) : 15; + + // we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise + unsigned char codeaux = (unsigned char)((feb << 4) | fec); + int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table); + + // <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15 + if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14 && !reset) + { + *code++ = (unsigned char)((15 << 4) | codeauxindex); + } + else + { + *code++ = (unsigned char)((15 << 4) | 14 | fea); + *data++ = codeaux; + } + + // note that we need to update the last index since free indices are delta-encoded + if (fea == 15) + encodeIndex(data, a, last), last = a; + + if (feb == 15) + encodeIndex(data, b, last), last = b; + + if (fec == 15) + encodeIndex(data, c, last), last = c; + + // only push vertices that weren't already in fifo + if (fea == 0 || fea == 15) + pushVertexFifo(vertexfifo, a, vertexfifooffset); + + if (feb == 0 || feb == 15) + pushVertexFifo(vertexfifo, b, vertexfifooffset); + + if (fec == 0 || fec == 15) + pushVertexFifo(vertexfifo, c, vertexfifooffset); + + // all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles + pushEdgeFifo(edgefifo, b, a, edgefifooffset); + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + } + + // make sure we have enough space to write codeaux table + if (data > data_safe_end) + return 0; + + // add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding + // we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data + // this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input + for (size_t i = 0; i < 16; ++i) + { + // decoder assumes that table entries never refer to separately encoded indices + assert((codeaux_table[i] & 0xf) != 0xf && (codeaux_table[i] >> 4) != 0xf); + + *data++ = codeaux_table[i]; + } + + // since we encode restarts as codeaux without a table reference, we need to make sure 00 is encoded as a table reference + assert(codeaux_table[0] == 0); + + assert(data >= buffer + index_count / 3 + 16); + assert(data <= buffer + buffer_size); + + return data - buffer; +} + +size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count) +{ + assert(index_count % 3 == 0); + + // compute number of bits required for each index + unsigned int vertex_bits = 1; + + while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits) + vertex_bits++; + + // worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas + unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7; + + return 1 + (index_count / 3) * (2 + 3 * vertex_groups) + 16; +} + +void meshopt_encodeIndexVersion(int version) +{ + assert(unsigned(version) <= 1); + + meshopt::gEncodeIndexVersion = version; +} + +int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(index_size == 2 || index_size == 4); + + // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table + if (buffer_size < 1 + index_count / 3 + 16) + return -2; + + if ((buffer[0] & 0xf0) != kIndexHeader) + return -1; + + int version = buffer[0] & 0x0f; + if (version > 1) + return -1; + + EdgeFifo edgefifo; + memset(edgefifo, -1, sizeof(edgefifo)); + + VertexFifo vertexfifo; + memset(vertexfifo, -1, sizeof(vertexfifo)); + + size_t edgefifooffset = 0; + size_t vertexfifooffset = 0; + + unsigned int next = 0; + unsigned int last = 0; + + int fecmax = version >= 1 ? 13 : 15; + + // since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end + const unsigned char* code = buffer + 1; + const unsigned char* data = code + index_count / 3; + const unsigned char* data_safe_end = buffer + buffer_size - 16; + + const unsigned char* codeaux_table = data_safe_end; + + for (size_t i = 0; i < index_count; i += 3) + { + // make sure we have enough data to read for a triangle + // each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index + // after this we can be sure we can read without extra bounds checks + if (data > data_safe_end) + return -2; + + unsigned char codetri = *code++; + + if (codetri < 0xf0) + { + int fe = codetri >> 4; + + // fifo reads are wrapped around 16 entry buffer + unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0]; + unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1]; + + int fec = codetri & 15; + + // note: this is the most common path in the entire decoder + // inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable + if (fec < fecmax) + { + // fifo reads are wrapped around 16 entry buffer + unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15]; + unsigned int c = (fec == 0) ? next : cf; + + int fec0 = fec == 0; + next += fec0; + + // output triangle + writeTriangle(destination, i, index_size, a, b, c); + + // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0); + + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + else + { + unsigned int c = 0; + + // fec - (fec ^ 3) decodes 13, 14 into -1, 1 + // note that we need to update the last index since free indices are delta-encoded + last = c = (fec != 15) ? last + (fec - (fec ^ 3)) : decodeIndex(data, last); + + // output triangle + writeTriangle(destination, i, index_size, a, b, c); + + // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + pushVertexFifo(vertexfifo, c, vertexfifooffset); + + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + } + else + { + // fast path: read codeaux from the table + if (codetri < 0xfe) + { + unsigned char codeaux = codeaux_table[codetri & 15]; + + // note: table can't contain feb/fec=15 + int feb = codeaux >> 4; + int fec = codeaux & 15; + + // fifo reads are wrapped around 16 entry buffer + // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior + unsigned int a = next++; + + unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15]; + unsigned int b = (feb == 0) ? next : bf; + + int feb0 = feb == 0; + next += feb0; + + unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15]; + unsigned int c = (fec == 0) ? next : cf; + + int fec0 = fec == 0; + next += fec0; + + // output triangle + writeTriangle(destination, i, index_size, a, b, c); + + // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + pushVertexFifo(vertexfifo, a, vertexfifooffset); + pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0); + pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0); + + pushEdgeFifo(edgefifo, b, a, edgefifooffset); + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + else + { + // slow path: read a full byte for codeaux instead of using a table lookup + unsigned char codeaux = *data++; + + int fea = codetri == 0xfe ? 0 : 15; + int feb = codeaux >> 4; + int fec = codeaux & 15; + + // reset: codeaux is 0 but encoded as not-a-table + if (codeaux == 0) + next = 0; + + // fifo reads are wrapped around 16 entry buffer + // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior + unsigned int a = (fea == 0) ? next++ : 0; + unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15]; + unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15]; + + // note that we need to update the last index since free indices are delta-encoded + if (fea == 15) + last = a = decodeIndex(data, last); + + if (feb == 15) + last = b = decodeIndex(data, last); + + if (fec == 15) + last = c = decodeIndex(data, last); + + // output triangle + writeTriangle(destination, i, index_size, a, b, c); + + // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + pushVertexFifo(vertexfifo, a, vertexfifooffset); + pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15)); + pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15)); + + pushEdgeFifo(edgefifo, b, a, edgefifooffset); + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + } + } + + // we should've read all data bytes and stopped at the boundary between data and codeaux table + if (data != data_safe_end) + return -3; + + return 0; +} + +size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count) +{ + using namespace meshopt; + + // the minimum valid encoding is header, 1 byte per index and a 4-byte tail + if (buffer_size < 1 + index_count + 4) + return 0; + + int version = gEncodeIndexVersion; + + buffer[0] = (unsigned char)(kSequenceHeader | version); + + unsigned int last[2] = {}; + unsigned int current = 0; + + unsigned char* data = buffer + 1; + unsigned char* data_safe_end = buffer + buffer_size - 4; + + for (size_t i = 0; i < index_count; ++i) + { + // make sure we have enough data to write + // each index writes at most 5 bytes of data; there's a 4 byte tail after data_safe_end + // after this we can be sure we can write without extra bounds checks + if (data >= data_safe_end) + return 0; + + unsigned int index = indices[i]; + + // this is a heuristic that switches between baselines when the delta grows too large + // we want the encoded delta to fit into one byte (7 bits), but 2 bits are used for sign and baseline index + // for now we immediately switch the baseline when delta grows too large - this can be adjusted arbitrarily + int cd = int(index - last[current]); + current ^= ((cd < 0 ? -cd : cd) >= 30); + + // encode delta from the last index + unsigned int d = index - last[current]; + unsigned int v = (d << 1) ^ (int(d) >> 31); + + // note: low bit encodes the index of the last baseline which will be used for reconstruction + encodeVByte(data, (v << 1) | current); + + // update last for the next iteration that uses it + last[current] = index; + } + + // make sure we have enough space to write tail + if (data > data_safe_end) + return 0; + + for (int k = 0; k < 4; ++k) + *data++ = 0; + + return data - buffer; +} + +size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count) +{ + // compute number of bits required for each index + unsigned int vertex_bits = 1; + + while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits) + vertex_bits++; + + // worst-case encoding is 1 varint-7 encoded index delta for a K bit value and an extra bit + unsigned int vertex_groups = (vertex_bits + 1 + 1 + 6) / 7; + + return 1 + index_count * vertex_groups + 4; +} + +int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size) +{ + using namespace meshopt; + + // the minimum valid encoding is header, 1 byte per index and a 4-byte tail + if (buffer_size < 1 + index_count + 4) + return -2; + + if ((buffer[0] & 0xf0) != kSequenceHeader) + return -1; + + int version = buffer[0] & 0x0f; + if (version > 1) + return -1; + + const unsigned char* data = buffer + 1; + const unsigned char* data_safe_end = buffer + buffer_size - 4; + + unsigned int last[2] = {}; + + for (size_t i = 0; i < index_count; ++i) + { + // make sure we have enough data to read + // each index reads at most 5 bytes of data; there's a 4 byte tail after data_safe_end + // after this we can be sure we can read without extra bounds checks + if (data >= data_safe_end) + return -2; + + unsigned int v = decodeVByte(data); + + // decode the index of the last baseline + unsigned int current = v & 1; + v >>= 1; + + // reconstruct index as a delta + unsigned int d = (v >> 1) ^ -int(v & 1); + unsigned int index = last[current] + d; + + // update last for the next iteration that uses it + last[current] = index; + + if (index_size == 2) + { + static_cast(destination)[i] = (unsigned short)(index); + } + else + { + static_cast(destination)[i] = index; + } + } + + // we should've read all data bytes and stopped at the boundary between data and tail + if (data != data_safe_end) + return -3; + + return 0; +} diff --git a/sources/meshopt/meshoptimizer.h b/sources/meshopt/meshoptimizer.h new file mode 100644 index 000000000..26c4e99a5 --- /dev/null +++ b/sources/meshopt/meshoptimizer.h @@ -0,0 +1,1079 @@ +/** + * meshoptimizer - version 0.19 + * + * Copyright (C) 2016-2023, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Report bugs and download new versions at https://github.com/zeux/meshoptimizer + * + * This library is distributed under the MIT License. See notice at the end of this file. + */ +#pragma once + +#include +#include + +/* Version macro; major * 1000 + minor * 10 + patch */ +#define MESHOPTIMIZER_VERSION 190 /* 0.19 */ + +/* If no API is defined, assume default */ +#ifndef MESHOPTIMIZER_API +#define MESHOPTIMIZER_API +#endif + +/* Set the calling-convention for alloc/dealloc function pointers */ +#ifndef MESHOPTIMIZER_ALLOC_CALLCONV +#ifdef _MSC_VER +#define MESHOPTIMIZER_ALLOC_CALLCONV __cdecl +#else +#define MESHOPTIMIZER_ALLOC_CALLCONV +#endif +#endif + +/* Experimental APIs have unstable interface and might have implementation that's not fully tested or optimized */ +#define MESHOPTIMIZER_EXPERIMENTAL MESHOPTIMIZER_API + +/* C interface */ +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Vertex attribute stream + * Each element takes size bytes, beginning at data, with stride controlling the spacing between successive elements (stride >= size). + */ +struct meshopt_Stream +{ + const void* data; + size_t size; + size_t stride; +}; + +/** + * Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices + * As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence. + * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer. + * Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized. + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + * indices can be NULL if the input is unindexed + */ +MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); + +/** + * Generates a vertex remap table from multiple vertex streams and an optional index buffer and returns number of unique vertices + * As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence. + * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer. + * To remap vertex buffers, you will need to call meshopt_remapVertexBuffer for each vertex stream. + * Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized. + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + * indices can be NULL if the input is unindexed + */ +MESHOPTIMIZER_API size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); + +/** + * Generates vertex buffer from the source vertex buffer and remap table generated by meshopt_generateVertexRemap + * + * destination must contain enough space for the resulting vertex buffer (unique_vertex_count elements, returned by meshopt_generateVertexRemap) + * vertex_count should be the initial vertex count and not the value returned by meshopt_generateVertexRemap + */ +MESHOPTIMIZER_API void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap); + +/** + * Generate index buffer from the source index buffer and remap table generated by meshopt_generateVertexRemap + * + * destination must contain enough space for the resulting index buffer (index_count elements) + * indices can be NULL if the input is unindexed + */ +MESHOPTIMIZER_API void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap); + +/** + * Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary + * All vertices that are binary equivalent (wrt first vertex_size bytes) map to the first vertex in the original vertex buffer. + * This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering. + * Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride); + +/** + * Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary + * All vertices that are binary equivalent (wrt specified streams) map to the first vertex in the original vertex buffer. + * This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering. + * Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); + +/** + * Generate index buffer that can be used as a geometry shader input with triangle adjacency topology + * Each triangle is converted into a 6-vertex patch with the following layout: + * - 0, 2, 4: original triangle vertices + * - 1, 3, 5: vertices adjacent to edges 02, 24 and 40 + * The resulting patch can be rendered with geometry shaders using e.g. VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY. + * This can be used to implement algorithms like silhouette detection/expansion and other forms of GS-driven rendering. + * + * destination must contain enough space for the resulting index buffer (index_count*2 elements) + * vertex_positions should have float3 position in the first 12 bytes of each vertex + */ +MESHOPTIMIZER_API void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Generate index buffer that can be used for PN-AEN tessellation with crack-free displacement + * Each triangle is converted into a 12-vertex patch with the following layout: + * - 0, 1, 2: original triangle vertices + * - 3, 4: opposing edge for edge 0, 1 + * - 5, 6: opposing edge for edge 1, 2 + * - 7, 8: opposing edge for edge 2, 0 + * - 9, 10, 11: dominant vertices for corners 0, 1, 2 + * The resulting patch can be rendered with hardware tessellation using PN-AEN and displacement mapping. + * See "Tessellation on Any Budget" (John McDonald, GDC 2011) for implementation details. + * + * destination must contain enough space for the resulting index buffer (index_count*4 elements) + * vertex_positions should have float3 position in the first 12 bytes of each vertex + */ +MESHOPTIMIZER_API void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Vertex transform cache optimizer + * Reorders indices to reduce the number of GPU vertex shader invocations + * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); + +/** + * Vertex transform cache optimizer for strip-like caches + * Produces inferior results to meshopt_optimizeVertexCache from the GPU vertex cache perspective + * However, the resulting index order is more optimal if the goal is to reduce the triangle strip length or improve compression efficiency + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); + +/** + * Vertex transform cache optimizer for FIFO caches + * Reorders indices to reduce the number of GPU vertex shader invocations + * Generally takes ~3x less time to optimize meshes but produces inferior results compared to meshopt_optimizeVertexCache + * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + * cache_size should be less than the actual GPU cache size to avoid cache thrashing + */ +MESHOPTIMIZER_API void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size); + +/** + * Overdraw optimizer + * Reorders indices to reduce the number of GPU vertex shader invocations and the pixel overdraw + * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + * indices must contain index data that is the result of meshopt_optimizeVertexCache (*not* the original mesh indices!) + * vertex_positions should have float3 position in the first 12 bytes of each vertex + * threshold indicates how much the overdraw optimizer can degrade vertex cache efficiency (1.05 = up to 5%) to reduce overdraw more efficiently + */ +MESHOPTIMIZER_API void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold); + +/** + * Vertex fetch cache optimizer + * Reorders vertices and changes indices to reduce the amount of GPU memory fetches during vertex processing + * Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused + * This functions works for a single vertex stream; for multiple vertex streams, use meshopt_optimizeVertexFetchRemap + meshopt_remapVertexBuffer for each stream. + * + * destination must contain enough space for the resulting vertex buffer (vertex_count elements) + * indices is used both as an input and as an output index buffer + */ +MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); + +/** + * Vertex fetch cache optimizer + * Generates vertex remap to reduce the amount of GPU memory fetches during vertex processing + * Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused + * The resulting remap table should be used to reorder vertex/index buffers using meshopt_remapVertexBuffer/meshopt_remapIndexBuffer + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + */ +MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); + +/** + * Index buffer encoder + * Encodes index data into an array of bytes that is generally much smaller (<1.5 bytes/triangle) and compresses better (<1 bytes/triangle) compared to original. + * Input index buffer must represent a triangle list. + * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space + * For maximum efficiency the index buffer being encoded has to be optimized for vertex cache and vertex fetch first. + * + * buffer must contain enough space for the encoded index buffer (use meshopt_encodeIndexBufferBound to compute worst case size) + */ +MESHOPTIMIZER_API size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count); +MESHOPTIMIZER_API size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count); + +/** + * Set index encoder format version + * version must specify the data format version to encode; valid values are 0 (decodable by all library versions) and 1 (decodable by 0.14+) + */ +MESHOPTIMIZER_API void meshopt_encodeIndexVersion(int version); + +/** + * Index buffer decoder + * Decodes index data from an array of bytes generated by meshopt_encodeIndexBuffer + * Returns 0 if decoding was successful, and an error code otherwise + * The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices). + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size); + +/** + * Index sequence encoder + * Encodes index sequence into an array of bytes that is generally smaller and compresses better compared to original. + * Input index sequence can represent arbitrary topology; for triangle lists meshopt_encodeIndexBuffer is likely to be better. + * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space + * + * buffer must contain enough space for the encoded index sequence (use meshopt_encodeIndexSequenceBound to compute worst case size) + */ +MESHOPTIMIZER_API size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count); +MESHOPTIMIZER_API size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count); + +/** + * Index sequence decoder + * Decodes index data from an array of bytes generated by meshopt_encodeIndexSequence + * Returns 0 if decoding was successful, and an error code otherwise + * The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices). + * + * destination must contain enough space for the resulting index sequence (index_count elements) + */ +MESHOPTIMIZER_API int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size); + +/** + * Vertex buffer encoder + * Encodes vertex data into an array of bytes that is generally smaller and compresses better compared to original. + * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space + * This function works for a single vertex stream; for multiple vertex streams, call meshopt_encodeVertexBuffer for each stream. + * Note that all vertex_size bytes of each vertex are encoded verbatim, including padding which should be zero-initialized. + * + * buffer must contain enough space for the encoded vertex buffer (use meshopt_encodeVertexBufferBound to compute worst case size) + */ +MESHOPTIMIZER_API size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size); +MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size); + +/** + * Set vertex encoder format version + * version must specify the data format version to encode; valid values are 0 (decodable by all library versions) + */ +MESHOPTIMIZER_API void meshopt_encodeVertexVersion(int version); + +/** + * Vertex buffer decoder + * Decodes vertex data from an array of bytes generated by meshopt_encodeVertexBuffer + * Returns 0 if decoding was successful, and an error code otherwise + * The decoder is safe to use for untrusted input, but it may produce garbage data. + * + * destination must contain enough space for the resulting vertex buffer (vertex_count * vertex_size bytes) + */ +MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size); + +/** + * Vertex buffer filters + * These functions can be used to filter output of meshopt_decodeVertexBuffer in-place. + * + * meshopt_decodeFilterOct decodes octahedral encoding of a unit vector with K-bit (K <= 16) signed X/Y as an input; Z must store 1.0f. + * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is. + * + * meshopt_decodeFilterQuat decodes 3-component quaternion encoding with K-bit (4 <= K <= 16) component encoding and a 2-bit component index indicating which component to reconstruct. + * Each component is stored as an 16-bit integer; stride must be equal to 8. + * + * meshopt_decodeFilterExp decodes exponential encoding of floating-point data with 8-bit exponent and 24-bit integer mantissa as 2^E*M. + * Each 32-bit component is decoded in isolation; stride must be divisible by 4. + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t count, size_t stride); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride); + +/** + * Vertex buffer filter encoders + * These functions can be used to encode data in a format that meshopt_decodeFilter can decode + * + * meshopt_encodeFilterOct encodes unit vectors with K-bit (K <= 16) signed X/Y as an output. + * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is. + * Input data must contain 4 floats for every vector (count*4 total). + * + * meshopt_encodeFilterQuat encodes unit quaternions with K-bit (4 <= K <= 16) component encoding. + * Each component is stored as an 16-bit integer; stride must be equal to 8. + * Input data must contain 4 floats for every quaternion (count*4 total). + * + * meshopt_encodeFilterExp encodes arbitrary (finite) floating-point data with 8-bit exponent and K-bit integer mantissa (1 <= K <= 24). + * Exponent can be shared between all components of a given vector as defined by stride or all values of a given component; stride must be divisible by 4. + * Input data must contain stride/4 floats for every vector (count*stride/4 total). + */ +enum meshopt_EncodeExpMode +{ + /* When encoding exponents, use separate values for each component (maximum quality) */ + meshopt_EncodeExpSeparate, + /* When encoding exponents, use shared value for all components of each vector (better compression) */ + meshopt_EncodeExpSharedVector, + /* When encoding exponents, use shared value for each component of all vectors (best compression) */ + meshopt_EncodeExpSharedComponent, +}; + +MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterQuat(void* destination, size_t count, size_t stride, int bits, const float* data); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data, enum meshopt_EncodeExpMode mode); + +/** + * Simplification options + */ +enum +{ + /* Do not move vertices that are located on the topological border (vertices on triangle edges that don't have a paired triangle). Useful for simplifying portions of the larger mesh. */ + meshopt_SimplifyLockBorder = 1 << 0, +}; + +/** + * Mesh simplifier + * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible + * The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error. + * If not all attributes from the input mesh are required, it's recommended to reindex the mesh using meshopt_generateShadowIndexBuffer prior to simplification. + * Returns the number of indices after simplification, with destination containing new index data + * The resulting index buffer references vertices from the original vertex buffer. + * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * + * destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)! + * vertex_positions should have float3 position in the first 12 bytes of each vertex + * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1] + * options must be a bitmask composed of meshopt_SimplifyX options; 0 is a safe default + * result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification + */ +MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error); + +/** + * Experimental: Mesh simplifier (sloppy) + * Reduces the number of triangles in the mesh, sacrificing mesh appearance for simplification performance + * The algorithm doesn't preserve mesh topology but can stop short of the target goal based on target error. + * Returns the number of indices after simplification, with destination containing new index data + * The resulting index buffer references vertices from the original vertex buffer. + * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * + * destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)! + * vertex_positions should have float3 position in the first 12 bytes of each vertex + * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1] + * result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error); + +/** + * Experimental: Point cloud simplifier + * Reduces the number of points in the cloud to reach the given target + * Returns the number of points after simplification, with destination containing new index data + * The resulting index buffer references vertices from the original vertex buffer. + * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * + * destination must contain enough space for the target index buffer (target_vertex_count elements) + * vertex_positions should have float3 position in the first 12 bytes of each vertex + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count); + +/** + * Returns the error scaling factor used by the simplifier to convert between absolute and relative extents + * + * Absolute error must be *divided* by the scaling factor before passing it to meshopt_simplify as target_error + * Relative error returned by meshopt_simplify via result_error must be *multiplied* by the scaling factor to get absolute error. + */ +MESHOPTIMIZER_API float meshopt_simplifyScale(const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Mesh stripifier + * Converts a previously vertex cache optimized triangle list to triangle strip, stitching strips using restart index or degenerate triangles + * Returns the number of indices in the resulting strip, with destination containing new index data + * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first. + * Using restart indices can result in ~10% smaller index buffers, but on some GPUs restart indices may result in decreased performance. + * + * destination must contain enough space for the target index buffer, worst case can be computed with meshopt_stripifyBound + * restart_index should be 0xffff or 0xffffffff depending on index size, or 0 to use degenerate triangles + */ +MESHOPTIMIZER_API size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index); +MESHOPTIMIZER_API size_t meshopt_stripifyBound(size_t index_count); + +/** + * Mesh unstripifier + * Converts a triangle strip to a triangle list + * Returns the number of indices in the resulting list, with destination containing new index data + * + * destination must contain enough space for the target index buffer, worst case can be computed with meshopt_unstripifyBound + */ +MESHOPTIMIZER_API size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index); +MESHOPTIMIZER_API size_t meshopt_unstripifyBound(size_t index_count); + +struct meshopt_VertexCacheStatistics +{ + unsigned int vertices_transformed; + unsigned int warps_executed; + float acmr; /* transformed vertices / triangle count; best case 0.5, worst case 3.0, optimum depends on topology */ + float atvr; /* transformed vertices / vertex count; best case 1.0, worst case 6.0, optimum is 1.0 (each vertex is transformed once) */ +}; + +/** + * Vertex transform cache analyzer + * Returns cache hit statistics using a simplified FIFO model + * Results may not match actual GPU performance + */ +MESHOPTIMIZER_API struct meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size); + +struct meshopt_OverdrawStatistics +{ + unsigned int pixels_covered; + unsigned int pixels_shaded; + float overdraw; /* shaded pixels / covered pixels; best case 1.0 */ +}; + +/** + * Overdraw analyzer + * Returns overdraw statistics using a software rasterizer + * Results may not match actual GPU performance + * + * vertex_positions should have float3 position in the first 12 bytes of each vertex + */ +MESHOPTIMIZER_API struct meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +struct meshopt_VertexFetchStatistics +{ + unsigned int bytes_fetched; + float overfetch; /* fetched bytes / vertex buffer size; best case 1.0 (each byte is fetched once) */ +}; + +/** + * Vertex fetch cache analyzer + * Returns cache hit statistics using a simplified direct mapped model + * Results may not match actual GPU performance + */ +MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size); + +struct meshopt_Meshlet +{ + /* offsets within meshlet_vertices and meshlet_triangles arrays with meshlet data */ + unsigned int vertex_offset; + unsigned int triangle_offset; + + /* number of vertices and triangles used in the meshlet; data is stored in consecutive range defined by offset and count */ + unsigned int vertex_count; + unsigned int triangle_count; +}; + +/** + * Meshlet builder + * Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer + * The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers. + * When using buildMeshlets, vertex positions need to be provided to minimize the size of the resulting clusters. + * When using buildMeshletsScan, for maximum efficiency the index buffer being converted has to be optimized for vertex cache first. + * + * meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound + * meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices + * meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3 + * vertex_positions should have float3 position in the first 12 bytes of each vertex + * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512) + * cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency + */ +MESHOPTIMIZER_API size_t meshopt_buildMeshlets(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight); +MESHOPTIMIZER_API size_t meshopt_buildMeshletsScan(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); +MESHOPTIMIZER_API size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles); + +struct meshopt_Bounds +{ + /* bounding sphere, useful for frustum and occlusion culling */ + float center[3]; + float radius; + + /* normal cone, useful for backface culling */ + float cone_apex[3]; + float cone_axis[3]; + float cone_cutoff; /* = cos(angle/2) */ + + /* normal cone axis and cutoff, stored in 8-bit SNORM format; decode using x/127.0 */ + signed char cone_axis_s8[3]; + signed char cone_cutoff_s8; +}; + +/** + * Cluster bounds generator + * Creates bounding volumes that can be used for frustum, backface and occlusion culling. + * + * For backface culling with orthographic projection, use the following formula to reject backfacing clusters: + * dot(view, cone_axis) >= cone_cutoff + * + * For perspective projection, you can the formula that needs cone apex in addition to axis & cutoff: + * dot(normalize(cone_apex - camera_position), cone_axis) >= cone_cutoff + * + * Alternatively, you can use the formula that doesn't need cone apex and uses bounding sphere instead: + * dot(normalize(center - camera_position), cone_axis) >= cone_cutoff + radius / length(center - camera_position) + * or an equivalent formula that doesn't have a singularity at center = camera_position: + * dot(center - camera_position, cone_axis) >= cone_cutoff * length(center - camera_position) + radius + * + * The formula that uses the apex is slightly more accurate but needs the apex; if you are already using bounding sphere + * to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable (for derivation see + * Real-Time Rendering 4th Edition, section 19.3). + * + * vertex_positions should have float3 position in the first 12 bytes of each vertex + * index_count/3 should be less than or equal to 512 (the function assumes clusters of limited size) + */ +MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Experimental: Spatial sorter + * Generates a remap table that can be used to reorder points for spatial locality. + * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer. + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Experimental: Spatial sorter + * Reorders triangles for spatial locality, and generates a new index buffer. The resulting index buffer can be used with other functions like optimizeVertexCache. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + * vertex_positions should have float3 position in the first 12 bytes of each vertex + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Set allocation callbacks + * These callbacks will be used instead of the default operator new/operator delete for all temporary allocations in the library. + * Note that all algorithms only allocate memory for temporary use. + * allocate/deallocate are always called in a stack-like order - last pointer to be allocated is deallocated first. + */ +MESHOPTIMIZER_API void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*)); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +/* Quantization into commonly supported data formats */ +#ifdef __cplusplus +/** + * Quantize a float in [0..1] range into an N-bit fixed point unorm value + * Assumes reconstruction function (q / (2^N-1)), which is the case for fixed-function normalized fixed point conversion + * Maximum reconstruction error: 1/2^(N+1) + */ +inline int meshopt_quantizeUnorm(float v, int N); + +/** + * Quantize a float in [-1..1] range into an N-bit fixed point snorm value + * Assumes reconstruction function (q / (2^(N-1)-1)), which is the case for fixed-function normalized fixed point conversion (except early OpenGL versions) + * Maximum reconstruction error: 1/2^N + */ +inline int meshopt_quantizeSnorm(float v, int N); + +/** + * Quantize a float into half-precision floating point value + * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest + * Representable magnitude range: [6e-5; 65504] + * Maximum relative reconstruction error: 5e-4 + */ +inline unsigned short meshopt_quantizeHalf(float v); + +/** + * Quantize a float into a floating point value with a limited number of significant mantissa bits + * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest + * Assumes N is in a valid mantissa precision range, which is 1..23 + */ +inline float meshopt_quantizeFloat(float v, int N); +#endif + +/** + * C++ template interface + * + * These functions mirror the C interface the library provides, providing template-based overloads so that + * the caller can use an arbitrary type for the index data, both for input and output. + * When the supplied type is the same size as that of unsigned int, the wrappers are zero-cost; when it's not, + * the wrappers end up allocating memory and copying index data to convert from one type to another. + */ +#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS) +template +inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); +template +inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count); +template +inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap); +template +inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride); +template +inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count); +template +inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +template +inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +template +inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count); +template +inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count); +template +inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size); +template +inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold); +template +inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count); +template +inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); +template +inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count); +template +inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size); +template +inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count); +template +inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size); +template +inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = 0); +template +inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = 0); +template +inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index); +template +inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index); +template +inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size); +template +inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +template +inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size); +template +inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight); +template +inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); +template +inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +template +inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +#endif + +/* Inline implementation */ +#ifdef __cplusplus +inline int meshopt_quantizeUnorm(float v, int N) +{ + const float scale = float((1 << N) - 1); + + v = (v >= 0) ? v : 0; + v = (v <= 1) ? v : 1; + + return int(v * scale + 0.5f); +} + +inline int meshopt_quantizeSnorm(float v, int N) +{ + const float scale = float((1 << (N - 1)) - 1); + + float round = (v >= 0 ? 0.5f : -0.5f); + + v = (v >= -1) ? v : -1; + v = (v <= +1) ? v : +1; + + return int(v * scale + round); +} + +inline unsigned short meshopt_quantizeHalf(float v) +{ + union { float f; unsigned int ui; } u = {v}; + unsigned int ui = u.ui; + + int s = (ui >> 16) & 0x8000; + int em = ui & 0x7fffffff; + + /* bias exponent and round to nearest; 112 is relative exponent bias (127-15) */ + int h = (em - (112 << 23) + (1 << 12)) >> 13; + + /* underflow: flush to zero; 113 encodes exponent -14 */ + h = (em < (113 << 23)) ? 0 : h; + + /* overflow: infinity; 143 encodes exponent 16 */ + h = (em >= (143 << 23)) ? 0x7c00 : h; + + /* NaN; note that we convert all types of NaN to qNaN */ + h = (em > (255 << 23)) ? 0x7e00 : h; + + return (unsigned short)(s | h); +} + +inline float meshopt_quantizeFloat(float v, int N) +{ + union { float f; unsigned int ui; } u = {v}; + unsigned int ui = u.ui; + + const int mask = (1 << (23 - N)) - 1; + const int round = (1 << (23 - N)) >> 1; + + int e = ui & 0x7f800000; + unsigned int rui = (ui + round) & ~mask; + + /* round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0 */ + ui = e == 0x7f800000 ? ui : rui; + + /* flush denormals to zero */ + ui = e == 0 ? 0 : ui; + + u.ui = ui; + return u.f; +} +#endif + +/* Internal implementation helpers */ +#ifdef __cplusplus +class meshopt_Allocator +{ +public: + template + struct StorageT + { + static void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t); + static void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*); + }; + + typedef StorageT Storage; + + meshopt_Allocator() + : blocks() + , count(0) + { + } + + ~meshopt_Allocator() + { + for (size_t i = count; i > 0; --i) + Storage::deallocate(blocks[i - 1]); + } + + template T* allocate(size_t size) + { + assert(count < sizeof(blocks) / sizeof(blocks[0])); + T* result = static_cast(Storage::allocate(size > size_t(-1) / sizeof(T) ? size_t(-1) : size * sizeof(T))); + blocks[count++] = result; + return result; + } + +private: + void* blocks[24]; + size_t count; +}; + +// This makes sure that allocate/deallocate are lazily generated in translation units that need them and are deduplicated by the linker +template void* (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT::allocate)(size_t) = operator new; +template void (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT::deallocate)(void*) = operator delete; +#endif + +/* Inline implementation for C++ templated wrappers */ +#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS) +template +struct meshopt_IndexAdapter; + +template +struct meshopt_IndexAdapter +{ + T* result; + unsigned int* data; + size_t count; + + meshopt_IndexAdapter(T* result_, const T* input, size_t count_) + : result(result_) + , data(0) + , count(count_) + { + size_t size = count > size_t(-1) / sizeof(unsigned int) ? size_t(-1) : count * sizeof(unsigned int); + + data = static_cast(meshopt_Allocator::Storage::allocate(size)); + + if (input) + { + for (size_t i = 0; i < count; ++i) + data[i] = input[i]; + } + } + + ~meshopt_IndexAdapter() + { + if (result) + { + for (size_t i = 0; i < count; ++i) + result[i] = T(data[i]); + } + + meshopt_Allocator::Storage::deallocate(data); + } +}; + +template +struct meshopt_IndexAdapter +{ + unsigned int* data; + + meshopt_IndexAdapter(T* result, const T* input, size_t) + : data(reinterpret_cast(result ? result : const_cast(input))) + { + } +}; + +template +inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + meshopt_IndexAdapter in(0, indices, indices ? index_count : 0); + + return meshopt_generateVertexRemap(destination, indices ? in.data : 0, index_count, vertices, vertex_count, vertex_size); +} + +template +inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count) +{ + meshopt_IndexAdapter in(0, indices, indices ? index_count : 0); + + return meshopt_generateVertexRemapMulti(destination, indices ? in.data : 0, index_count, vertex_count, streams, stream_count); +} + +template +inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap) +{ + meshopt_IndexAdapter in(0, indices, indices ? index_count : 0); + meshopt_IndexAdapter out(destination, 0, index_count); + + meshopt_remapIndexBuffer(out.data, indices ? in.data : 0, index_count, remap); +} + +template +inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride) +{ + meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter out(destination, 0, index_count); + + meshopt_generateShadowIndexBuffer(out.data, in.data, index_count, vertices, vertex_count, vertex_size, vertex_stride); +} + +template +inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count) +{ + meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter out(destination, 0, index_count); + + meshopt_generateShadowIndexBufferMulti(out.data, in.data, index_count, vertex_count, streams, stream_count); +} + +template +inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter out(destination, 0, index_count * 2); + + meshopt_generateAdjacencyIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} + +template +inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter out(destination, 0, index_count * 4); + + meshopt_generateTessellationIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} + +template +inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count) +{ + meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter out(destination, 0, index_count); + + meshopt_optimizeVertexCache(out.data, in.data, index_count, vertex_count); +} + +template +inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count) +{ + meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter out(destination, 0, index_count); + + meshopt_optimizeVertexCacheStrip(out.data, in.data, index_count, vertex_count); +} + +template +inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size) +{ + meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter out(destination, 0, index_count); + + meshopt_optimizeVertexCacheFifo(out.data, in.data, index_count, vertex_count, cache_size); +} + +template +inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold) +{ + meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter out(destination, 0, index_count); + + meshopt_optimizeOverdraw(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, threshold); +} + +template +inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count) +{ + meshopt_IndexAdapter in(0, indices, index_count); + + return meshopt_optimizeVertexFetchRemap(destination, in.data, index_count, vertex_count); +} + +template +inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + meshopt_IndexAdapter inout(indices, indices, index_count); + + return meshopt_optimizeVertexFetch(destination, inout.data, index_count, vertices, vertex_count, vertex_size); +} + +template +inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count) +{ + meshopt_IndexAdapter in(0, indices, index_count); + + return meshopt_encodeIndexBuffer(buffer, buffer_size, in.data, index_count); +} + +template +inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size) +{ + char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1]; + (void)index_size_valid; + + return meshopt_decodeIndexBuffer(destination, index_count, sizeof(T), buffer, buffer_size); +} + +template +inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count) +{ + meshopt_IndexAdapter in(0, indices, index_count); + + return meshopt_encodeIndexSequence(buffer, buffer_size, in.data, index_count); +} + +template +inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size) +{ + char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1]; + (void)index_size_valid; + + return meshopt_decodeIndexSequence(destination, index_count, sizeof(T), buffer, buffer_size); +} + +template +inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error) +{ + meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter out(destination, 0, index_count); + + return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, options, result_error); +} + +template +inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error) +{ + meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter out(destination, 0, index_count); + + return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, result_error); +} + +template +inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index) +{ + meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter out(destination, 0, (index_count / 3) * 5); + + return meshopt_stripify(out.data, in.data, index_count, vertex_count, unsigned(restart_index)); +} + +template +inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index) +{ + meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter out(destination, 0, (index_count - 2) * 3); + + return meshopt_unstripify(out.data, in.data, index_count, unsigned(restart_index)); +} + +template +inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size) +{ + meshopt_IndexAdapter in(0, indices, index_count); + + return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, buffer_size); +} + +template +inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter in(0, indices, index_count); + + return meshopt_analyzeOverdraw(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} + +template +inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size) +{ + meshopt_IndexAdapter in(0, indices, index_count); + + return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size); +} + +template +inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight) +{ + meshopt_IndexAdapter in(0, indices, index_count); + + return meshopt_buildMeshlets(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, max_triangles, cone_weight); +} + +template +inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) +{ + meshopt_IndexAdapter in(0, indices, index_count); + + return meshopt_buildMeshletsScan(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_count, max_vertices, max_triangles); +} + +template +inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter in(0, indices, index_count); + + return meshopt_computeClusterBounds(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} + +template +inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter out(destination, 0, index_count); + + meshopt_spatialSortTriangles(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} +#endif + +/** + * Copyright (c) 2016-2023 Arseny Kapoulkine + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ diff --git a/sources/meshopt/vertexcodec.cpp b/sources/meshopt/vertexcodec.cpp new file mode 100644 index 000000000..167034bae --- /dev/null +++ b/sources/meshopt/vertexcodec.cpp @@ -0,0 +1,1248 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include +#include + +// The block below auto-detects SIMD ISA that can be used on the target platform +#ifndef MESHOPTIMIZER_NO_SIMD + +// The SIMD implementation requires SSSE3, which can be enabled unconditionally through compiler settings +#if defined(__AVX__) || defined(__SSSE3__) +#define SIMD_SSE +#endif + +// An experimental implementation using AVX512 instructions; it's only enabled when AVX512 is enabled through compiler settings +#if defined(__AVX512VBMI2__) && defined(__AVX512VBMI__) && defined(__AVX512VL__) && defined(__POPCNT__) +#undef SIMD_SSE +#define SIMD_AVX +#endif + +// MSVC supports compiling SSSE3 code regardless of compile options; we use a cpuid-based scalar fallback +#if !defined(SIMD_SSE) && !defined(SIMD_AVX) && defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) +#define SIMD_SSE +#define SIMD_FALLBACK +#endif + +// GCC 4.9+ and clang 3.8+ support targeting SIMD ISA from individual functions; we use a cpuid-based scalar fallback +#if !defined(SIMD_SSE) && !defined(SIMD_AVX) && ((defined(__clang__) && __clang_major__ * 100 + __clang_minor__ >= 308) || (defined(__GNUC__) && __GNUC__ * 100 + __GNUC_MINOR__ >= 409)) && (defined(__i386__) || defined(__x86_64__)) +#define SIMD_SSE +#define SIMD_FALLBACK +#define SIMD_TARGET __attribute__((target("ssse3"))) +#endif + +// GCC/clang define these when NEON support is available +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define SIMD_NEON +#endif + +// On MSVC, we assume that ARM builds always target NEON-capable devices +#if !defined(SIMD_NEON) && defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) +#define SIMD_NEON +#endif + +// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD +#if defined(__wasm_simd128__) +#define SIMD_WASM +// Prevent compiling other variant when wasm simd compilation is active +#undef SIMD_NEON +#undef SIMD_SSE +#undef SIMD_AVX +#endif + +#ifndef SIMD_TARGET +#define SIMD_TARGET +#endif + +// When targeting AArch64/x64, optimize for latency to allow decoding of individual 16-byte groups to overlap +// We don't do this for 32-bit systems because we need 64-bit math for this and this will hurt in-order CPUs +#if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) +#define SIMD_LATENCYOPT +#endif + +#endif // !MESHOPTIMIZER_NO_SIMD + +#ifdef SIMD_SSE +#include +#endif + +#if defined(SIMD_SSE) && defined(SIMD_FALLBACK) +#ifdef _MSC_VER +#include // __cpuid +#else +#include // __cpuid +#endif +#endif + +#ifdef SIMD_AVX +#include +#endif + +#ifdef SIMD_NEON +#if defined(_MSC_VER) && defined(_M_ARM64) +#include +#else +#include +#endif +#endif + +#ifdef SIMD_WASM +#include +#endif + +#ifdef SIMD_WASM +#define wasmx_splat_v32x4(v, i) wasm_i32x4_shuffle(v, v, i, i, i, i) +#define wasmx_unpacklo_v8x16(a, b) wasm_i8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23) +#define wasmx_unpackhi_v8x16(a, b) wasm_i8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31) +#define wasmx_unpacklo_v16x8(a, b) wasm_i16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11) +#define wasmx_unpackhi_v16x8(a, b) wasm_i16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15) +#define wasmx_unpacklo_v64x2(a, b) wasm_i64x2_shuffle(a, b, 0, 2) +#define wasmx_unpackhi_v64x2(a, b) wasm_i64x2_shuffle(a, b, 1, 3) +#endif + +namespace meshopt +{ + +const unsigned char kVertexHeader = 0xa0; + +static int gEncodeVertexVersion = 0; + +const size_t kVertexBlockSizeBytes = 8192; +const size_t kVertexBlockMaxSize = 256; +const size_t kByteGroupSize = 16; +const size_t kByteGroupDecodeLimit = 24; +const size_t kTailMaxSize = 32; + +static size_t getVertexBlockSize(size_t vertex_size) +{ + // make sure the entire block fits into the scratch buffer + size_t result = kVertexBlockSizeBytes / vertex_size; + + // align to byte group size; we encode each byte as a byte group + // if vertex block is misaligned, it results in wasted bytes, so just truncate the block size + result &= ~(kByteGroupSize - 1); + + return (result < kVertexBlockMaxSize) ? result : kVertexBlockMaxSize; +} + +inline unsigned char zigzag8(unsigned char v) +{ + return ((signed char)(v) >> 7) ^ (v << 1); +} + +inline unsigned char unzigzag8(unsigned char v) +{ + return -(v & 1) ^ (v >> 1); +} + +static bool encodeBytesGroupZero(const unsigned char* buffer) +{ + for (size_t i = 0; i < kByteGroupSize; ++i) + if (buffer[i]) + return false; + + return true; +} + +static size_t encodeBytesGroupMeasure(const unsigned char* buffer, int bits) +{ + assert(bits >= 1 && bits <= 8); + + if (bits == 1) + return encodeBytesGroupZero(buffer) ? 0 : size_t(-1); + + if (bits == 8) + return kByteGroupSize; + + size_t result = kByteGroupSize * bits / 8; + + unsigned char sentinel = (1 << bits) - 1; + + for (size_t i = 0; i < kByteGroupSize; ++i) + result += buffer[i] >= sentinel; + + return result; +} + +static unsigned char* encodeBytesGroup(unsigned char* data, const unsigned char* buffer, int bits) +{ + assert(bits >= 1 && bits <= 8); + + if (bits == 1) + return data; + + if (bits == 8) + { + memcpy(data, buffer, kByteGroupSize); + return data + kByteGroupSize; + } + + size_t byte_size = 8 / bits; + assert(kByteGroupSize % byte_size == 0); + + // fixed portion: bits bits for each value + // variable portion: full byte for each out-of-range value (using 1...1 as sentinel) + unsigned char sentinel = (1 << bits) - 1; + + for (size_t i = 0; i < kByteGroupSize; i += byte_size) + { + unsigned char byte = 0; + + for (size_t k = 0; k < byte_size; ++k) + { + unsigned char enc = (buffer[i + k] >= sentinel) ? sentinel : buffer[i + k]; + + byte <<= bits; + byte |= enc; + } + + *data++ = byte; + } + + for (size_t i = 0; i < kByteGroupSize; ++i) + { + if (buffer[i] >= sentinel) + { + *data++ = buffer[i]; + } + } + + return data; +} + +static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end, const unsigned char* buffer, size_t buffer_size) +{ + assert(buffer_size % kByteGroupSize == 0); + + unsigned char* header = data; + + // round number of groups to 4 to get number of header bytes + size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; + + if (size_t(data_end - data) < header_size) + return 0; + + data += header_size; + + memset(header, 0, header_size); + + for (size_t i = 0; i < buffer_size; i += kByteGroupSize) + { + if (size_t(data_end - data) < kByteGroupDecodeLimit) + return 0; + + int best_bits = 8; + size_t best_size = encodeBytesGroupMeasure(buffer + i, 8); + + for (int bits = 1; bits < 8; bits *= 2) + { + size_t size = encodeBytesGroupMeasure(buffer + i, bits); + + if (size < best_size) + { + best_bits = bits; + best_size = size; + } + } + + int bitslog2 = (best_bits == 1) ? 0 : (best_bits == 2) ? 1 : (best_bits == 4) ? 2 : 3; + assert((1 << bitslog2) == best_bits); + + size_t header_offset = i / kByteGroupSize; + + header[header_offset / 4] |= bitslog2 << ((header_offset % 4) * 2); + + unsigned char* next = encodeBytesGroup(data, buffer + i, best_bits); + + assert(data + best_size == next); + data = next; + } + + return data; +} + +static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data_end, const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +{ + assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); + + unsigned char buffer[kVertexBlockMaxSize]; + assert(sizeof(buffer) % kByteGroupSize == 0); + + // we sometimes encode elements we didn't fill when rounding to kByteGroupSize + memset(buffer, 0, sizeof(buffer)); + + for (size_t k = 0; k < vertex_size; ++k) + { + size_t vertex_offset = k; + + unsigned char p = last_vertex[k]; + + for (size_t i = 0; i < vertex_count; ++i) + { + buffer[i] = zigzag8(vertex_data[vertex_offset] - p); + + p = vertex_data[vertex_offset]; + + vertex_offset += vertex_size; + } + + data = encodeBytes(data, data_end, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1)); + if (!data) + return 0; + } + + memcpy(last_vertex, &vertex_data[vertex_size * (vertex_count - 1)], vertex_size); + + return data; +} + +#if defined(SIMD_FALLBACK) || (!defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_AVX) && !defined(SIMD_WASM)) +static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ +#define READ() byte = *data++ +#define NEXT(bits) enc = byte >> (8 - bits), byte <<= bits, encv = *data_var, *buffer++ = (enc == (1 << bits) - 1) ? encv : enc, data_var += (enc == (1 << bits) - 1) + + unsigned char byte, enc, encv; + const unsigned char* data_var; + + switch (bitslog2) + { + case 0: + memset(buffer, 0, kByteGroupSize); + return data; + case 1: + data_var = data + 4; + + // 4 groups with 4 2-bit values in each byte + READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); + READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); + READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); + READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); + + return data_var; + case 2: + data_var = data + 8; + + // 8 groups with 2 4-bit values in each byte + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + + return data_var; + case 3: + memcpy(buffer, data, kByteGroupSize); + return data + kByteGroupSize; + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } + +#undef READ +#undef NEXT +} + +static const unsigned char* decodeBytes(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size) +{ + assert(buffer_size % kByteGroupSize == 0); + + const unsigned char* header = data; + + // round number of groups to 4 to get number of header bytes + size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; + + if (size_t(data_end - data) < header_size) + return 0; + + data += header_size; + + for (size_t i = 0; i < buffer_size; i += kByteGroupSize) + { + if (size_t(data_end - data) < kByteGroupDecodeLimit) + return 0; + + size_t header_offset = i / kByteGroupSize; + + int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3; + + data = decodeBytesGroup(data, buffer + i, bitslog2); + } + + return data; +} + +static const unsigned char* decodeVertexBlock(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +{ + assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); + + unsigned char buffer[kVertexBlockMaxSize]; + unsigned char transposed[kVertexBlockSizeBytes]; + + size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1); + + for (size_t k = 0; k < vertex_size; ++k) + { + data = decodeBytes(data, data_end, buffer, vertex_count_aligned); + if (!data) + return 0; + + size_t vertex_offset = k; + + unsigned char p = last_vertex[k]; + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned char v = unzigzag8(buffer[i]) + p; + + transposed[vertex_offset] = v; + p = v; + + vertex_offset += vertex_size; + } + } + + memcpy(vertex_data, transposed, vertex_count * vertex_size); + + memcpy(last_vertex, &transposed[vertex_size * (vertex_count - 1)], vertex_size); + + return data; +} +#endif + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) +static unsigned char kDecodeBytesGroupShuffle[256][8]; +static unsigned char kDecodeBytesGroupCount[256]; + +#ifdef __wasm__ +__attribute__((cold)) // this saves 500 bytes in the output binary - we don't need to vectorize this loop! +#endif +static bool +decodeBytesGroupBuildTables() +{ + for (int mask = 0; mask < 256; ++mask) + { + unsigned char shuffle[8]; + unsigned char count = 0; + + for (int i = 0; i < 8; ++i) + { + int maski = (mask >> i) & 1; + shuffle[i] = maski ? count : 0x80; + count += (unsigned char)(maski); + } + + memcpy(kDecodeBytesGroupShuffle[mask], shuffle, 8); + kDecodeBytesGroupCount[mask] = count; + } + + return true; +} + +static bool gDecodeBytesGroupInitialized = decodeBytesGroupBuildTables(); +#endif + +#ifdef SIMD_SSE +SIMD_TARGET +static __m128i decodeShuffleMask(unsigned char mask0, unsigned char mask1) +{ + __m128i sm0 = _mm_loadl_epi64(reinterpret_cast(&kDecodeBytesGroupShuffle[mask0])); + __m128i sm1 = _mm_loadl_epi64(reinterpret_cast(&kDecodeBytesGroupShuffle[mask1])); + __m128i sm1off = _mm_set1_epi8(kDecodeBytesGroupCount[mask0]); + + __m128i sm1r = _mm_add_epi8(sm1, sm1off); + + return _mm_unpacklo_epi64(sm0, sm1r); +} + +SIMD_TARGET +static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ + switch (bitslog2) + { + case 0: + { + __m128i result = _mm_setzero_si128(); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data; + } + + case 1: + { +#ifdef __GNUC__ + typedef int __attribute__((aligned(1))) unaligned_int; +#else + typedef int unaligned_int; +#endif + +#ifdef SIMD_LATENCYOPT + unsigned int data32; + memcpy(&data32, data, 4); + data32 &= data32 >> 1; + + // arrange bits such that low bits of nibbles of data64 contain all 2-bit elements of data32 + unsigned long long data64 = ((unsigned long long)data32 << 30) | (data32 & 0x3fffffff); + + // adds all 1-bit nibbles together; the sum fits in 4 bits because datacnt=16 would have used mode 3 + int datacnt = int(((data64 & 0x1111111111111111ull) * 0x1111111111111111ull) >> 60); +#endif + + __m128i sel2 = _mm_cvtsi32_si128(*reinterpret_cast(data)); + __m128i rest = _mm_loadu_si128(reinterpret_cast(data + 4)); + + __m128i sel22 = _mm_unpacklo_epi8(_mm_srli_epi16(sel2, 4), sel2); + __m128i sel2222 = _mm_unpacklo_epi8(_mm_srli_epi16(sel22, 2), sel22); + __m128i sel = _mm_and_si128(sel2222, _mm_set1_epi8(3)); + + __m128i mask = _mm_cmpeq_epi8(sel, _mm_set1_epi8(3)); + int mask16 = _mm_movemask_epi8(mask); + unsigned char mask0 = (unsigned char)(mask16 & 255); + unsigned char mask1 = (unsigned char)(mask16 >> 8); + + __m128i shuf = decodeShuffleMask(mask0, mask1); + + __m128i result = _mm_or_si128(_mm_shuffle_epi8(rest, shuf), _mm_andnot_si128(mask, sel)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + +#ifdef SIMD_LATENCYOPT + return data + 4 + datacnt; +#else + return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +#endif + } + + case 2: + { +#ifdef SIMD_LATENCYOPT + unsigned long long data64; + memcpy(&data64, data, 8); + data64 &= data64 >> 1; + data64 &= data64 >> 2; + + // adds all 1-bit nibbles together; the sum fits in 4 bits because datacnt=16 would have used mode 3 + int datacnt = int(((data64 & 0x1111111111111111ull) * 0x1111111111111111ull) >> 60); +#endif + + __m128i sel4 = _mm_loadl_epi64(reinterpret_cast(data)); + __m128i rest = _mm_loadu_si128(reinterpret_cast(data + 8)); + + __m128i sel44 = _mm_unpacklo_epi8(_mm_srli_epi16(sel4, 4), sel4); + __m128i sel = _mm_and_si128(sel44, _mm_set1_epi8(15)); + + __m128i mask = _mm_cmpeq_epi8(sel, _mm_set1_epi8(15)); + int mask16 = _mm_movemask_epi8(mask); + unsigned char mask0 = (unsigned char)(mask16 & 255); + unsigned char mask1 = (unsigned char)(mask16 >> 8); + + __m128i shuf = decodeShuffleMask(mask0, mask1); + + __m128i result = _mm_or_si128(_mm_shuffle_epi8(rest, shuf), _mm_andnot_si128(mask, sel)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + +#ifdef SIMD_LATENCYOPT + return data + 8 + datacnt; +#else + return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +#endif + } + + case 3: + { + __m128i result = _mm_loadu_si128(reinterpret_cast(data)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data + 16; + } + + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } +} +#endif + +#ifdef SIMD_AVX +static const __m128i decodeBytesGroupConfig[] = { + _mm_set1_epi8(3), + _mm_set1_epi8(15), + _mm_setr_epi8(6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24), + _mm_setr_epi8(4, 0, 12, 8, 20, 16, 28, 24, 36, 32, 44, 40, 52, 48, 60, 56), +}; + +static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ + switch (bitslog2) + { + case 0: + { + __m128i result = _mm_setzero_si128(); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data; + } + + case 1: + case 2: + { + const unsigned char* skip = data + (bitslog2 << 2); + + __m128i selb = _mm_loadl_epi64(reinterpret_cast(data)); + __m128i rest = _mm_loadu_si128(reinterpret_cast(skip)); + + __m128i sent = decodeBytesGroupConfig[bitslog2 - 1]; + __m128i ctrl = decodeBytesGroupConfig[bitslog2 + 1]; + + __m128i selw = _mm_shuffle_epi32(selb, 0x44); + __m128i sel = _mm_and_si128(sent, _mm_multishift_epi64_epi8(ctrl, selw)); + __mmask16 mask16 = _mm_cmp_epi8_mask(sel, sent, _MM_CMPINT_EQ); + + __m128i result = _mm_mask_expand_epi8(sel, mask16, rest); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return skip + _mm_popcnt_u32(mask16); + } + + case 3: + { + __m128i result = _mm_loadu_si128(reinterpret_cast(data)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data + 16; + } + + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } +} +#endif + +#ifdef SIMD_NEON +static uint8x16_t shuffleBytes(unsigned char mask0, unsigned char mask1, uint8x8_t rest0, uint8x8_t rest1) +{ + uint8x8_t sm0 = vld1_u8(kDecodeBytesGroupShuffle[mask0]); + uint8x8_t sm1 = vld1_u8(kDecodeBytesGroupShuffle[mask1]); + + uint8x8_t r0 = vtbl1_u8(rest0, sm0); + uint8x8_t r1 = vtbl1_u8(rest1, sm1); + + return vcombine_u8(r0, r1); +} + +static void neonMoveMask(uint8x16_t mask, unsigned char& mask0, unsigned char& mask1) +{ + // magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00 + const uint64_t magic = 0x000103070f1f3f80ull; + + uint64x2_t mask2 = vreinterpretq_u64_u8(mask); + + mask0 = uint8_t((vgetq_lane_u64(mask2, 0) * magic) >> 56); + mask1 = uint8_t((vgetq_lane_u64(mask2, 1) * magic) >> 56); +} + +static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ + switch (bitslog2) + { + case 0: + { + uint8x16_t result = vdupq_n_u8(0); + + vst1q_u8(buffer, result); + + return data; + } + + case 1: + { +#ifdef SIMD_LATENCYOPT + unsigned int data32; + memcpy(&data32, data, 4); + data32 &= data32 >> 1; + + // arrange bits such that low bits of nibbles of data64 contain all 2-bit elements of data32 + unsigned long long data64 = ((unsigned long long)data32 << 30) | (data32 & 0x3fffffff); + + // adds all 1-bit nibbles together; the sum fits in 4 bits because datacnt=16 would have used mode 3 + int datacnt = int(((data64 & 0x1111111111111111ull) * 0x1111111111111111ull) >> 60); +#endif + + uint8x8_t sel2 = vld1_u8(data); + uint8x8_t sel22 = vzip_u8(vshr_n_u8(sel2, 4), sel2).val[0]; + uint8x8x2_t sel2222 = vzip_u8(vshr_n_u8(sel22, 2), sel22); + uint8x16_t sel = vandq_u8(vcombine_u8(sel2222.val[0], sel2222.val[1]), vdupq_n_u8(3)); + + uint8x16_t mask = vceqq_u8(sel, vdupq_n_u8(3)); + unsigned char mask0, mask1; + neonMoveMask(mask, mask0, mask1); + + uint8x8_t rest0 = vld1_u8(data + 4); + uint8x8_t rest1 = vld1_u8(data + 4 + kDecodeBytesGroupCount[mask0]); + + uint8x16_t result = vbslq_u8(mask, shuffleBytes(mask0, mask1, rest0, rest1), sel); + + vst1q_u8(buffer, result); + +#ifdef SIMD_LATENCYOPT + return data + 4 + datacnt; +#else + return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +#endif + } + + case 2: + { +#ifdef SIMD_LATENCYOPT + unsigned long long data64; + memcpy(&data64, data, 8); + data64 &= data64 >> 1; + data64 &= data64 >> 2; + + // adds all 1-bit nibbles together; the sum fits in 4 bits because datacnt=16 would have used mode 3 + int datacnt = int(((data64 & 0x1111111111111111ull) * 0x1111111111111111ull) >> 60); +#endif + + uint8x8_t sel4 = vld1_u8(data); + uint8x8x2_t sel44 = vzip_u8(vshr_n_u8(sel4, 4), vand_u8(sel4, vdup_n_u8(15))); + uint8x16_t sel = vcombine_u8(sel44.val[0], sel44.val[1]); + + uint8x16_t mask = vceqq_u8(sel, vdupq_n_u8(15)); + unsigned char mask0, mask1; + neonMoveMask(mask, mask0, mask1); + + uint8x8_t rest0 = vld1_u8(data + 8); + uint8x8_t rest1 = vld1_u8(data + 8 + kDecodeBytesGroupCount[mask0]); + + uint8x16_t result = vbslq_u8(mask, shuffleBytes(mask0, mask1, rest0, rest1), sel); + + vst1q_u8(buffer, result); + +#ifdef SIMD_LATENCYOPT + return data + 8 + datacnt; +#else + return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +#endif + } + + case 3: + { + uint8x16_t result = vld1q_u8(data); + + vst1q_u8(buffer, result); + + return data + 16; + } + + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } +} +#endif + +#ifdef SIMD_WASM +SIMD_TARGET +static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1) +{ + v128_t sm0 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask0]); + v128_t sm1 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask1]); + + v128_t sm1off = wasm_v128_load(&kDecodeBytesGroupCount[mask0]); + sm1off = wasm_i8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + + v128_t sm1r = wasm_i8x16_add(sm1, sm1off); + + return wasmx_unpacklo_v64x2(sm0, sm1r); +} + +SIMD_TARGET +static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1) +{ + // magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00 + const uint64_t magic = 0x000103070f1f3f80ull; + + mask0 = uint8_t((wasm_i64x2_extract_lane(mask, 0) * magic) >> 56); + mask1 = uint8_t((wasm_i64x2_extract_lane(mask, 1) * magic) >> 56); +} + +SIMD_TARGET +static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ + switch (bitslog2) + { + case 0: + { + v128_t result = wasm_i8x16_splat(0); + + wasm_v128_store(buffer, result); + + return data; + } + + case 1: + { + v128_t sel2 = wasm_v128_load(data); + v128_t rest = wasm_v128_load(data + 4); + + v128_t sel22 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel2, 4), sel2); + v128_t sel2222 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel22, 2), sel22); + v128_t sel = wasm_v128_and(sel2222, wasm_i8x16_splat(3)); + + v128_t mask = wasm_i8x16_eq(sel, wasm_i8x16_splat(3)); + + unsigned char mask0, mask1; + wasmMoveMask(mask, mask0, mask1); + + v128_t shuf = decodeShuffleMask(mask0, mask1); + + v128_t result = wasm_v128_bitselect(wasm_i8x16_swizzle(rest, shuf), sel, mask); + + wasm_v128_store(buffer, result); + + return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 2: + { + v128_t sel4 = wasm_v128_load(data); + v128_t rest = wasm_v128_load(data + 8); + + v128_t sel44 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel4, 4), sel4); + v128_t sel = wasm_v128_and(sel44, wasm_i8x16_splat(15)); + + v128_t mask = wasm_i8x16_eq(sel, wasm_i8x16_splat(15)); + + unsigned char mask0, mask1; + wasmMoveMask(mask, mask0, mask1); + + v128_t shuf = decodeShuffleMask(mask0, mask1); + + v128_t result = wasm_v128_bitselect(wasm_i8x16_swizzle(rest, shuf), sel, mask); + + wasm_v128_store(buffer, result); + + return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 3: + { + v128_t result = wasm_v128_load(data); + + wasm_v128_store(buffer, result); + + return data + 16; + } + + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } +} +#endif + +#if defined(SIMD_SSE) || defined(SIMD_AVX) +SIMD_TARGET +static void transpose8(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3) +{ + __m128i t0 = _mm_unpacklo_epi8(x0, x1); + __m128i t1 = _mm_unpackhi_epi8(x0, x1); + __m128i t2 = _mm_unpacklo_epi8(x2, x3); + __m128i t3 = _mm_unpackhi_epi8(x2, x3); + + x0 = _mm_unpacklo_epi16(t0, t2); + x1 = _mm_unpackhi_epi16(t0, t2); + x2 = _mm_unpacklo_epi16(t1, t3); + x3 = _mm_unpackhi_epi16(t1, t3); +} + +SIMD_TARGET +static __m128i unzigzag8(__m128i v) +{ + __m128i xl = _mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128(v, _mm_set1_epi8(1))); + __m128i xr = _mm_and_si128(_mm_srli_epi16(v, 1), _mm_set1_epi8(127)); + + return _mm_xor_si128(xl, xr); +} +#endif + +#ifdef SIMD_NEON +static void transpose8(uint8x16_t& x0, uint8x16_t& x1, uint8x16_t& x2, uint8x16_t& x3) +{ + uint8x16x2_t t01 = vzipq_u8(x0, x1); + uint8x16x2_t t23 = vzipq_u8(x2, x3); + + uint16x8x2_t x01 = vzipq_u16(vreinterpretq_u16_u8(t01.val[0]), vreinterpretq_u16_u8(t23.val[0])); + uint16x8x2_t x23 = vzipq_u16(vreinterpretq_u16_u8(t01.val[1]), vreinterpretq_u16_u8(t23.val[1])); + + x0 = vreinterpretq_u8_u16(x01.val[0]); + x1 = vreinterpretq_u8_u16(x01.val[1]); + x2 = vreinterpretq_u8_u16(x23.val[0]); + x3 = vreinterpretq_u8_u16(x23.val[1]); +} + +static uint8x16_t unzigzag8(uint8x16_t v) +{ + uint8x16_t xl = vreinterpretq_u8_s8(vnegq_s8(vreinterpretq_s8_u8(vandq_u8(v, vdupq_n_u8(1))))); + uint8x16_t xr = vshrq_n_u8(v, 1); + + return veorq_u8(xl, xr); +} +#endif + +#ifdef SIMD_WASM +SIMD_TARGET +static void transpose8(v128_t& x0, v128_t& x1, v128_t& x2, v128_t& x3) +{ + v128_t t0 = wasmx_unpacklo_v8x16(x0, x1); + v128_t t1 = wasmx_unpackhi_v8x16(x0, x1); + v128_t t2 = wasmx_unpacklo_v8x16(x2, x3); + v128_t t3 = wasmx_unpackhi_v8x16(x2, x3); + + x0 = wasmx_unpacklo_v16x8(t0, t2); + x1 = wasmx_unpackhi_v16x8(t0, t2); + x2 = wasmx_unpacklo_v16x8(t1, t3); + x3 = wasmx_unpackhi_v16x8(t1, t3); +} + +SIMD_TARGET +static v128_t unzigzag8(v128_t v) +{ + v128_t xl = wasm_i8x16_neg(wasm_v128_and(v, wasm_i8x16_splat(1))); + v128_t xr = wasm_u8x16_shr(v, 1); + + return wasm_v128_xor(xl, xr); +} +#endif + +#if defined(SIMD_SSE) || defined(SIMD_AVX) || defined(SIMD_NEON) || defined(SIMD_WASM) +SIMD_TARGET +static const unsigned char* decodeBytesSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size) +{ + assert(buffer_size % kByteGroupSize == 0); + assert(kByteGroupSize == 16); + + const unsigned char* header = data; + + // round number of groups to 4 to get number of header bytes + size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; + + if (size_t(data_end - data) < header_size) + return 0; + + data += header_size; + + size_t i = 0; + + // fast-path: process 4 groups at a time, do a shared bounds check - each group reads <=24b + for (; i + kByteGroupSize * 4 <= buffer_size && size_t(data_end - data) >= kByteGroupDecodeLimit * 4; i += kByteGroupSize * 4) + { + size_t header_offset = i / kByteGroupSize; + unsigned char header_byte = header[header_offset / 4]; + + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 0, (header_byte >> 0) & 3); + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 1, (header_byte >> 2) & 3); + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 2, (header_byte >> 4) & 3); + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 3, (header_byte >> 6) & 3); + } + + // slow-path: process remaining groups + for (; i < buffer_size; i += kByteGroupSize) + { + if (size_t(data_end - data) < kByteGroupDecodeLimit) + return 0; + + size_t header_offset = i / kByteGroupSize; + + int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3; + + data = decodeBytesGroupSimd(data, buffer + i, bitslog2); + } + + return data; +} + +SIMD_TARGET +static const unsigned char* decodeVertexBlockSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +{ + assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); + + unsigned char buffer[kVertexBlockMaxSize * 4]; + unsigned char transposed[kVertexBlockSizeBytes]; + + size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1); + + for (size_t k = 0; k < vertex_size; k += 4) + { + for (size_t j = 0; j < 4; ++j) + { + data = decodeBytesSimd(data, data_end, buffer + j * vertex_count_aligned, vertex_count_aligned); + if (!data) + return 0; + } + +#if defined(SIMD_SSE) || defined(SIMD_AVX) +#define TEMP __m128i +#define PREP() __m128i pi = _mm_cvtsi32_si128(*reinterpret_cast(last_vertex + k)) +#define LOAD(i) __m128i r##i = _mm_loadu_si128(reinterpret_cast(buffer + j + i * vertex_count_aligned)) +#define GRP4(i) t0 = _mm_shuffle_epi32(r##i, 0), t1 = _mm_shuffle_epi32(r##i, 1), t2 = _mm_shuffle_epi32(r##i, 2), t3 = _mm_shuffle_epi32(r##i, 3) +#define FIXD(i) t##i = pi = _mm_add_epi8(pi, t##i) +#define SAVE(i) *reinterpret_cast(savep) = _mm_cvtsi128_si32(t##i), savep += vertex_size +#endif + +#ifdef SIMD_NEON +#define TEMP uint8x8_t +#define PREP() uint8x8_t pi = vreinterpret_u8_u32(vld1_lane_u32(reinterpret_cast(last_vertex + k), vdup_n_u32(0), 0)) +#define LOAD(i) uint8x16_t r##i = vld1q_u8(buffer + j + i * vertex_count_aligned) +#define GRP4(i) t0 = vget_low_u8(r##i), t1 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t0), 1)), t2 = vget_high_u8(r##i), t3 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t2), 1)) +#define FIXD(i) t##i = pi = vadd_u8(pi, t##i) +#define SAVE(i) vst1_lane_u32(reinterpret_cast(savep), vreinterpret_u32_u8(t##i), 0), savep += vertex_size +#endif + +#ifdef SIMD_WASM +#define TEMP v128_t +#define PREP() v128_t pi = wasm_v128_load(last_vertex + k) +#define LOAD(i) v128_t r##i = wasm_v128_load(buffer + j + i * vertex_count_aligned) +#define GRP4(i) t0 = wasmx_splat_v32x4(r##i, 0), t1 = wasmx_splat_v32x4(r##i, 1), t2 = wasmx_splat_v32x4(r##i, 2), t3 = wasmx_splat_v32x4(r##i, 3) +#define FIXD(i) t##i = pi = wasm_i8x16_add(pi, t##i) +#define SAVE(i) *reinterpret_cast(savep) = wasm_i32x4_extract_lane(t##i, 0), savep += vertex_size +#endif + + PREP(); + + unsigned char* savep = transposed + k; + + for (size_t j = 0; j < vertex_count_aligned; j += 16) + { + LOAD(0); + LOAD(1); + LOAD(2); + LOAD(3); + + r0 = unzigzag8(r0); + r1 = unzigzag8(r1); + r2 = unzigzag8(r2); + r3 = unzigzag8(r3); + + transpose8(r0, r1, r2, r3); + + TEMP t0, t1, t2, t3; + + GRP4(0); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + + GRP4(1); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + + GRP4(2); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + + GRP4(3); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + +#undef TEMP +#undef PREP +#undef LOAD +#undef GRP4 +#undef FIXD +#undef SAVE + } + } + + memcpy(vertex_data, transposed, vertex_count * vertex_size); + + memcpy(last_vertex, &transposed[vertex_size * (vertex_count - 1)], vertex_size); + + return data; +} +#endif + +#if defined(SIMD_SSE) && defined(SIMD_FALLBACK) +static unsigned int getCpuFeatures() +{ + int cpuinfo[4] = {}; +#ifdef _MSC_VER + __cpuid(cpuinfo, 1); +#else + __cpuid(1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); +#endif + return cpuinfo[2]; +} + +static unsigned int cpuid = getCpuFeatures(); +#endif + +} // namespace meshopt + +size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(vertex_size > 0 && vertex_size <= 256); + assert(vertex_size % 4 == 0); + + const unsigned char* vertex_data = static_cast(vertices); + + unsigned char* data = buffer; + unsigned char* data_end = buffer + buffer_size; + + if (size_t(data_end - data) < 1 + vertex_size) + return 0; + + int version = gEncodeVertexVersion; + + *data++ = (unsigned char)(kVertexHeader | version); + + unsigned char first_vertex[256] = {}; + if (vertex_count > 0) + memcpy(first_vertex, vertex_data, vertex_size); + + unsigned char last_vertex[256] = {}; + memcpy(last_vertex, first_vertex, vertex_size); + + size_t vertex_block_size = getVertexBlockSize(vertex_size); + + size_t vertex_offset = 0; + + while (vertex_offset < vertex_count) + { + size_t block_size = (vertex_offset + vertex_block_size < vertex_count) ? vertex_block_size : vertex_count - vertex_offset; + + data = encodeVertexBlock(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex); + if (!data) + return 0; + + vertex_offset += block_size; + } + + size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; + + if (size_t(data_end - data) < tail_size) + return 0; + + // write first vertex to the end of the stream and pad it to 32 bytes; this is important to simplify bounds checks in decoder + if (vertex_size < kTailMaxSize) + { + memset(data, 0, kTailMaxSize - vertex_size); + data += kTailMaxSize - vertex_size; + } + + memcpy(data, first_vertex, vertex_size); + data += vertex_size; + + assert(data >= buffer + tail_size); + assert(data <= buffer + buffer_size); + + return data - buffer; +} + +size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(vertex_size > 0 && vertex_size <= 256); + assert(vertex_size % 4 == 0); + + size_t vertex_block_size = getVertexBlockSize(vertex_size); + size_t vertex_block_count = (vertex_count + vertex_block_size - 1) / vertex_block_size; + + size_t vertex_block_header_size = (vertex_block_size / kByteGroupSize + 3) / 4; + size_t vertex_block_data_size = vertex_block_size; + + size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; + + return 1 + vertex_block_count * vertex_size * (vertex_block_header_size + vertex_block_data_size) + tail_size; +} + +void meshopt_encodeVertexVersion(int version) +{ + assert(unsigned(version) <= 0); + + meshopt::gEncodeVertexVersion = version; +} + +int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size) +{ + using namespace meshopt; + + assert(vertex_size > 0 && vertex_size <= 256); + assert(vertex_size % 4 == 0); + + const unsigned char* (*decode)(const unsigned char*, const unsigned char*, unsigned char*, size_t, size_t, unsigned char[256]) = 0; + +#if defined(SIMD_SSE) && defined(SIMD_FALLBACK) + decode = (cpuid & (1 << 9)) ? decodeVertexBlockSimd : decodeVertexBlock; +#elif defined(SIMD_SSE) || defined(SIMD_AVX) || defined(SIMD_NEON) || defined(SIMD_WASM) + decode = decodeVertexBlockSimd; +#else + decode = decodeVertexBlock; +#endif + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) + assert(gDecodeBytesGroupInitialized); + (void)gDecodeBytesGroupInitialized; +#endif + + unsigned char* vertex_data = static_cast(destination); + + const unsigned char* data = buffer; + const unsigned char* data_end = buffer + buffer_size; + + if (size_t(data_end - data) < 1 + vertex_size) + return -2; + + unsigned char data_header = *data++; + + if ((data_header & 0xf0) != kVertexHeader) + return -1; + + int version = data_header & 0x0f; + if (version > 0) + return -1; + + unsigned char last_vertex[256]; + memcpy(last_vertex, data_end - vertex_size, vertex_size); + + size_t vertex_block_size = getVertexBlockSize(vertex_size); + + size_t vertex_offset = 0; + + while (vertex_offset < vertex_count) + { + size_t block_size = (vertex_offset + vertex_block_size < vertex_count) ? vertex_block_size : vertex_count - vertex_offset; + + data = decode(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex); + if (!data) + return -2; + + vertex_offset += block_size; + } + + size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; + + if (size_t(data_end - data) != tail_size) + return -3; + + return 0; +} + +#undef SIMD_NEON +#undef SIMD_SSE +#undef SIMD_AVX +#undef SIMD_WASM +#undef SIMD_FALLBACK +#undef SIMD_TARGET