-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Support for multi-threaded hash calculation
- Loading branch information
Showing
12 changed files
with
613 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15 changes: 15 additions & 0 deletions
15
packages/file-handle/src/browser/worker/calcPrimaryWorker.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import { sha256 } from 'hash-wasm'; | ||
|
||
const encodePrimary = async (chunkId, buffer) => { | ||
const primary = await sha256(new Uint8Array(buffer)); | ||
return [chunkId, primary]; | ||
}; | ||
|
||
onmessage = async (e) => { | ||
const { chunkId, buffer, taskId } = e.data; | ||
const result = await encodePrimary(chunkId, buffer); | ||
postMessage({ | ||
result, | ||
taskId, | ||
}); | ||
}; |
54 changes: 54 additions & 0 deletions
54
packages/file-handle/src/browser/worker/calcSecondWorker.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import { sha256 } from 'hash-wasm'; | ||
import { decodeBase64 } from '../../utils'; | ||
import Go from '../wasm_exec.js'; | ||
|
||
const init = async () => { | ||
const go = new Go(); | ||
const input = window.__PUBLIC_FILE_HANDLE_WASM_PATH__; | ||
const result = await WebAssembly.instantiateStreaming(fetch(input), go.importObject); | ||
if (result) { | ||
go.run(result.instance); | ||
// Ensure hash-wasm initial success, | ||
// Otherwise, after the browser finishes loading the page, | ||
// the user immediately uploads a large object, | ||
// and hash-wasm has a certain probability of initialization failure due to memory problems in chrome. | ||
await sha256(''); | ||
} | ||
}; | ||
|
||
init(); | ||
|
||
const encodeRawSegment = async (chunkId, buffer, dataBlocks, parityBlocks) => { | ||
const results = []; | ||
const bytes = new Uint8Array(buffer); | ||
|
||
if (typeof greenfieldSdk === 'undefined') { | ||
await init(); | ||
} | ||
const result = greenfieldSdk.encodeRawSegment(bytes, dataBlocks, parityBlocks); | ||
const shards = JSON.parse(result.result); | ||
|
||
// Empty chunks should also return digest arrays of the corresponding length. | ||
await Promise.all( | ||
shards.map(async (shard, idx) => { | ||
if (!results[idx]) { | ||
results[idx] = []; | ||
} | ||
const hex = await sha256(decodeBase64(shard || '')); | ||
results[idx].unshift(hex); | ||
}), | ||
); | ||
|
||
return [chunkId, results]; | ||
}; | ||
|
||
onmessage = async (e) => { | ||
const { chunkId, buffer, dataBlocks, parityBlocks, taskId } = e.data; | ||
|
||
const result = await encodeRawSegment(chunkId, buffer, dataBlocks, parityBlocks); | ||
|
||
postMessage({ | ||
result, | ||
taskId, | ||
}); | ||
}; |
173 changes: 173 additions & 0 deletions
173
packages/file-handle/src/browser/worker/checksumWorker.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
import { Buffer } from 'buffer'; | ||
import * as Comlink from 'comlink'; | ||
import { sha256 } from 'hash-wasm'; | ||
import { values } from 'lodash-es'; | ||
import { encodeBase64 } from '../../utils'; | ||
import { DEFAULT_DATA_BLOCKS, DEFAULT_PARITY_BLOCKS, DEFAULT_SEGMENT_SIZE } from '../../constants'; | ||
|
||
const WORKER_POOL_SIZE = 6; | ||
const _createFileChunks = (file) => { | ||
if (!file.size) return [{ file }]; | ||
const SIZE = DEFAULT_SEGMENT_SIZE; | ||
const fileChunkList = []; | ||
let cur = 0; | ||
while (cur < file.size) { | ||
fileChunkList.push({ file: file.slice(cur, cur + SIZE) }); | ||
cur += SIZE; | ||
} | ||
return fileChunkList; | ||
}; | ||
|
||
const _generateIntegrityHash = async (list) => { | ||
const hex = await sha256(Buffer.from(list.join(''), 'hex')); | ||
return encodeBase64(Uint8Array.from(Buffer.from(hex, 'hex'))); | ||
}; | ||
|
||
const _initPrimaryWorkers = ({ consumers }) => { | ||
const workers = new Array(WORKER_POOL_SIZE).fill(1).map(() => { | ||
return new Worker( | ||
/* webpackChunkName: "workers/calcPrimaryWorker-worker" */ new URL( | ||
'./calcPrimaryWorker.js', | ||
import.meta.url, | ||
), | ||
{ | ||
type: 'module', | ||
}, | ||
); | ||
}); | ||
workers.forEach((it) => { | ||
it.onmessage = (e) => { | ||
const { result, taskId } = e.data; | ||
const id = result[0]; | ||
if (!consumers[id]) return; | ||
const { resolve, data, taskId: _taskId } = consumers[id]; | ||
if (taskId !== _taskId) return; | ||
data[result[0]] = result[1]; | ||
resolve(); | ||
}; | ||
}); | ||
|
||
return workers; | ||
}; | ||
const _initSecondWorkers = ({ consumers }) => { | ||
const workers = new Array(WORKER_POOL_SIZE).fill(1).map(() => { | ||
return new Worker( | ||
/* webpackChunkName: "workers/calcSecondWorker-worker" */ new URL( | ||
'./calcSecondWorker.js', | ||
import.meta.url, | ||
), | ||
); | ||
}); | ||
workers.forEach((it) => { | ||
it.onmessage = (e) => { | ||
const { result, taskId } = e.data; | ||
const id = result[0]; | ||
if (!consumers[id]) return; | ||
const { resolve, data, taskId: _taskId } = consumers[id]; | ||
if (taskId !== _taskId) return; | ||
data[result[0]] = result[1]; | ||
resolve(); | ||
}; | ||
}); | ||
|
||
return workers; | ||
}; | ||
|
||
// js vm instance memory will not release immediately. try reuse worker thread. | ||
let primaryWorkers = []; | ||
let secondWorkers = []; | ||
|
||
const primaryWorkerConsumers = {}; | ||
primaryWorkers = _initPrimaryWorkers({ | ||
consumers: primaryWorkerConsumers, | ||
}); | ||
|
||
const secondWorkerConsumers = {}; | ||
secondWorkers = _initSecondWorkers({ | ||
consumers: secondWorkerConsumers, | ||
}); | ||
|
||
export const generateCheckSumV2 = async (file) => { | ||
if (!file) return {}; | ||
|
||
const taskId = Date.now(); | ||
let checkSumRes; | ||
|
||
values(primaryWorkerConsumers).forEach((r) => r.resolve()); | ||
values(secondWorkerConsumers).forEach((r) => r.resolve()); | ||
|
||
try { | ||
const fileChunks = _createFileChunks(file); | ||
const secondResults = []; | ||
const primaryResults = []; | ||
|
||
const segments = fileChunks.map(async (fileItem, chunkId) => { | ||
const buffer = await fileItem.file.arrayBuffer(); | ||
|
||
const primaryPromise = new Promise((resolve) => { | ||
primaryWorkerConsumers[chunkId] = { | ||
resolve, | ||
data: primaryResults, | ||
taskId, | ||
}; | ||
|
||
const workerIdx = chunkId % WORKER_POOL_SIZE; | ||
primaryWorkers[workerIdx].postMessage({ chunkId, buffer, taskId }); | ||
}); | ||
|
||
// shards | ||
const shardsPromise = new Promise((resolve) => { | ||
secondWorkerConsumers[chunkId] = { | ||
resolve, | ||
data: secondResults, | ||
taskId, | ||
}; | ||
|
||
const workerIdx = chunkId % WORKER_POOL_SIZE; | ||
secondWorkers[workerIdx].postMessage({ | ||
chunkId, | ||
buffer, | ||
DEFAULT_DATA_BLOCKS, | ||
DEFAULT_PARITY_BLOCKS, | ||
taskId, | ||
}); | ||
}); | ||
|
||
return Promise.all([shardsPromise, primaryPromise]); | ||
}); | ||
|
||
await Promise.all(segments); | ||
|
||
const combinedShards = []; | ||
secondResults.forEach((items, idx) => { | ||
items.forEach((child, childIdx) => { | ||
if (!combinedShards[childIdx]) { | ||
combinedShards[childIdx] = []; | ||
} else if (!combinedShards[childIdx][idx]) { | ||
combinedShards[childIdx][idx] = []; | ||
} | ||
combinedShards[childIdx][idx] = child[0]; | ||
}); | ||
}); | ||
|
||
const primaryCheckSum = await _generateIntegrityHash(primaryResults); | ||
const secondsCheckSum = await Promise.all( | ||
combinedShards.map((it) => _generateIntegrityHash(it)), | ||
); | ||
const value = [primaryCheckSum].concat(secondsCheckSum); | ||
checkSumRes = { | ||
fileChunks: fileChunks.length, | ||
contentLength: file.size, | ||
expectCheckSums: value, | ||
}; | ||
} catch (e) { | ||
// eslint-disable-next-line no-console | ||
console.log('check sum error', e); | ||
} | ||
|
||
return checkSumRes; | ||
}; | ||
|
||
Comlink.expose({ | ||
generateCheckSumV2, | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
const base64Chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; | ||
|
||
export function encodeBase64(data, pad = true) { | ||
const len = data.length; | ||
const extraBytes = len % 3; | ||
const parts = []; | ||
|
||
const len2 = len - extraBytes; | ||
for (let i = 0; i < len2; i += 3) { | ||
const tmp = ((data[i] << 16) & 0xff0000) + ((data[i + 1] << 8) & 0xff00) + (data[i + 2] & 0xff); | ||
|
||
const triplet = | ||
base64Chars.charAt((tmp >> 18) & 0x3f) + | ||
base64Chars.charAt((tmp >> 12) & 0x3f) + | ||
base64Chars.charAt((tmp >> 6) & 0x3f) + | ||
base64Chars.charAt(tmp & 0x3f); | ||
|
||
parts.push(triplet); | ||
} | ||
|
||
if (extraBytes === 1) { | ||
const tmp = data[len - 1]; | ||
const a = base64Chars.charAt(tmp >> 2); | ||
const b = base64Chars.charAt((tmp << 4) & 0x3f); | ||
|
||
parts.push(`${a}${b}`); | ||
if (pad) { | ||
parts.push('=='); | ||
} | ||
} else if (extraBytes === 2) { | ||
const tmp = (data[len - 2] << 8) + data[len - 1]; | ||
const a = base64Chars.charAt(tmp >> 10); | ||
const b = base64Chars.charAt((tmp >> 4) & 0x3f); | ||
const c = base64Chars.charAt((tmp << 2) & 0x3f); | ||
parts.push(`${a}${b}${c}`); | ||
if (pad) { | ||
parts.push('='); | ||
} | ||
} | ||
|
||
return parts.join(''); | ||
} |
Binary file not shown.
Oops, something went wrong.