From cbc8fe92ac84e9086b68d9b262e13865449cb852 Mon Sep 17 00:00:00 2001 From: Davin Shearer Date: Thu, 28 Sep 2023 14:54:17 -0400 Subject: [PATCH] add BOM detection, and language guessing, and add multi-byte character counts to the profiler --- package.json | 1 + src/dataEditor/dataEditorClient.ts | 77 +++++++++++++------ .../components/DataMetrics/DataMetrics.svelte | 31 ++++++++ .../Header/fieldsets/FileMetrics.svelte | 22 +++++- .../Header/fieldsets/FileMetrics.ts | 2 + yarn.lock | 5 ++ 6 files changed, 114 insertions(+), 24 deletions(-) diff --git a/package.json b/package.json index 185359bd0..5b7911782 100644 --- a/package.json +++ b/package.json @@ -50,6 +50,7 @@ "@vscode/debugadapter": "1.63.0", "await-notify": "1.0.1", "hexy": "0.3.5", + "iso-639-1": "^3.1.0", "jsonc-parser": "3.2.0", "semver": "7.5.4", "unzip-stream": "0.3.1", diff --git a/src/dataEditor/dataEditorClient.ts b/src/dataEditor/dataEditorClient.ts index 2cf0dbf29..52caa43fa 100644 --- a/src/dataEditor/dataEditorClient.ts +++ b/src/dataEditor/dataEditorClient.ts @@ -19,6 +19,7 @@ import fs from 'fs' import { ALL_EVENTS, clear, + countCharacters, CountKind, createSession, createSimpleFileLogger, @@ -163,8 +164,6 @@ export class DataEditorClient implements vscode.Disposable { private currentViewportId: string private fileToEdit: string = '' private omegaSessionId = '' - private contentType = '' - private fileSize = 0 private sendHeartbeatIntervalId: NodeJS.Timeout | number | undefined = undefined @@ -187,8 +186,6 @@ export class DataEditorClient implements vscode.Disposable { this.svelteWebviewInitializer = new SvelteWebviewInitializer(context) this.svelteWebviewInitializer.initialize(this.view, this.panel.webview) this.currentViewportId = '' - this.contentType = '' - this.fileSize = 0 this.fileToEdit = fileToEdit this.displayState = new DisplayState(this.panel) } @@ -237,6 +234,17 @@ export class DataEditorClient implements vscode.Disposable { 'checkpointPath is not set' ) + let data = { + byteOrderMark: '', + changeCount: 0, + computedFileSize: 0, + diskFileSize: 0, + fileName: this.fileToEdit, + language: '', + type: '', + undoCount: 0, + } + // create a session and capture the session id, content type, and file size try { const createSessionResponse = await createSession( @@ -248,14 +256,30 @@ export class DataEditorClient implements vscode.Disposable { assert(this.omegaSessionId.length > 0, 'omegaSessionId is not set') addActiveSession(this.omegaSessionId) - this.contentType = createSessionResponse.hasContentType() + data.diskFileSize = data.computedFileSize = + createSessionResponse.hasFileSize() + ? (createSessionResponse.getFileSize() as number) + : 0 + + data.type = createSessionResponse.hasContentType() ? (createSessionResponse.getContentType() as string) : 'unknown' - assert(this.contentType.length > 0, 'contentType is not set') + assert(data.type.length > 0, 'contentType is not set') + + data.byteOrderMark = createSessionResponse.hasByteOrderMark() + ? (createSessionResponse.getByteOrderMark() as string) + : 'unknown' + assert(data.byteOrderMark.length > 0, 'byteOrderMark is not set') - this.fileSize = createSessionResponse.hasFileSize() - ? (createSessionResponse.getFileSize() as number) - : 0 + data.language = createSessionResponse.hasLanguage() + ? (createSessionResponse.getLanguage() as string) + : 'unknown' + assert(data.language.length > 0, 'language is not set') + + data.diskFileSize = data.computedFileSize = + createSessionResponse.hasFileSize() + ? (createSessionResponse.getFileSize() as number) + : 0 } catch { const msg = `Failed to create session for ${this.fileToEdit}` getLogger().error({ @@ -294,14 +318,7 @@ export class DataEditorClient implements vscode.Disposable { // send the initial file info to the webview await this.panel.webview.postMessage({ command: MessageCommand.fileInfo, - data: { - changeCount: 0, - computedFileSize: this.fileSize, - diskFileSize: this.fileSize, - fileName: this.fileToEdit, - type: this.contentType, - undoCount: 0, - }, + data: data, }) } @@ -443,6 +460,11 @@ export class DataEditorClient implements vscode.Disposable { startOffset, length ) + const characterCount = await countCharacters( + this.omegaSessionId, + startOffset, + length + ) await this.panel.webview.postMessage({ command: MessageCommand.profile, data: { @@ -450,6 +472,15 @@ export class DataEditorClient implements vscode.Disposable { length: length, byteProfile: byteProfile, numAscii: numAscii(byteProfile), + characterCount: { + byteOrderMark: characterCount.getByteOrderMark(), + byteOrderMarkBytes: characterCount.getByteOrderMarkBytes(), + singleByteCount: characterCount.getSingleByteChars(), + doubleByteCount: characterCount.getDoubleByteChars(), + tripleByteCount: characterCount.getTripleByteChars(), + quadByteCount: characterCount.getQuadByteChars(), + invalidBytes: characterCount.getInvalidBytes(), + }, }, }) } @@ -610,16 +641,16 @@ export class DataEditorClient implements vscode.Disposable { if (saved) { this.fileToEdit = fileToSave - this.fileSize = await getComputedFileSize(this.omegaSessionId) + const fileSize = await getComputedFileSize(this.omegaSessionId) await this.panel.webview.postMessage({ command: MessageCommand.fileInfo, data: { - computedFileSize: this.fileSize, - diskFileSize: this.fileSize, - fileName: this.fileToEdit, + computedFileSize: fileSize, + diskFileSize: fileSize, + fileName: fileToSave, }, }) - vscode.window.showInformationMessage(`Saved: ${this.fileToEdit}`) + vscode.window.showInformationMessage(`Saved: ${fileToSave}`) } else if (cancelled) { vscode.window.showInformationMessage(`Cancelled save: ${fileToSave}`) } else { @@ -1163,7 +1194,7 @@ async function serverStart() { getPidFile(omegaEditPort), logConfigFile ), - new Promise((resolve, reject) => { + new Promise((_resolve, reject) => { setTimeout(() => { reject((): Error => { return new Error( diff --git a/src/svelte/src/components/DataMetrics/DataMetrics.svelte b/src/svelte/src/components/DataMetrics/DataMetrics.svelte index d2ebd3847..9c6e65467 100644 --- a/src/svelte/src/components/DataMetrics/DataMetrics.svelte +++ b/src/svelte/src/components/DataMetrics/DataMetrics.svelte @@ -36,6 +36,16 @@ limitations under the License. // number of bytes to profile from the start offset export let length: number + class CharacterCountData { + byteOrderMark: string = '' + byteOrderMarkBytes: number = 0 + singleByteCount: number = 0 + doubleByteCount: number = 0 + tripleByteCount: number = 0 + quadByteCount: number = 0 + invalidBytes: number = 0 + } + let endOffset: number = 0 let byteProfile: number[] = [] let currentTooltip: { index: number; value: number } | null = null @@ -47,6 +57,7 @@ limitations under the License. let mean: number = 0 let variance: number = 0 let stdDev: number = 0 + let characterCountData: CharacterCountData = new CharacterCountData() let numAscii: number = 0 let numDistinct: number = 0 let fieldBeingEdited: string = '' @@ -257,6 +268,16 @@ limitations under the License. case MessageCommand.profile: numAscii = msg.data.data.numAscii as number byteProfile = msg.data.data.byteProfile as number[] + + // character count data + characterCountData.byteOrderMark = msg.data.data.characterCount.byteOrderMark as string + characterCountData.byteOrderMarkBytes = msg.data.data.characterCount.byteOrderMarkBytes as number + characterCountData.singleByteCount = msg.data.data.characterCount.singleByteCount as number + characterCountData.doubleByteCount = msg.data.data.characterCount.doubleByteCount as number + characterCountData.tripleByteCount = msg.data.data.characterCount.tripleByteCount as number + characterCountData.quadByteCount = msg.data.data.characterCount.quadByteCount as number + characterCountData.invalidBytes = msg.data.data.characterCount.invalidBytes as number + setStatusMessage( `Profiled bytes from ${startOffset} to ${startOffset + length}` ) @@ -491,6 +512,16 @@ limitations under the License. >{((numAscii / sum) * 100).toFixed(2)} + +
+
+ + + + + + +