Skip to content

Commit

Permalink
add BOM detection, and language guessing, and add multi-byte characte…
Browse files Browse the repository at this point in the history
…r counts to the profiler
  • Loading branch information
scholarsmate committed Sep 29, 2023
1 parent c860008 commit cbc8fe9
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 24 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"@vscode/debugadapter": "1.63.0",
"await-notify": "1.0.1",
"hexy": "0.3.5",
"iso-639-1": "^3.1.0",
"jsonc-parser": "3.2.0",
"semver": "7.5.4",
"unzip-stream": "0.3.1",
Expand Down
77 changes: 54 additions & 23 deletions src/dataEditor/dataEditorClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import fs from 'fs'
import {
ALL_EVENTS,
clear,
countCharacters,
CountKind,
createSession,
createSimpleFileLogger,
Expand Down Expand Up @@ -163,8 +164,6 @@ export class DataEditorClient implements vscode.Disposable {
private currentViewportId: string
private fileToEdit: string = ''
private omegaSessionId = ''
private contentType = ''
private fileSize = 0
private sendHeartbeatIntervalId: NodeJS.Timeout | number | undefined =
undefined

Expand All @@ -187,8 +186,6 @@ export class DataEditorClient implements vscode.Disposable {
this.svelteWebviewInitializer = new SvelteWebviewInitializer(context)
this.svelteWebviewInitializer.initialize(this.view, this.panel.webview)
this.currentViewportId = ''
this.contentType = ''
this.fileSize = 0
this.fileToEdit = fileToEdit
this.displayState = new DisplayState(this.panel)
}
Expand Down Expand Up @@ -237,6 +234,17 @@ export class DataEditorClient implements vscode.Disposable {
'checkpointPath is not set'
)

let data = {
byteOrderMark: '',
changeCount: 0,
computedFileSize: 0,
diskFileSize: 0,
fileName: this.fileToEdit,
language: '',
type: '',
undoCount: 0,
}

// create a session and capture the session id, content type, and file size
try {
const createSessionResponse = await createSession(
Expand All @@ -248,14 +256,30 @@ export class DataEditorClient implements vscode.Disposable {
assert(this.omegaSessionId.length > 0, 'omegaSessionId is not set')
addActiveSession(this.omegaSessionId)

this.contentType = createSessionResponse.hasContentType()
data.diskFileSize = data.computedFileSize =
createSessionResponse.hasFileSize()
? (createSessionResponse.getFileSize() as number)
: 0

data.type = createSessionResponse.hasContentType()
? (createSessionResponse.getContentType() as string)
: 'unknown'
assert(this.contentType.length > 0, 'contentType is not set')
assert(data.type.length > 0, 'contentType is not set')

data.byteOrderMark = createSessionResponse.hasByteOrderMark()
? (createSessionResponse.getByteOrderMark() as string)
: 'unknown'
assert(data.byteOrderMark.length > 0, 'byteOrderMark is not set')

this.fileSize = createSessionResponse.hasFileSize()
? (createSessionResponse.getFileSize() as number)
: 0
data.language = createSessionResponse.hasLanguage()
? (createSessionResponse.getLanguage() as string)
: 'unknown'
assert(data.language.length > 0, 'language is not set')

data.diskFileSize = data.computedFileSize =
createSessionResponse.hasFileSize()
? (createSessionResponse.getFileSize() as number)
: 0
} catch {
const msg = `Failed to create session for ${this.fileToEdit}`
getLogger().error({
Expand Down Expand Up @@ -294,14 +318,7 @@ export class DataEditorClient implements vscode.Disposable {
// send the initial file info to the webview
await this.panel.webview.postMessage({
command: MessageCommand.fileInfo,
data: {
changeCount: 0,
computedFileSize: this.fileSize,
diskFileSize: this.fileSize,
fileName: this.fileToEdit,
type: this.contentType,
undoCount: 0,
},
data: data,
})
}

Expand Down Expand Up @@ -443,13 +460,27 @@ export class DataEditorClient implements vscode.Disposable {
startOffset,
length
)
const characterCount = await countCharacters(
this.omegaSessionId,
startOffset,
length
)
await this.panel.webview.postMessage({
command: MessageCommand.profile,
data: {
startOffset: startOffset,
length: length,
byteProfile: byteProfile,
numAscii: numAscii(byteProfile),
characterCount: {
byteOrderMark: characterCount.getByteOrderMark(),
byteOrderMarkBytes: characterCount.getByteOrderMarkBytes(),
singleByteCount: characterCount.getSingleByteChars(),
doubleByteCount: characterCount.getDoubleByteChars(),
tripleByteCount: characterCount.getTripleByteChars(),
quadByteCount: characterCount.getQuadByteChars(),
invalidBytes: characterCount.getInvalidBytes(),
},
},
})
}
Expand Down Expand Up @@ -610,16 +641,16 @@ export class DataEditorClient implements vscode.Disposable {

if (saved) {
this.fileToEdit = fileToSave
this.fileSize = await getComputedFileSize(this.omegaSessionId)
const fileSize = await getComputedFileSize(this.omegaSessionId)
await this.panel.webview.postMessage({
command: MessageCommand.fileInfo,
data: {
computedFileSize: this.fileSize,
diskFileSize: this.fileSize,
fileName: this.fileToEdit,
computedFileSize: fileSize,
diskFileSize: fileSize,
fileName: fileToSave,
},
})
vscode.window.showInformationMessage(`Saved: ${this.fileToEdit}`)
vscode.window.showInformationMessage(`Saved: ${fileToSave}`)
} else if (cancelled) {
vscode.window.showInformationMessage(`Cancelled save: ${fileToSave}`)
} else {
Expand Down Expand Up @@ -1163,7 +1194,7 @@ async function serverStart() {
getPidFile(omegaEditPort),
logConfigFile
),
new Promise((resolve, reject) => {
new Promise((_resolve, reject) => {
setTimeout(() => {
reject((): Error => {
return new Error(
Expand Down
31 changes: 31 additions & 0 deletions src/svelte/src/components/DataMetrics/DataMetrics.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ limitations under the License.
// number of bytes to profile from the start offset
export let length: number
class CharacterCountData {
byteOrderMark: string = ''
byteOrderMarkBytes: number = 0
singleByteCount: number = 0
doubleByteCount: number = 0
tripleByteCount: number = 0
quadByteCount: number = 0
invalidBytes: number = 0
}
let endOffset: number = 0
let byteProfile: number[] = []
let currentTooltip: { index: number; value: number } | null = null
Expand All @@ -47,6 +57,7 @@ limitations under the License.
let mean: number = 0
let variance: number = 0
let stdDev: number = 0
let characterCountData: CharacterCountData = new CharacterCountData()
let numAscii: number = 0
let numDistinct: number = 0
let fieldBeingEdited: string = ''
Expand Down Expand Up @@ -257,6 +268,16 @@ limitations under the License.
case MessageCommand.profile:
numAscii = msg.data.data.numAscii as number
byteProfile = msg.data.data.byteProfile as number[]
// character count data
characterCountData.byteOrderMark = msg.data.data.characterCount.byteOrderMark as string
characterCountData.byteOrderMarkBytes = msg.data.data.characterCount.byteOrderMarkBytes as number
characterCountData.singleByteCount = msg.data.data.characterCount.singleByteCount as number
characterCountData.doubleByteCount = msg.data.data.characterCount.doubleByteCount as number
characterCountData.tripleByteCount = msg.data.data.characterCount.tripleByteCount as number
characterCountData.quadByteCount = msg.data.data.characterCount.quadByteCount as number
characterCountData.invalidBytes = msg.data.data.characterCount.invalidBytes as number
setStatusMessage(
`Profiled bytes from ${startOffset} to ${startOffset + length}`
)
Expand Down Expand Up @@ -491,6 +512,16 @@ limitations under the License.
>{((numAscii / sum) * 100).toFixed(2)}</span
>
</label>
</div>
<hr />
<div class="char-count">
<label for="char-count-bom">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;BOM: <span id="char-count-bom" class="nowrap">{characterCountData.byteOrderMark}</span></label>
<label for="char-count-bom-bytes">&nbsp;&nbsp;BOM Bytes: <span id="char-count-bom-bytes" class="nowrap">{characterCountData.byteOrderMarkBytes}</span></label>
<label for="char-count-single">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Single: <span id="char-count-single" class="nowrap">{characterCountData.singleByteCount}</span></label>
<label for="char-count-double">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Double: <span id="char-count-double" class="nowrap">{characterCountData.doubleByteCount}</span></label>
<label for="char-count-triple">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Triple: <span id="char-count-triple" class="nowrap">{characterCountData.tripleByteCount}</span></label>
<label for="char-count-quad">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Quad: <span id="char-count-quad" class="nowrap">{characterCountData.quadByteCount}</span></label>
<label for="char-count-invalid">&nbsp;&nbsp;&nbsp;&nbsp;Invalid: <span id="char-count-invalid" class="nowrap">{characterCountData.invalidBytes}</span></label>
</div>
<hr />
<Button
Expand Down
22 changes: 21 additions & 1 deletion src/svelte/src/components/Header/fieldsets/FileMetrics.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ limitations under the License.
import { humanReadableByteLength } from '../../../utilities/display'
import { DATA_PROFILE_MAX_LENGTH } from '../../../stores/configuration'
import Tooltip from '../../layouts/Tooltip.svelte'
import ISO6391 from 'iso-639-1'
const eventDispatcher = createEventDispatcher()
let displayOpts = false
Expand Down Expand Up @@ -71,6 +72,12 @@ limitations under the License.
if ('type' in msg.data.data) {
$fileMetrics.type = msg.data.data.type
}
if ('language' in msg.data.data) {
$fileMetrics.language = msg.data.data.language
}
if ('byteOrderMark' in msg.data.data) {
$fileMetrics.byteOrderMark = msg.data.data.byteOrderMark
}
if ('diskFileSize' in msg.data.data) {
$fileMetrics.diskSize = msg.data.data.diskFileSize
}
Expand Down Expand Up @@ -179,7 +186,20 @@ limitations under the License.
</FlexContainer>
<FlexContainer --dir="column">
<label for="content_type">Content Type</label>
<span id="content_type" class="nowrap">{$fileMetrics.type}</span>
<Tooltip
description="{$fileMetrics.type}"
alwaysEnabled={true}
>
<span id="content_type" class="nowrap">{$fileMetrics.type.split('/').pop()}</span>
</Tooltip>
</FlexContainer>
<FlexContainer --dir="column">
<label for="language">Language</label>
<Tooltip
description="{ISO6391.getName($fileMetrics.language)}"
alwaysEnabled={true}>
<span id="language" class="nowrap">{$fileMetrics.language}</span>
</Tooltip>
</FlexContainer>
</FlexContainer>
<hr />
Expand Down
2 changes: 2 additions & 0 deletions src/svelte/src/components/Header/fieldsets/FileMetrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import { SimpleWritable } from '../../../stores/localStore'
class FileMetricsData {
name: string = ''
type: string = ''
byteOrderMark: string = ''
language: string = ''
diskSize: number = 0
computedSize: number = 0
changeCount: number = 0
Expand Down
5 changes: 5 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2014,6 +2014,11 @@ isexe@^2.0.0:
resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"
integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==

iso-639-1@^3.1.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/iso-639-1/-/iso-639-1-3.1.0.tgz#62611c680eba80ccedb57c3fa00d048f7c866693"
integrity sha512-rWcHp9dcNbxa5C8jA/cxFlWNFNwy5Vup0KcFvgA8sPQs9ZeJHj/Eq0Y8Yz2eL8XlWYpxw4iwh9FfTeVxyqdRMw==

isobject@^3.0.1:
version "3.0.1"
resolved "https://registry.yarnpkg.com/isobject/-/isobject-3.0.1.tgz#4e431e92b11a9731636aa1f9c8d1ccbcfdab78df"
Expand Down

0 comments on commit cbc8fe9

Please sign in to comment.