Skip to content

Commit

Permalink
Normalize Latin-1 to Latin-9
Browse files Browse the repository at this point in the history
  • Loading branch information
jdesboeufs committed Sep 17, 2024
1 parent 795a08e commit fc5fd47
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 13 deletions.
8 changes: 3 additions & 5 deletions lib/detect-encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,14 @@ const ENCODING_TO_DETECT = new Set([
'windows-1252'
])

const DEFAULT_ENCODING = 'UTF-8'

export function selectEncoding(result) {
const candidate = result.find(r => ENCODING_TO_DETECT.has(r.name))

if (candidate) {
return candidate.name
if (!candidate || candidate.name === 'UTF-8') {
return 'UTF-8'
}

return DEFAULT_ENCODING
return 'ISO-8859-15'
}

export function detectEncoding(buffer) {
Expand Down
2 changes: 1 addition & 1 deletion test/auto-decode-stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ test('createAutoDecodeStream / ISO-8859-1', async t => {
.pipe(autoDecodeStream)
)

t.is(encoding, 'ISO-8859-1')
t.is(encoding, 'ISO-8859-15')
t.is(decodedString, 'éléphant')
})

Expand Down
4 changes: 2 additions & 2 deletions test/csv.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ test('detecting CSV/ISO-8859-1', async t => {
t.deepEqual(result, {
format: 'csv',
formatOptions: {
encoding: 'ISO-8859-1',
encoding: 'ISO-8859-15',
delimiter: ',',
linebreak: '\n',
quoteChar: '"'
Expand All @@ -50,7 +50,7 @@ test('parsing invalid CSV', async t => {
t.deepEqual(result, {
format: 'csv',
formatOptions: {
encoding: 'ISO-8859-1',
encoding: 'ISO-8859-15',
delimiter: ',',
linebreak: '\n',
quoteChar: '"'
Expand Down
10 changes: 5 additions & 5 deletions test/detect-encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,20 @@ test('selectEncoding empty', t => {
t.is(selectEncoding([]), 'UTF-8')
})

test('selectEncoding one allowed', t => {
t.is(selectEncoding([{name: 'ISO-8859-1'}]), 'ISO-8859-1')
test('selectEncoding fallback Latin-9', t => {
t.is(selectEncoding([{name: 'ISO-8859-1'}]), 'ISO-8859-15')
})

test('selectEncoding one rejected', t => {
t.is(selectEncoding([{name: 'ISO-8859-9'}]), 'UTF-8')
})

test('selectEncoding multiple allowed', t => {
test('selectEncoding multiple allowed fallback', t => {
t.is(selectEncoding([
{name: 'ISO-8859-9'},
{name: 'ISO-8859-1'},
{name: 'ISO-8859-6'}
]), 'ISO-8859-1')
]), 'ISO-8859-15')
})

test('selectEncoding multiple rejected', t => {
Expand All @@ -37,7 +37,7 @@ test('detectEncoding / UTF-8', t => {

test('detectEncoding / ISO-8859-1', t => {
const text = iconv.encode('éléphant', 'ISO-8859-1')
t.is(detectEncoding(text), 'ISO-8859-1')
t.is(detectEncoding(text), 'ISO-8859-15')
})

test('detectEncoding / fallback UTF-8 when unknown', t => {
Expand Down

0 comments on commit fc5fd47

Please sign in to comment.