From fc5fd47356db504de02c8ce09eb222f2ffe12bcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Desboeufs?= Date: Tue, 17 Sep 2024 11:48:10 +0200 Subject: [PATCH] Normalize Latin-1 to Latin-9 --- lib/detect-encoding.js | 8 +++----- test/auto-decode-stream.js | 2 +- test/csv.js | 4 ++-- test/detect-encoding.js | 10 +++++----- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/lib/detect-encoding.js b/lib/detect-encoding.js index 2df6739..d6cbf34 100644 --- a/lib/detect-encoding.js +++ b/lib/detect-encoding.js @@ -6,16 +6,14 @@ const ENCODING_TO_DETECT = new Set([ 'windows-1252' ]) -const DEFAULT_ENCODING = 'UTF-8' - export function selectEncoding(result) { const candidate = result.find(r => ENCODING_TO_DETECT.has(r.name)) - if (candidate) { - return candidate.name + if (!candidate || candidate.name === 'UTF-8') { + return 'UTF-8' } - return DEFAULT_ENCODING + return 'ISO-8859-15' } export function detectEncoding(buffer) { diff --git a/test/auto-decode-stream.js b/test/auto-decode-stream.js index c10ec32..770295f 100644 --- a/test/auto-decode-stream.js +++ b/test/auto-decode-stream.js @@ -19,7 +19,7 @@ test('createAutoDecodeStream / ISO-8859-1', async t => { .pipe(autoDecodeStream) ) - t.is(encoding, 'ISO-8859-1') + t.is(encoding, 'ISO-8859-15') t.is(decodedString, 'éléphant') }) diff --git a/test/csv.js b/test/csv.js index e99ee21..67fd3a1 100644 --- a/test/csv.js +++ b/test/csv.js @@ -31,7 +31,7 @@ test('detecting CSV/ISO-8859-1', async t => { t.deepEqual(result, { format: 'csv', formatOptions: { - encoding: 'ISO-8859-1', + encoding: 'ISO-8859-15', delimiter: ',', linebreak: '\n', quoteChar: '"' @@ -50,7 +50,7 @@ test('parsing invalid CSV', async t => { t.deepEqual(result, { format: 'csv', formatOptions: { - encoding: 'ISO-8859-1', + encoding: 'ISO-8859-15', delimiter: ',', linebreak: '\n', quoteChar: '"' diff --git a/test/detect-encoding.js b/test/detect-encoding.js index 6a1a76d..1c9b9f0 100644 --- a/test/detect-encoding.js +++ b/test/detect-encoding.js @@ -7,20 +7,20 @@ test('selectEncoding empty', t => { t.is(selectEncoding([]), 'UTF-8') }) -test('selectEncoding one allowed', t => { - t.is(selectEncoding([{name: 'ISO-8859-1'}]), 'ISO-8859-1') +test('selectEncoding fallback Latin-9', t => { + t.is(selectEncoding([{name: 'ISO-8859-1'}]), 'ISO-8859-15') }) test('selectEncoding one rejected', t => { t.is(selectEncoding([{name: 'ISO-8859-9'}]), 'UTF-8') }) -test('selectEncoding multiple allowed', t => { +test('selectEncoding multiple allowed fallback', t => { t.is(selectEncoding([ {name: 'ISO-8859-9'}, {name: 'ISO-8859-1'}, {name: 'ISO-8859-6'} - ]), 'ISO-8859-1') + ]), 'ISO-8859-15') }) test('selectEncoding multiple rejected', t => { @@ -37,7 +37,7 @@ test('detectEncoding / UTF-8', t => { test('detectEncoding / ISO-8859-1', t => { const text = iconv.encode('éléphant', 'ISO-8859-1') - t.is(detectEncoding(text), 'ISO-8859-1') + t.is(detectEncoding(text), 'ISO-8859-15') }) test('detectEncoding / fallback UTF-8 when unknown', t => {