Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Output BOM in extraction results for UTF-32LE and UTF-32BE charsets #1414

Merged
merged 1 commit into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ public void open() throws DataAccessObjectInitializationException {
private byte[] getBOM() {
if (StandardCharsets.UTF_8.equals(Charset.forName(this.encoding))) {
return new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF};
} else if (this.encoding.startsWith(StandardCharsets.UTF_16.name())) {
} else if (this.encoding.startsWith(StandardCharsets.UTF_16.name())
|| this.encoding.startsWith("UTF-32")) {
return new byte[]{(byte) 0xFE, (byte) 0xFF};
}
return new byte[0];
Expand Down
76 changes: 61 additions & 15 deletions src/test/java/com/salesforce/dataloader/dao/CsvTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@
package com.salesforce.dataloader.dao;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.io.FileInputStream;
import java.io.InputStream;

import org.junit.Before;
import org.junit.Ignore;
Expand Down Expand Up @@ -57,24 +60,20 @@ public class CsvTest extends ConfigTestBase {
@Before
public void createTestData() {
writeHeader = new ArrayList<String>(3);
writeHeader.add("COL1");
writeHeader.add("COL2");
writeHeader.add("COL3");

ArrayList<String> headerLabelList = new ArrayList<String>();
headerLabelList.add("COL1");
headerLabelList.add("COL2");
headerLabelList.add("COL3");
TableHeader header = new TableHeader(headerLabelList);
writeHeader.add("column1");
writeHeader.add("column2");
writeHeader.add("column3");

TableHeader header = new TableHeader(writeHeader);
row1 = new TableRow(header);
row1.put("COL1", "row1col1");
row1.put("COL2", "row1col2");
row1.put("COL3", "row1col3");
row1.put(writeHeader.get(0), "row1-1");
row1.put(writeHeader.get(1), "row1-2");
row1.put(writeHeader.get(2), "row1-3");

row2 = new TableRow(header);
row2.put("COL1", "row2col1");
row2.put("COL2", "row2col2");
row2.put("COL3", "row2col3");
row2.put(writeHeader.get(0), "row2-1");
row2.put(writeHeader.get(1), "row2-2");
row2.put(writeHeader.get(2), "row2-3");
}
@Test
public void testCSVReadBasic() throws Exception {
Expand All @@ -84,19 +83,43 @@ public void testCSVReadBasic() throws Exception {
@Test
public void testCSVReadUTF8BOMBasic() throws Exception{
testCSVReadBasic("csvtext_BOM_UTF8.csv");
assertTrue("did not find BOM in " + getTestDataDir() + "/csvtext_BOM_UTF8.csv",
hasBOM(getTestDataDir() + "/csvtext_BOM_UTF8.csv"));
}

@Test
public void testCSVReadUTF16BEBOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-16BE");
testCSVReadBasic("csvtext_BOM_UTF16BE.csv");
assertTrue("did not find BOM in " + getTestDataDir() + "/csvtext_BOM_UTF16BE.csv",
hasBOM(getTestDataDir() + "/csvtext_BOM_UTF16BE.csv"));
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "");
}

@Test
public void testCSVReadUTF16LEBOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-16LE");
testCSVReadBasic("csvtext_BOM_UTF16LE.csv");
assertTrue("did not find BOM in " + getTestDataDir() + "/csvtext_BOM_UTF16LE.csv",
hasBOM(getTestDataDir() + "/csvtext_BOM_UTF16LE.csv"));
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "");
}

@Test
public void testCSVReadUTF32LEBOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-32LE");
testCSVReadBasic("csvtext_BOM_UTF32LE.csv");
assertTrue("did not find BOM in " + getTestDataDir() + "/csvtext_BOM_UTF32LE.csv",
hasBOM(getTestDataDir() + "/csvtext_BOM_UTF32LE.csv"));
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "");
}

@Test
public void testCSVReadUTF32BEBOMBasic() throws Exception{
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "UTF-32BE");
testCSVReadBasic("csvtext_BOM_UTF32BE.csv");
assertTrue("did not find BOM in " + getTestDataDir() + "/csvtext_BOM_UTF32BE.csv",
hasBOM(getTestDataDir() + "/csvtext_BOM_UTF32BE.csv"));
getController().getAppConfig().setValue(AppConfig.PROP_READ_CHARSET, "");
}

Expand Down Expand Up @@ -125,6 +148,28 @@ private void testCSVReadBasic(String csvFile) throws Exception {

csv.close();
}


public static boolean hasBOM(String filePath) throws IOException {
try (InputStream is = new FileInputStream(filePath)) {
byte[] bom = new byte[3];
if (is.read(bom) == 3) {
boolean bomFound = false;
// UTF-8 case
bomFound = bom[0] == (byte) 0xEF && bom[1] == (byte) 0xBB && bom[2] == (byte) 0xBF;
if (!bomFound) {
// UTF-16BE, UTF-32BE, UTF-32LE cases
bomFound = bom[0] == (byte)0xFE && bom[1] == (byte) 0xFF;
}
if (!bomFound) {
// UTF16-LE case
bomFound = bom[0] == (byte)0xFF && bom[1] == (byte) 0xFE;
}
return bomFound;
}
}
return false;
}

@Test
public void testCSVWriteBasic() throws Exception {
Expand Down Expand Up @@ -196,6 +241,7 @@ private void doTestCSVWriteBasic(String delimiter) throws Exception {

writer.writeRowList(rowList);
writer.close();
assertTrue("did not find BOM in " + path, hasBOM(path));

compareWriterFile(path, delimiter, false, false); // 3rd param false and 4th param false => CSV for a upload
compareWriterFile(path, delimiter, false, true); // 3rd param false and 4th param true => query result CSV
Expand Down
Binary file not shown.
Binary file not shown.
Loading