Skip to content

Commit

Permalink
output CSV with BOM if output format is UTF-8 or UTF-16
Browse files Browse the repository at this point in the history
output CSV with BOM if output format is UTF-8 or UTF-16
  • Loading branch information
ashitsalesforce committed Dec 13, 2024
1 parent 08f47ef commit a1cac97
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ public class AppConfig {
public static final String PROP_EXTRACT_SOQL = "sfdc.extractionSOQL"; //$NON-NLS-1$
public static final String PROP_SORT_EXTRACT_FIELDS = "sfdc.sortExtractionFields"; //$NON-NLS-1$
public static final String PROP_EXTRACT_ALL_CAPS_HEADERS="sfdc.extraction.allCapsHeaders";
public static final String PROP_EXTRACT_CSV_OUTPUT_BOM="sfdc.extraction.outputByteOrderMark";
public static final String PROP_LOAD_PRESERVE_WHITESPACE_IN_RICH_TEXT = "sfdc.load.preserveWhitespaceInRichText";

//
Expand Down Expand Up @@ -780,6 +781,8 @@ private void setDefaults(Map<String, String> cliOptionsMap) {
setDefaultValue(PROP_GMT_FOR_DATE_FIELD_VALUE, false);
setDefaultValue(PROP_SAVE_ALL_PROPS, false);
setDefaultValue(PROP_EXTRACT_ALL_CAPS_HEADERS, false);
setDefaultValue(PROP_EXTRACT_CSV_OUTPUT_BOM, true);

}

/**
Expand Down Expand Up @@ -1749,7 +1752,7 @@ public String getCsvEncoding(boolean isWrite) {
return charset;
}

private static String defaultCharsetForCsvReadWrite = null;
private static String defaultCharsetForCsvReadWrite = Charset.defaultCharset().name();
private synchronized static String getDefaultCharsetForCsvReadWrite() {
if (defaultCharsetForCsvReadWrite != null) {
return defaultCharsetForCsvReadWrite;
Expand Down
31 changes: 24 additions & 7 deletions src/main/java/com/salesforce/dataloader/dao/csv/CSVFileWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@

import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;

Expand Down Expand Up @@ -70,18 +71,24 @@ public class CSVFileWriter implements DataWriter {
/**
* <code>encoding</code> contains a value for output character encoding, blank indicates "use default"
*/
private final String encoding;
private String encoding;

/**
* If <code>capitalizedHeadings</code> is true, output header row in caps
*/
private boolean capitalizedHeadings = false;
private final char columnDelimiter;
private AppConfig appConfig;

public CSVFileWriter(String fileName, AppConfig appConfig, String columnDelimiterStr) {

this.fileName = fileName;
this.appConfig = appConfig;
encoding = appConfig.getCsvEncoding(true);
logger.debug("CSV encoding is set to " + Charset.forName(encoding));
if (encoding == null) {
encoding = Charset.defaultCharset().name();
}
logger.debug(this.getClass().getName(), "encoding used to write to CSV file is " + encoding);
if (columnDelimiterStr.length() == 0) {
columnDelimiterStr = AppUtil.COMMA;
Expand All @@ -107,19 +114,29 @@ public void checkConnection() throws DataAccessObjectInitializationException {
@Override
public void open() throws DataAccessObjectInitializationException {
try {
if (this.encoding != null) {
fileOut = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(this.fileName), this.encoding));
} else {
fileOut = new BufferedWriter(new FileWriter(this.fileName));
}
FileOutputStream os = new FileOutputStream(this.fileName);
OutputStreamWriter osw = new OutputStreamWriter(os, this.encoding);
fileOut = new BufferedWriter(osw);
currentRowNumber = 0;
if (appConfig.getBoolean(AppConfig.PROP_EXTRACT_CSV_OUTPUT_BOM)) {
os.write(getBOM());
}
setOpen(true);
} catch (IOException e) {
String errMsg = Messages.getFormattedString("CSVWriter.errorOpening", this.fileName);
logger.error(errMsg, e);
throw new DataAccessObjectInitializationException(errMsg, e);
}
}

private byte[] getBOM() {
if (StandardCharsets.UTF_8.equals(Charset.forName(this.encoding))) {
return new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF};
} else if (StandardCharsets.UTF_16.equals(Charset.forName(this.encoding))) {
return new byte[]{(byte) 0xFE, (byte) 0xFF};
}
return new byte[0];
}

/*
* (non-Javadoc)
Expand Down
1 change: 1 addition & 0 deletions src/main/resources/labels.properties
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,7 @@ AppConfig.property.description.sfdc.oauth.Sandbox.clientsecret=Specify encrypted
AppConfig.property.description.sfdc.proxyNtlmDomain=Details documented at https://developer.salesforce.com/docs/atlas.en-us.dataLoader.meta/dataLoader/configuring_the_data_loader.htm
AppConfig.property.description.sfdc.extractionRequestSize=Details documented at https://developer.salesforce.com/docs/atlas.en-us.dataLoader.meta/dataLoader/loader_params.htm
AppConfig.property.description.sfdc.extraction.allCapsHeaders=set it to "true" to output extraction field headers in capital letters (uppercase characters).
AppConfig.property.description.sfdc.extraction.outputByteOrderMark=set to "true" by default. When set to "true", it writes Byte Order Mark (BOM) character if the CSV file is created in UTF-8 format.
AppConfig.property.description.config.properties.readonly=Do not modify config.properties file even if the user makes changes through Settings dialog.
AppConfig.property.description.dataAccess.readCharset=Override system default charset by specifying charset to use for import operations. Set it to UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, or UTF-32LE to handle import CSVs with Byte Order Mark (BOM) character.
AppConfig.property.description.loader.cacheSObjectNamesAndField=Cache object names and fields metadata across multiple operations. Applicable in the UI mode because batch mode executes one operation and stops.
Expand Down

0 comments on commit a1cac97

Please sign in to comment.