-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathCSVWriter.java
319 lines (286 loc) · 10.4 KB
/
CSVWriter.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
/*
* Made with all the love in the world
* by scireum in Remshalden, Germany
*
* Copyright by scireum GmbH
* http://www.scireum.de - [email protected]
*/
package sirius.kernel.commons;
import sirius.kernel.nls.NLS;
import java.io.Closeable;
import java.io.IOException;
import java.io.Writer;
import java.util.List;
/**
* Writes rows of data as CSV (comma separated values) files.
* <p>
* By default <tt>;</tt> is used to separate columns and line breaks are used to separate rows. If a column value
* contains the separator character or a line break, it is quoted using <tt>"</tt>.
* <p>
* If the quotation character occurs within an already quoted string, it is escaped using <tt>\</tt>. If no
* quotation character is specified (set to <tt>\0</tt>), the escape character is used if possible. If quoting or
* escaping is required but disabled (using <tt>\0</tt> for their respective value), an exception will be thrown as no
* valid output can be generated.
*/
public class CSVWriter implements Closeable {
private String lineSeparator = "\n";
private final Writer writer;
private boolean firstLine = true;
private char separator = ';';
private String separatorString = String.valueOf(';');
private char quotation = '"';
private boolean isQuotationEmpty = false;
private boolean forceQuotation = false;
private char escape = '\\';
private boolean isEscapeEmpty = false;
private boolean trim = true;
/**
* Creates a new writer sending data to the given writer.
* <p>
* The writer is closed by calling {@link #close()}.
*
* @param writer the target to write data to
*/
public CSVWriter(Writer writer) {
this.writer = writer;
}
/**
* Emits a UNICODE byte order mark.
* <p>
* This must be invoked right after the writer has been created before any headers or rows have been written.
*
* @return the writer itself for fluent method calls
* @throws IOException in case an IO error occurs while writing the BOM
*/
public CSVWriter writeUnicodeBOM() throws IOException {
this.writer.write(Streams.UNICODE_BOM_CHARACTER);
return this;
}
/**
* Specifies the separator character to use.
* <p>
* By default this is <tt>;</tt>.
*
* @param separator the separator to use
* @return the writer itself for fluent method calls
*/
public CSVWriter withSeparator(char separator) {
this.separator = separator;
this.separatorString = String.valueOf(separator);
return this;
}
/**
* Specifies the quotation character to use.
* <p>
* By default this is <tt>"</tt>. Use <tt>\0</tt> to disable quotation entirely. Note that quotation is required
* if columns contain the separator character or a line break.
*
* @param quotation the quotation character to use
* @return the writer itself for fluent method calls
*/
public CSVWriter withQuotation(char quotation) {
this.quotation = quotation;
this.isQuotationEmpty = quotation == '\0';
return this;
}
/**
* Specifies the escape character to use.
* <p>
* By default this is <tt>\</tt>. Use <tt>\0</tt> to disable escaping entirely. Note that escaping is required if
* columns contain a quotation character inside an already quoted column. Or if values contain the separator
* character and no quotation is possible (quotation character is \0).
*
* @param escape the escape character to use
* @return the writer itself for fluent method calls
*/
public CSVWriter withEscape(char escape) {
this.escape = escape;
this.isEscapeEmpty = escape == '\0';
return this;
}
/**
* Controls if each added cell value of the type String should be trimmed or not
* <p>
* By default this is <tt>true</tt>. Use <tt>false</tt> if strings should not be trimmed.
*
* @param trim the value controlling if strings should be trimmed
* @return the writer itself for fluent method calls
*/
public CSVWriter withInputTrimming(boolean trim) {
this.trim = trim;
return this;
}
/**
* Specifies the lineSeparator used to create new lines.
* <p>
* By default this is <tt>\n</tt>.
*
* @param lineSeparator the lineSeparator to use
* @return the writer itself for fluent method calls
*/
public CSVWriter withLineSeparator(String lineSeparator) {
this.lineSeparator = lineSeparator;
return this;
}
/**
* Specifies wether or not all fields in the generated CSV should be enclosed with the specified quotation character.
* <p>
* By default this is <tt>false</tt>, which means only fields that require quotation because they contain
* the separator character or a line break are enclosed with quotations.
*
* @param force if all fields should be quoted regardless of content or not
* @return the writer itself for fluent method calls
*/
public CSVWriter withForceQuotation(boolean force) {
this.forceQuotation = force;
return this;
}
/**
* Writes the given list of values as row.
*
* @param row the data to write. <tt>null</tt> values will be completely skipped.
* @return the writer itself for fluent method calls
* @throws IOException in case of an IO error when writing to the underlying writer
*/
public CSVWriter writeList(List<Object> row) throws IOException {
if (row != null) {
writeLineSeparator();
for (int i = 0; i < row.size(); i++) {
if (i > 0) {
writer.write(separator);
}
writeColumn(row.get(i));
}
}
return this;
}
/**
* Writes the given array of values as row.
*
* @param row the data to write
* @return the writer itself for fluent method calls
* @throws IOException in case of an IO error when writing to the underlying writer
*/
public CSVWriter writeArray(Object... row) throws IOException {
writeLineSeparator();
for (int i = 0; i < row.length; i++) {
if (i > 0) {
writer.write(separator);
}
writeColumn(row[i]);
}
return this;
}
private void writeLineSeparator() throws IOException {
if (!firstLine) {
writer.write(lineSeparator);
} else {
firstLine = false;
}
}
/**
* Effectively outputs the given object as next column value.
*
* @param object the value to output
* @throws IOException in case of an IO error while writing
*/
private void writeColumn(Object object) throws IOException {
String stringValue = convertToString(object);
if (shouldQuote(stringValue)) {
writer.append(quotation);
writer.append(escapeValue(stringValue, true));
writer.append(quotation);
} else {
writer.append(escapeValue(stringValue, false));
}
}
/**
* Determines if the given value needs to be quoted.
*
* @param stringValue the value to check
* @return <tt>true</tt> if the value should be output as a quoted string, <tt>false</tt> otherwise
*/
private boolean shouldQuote(String stringValue) {
if (isQuotationEmpty) {
return false;
}
if (forceQuotation) {
return true;
}
return stringValue.contains(separatorString) || stringValue.contains("\n") || stringValue.contains("\r");
}
/**
* Transforms the value into a string representation suitable for outputting in a CSV file.
*
* @param object the object to convert
* @return a machine-readable string representation
*/
private String convertToString(Object object) {
if (object == null) {
return "";
}
if (object instanceof String string) {
if (trim) {
return string.trim();
} else {
return string;
}
}
return NLS.toMachineString(object);
}
/**
* Escapes the given string if needed.
*
* @param stringValue the string to escape
* @param shouldQuote a flag which determines if the resulting string will be quoted or not
* @return the properly escaped string
*/
private String escapeValue(String stringValue, boolean shouldQuote) {
StringBuilder effectiveValue = new StringBuilder();
for (int i = 0; i < stringValue.length(); i++) {
char currentChar = stringValue.charAt(i);
processCharacter(currentChar, effectiveValue, shouldQuote);
}
return effectiveValue.toString();
}
private void processCharacter(char currentChar, StringBuilder effectiveValue, boolean shouldQuote) {
if (currentChar == escape) {
effectiveValue.append(escape).append(currentChar);
return;
}
if (isQuotationEmpty) {
processCharacterWithoutQuotation(currentChar, effectiveValue);
return;
}
if (shouldQuote && currentChar == quotation) {
if (isEscapeEmpty) {
throw new IllegalArgumentException(
"Cannot output a quotation character within a quoted string without an escape character.");
} else {
effectiveValue.append(escape);
}
}
effectiveValue.append(currentChar);
}
private void processCharacterWithoutQuotation(char currentChar, StringBuilder effectiveValue) {
if (currentChar == separator) {
if (isEscapeEmpty) {
throw new IllegalArgumentException(Strings.apply(
"Cannot output a column which contains the separator character '%s' "
+ "without an escape or quotation character.",
separator));
} else {
effectiveValue.append(escape).append(currentChar);
}
} else if (currentChar == '\r' || currentChar == '\n') {
throw new IllegalArgumentException(
"Cannot output a column which contains a line break without an quotation character.");
} else {
effectiveValue.append(currentChar);
}
}
@Override
public void close() throws IOException {
writer.close();
}
}