From de6ad453e7a78f41d46723fa1c903c604efff2fb Mon Sep 17 00:00:00 2001 From: Gunther Rademacher Date: Tue, 21 Jan 2025 09:08:49 +0100 Subject: [PATCH 1/3] add support for W3 CSV result formats to csv:seralize --- .../basex/io/parse/csv/CsvXmlConverter.java | 10 +- .../java/org/basex/io/serial/Serializer.java | 8 +- .../basex/io/serial/SerializerOptions.java | 6 +- .../io/serial/csv/CsvArraysSerializer.java | 51 ++ .../io/serial/csv/CsvDirectSerializer.java | 7 +- .../basex/io/serial/csv/CsvMapSerializer.java | 77 +++ .../basex/io/serial/csv/CsvSerializer.java | 183 +++++-- .../io/serial/csv/CsvXQuerySerializer.java | 8 +- .../basex/io/serial/csv/CsvXmlSerializer.java | 91 ++++ .../java/org/basex/query/expr/ParseExpr.java | 2 +- .../basex/query/func/csv/CsvSerialize.java | 7 +- .../basex/query/func/fn/FnCsvToArrays.java | 6 +- .../org/basex/query/func/fn/FnParseCsv.java | 13 +- .../java/org/basex/util/options/Options.java | 9 + .../basex/query/func/CsvRoundtripTest.java | 450 ++++++++++++++++++ 15 files changed, 861 insertions(+), 67 deletions(-) create mode 100644 basex-core/src/main/java/org/basex/io/serial/csv/CsvArraysSerializer.java create mode 100644 basex-core/src/main/java/org/basex/io/serial/csv/CsvMapSerializer.java create mode 100644 basex-core/src/main/java/org/basex/io/serial/csv/CsvXmlSerializer.java create mode 100644 basex-core/src/test/java/org/basex/query/func/CsvRoundtripTest.java diff --git a/basex-core/src/main/java/org/basex/io/parse/csv/CsvXmlConverter.java b/basex-core/src/main/java/org/basex/io/parse/csv/CsvXmlConverter.java index 681b2eae1d..1158cc5bad 100644 --- a/basex-core/src/main/java/org/basex/io/parse/csv/CsvXmlConverter.java +++ b/basex-core/src/main/java/org/basex/io/parse/csv/CsvXmlConverter.java @@ -14,17 +14,19 @@ */ public final class CsvXmlConverter extends CsvConverter { /** QName. */ - protected static final QNm Q_FN_CSV = new QNm("csv", QueryText.FN_URI); + public static final QNm Q_FN_CSV = new QNm("csv", QueryText.FN_URI); /** QName. */ protected static final QNm Q_FN_ROWS = new QNm("rows", QueryText.FN_URI); /** QName. */ - protected static final QNm Q_FN_ROW = new QNm("row", QueryText.FN_URI); + public static final QNm Q_FN_ROW = new QNm("row", QueryText.FN_URI); /** QName. */ protected static final QNm Q_FN_FIELD = new QNm("field", QueryText.FN_URI); /** QName. */ protected static final QNm Q_FN_COLUMNS = new QNm("columns", QueryText.FN_URI); /** QName. */ - protected static final QNm Q_FN_COLUMN = new QNm("column", QueryText.FN_URI); + public static final QNm Q_FN_COLUMN = new QNm("column", QueryText.FN_URI); + /** QName. */ + public static final QNm Q_COLUMN = new QNm("column"); /** Document node. */ private FBuilder doc; @@ -71,7 +73,7 @@ protected void entry(final byte[] value) { final FBuilder elem = FElem.build(Q_FN_FIELD); final byte[] name = headers.get(column); - if(name != null && name.length > 0) elem.add(Q_FN_COLUMN, name); + if(name != null && name.length > 0) elem.add(Q_COLUMN, name); record.add(elem.add(shared.token(value))); } diff --git a/basex-core/src/main/java/org/basex/io/serial/Serializer.java b/basex-core/src/main/java/org/basex/io/serial/Serializer.java index d356abb920..c2e89a1204 100644 --- a/basex-core/src/main/java/org/basex/io/serial/Serializer.java +++ b/basex-core/src/main/java/org/basex/io/serial/Serializer.java @@ -6,8 +6,6 @@ import java.io.*; import java.util.*; -import org.basex.build.csv.*; -import org.basex.build.csv.CsvOptions.*; import org.basex.build.json.*; import org.basex.build.json.JsonOptions.*; import org.basex.data.*; @@ -81,11 +79,7 @@ public static Serializer get(final OutputStream os, final SerializerOptions sopt case XHTML: return new XHTMLSerializer(os, so); case HTML: return new HTMLSerializer(os, so); case TEXT: return new TextSerializer(os, so); - case CSV: - final CsvOptions copts = so.get(SerializerOptions.CSV); - return copts.get(CsvOptions.FORMAT) == CsvFormat.XQUERY - ? new CsvXQuerySerializer(os, so) - : new CsvDirectSerializer(os, so); + case CSV: return new CsvSerializer.Delegator(os, so); case JSON: final JsonSerialOptions jopts = so.get(SerializerOptions.JSON); final JsonFormat jformat = jopts.get(JsonOptions.FORMAT); diff --git a/basex-core/src/main/java/org/basex/io/serial/SerializerOptions.java b/basex-core/src/main/java/org/basex/io/serial/SerializerOptions.java index a5d559a747..1910dd20c5 100644 --- a/basex-core/src/main/java/org/basex/io/serial/SerializerOptions.java +++ b/basex-core/src/main/java/org/basex/io/serial/SerializerOptions.java @@ -7,7 +7,6 @@ import java.io.*; import java.util.function.*; -import org.basex.build.csv.*; import org.basex.build.json.*; import org.basex.core.*; import org.basex.io.*; @@ -15,6 +14,7 @@ import org.basex.query.expr.path.*; import org.basex.query.util.hash.*; import org.basex.query.value.item.*; +import org.basex.query.value.map.*; import org.basex.query.value.node.*; import org.basex.query.value.type.*; import org.basex.util.*; @@ -102,8 +102,8 @@ public final class SerializerOptions extends Options { new EnumOption<>("json-lines", YesNo.NO); /** Specific serialization parameter. */ - public static final OptionsOption CSV = - new OptionsOption<>("csv", new CsvOptions()); + public static final ValueOption CSV = + new ValueOption("csv", SeqType.MAP_ZO, XQMap.empty()); /** Specific serialization parameter. */ public static final OptionsOption JSON = new OptionsOption<>("json", new JsonSerialOptions()); diff --git a/basex-core/src/main/java/org/basex/io/serial/csv/CsvArraysSerializer.java b/basex-core/src/main/java/org/basex/io/serial/csv/CsvArraysSerializer.java new file mode 100644 index 0000000000..4dcbdba45a --- /dev/null +++ b/basex-core/src/main/java/org/basex/io/serial/csv/CsvArraysSerializer.java @@ -0,0 +1,51 @@ +package org.basex.io.serial.csv; + +import static org.basex.query.QueryError.*; + +import java.io.*; + +import org.basex.build.csv.*; +import org.basex.io.serial.*; +import org.basex.query.*; +import org.basex.query.value.*; +import org.basex.query.value.array.*; +import org.basex.query.value.item.*; +import org.basex.util.list.*; + +/** + * This class serializes a sequence of arrays as CSV. The input must conform to the result + * format of fn:csv-to-arrays. + * + * @author BaseX Team, BSD License + * @author Gunther Rademacher + */ +public final class CsvArraysSerializer extends CsvSerializer { + /** + * Constructor. + * @param os output stream + * @param sopts serialization parameters + * @param copts csv options + * @throws IOException I/O exception + */ + public CsvArraysSerializer(final OutputStream os, final SerializerOptions sopts, + final CsvOptions copts) throws IOException { + super(os, sopts, copts); + } + + @Override + public void serialize(final Item item) throws IOException { + if(!(item instanceof XQArray)) + throw CSV_SERIALIZE_X_X.getIO("Array expected, found " + item.seqType(), item); + final TokenList tl = new TokenList(); + try { + for(final Value value : ((XQArray) item).iterable()) { + if(!value.isItem()) throw CSV_SERIALIZE_X_X.getIO( + "Item expected, found " + value.seqType(), value); + tl.add(((Item) value).string(null)); + } + } catch(final QueryException ex) { + throw new QueryIOException(ex); + } + record(tl); + } +} diff --git a/basex-core/src/main/java/org/basex/io/serial/csv/CsvDirectSerializer.java b/basex-core/src/main/java/org/basex/io/serial/csv/CsvDirectSerializer.java index c4c4ea8133..68349d77e6 100644 --- a/basex-core/src/main/java/org/basex/io/serial/csv/CsvDirectSerializer.java +++ b/basex-core/src/main/java/org/basex/io/serial/csv/CsvDirectSerializer.java @@ -37,12 +37,13 @@ public final class CsvDirectSerializer extends CsvSerializer { * Constructor. * @param os output stream * @param sopts serialization parameters + * @param copts csv options * @throws IOException I/O exception */ - public CsvDirectSerializer(final OutputStream os, final SerializerOptions sopts) - throws IOException { + public CsvDirectSerializer(final OutputStream os, final SerializerOptions sopts, + final CsvOptions copts) throws IOException { - super(os, sopts); + super(os, sopts, copts); headers = header ? new TokenList() : null; atts = copts.get(CsvOptions.FORMAT) == CsvFormat.ATTRIBUTES; lax = copts.get(CsvOptions.LAX) || atts; diff --git a/basex-core/src/main/java/org/basex/io/serial/csv/CsvMapSerializer.java b/basex-core/src/main/java/org/basex/io/serial/csv/CsvMapSerializer.java new file mode 100644 index 0000000000..57508cc444 --- /dev/null +++ b/basex-core/src/main/java/org/basex/io/serial/csv/CsvMapSerializer.java @@ -0,0 +1,77 @@ +package org.basex.io.serial.csv; + +import static org.basex.query.QueryError.*; + +import java.io.*; + +import org.basex.build.csv.*; +import org.basex.io.serial.*; +import org.basex.query.*; +import org.basex.query.func.fn.*; +import org.basex.query.value.*; +import org.basex.query.value.array.*; +import org.basex.query.value.item.*; +import org.basex.query.value.map.*; +import org.basex.util.list.*; + +/** + * This class serializes a map as CSV. The input must conform to the result format of + * fn:parse-csvs. + * + * @author BaseX Team, BSD License + * @author Gunther Rademacher + */ +public final class CsvMapSerializer extends CsvSerializer { + /** + * Constructor. + * @param os output stream + * @param sopts serialization parameters + * @param copts csv options + * @throws IOException I/O exception + */ + public CsvMapSerializer(final OutputStream os, final SerializerOptions sopts, + final CsvOptions copts) throws IOException { + super(os, sopts, copts); + } + + @Override + public void serialize(final Item item) throws IOException { + if(sep && level == 0) out.print(' '); + + if(!(item instanceof XQMap)) + throw CSV_SERIALIZE_X_X.getIO("Top level must be a map, found " + item.type, item); + + final XQMap m = (XQMap) item; + final TokenList tl = new TokenList(); + try { + // print header + if(header) { + if(!m.contains(FnParseCsv.COLUMNS)) throw CSV_SERIALIZE_X.getIO("Map has no 'columns' key"); + row(m.get(FnParseCsv.COLUMNS), tl); + } + // print rows + if(!m.contains(FnParseCsv.ROWS)) throw CSV_SERIALIZE_X.getIO("Map has no 'rows' key"); + for(final Item record : m.get(FnParseCsv.ROWS)) row(((XQArray) record).iterable(), tl); + } catch(final QueryException ex) { + throw new QueryIOException(ex); + } + sep = true; + } + + /** + * Serializes a single line (header or contents). + * @param line line to be serialized + * @param tl token list + * @throws QueryException query exception + * @throws IOException I/O exception + */ + private void row(final Iterable line, final TokenList tl) + throws QueryException, IOException { + for(final Value value : line) { + if(!value.isItem()) throw CSV_SERIALIZE_X_X.getIO( + "Item expected, found " + value.seqType(), value); + tl.add(((Item) value).string(null)); + } + record(tl); + } +} diff --git a/basex-core/src/main/java/org/basex/io/serial/csv/CsvSerializer.java b/basex-core/src/main/java/org/basex/io/serial/csv/CsvSerializer.java index 4e50959b78..8092d80ac7 100644 --- a/basex-core/src/main/java/org/basex/io/serial/csv/CsvSerializer.java +++ b/basex-core/src/main/java/org/basex/io/serial/csv/CsvSerializer.java @@ -6,8 +6,15 @@ import java.io.*; import org.basex.build.csv.*; +import org.basex.io.parse.csv.*; import org.basex.io.serial.*; +import org.basex.query.*; +import org.basex.query.func.fn.FnCsvToArrays.*; +import org.basex.query.func.fn.FnParseCsv.*; +import org.basex.query.value.array.*; import org.basex.query.value.item.*; +import org.basex.query.value.map.*; +import org.basex.query.value.node.*; import org.basex.util.*; import org.basex.util.list.*; @@ -17,7 +24,7 @@ * @author BaseX Team, BSD License * @author Christian Gruen */ -abstract class CsvSerializer extends StandardSerializer { +public abstract class CsvSerializer extends StandardSerializer { /** CSV options. */ final CsvOptions copts; /** Separator. */ @@ -26,6 +33,14 @@ abstract class CsvSerializer extends StandardSerializer { final boolean quotes; /** Generate backslashes. */ final boolean backslashes; + /** Row delimiter (see {@link CsvOptions#ROW_DELIMITER}). */ + private final int rowDelimiter; + /** Quote character (see {@link CsvOptions#QUOTE_CHARACTER}). */ + private final int quoteCharacter; + /** Select columns (see {@link CsvOptions#SELECT_COLUMNS}). */ + private final int[] selectColumns; + /** Maximum select columns value. */ + private int maxCol; /** Header flag. */ boolean header; @@ -34,15 +49,22 @@ abstract class CsvSerializer extends StandardSerializer { * Constructor. * @param os output stream * @param sopts serialization parameters + * @param copts csv options * @throws IOException I/O exception */ - CsvSerializer(final OutputStream os, final SerializerOptions sopts) throws IOException { + CsvSerializer(final OutputStream os, final SerializerOptions sopts, final CsvOptions copts) + throws IOException { super(os, sopts); - copts = sopts.get(SerializerOptions.CSV); + this.copts = copts; quotes = copts.get(CsvOptions.QUOTES); backslashes = copts.get(CsvOptions.BACKSLASHES); header = copts.get(CsvOptions.HEADER); separator = copts.separator(); + rowDelimiter = copts.rowDelimiter(); + quoteCharacter = copts.quoteCharacter(); + selectColumns = copts.get(CsvOptions.SELECT_COLUMNS); + maxCol = -1; + for(final int col : selectColumns) if(col > maxCol) maxCol = col; } /** @@ -51,48 +73,139 @@ abstract class CsvSerializer extends StandardSerializer { * @throws IOException I/O exception */ final void record(final TokenList entries) throws IOException { + int f = 0; + if(maxCol < 0) { + for(final byte[] val : entries) field(f++, val); + } else { + final byte[][] row = new byte[maxCol + 1][]; + int i = 0; + for(final byte[] val : entries) { + final int j = selectColumns[i++] - 1; + if(row[j] == null) row[j] = val; + } + for(final byte[] val : row) field(f++, val == null ? Token.EMPTY : val); + } + out.print(rowDelimiter); + entries.reset(); + } + + /** + * Prints a field value. + * @param seqNo field sequence number + * @param value field value + * @throws IOException I/O exception + */ + final void field(final int seqNo, final byte[] value) throws IOException { // print fields, skip trailing empty contents - final int fs = entries.size(); - for(int i = 0; i < fs; i++) { - final byte[] v = entries.get(i); - if(i != 0) out.print(separator); + if(seqNo != 0) out.print(separator); - byte[] txt = v != null ? v : EMPTY; - final boolean delim = contains(txt, separator) || contains(txt, '\n'); - final boolean special = contains(txt, '\r') || contains(txt, '\t') || contains(txt, '"'); - if(delim || special || backslashes && contains(txt, '\\')) { - final TokenBuilder tb = new TokenBuilder(); - if(delim && !backslashes && !quotes) - throw CSV_SERIALIZE_X_X.getIO("Output must be put into quotes", txt); + byte[] txt = value != null ? value : Token.EMPTY; + final boolean delim = contains(txt, separator) || contains(txt, rowDelimiter); + final boolean special = contains(txt, '\r') || contains(txt, '\t') + || contains(txt, quoteCharacter); + if(delim || special || backslashes && contains(txt, '\\')) { + final TokenBuilder tb = new TokenBuilder(); + if(delim && !backslashes && !quotes) + throw CSV_SERIALIZE_X_X.getIO("Output must be put into quotes", txt); - if(quotes && (delim || special)) tb.add('"'); - final TokenParser tp = new TokenParser(txt); - while(tp.more()) { - final int cp = tp.next(); - if(backslashes) { - if(cp == '\n') tb.add("\\n"); - else if(cp == '\r') tb.add("\\r"); - else if(cp == '\t') tb.add("\\t"); - else if(cp == '"') tb.add("\\\""); - else if(cp == '\\') tb.add("\\\\"); - else if(cp == separator && !quotes) tb.add('\\').add(cp); - else tb.add(cp); - } else { - if(cp == '"') tb.add('"'); - tb.add(cp); - } + if(quotes && (delim || special)) tb.add(quoteCharacter); + final TokenParser tp = new TokenParser(txt); + while(tp.more()) { + final int cp = tp.next(); + if(backslashes) { + if(cp == '\n') tb.add("\\").add(separator == '\n' ? "n" : cp); + else if(cp == '\r') tb.add("\\r"); + else if(cp == '\t') tb.add("\\t"); + else if(cp == quoteCharacter) tb.add("\\").add(cp); + else if(cp == '\\') tb.add("\\\\"); + else if(cp == separator && !quotes) tb.add('\\').add(cp); + else tb.add(cp); + } else { + if(cp == quoteCharacter) tb.add(quoteCharacter); + tb.add(cp); } - if(quotes && (delim || special)) tb.add('"'); - txt = tb.finish(); } - out.print(txt); + if(quotes && (delim || special)) tb.add(quoteCharacter); + txt = tb.finish(); } - out.print('\n'); - entries.reset(); + out.print(txt); } @Override protected void atomic(final Item value) throws IOException { throw CSV_SERIALIZE_X.getIO("Atomic items cannot be serialized"); } + + /** + * This delegator class allows lazy instantiation of the concrete CSV serializer, depending on + * the item to be serialized. + */ + public static class Delegator extends Serializer { + /** Output stream. */ + private final OutputStream os; + /** Serializer options. */ + private final SerializerOptions so; + /** Concrete CSV serializer. */ + private CsvSerializer delegate; + + /** + * Constructor. + * @param os output stream + * @param sopts serializer options + */ + public Delegator(final OutputStream os, final SerializerOptions sopts) { + this.os = os; + this.so = sopts; + } + + @Override + public void serialize(final Item item) throws IOException { + if(delegate == null) { + try { + final XQMap opts = (XQMap) so.get(SerializerOptions.CSV); + if(item instanceof FNode) { + final FElem root; + if(item instanceof FElem) { + root = (FElem) item; + } else if(item instanceof FDoc) { + final FDoc doc = (FDoc) item; + root = doc.hasChildren() ? (FElem) doc.childIter().next() : null; + } else { + root = null; + } + if(root != null && root.qname().eq(CsvXmlConverter.Q_FN_CSV)) { + final ParseCsvOptions popts = new ParseCsvOptions(); + popts.assign(opts, null); + popts.validate(null); + delegate = new CsvXmlSerializer(os, so, popts.toCsvParserOptions()); + } else { + final CsvParserOptions copts = new CsvParserOptions(); + copts.assign(opts, null); + delegate = new CsvDirectSerializer(os, so, copts); + } + } else if(item instanceof XQArray) { + final CsvToArraysOptions aopts = new CsvToArraysOptions(); + aopts.assign(opts, null); + aopts.validate(null); + delegate = new CsvArraysSerializer(os, so, aopts.toCsvParserOptions()); + } else if(!(item instanceof XQMap)) { + throw new UnsupportedOperationException( + "Cannot serialize items of type " + item.getClass()); + } else if(((XQMap) item).contains(CsvXQueryConverter.RECORDS)) { + final CsvParserOptions copts = new CsvParserOptions(); + copts.assign(opts, null); + delegate = new CsvXQuerySerializer(os, so, copts); + } else { + final ParseCsvOptions popts = new ParseCsvOptions(); + popts.assign(opts, null); + popts.validate(null); + delegate = new CsvMapSerializer(os, so, popts.toCsvParserOptions()); + } + } catch(final QueryException ex) { + throw new QueryIOException(ex); + } + } + delegate.serialize(item); + } + } } diff --git a/basex-core/src/main/java/org/basex/io/serial/csv/CsvXQuerySerializer.java b/basex-core/src/main/java/org/basex/io/serial/csv/CsvXQuerySerializer.java index 74dc5c3557..7292655479 100644 --- a/basex-core/src/main/java/org/basex/io/serial/csv/CsvXQuerySerializer.java +++ b/basex-core/src/main/java/org/basex/io/serial/csv/CsvXQuerySerializer.java @@ -4,6 +4,7 @@ import java.io.*; +import org.basex.build.csv.*; import org.basex.io.parse.csv.*; import org.basex.io.serial.*; import org.basex.query.*; @@ -24,11 +25,12 @@ public final class CsvXQuerySerializer extends CsvSerializer { * Constructor. * @param os output stream * @param sopts serialization parameters + * @param copts csv options * @throws IOException I/O exception */ - public CsvXQuerySerializer(final OutputStream os, final SerializerOptions sopts) - throws IOException { - super(os, sopts); + public CsvXQuerySerializer(final OutputStream os, final SerializerOptions sopts, + final CsvOptions copts) throws IOException { + super(os, sopts, copts); } @Override diff --git a/basex-core/src/main/java/org/basex/io/serial/csv/CsvXmlSerializer.java b/basex-core/src/main/java/org/basex/io/serial/csv/CsvXmlSerializer.java new file mode 100644 index 0000000000..0e873c4f90 --- /dev/null +++ b/basex-core/src/main/java/org/basex/io/serial/csv/CsvXmlSerializer.java @@ -0,0 +1,91 @@ +package org.basex.io.serial.csv; + +import static org.basex.query.QueryError.*; +import static org.basex.util.Token.*; + +import java.io.*; + +import org.basex.build.csv.*; +import org.basex.io.parse.csv.*; +import org.basex.io.serial.*; +import org.basex.query.util.ft.*; +import org.basex.query.value.item.*; +import org.basex.util.*; +import org.basex.util.list.*; + +/** + * This class serializes items as CSV. + * + * @author BaseX Team, BSD License + * @author Gunther Rademacher + */ +public final class CsvXmlSerializer extends CsvSerializer { + /** Names of header elements. */ + private final TokenList headers; + /** Contents of current row. */ + private TokenList data; + + /** + * Constructor. + * @param os output stream + * @param sopts serialization parameters + * @param copts csv options + * @throws IOException I/O exception + */ + public CsvXmlSerializer(final OutputStream os, final SerializerOptions sopts, + final CsvOptions copts) throws IOException { + + super(os, sopts, copts); + headers = header ? new TokenList() : null; + } + + @Override + protected void startOpen(final QNm name) { + if(level == 2) data = new TokenList(); + } + + @Override + protected void finishEmpty() throws IOException { + finishOpen(); + switch(level) { + case 2: + if(header && elem.eq(CsvXmlConverter.Q_FN_COLUMN)) headers.add(EMPTY); + break; + case 3: + data.add(EMPTY); + break; + } + finishClose(); + } + + @Override + protected void text(final byte[] value, final FTPos ftp) throws IOException { + switch(level) { + case 3: + if(header && elem.eq(CsvXmlConverter.Q_FN_COLUMN)) headers.add(value); + break; + case 4: + data.add(value); + break; + } + } + + @Override + protected void finishClose() throws IOException { + if(level != 2 || !elem.eq(CsvXmlConverter.Q_FN_ROW)) return; + if(header) { + record(headers); + header = false; + } + final TokenList line = data; + record(line); + } + + @Override + protected void attribute(final byte[] name, final byte[] value, final boolean standalone) + throws IOException { + if(headers == null || !name.equals(CsvXmlConverter.Q_COLUMN.local())) return; + if(data.size() < headers.size() && Token.eq(value, headers.get(data.size()))) return; + throw CSV_SERIALIZE_X_X.getIO("Unexpected column", value); + } +} diff --git a/basex-core/src/main/java/org/basex/query/expr/ParseExpr.java b/basex-core/src/main/java/org/basex/query/expr/ParseExpr.java index cb575d636a..f9d67173f0 100644 --- a/basex-core/src/main/java/org/basex/query/expr/ParseExpr.java +++ b/basex-core/src/main/java/org/basex/query/expr/ParseExpr.java @@ -347,7 +347,7 @@ protected final boolean toBoolean(final Item item) throws QueryException { /** * Converts an item to a boolean. * @param item item to be converted - * @param info input info + * @param info input info (can be {@code null}) * @return boolean * @throws QueryException query exception */ diff --git a/basex-core/src/main/java/org/basex/query/func/csv/CsvSerialize.java b/basex-core/src/main/java/org/basex/query/func/csv/CsvSerialize.java index 7ca1990fb9..6d4017af34 100644 --- a/basex-core/src/main/java/org/basex/query/func/csv/CsvSerialize.java +++ b/basex-core/src/main/java/org/basex/query/func/csv/CsvSerialize.java @@ -2,12 +2,12 @@ import static org.basex.query.QueryError.*; -import org.basex.build.csv.*; import org.basex.io.serial.*; import org.basex.query.*; import org.basex.query.func.*; import org.basex.query.iter.*; import org.basex.query.value.item.*; +import org.basex.query.value.map.*; import org.basex.util.*; /** @@ -20,11 +20,10 @@ public final class CsvSerialize extends StandardFunc { @Override public Item item(final QueryContext qc, final InputInfo ii) throws QueryException { final Iter input = arg(0).iter(qc); - final CsvOptions options = toOptions(arg(1), new CsvOptions(), qc); - + final Item options = arg(1).item(qc, ii); final SerializerOptions sopts = new SerializerOptions(); sopts.set(SerializerOptions.METHOD, SerialMethod.CSV); - sopts.set(SerializerOptions.CSV, options); + sopts.set(SerializerOptions.CSV, options.isEmpty() ? XQMap.empty() : toMap(options)); return Str.get(serialize(input, sopts, INVALIDOPT_X, qc)); } } diff --git a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java index e81fbe4a1b..6d2a56f7c4 100644 --- a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java +++ b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java @@ -55,10 +55,10 @@ public static class CsvToArraysOptions extends Options { /** * Check for error conditions in the current settings. - * @param ii input info + * @param ii input info (can be {@code null}) * @throws QueryException query exception */ - void validate(final InputInfo ii) throws QueryException { + public void validate(final InputInfo ii) throws QueryException { final IntSet delim = new IntSet(); for(final StringOption opt : Arrays.asList(FIELD_DELIMITER, ROW_DELIMITER, QUOTE_CHARACTER)) { final String val = get(opt); @@ -73,7 +73,7 @@ void validate(final InputInfo ii) throws QueryException { * Convert the options to a CsvParserOptions object. * @return the CsvParserOptions object */ - CsvParserOptions toCsvParserOptions() { + public CsvParserOptions toCsvParserOptions() { final CsvParserOptions copts = new CsvParserOptions(); copts.set(CsvOptions.SEPARATOR, get(FIELD_DELIMITER)); copts.set(CsvOptions.ROW_DELIMITER, get(ROW_DELIMITER)); diff --git a/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java b/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java index 4788b881c0..95f8426ee9 100644 --- a/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java +++ b/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java @@ -29,6 +29,11 @@ * @author Gunther Rademacher */ public class FnParseCsv extends Parse { + /** Columns. */ + public static final Str COLUMNS = Str.get("columns"); + /** Rows. */ + public static final Str ROWS = Str.get("rows"); + @Override public Item item(final QueryContext qc, final InputInfo ii) throws QueryException { final byte[] value = toZeroToken(arg(0), qc); @@ -65,9 +70,9 @@ public Item item(final QueryContext qc, final InputInfo ii) throws QueryExceptio final Value rows = map.get(CsvXQueryConverter.RECORDS); final MapBuilder result = new MapBuilder(); - result.put("columns", columns); + result.put(COLUMNS, columns); result.put("column-index", columnIndex); - result.put("rows", rows); + result.put(ROWS, rows); result.put("get", Get.funcItem(rows, columnIndex, qc, ii)); return result.map(); } catch(final IOException ex) { @@ -166,7 +171,7 @@ public static final class ParseCsvOptions extends FnCsvToArrays.CsvToArraysOptio private boolean extractHeader; @Override - void validate(final InputInfo ii) throws QueryException { + public void validate(final InputInfo ii) throws QueryException { super.validate(ii); final Value header = get(HEADER); if(BOOLEAN_O.instance(header)) extractHeader = toBoolean((Item) header, ii); @@ -175,7 +180,7 @@ void validate(final InputInfo ii) throws QueryException { } @Override - CsvParserOptions toCsvParserOptions() { + public CsvParserOptions toCsvParserOptions() { final CsvParserOptions copts = super.toCsvParserOptions(); copts.set(CsvOptions.TRIM_ROWS, get(TRIM_ROWS)); copts.set(CsvOptions.SELECT_COLUMNS, get(SELECT_COLUMNS)); diff --git a/basex-core/src/main/java/org/basex/util/options/Options.java b/basex-core/src/main/java/org/basex/util/options/Options.java index 14d2923bab..30ef374a6a 100644 --- a/basex-core/src/main/java/org/basex/util/options/Options.java +++ b/basex-core/src/main/java/org/basex/util/options/Options.java @@ -360,6 +360,15 @@ public final synchronized > void set(final EnumOption optio put(option, option.get(value)); } + /** + * Sets the value of an option. + * @param option option to be set + * @param value value to be set + */ + public final synchronized void set(final ValueOption option, final Value value) { + put(option, value); + } + /** * Assigns a value after casting it to the correct type. If the option is unknown, * it will be added as free option. diff --git a/basex-core/src/test/java/org/basex/query/func/CsvRoundtripTest.java b/basex-core/src/test/java/org/basex/query/func/CsvRoundtripTest.java new file mode 100644 index 0000000000..a8202701ab --- /dev/null +++ b/basex-core/src/test/java/org/basex/query/func/CsvRoundtripTest.java @@ -0,0 +1,450 @@ +package org.basex.query.func; + +import static org.basex.query.func.Function.*; + +import org.basex.*; +import org.junit.jupiter.api.*; + +/** + * This class roundtrips CSV data through parsing and serialization. + * + * @author BaseX Team, BSD License + * @author Gunther Rademacher + */ +public final class CsvRoundtripTest extends SandboxTest { + /** Test method. */ + @Test public void csvParse() { + final Function func = _CSV_PARSE; + roundtrip(func, "1,2,3,4|11,12,13,14", "'select-columns': (1, 4, 17), 'row-" + + "delimiter': '|'", "141114"); + roundtrip(func, "a,b,c,d|1,2,3,4|11,12,13,14", "'format': 'attributes', 'header': true(), 'sele" + + "ct-columns': (1, 4, 2), 'row-delimiter': '|'", "1" + + "42111412"); + roundtrip(func, "1,2,3,4|11,12,13,14", "'format': 'xquery', 'select-columns': (1, 4, 17), 'row-" + + "delimiter': '|'", "{\"records\":([\"1\",\"4\",\"\"],[\"11\",\"14\",\"\"])}"); + } + + /** Test method. */ + @Test public void csvToArrays() { + final Function func = CSV_TO_ARRAYS; + roundtrip(func, "", "", ""); + roundtrip(func, "one", "", "[\"one\"]"); + roundtrip(func, "one,two", "", "[\"one\",\"two\"]"); + roundtrip(func, "one,two three,four", "", "[\"one\",\"two\"]\n[\"three\",\"four\"]"); + roundtrip(func, "one,two three,four ", "", "[\"one\",\"two\"]\n[\"three\",\"four\"]"); + roundtrip(func, "one,two three,four,five", "", "[\"one\",\"two\"]\n[\"three\",\"four\",\"fi" + + "ve\"]"); + roundtrip(func, "one,two three,four", "", "[\"one\",\"two\"]\n[]\n[\"three\",\"four\"]" + ); + roundtrip(func, "one,two \"three,four\",five", "", "[\"one\",\"two\"]\n[\"three,four\",\"fi" + + "ve\"]"); + roundtrip(func, "one,two \"three,\"\"four\"\"\",five", "", "[\"one\",\"two\"]\n[\"three,\"" + + "\"four\"\"\",\"five\"]"); + roundtrip(func, "one, ,four , ,,,", "", "[\"one\",\"\"]\n[\"\",\"four\"]\n[\"\",\"" + + "\"]\n[\"\",\"\",\"\",\"\"]"); + roundtrip(func, "one,\"\" \"\",\"four\"", "", "[\"one\",\"\"]\n[\"\",\"four\"]"); + roundtrip(func, "one,\"[ ]\" \"\",\"four\"", "", "[\"one\",\"[ ]\"]\n[\"\",\"four\"" + + "]"); + roundtrip(func, "one;two three;four", "'field-delimiter': ';'", "[\"one\",\"two\"]\n[\"thre" + + "e\",\"four\"]"); + roundtrip(func, "one,two|three,four", "'row-delimiter': '|'", "[\"one\",\"two\"]\n[\"three\",\"" + + "four\"]"); + roundtrip(func, "one.two|three.four", "'row-delimiter': '|', 'field-delimiter': '.'", "[\"one\"" + + ",\"two\"]\n[\"three\",\"four\"]"); + roundtrip(func, "one,'two,2'|three,'four,4'", "'row-delimiter': '|', 'quote-character': ''''", + "[\"one\",\"two,2\"]\n[\"three\",\"four,4\"]"); + roundtrip(func, "one,'two,''2'''|three,'four,''4'''", "'row-delimiter': '|', 'quote-character':" + + " ''''", "[\"one\",\"two,'2'\"]\n[\"three\",\"four,'4'\"]"); + roundtrip(func, "one ,two | three, four", "'row-delimiter': '|'", "[\"one \",\"two \"]\n[\" thr" + + "ee\",\" four\"]"); + roundtrip(func, "one ,two | three, four", "'row-delimiter': '|', 'trim-whitespace': false()", + "[\"one \",\"two \"]\n[\" three\",\" four\"]"); + roundtrip(func, "one ,two | three, twenty four ", "'row-delimiter': '|', 'trim-whitespace': tr" + + "ue()", "[\"one\",\"two\"]\n[\"three\",\"twenty four\"]"); + roundtrip(func, " ", "", "[]"); + roundtrip(func, " ", "", "[]\n[\" \"]"); + roundtrip(func, " ", "'trim-whitespace': true()", "[]"); + roundtrip(func, " ", "'trim-whitespace': true()", "[]\n[]"); + roundtrip(func, " ", "'trim-whitespace': true()", "[]\n[]\n[]"); + roundtrip(func, "one,two,\"z\"", "", "[\"one\",\"two\",\"z\"]"); + roundtrip(func, "one,two,\"z\" ", "", "[\"one\",\"two\",\"z\"]"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|'", "[\"a\",\"b\",\"c\",\"d\",\"" + + "e\",\"f\"]\n[\"p\",\"q\",\"r\",\"s\",\"t\",\"u\"]"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|'", "[\"a\",\"b\",\"c\",\"d\",\"" + + "e\",\"f\"]\n[\"p\",\"q\",\"r\",\"s\",\"t\",\"u\"]"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|'", "[\"a\",\"b\",\"c\",\"d\",\"" + + "e\",\"f\"]\n[\"p\",\"q\",\"r\",\"s\",\"t\",\"u\"]"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|'", "[\"a\",\"b\",\"c\",\"d\",\"" + + "e\",\"f\"]\n[\"p\",\"q\",\"r\",\"s\",\"t\",\"u\"]"); + roundtrip(func, "a,b,c|p,q,r", "'row-delimiter': '|'", "[\"a\",\"b\",\"c\"]\n[\"p\",\"q\",\"r\"" + + "]"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|'", "[\"a\",\"b\",\"c\",\"d\",\"" + + "e\",\"f\"]\n[\"p\",\"q\",\"r\",\"s\",\"t\",\"u\"]"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|'", "[\"a\",\"b\",\"c\",\"d\",\"" + + "e\",\"f\"]\n[\"p\",\"q\",\"r\",\"s\",\"t\",\"u\"]"); + } + + /** Test method. */ + @Test public void parseCsv() { + final Function func = PARSE_CSV; + roundtrip(func, " ()", "", "{\"columns\":(),\"column-index\":{},\"rows\":(),\"get\":(anonymous-" + + "function)#2}"); + roundtrip(func, "", "", "{\"columns\":(),\"column-index\":{},\"rows\":(),\"get\":(anonymous-fun" + + "ction)#2}"); + roundtrip(func, "one", "", "{\"columns\":(),\"column-index\":{},\"rows\":[\"one\"],\"get\":(ano" + + "nymous-function)#2}"); + roundtrip(func, "one,two", "", "{\"columns\":(),\"column-index\":{},\"rows\":[\"one\",\"two\"]," + + "\"get\":(anonymous-function)#2}"); + roundtrip(func, "one,two three,four", "", "{\"columns\":(),\"column-index\":{},\"rows\":([" + + "\"one\",\"two\"],[\"three\",\"four\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "one,two three,four ", "", "{\"columns\":(),\"column-index\":{},\"rows" + + "\":([\"one\",\"two\"],[\"three\",\"four\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "one,two three,four,five", "", "{\"columns\":(),\"column-index\":{},\"rows" + + "\":([\"one\",\"two\"],[\"three\",\"four\",\"five\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "one,two three,four", "", "{\"columns\":(),\"column-index\":{},\"rows" + + "\":([\"one\",\"two\"],[],[\"three\",\"four\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "one,two \"three,four\",five", "", "{\"columns\":(),\"column-index\":{},\"r" + + "ows\":([\"one\",\"two\"],[\"three,four\",\"five\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "one,two \"three,\"\"four\"\"\",five", "", "{\"columns\":(),\"column-index" + + "\":{},\"rows\":([\"one\",\"two\"],[\"three,\"\"four\"\"\",\"five\"]),\"get\":(anonymous" + + "-function)#2}"); + roundtrip(func, "one, ,four , ,,,", "", "{\"columns\":(),\"column-index\":{},\"rows" + + "\":([\"one\",\"\"],[\"\",\"four\"],[\"\",\"\"],[\"\",\"\",\"\",\"\"]),\"get\":(anonymous" + + "-function)#2}"); + roundtrip(func, "one,\"\" \"\",\"four\"", "", "{\"columns\":(),\"column-index\":{},\"rows\"" + + ":([\"one\",\"\"],[\"\",\"four\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "one,\"[ ]\" \"\",\"four\"", "", "{\"columns\":(),\"column-index\":{}," + + "\"rows\":([\"one\",\"[ ]\"],[\"\",\"four\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "one;two three;four", "'field-delimiter': ';'", "{\"columns\":(),\"column-i" + + "ndex\":{},\"rows\":([\"one\",\"two\"],[\"three\",\"four\"]),\"get\":(anonymous-function)" + + "#2}"); + roundtrip(func, "one,two|three,four", "'row-delimiter': '|'", "{\"columns\":(),\"column-index\"" + + ":{},\"rows\":([\"one\",\"two\"],[\"three\",\"four\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "one.two|three.four", "'row-delimiter': '|', 'field-delimiter': '.'", "{\"colum" + + "ns\":(),\"column-index\":{},\"rows\":([\"one\",\"two\"],[\"three\",\"four\"]),\"get\":(a" + + "nonymous-function)#2}"); + roundtrip(func, "one,'two,2'|three,'four,4'", "'row-delimiter': '|', 'quote-character': ''''", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"one\",\"two,2\"],[\"three\",\"four,4\"])," + + "\"get\":(anonymous-function)#2}"); + roundtrip(func, "one,'two,''2'''|three,'four,''4'''", "'row-delimiter': '|', 'quote-character':" + + " ''''", "{\"columns\":(),\"column-index\":{},\"rows\":([\"one\",\"two,'2'\"],[\"three\"," + + "\"four,'4'\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "one ,two | three, four", "'row-delimiter': '|'", "{\"columns\":(),\"column-ind" + + "ex\":{},\"rows\":([\"one \",\"two \"],[\" three\",\" four\"]),\"get\":(anonymous-functio" + + "n)#2}"); + roundtrip(func, "one ,two | three, four", "'row-delimiter': '|', 'trim-whitespace': false()", "" + + "{\"columns\":(),\"column-index\":{},\"rows\":([\"one \",\"two \"],[\" three\",\" four\"]" + + "),\"get\":(anonymous-function)#2}"); + roundtrip(func, "one ,two | three, twenty four ", "'row-delimiter': '|', 'trim-whitespace': tr" + + "ue()", "{\"columns\":(),\"column-index\":{},\"rows\":([\"one\",\"two\"],[\"three\",\"twe" + + "nty four\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, " ", "", "{\"columns\":(),\"column-index\":{},\"rows\":[],\"get\":(anonymou" + + "s-function)#2}"); + roundtrip(func, " ", "", "{\"columns\":(),\"column-index\":{},\"rows\":([],[\" \"]),\"get" + + "\":(anonymous-function)#2}"); + roundtrip(func, " ", "'trim-whitespace': true()", "{\"columns\":(),\"column-index\":{},\"r" + + "ows\":[],\"get\":(anonymous-function)#2}"); + roundtrip(func, " ", "'trim-whitespace': true()", "{\"columns\":(),\"column-index\":{}" + + ",\"rows\":([],[]),\"get\":(anonymous-function)#2}"); + roundtrip(func, " ", "'trim-whitespace': true()", "{\"columns\":(),\"column-index" + + "\":{},\"rows\":([],[],[]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "left,right|one,two|three,four", "'row-delimiter': '|', 'header': true()", "{\"" + + "columns\":(\"left\",\"right\"),\"column-index\":{\"left\":1,\"right\":2},\"rows\":([\"on" + + "e\",\"two\"],[\"three\",\"four\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "one,two|three,four", "'row-delimiter': '|', 'header': ('left', 'right')", "{\"" + + "columns\":(\"left\",\"right\"),\"column-index\":{\"left\":1,\"right\":2},\"rows\":([\"on" + + "e\",\"two\"],[\"three\",\"four\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "one,two|three,four", "'row-delimiter': '|', 'header': false()", "{\"columns\":" + + "(),\"column-index\":{},\"rows\":([\"one\",\"two\"],[\"three\",\"four\"]),\"get\":(anonym" + + "ous-function)#2}"); + roundtrip(func, "one,two|three,four", "'row-delimiter': '|', 'header': 'left'", "{\"columns\":" + + "\"left\",\"column-index\":{\"left\":1},\"rows\":([\"one\",\"two\"],[\"three\",\"four\"])" + + ",\"get\":(anonymous-function)#2}"); + roundtrip(func, "one,two|three,four", "'row-delimiter': '|', 'header': ('', 'right')", "{\"colu" + + "mns\":(\"\",\"right\"),\"column-index\":{\"right\":2},\"rows\":([\"one\",\"two\"],[\"thr" + + "ee\",\"four\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "left,left|one,two|three,four", "'row-delimiter': '|', 'header': true()", "{\"c" + + "olumns\":(\"left\",\"left\"),\"column-index\":{\"left\":1},\"rows\":([\"one\",\"two\"],[" + + "\"three\",\"four\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, ",right|one,two|three,four", "'row-delimiter': '|', 'header': true()", "{\"colu" + + "mns\":(\"\",\"right\"),\"column-index\":{\"right\":2},\"rows\":([\"one\",\"two\"],[\"thr" + + "ee\",\"four\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, ",|one,two|three,four", "'row-delimiter': '|', 'header': true()", "{\"columns\"" + + ":(\"\",\"\"),\"column-index\":{},\"rows\":([\"one\",\"two\"],[\"three\",\"four\"]),\"get" + + "\":(anonymous-function)#2}"); + roundtrip(func, "left,right", "'row-delimiter': '|', 'header': true()", "{\"columns\":(\"left\"" + + ",\"right\"),\"column-index\":{\"left\":1,\"right\":2},\"rows\":(),\"get\":(anonymous-fun" + + "ction)#2}"); + roundtrip(func, "1,2,3,4,5,6,7,8,9,10|11,12,13,14,15,16,17,18,19,20", "'row-delimiter': '|', 's" + + "elect-columns': (1 to 4)", "{\"columns\":(),\"column-index\":{},\"rows\":([\"1\",\"2\"," + + "\"3\",\"4\"],[\"11\",\"12\",\"13\",\"14\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "a,b,c,d,e,f,g,h,i|1,2,3,4,5,6,7,8,9,10|11,12,13,14,15,16,17,18,19,20", "'row-d" + + "elimiter': '|', 'select-columns': (1 to 4), 'header': true()", "{\"columns\":(\"a\",\"b" + + "\",\"c\",\"d\"),\"column-index\":{\"a\":1,\"b\":2,\"c\":3,\"d\":4},\"rows\":([\"1\",\"2" + + "\",\"3\",\"4\"],[\"11\",\"12\",\"13\",\"14\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "1,2,3,4|11,12,13,14,15,16,17,18,19,20", "'row-delimiter': '|', 'trim-rows': tr" + + "ue()", "{\"columns\":(),\"column-index\":{},\"rows\":([\"1\",\"2\",\"3\",\"4\"],[\"11\"," + + "\"12\",\"13\",\"14\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "a,b,c,d|1,2,3,4,5,6,7,8,9,10|11,12,13,14,15,16,17,18,19,20", "'row-delimiter':" + + " '|', 'trim-rows': true(), 'header': true()", "{\"columns\":(\"a\",\"b\",\"c\",\"d\"),\"" + + "column-index\":{\"a\":1,\"b\":2,\"c\":3,\"d\":4},\"rows\":([\"1\",\"2\",\"3\",\"4\"],[\"" + + "11\",\"12\",\"13\",\"14\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "1,2,3,4|11,12,13,14,15,16", "'row-delimiter': '|', 'trim-rows': false(), 'head" + + "er': false()", "{\"columns\":(),\"column-index\":{},\"rows\":([\"1\",\"2\",\"3\",\"4\"]," + + "[\"11\",\"12\",\"13\",\"14\",\"15\",\"16\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "1,2,3,4,5,6|14,15,16", "'row-delimiter': '|', 'select-columns': (1 to 4)", "{" + + "\"columns\":(),\"column-index\":{},\"rows\":([\"1\",\"2\",\"3\",\"4\"],[\"14\",\"15\",\"" + + "16\",\"\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "1,2,3,4,5,6|14,15,16", "'row-delimiter': '|', 'trim-rows': true()", "{\"column" + + "s\":(),\"column-index\":{},\"rows\":([\"1\",\"2\",\"3\",\"4\",\"5\",\"6\"],[\"14\",\"15" + + "\",\"16\",\"\",\"\",\"\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "a,b,c,d,e|1,2,3|14,15,16", "'row-delimiter': '|', 'trim-rows': true(), 'header" + + "': true()", "{\"columns\":(\"a\",\"b\",\"c\",\"d\",\"e\"),\"column-index\":{\"a\":1,\"b" + + "\":2,\"c\":3,\"d\":4,\"e\":5},\"rows\":([\"1\",\"2\",\"3\",\"\",\"\"],[\"14\",\"15\",\"1" + + "6\",\"\",\"\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "1,2,3,4|11,12,13,14", "'select-columns': (4, 3, 2, 1), 'row-delimiter': '|'", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"4\",\"3\",\"2\",\"1\"],[\"14\",\"13\",\"1" + + "2\",\"11\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "1,2,3,4|11,12,13,14", "'select-columns': (1, 4), 'row-delimiter': '|'", "{\"co" + + "lumns\":(),\"column-index\":{},\"rows\":([\"1\",\"4\"],[\"11\",\"14\"]),\"get\":(anonymo" + + "us-function)#2}"); + roundtrip(func, "1,2,3,4|11,12,13,14", "'select-columns': (1, 4, 17), 'row-delimiter': '|'", "{" + + "\"columns\":(),\"column-index\":{},\"rows\":([\"1\",\"4\",\"\"],[\"11\",\"14\",\"\"]),\"" + + "get\":(anonymous-function)#2}"); + roundtrip(func, "1,2,3,4|11,12,13,14", "'select-columns': (1, 17, 4), 'row-delimiter': '|'", "{" + + "\"columns\":(),\"column-index\":{},\"rows\":([\"1\",\"\",\"4\"],[\"11\",\"\",\"14\"]),\"" + + "get\":(anonymous-function)#2}"); + roundtrip(func, "1,2,3,4|11,12,13,14,15", "'select-columns': (1, 4, 5), 'row-delimiter': '|'", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"1\",\"4\",\"\"],[\"11\",\"14\",\"15\"])," + + "\"get\":(anonymous-function)#2}"); + roundtrip(func, "first,second,third,fourth|1,2,3,4|11,12,13,14", "'select-columns': (1, 4, 3), " + + "'header': true(), 'row-delimiter': '|'", "{\"columns\":(\"first\",\"fourth\",\"third\")," + + "\"column-index\":{\"first\":1,\"fourth\":2,\"third\":3},\"rows\":([\"1\",\"4\",\"3\"],[" + + "\"11\",\"14\",\"13\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|', 'select-columns': (1 to 3)", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"a\",\"b\",\"c\"],[\"p\",\"q\",\"r\"]),\"g" + + "et\":(anonymous-function)#2}"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|', 'select-columns': (2 to 4)", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"b\",\"c\",\"d\"],[\"q\",\"r\",\"s\"]),\"g" + + "et\":(anonymous-function)#2}"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|', 'select-columns': (4, 3, 2)", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"d\",\"c\",\"b\"],[\"s\",\"r\",\"q\"]),\"g" + + "et\":(anonymous-function)#2}"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|', 'select-columns': (4, 3, 1)", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"d\",\"c\",\"a\"],[\"s\",\"r\",\"p\"]),\"g" + + "et\":(anonymous-function)#2}"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|', 'select-columns': (4, 3, 1)", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"d\",\"c\",\"a\"],[\"s\",\"r\",\"p\"]),\"g" + + "et\":(anonymous-function)#2}"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|', 'select-columns': (4, 3, 1)", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"d\",\"c\",\"a\"],[\"s\",\"r\",\"p\"]),\"g" + + "et\":(anonymous-function)#2}"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|', 'select-columns': (4, 3, 1)", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"d\",\"c\",\"a\"],[\"s\",\"r\",\"p\"]),\"" + + "get\":(anonymous-function)#2}"); + roundtrip(func, "left,right|one,two|three,four", "'row-delimiter': '|', 'header': true()", "{\"" + + "columns\":(\"left\",\"right\"),\"column-index\":{\"left\":1,\"right\":2},\"rows\":([\"on" + + "e\",\"two\"],[\"three\",\"four\"]),\"get\":(anonymous-function)#2}"); + roundtrip(func, "a,b,c,d,e,f|p,q,r,s,t,u", "'row-delimiter': '|', 'select-columns': (4, 3, 1)", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"d\",\"c\",\"a\"],[\"s\",\"r\",\"p\"]),\"g" + + "et\":(anonymous-function)#2}"); + } + + /** Test method. */ + @Test public void csvToXml() { + final Function func = CSV_TO_XML; + roundtrip(func, "", "", ""); + roundtrip(func, "one", "", "one"); + roundtrip(func, "one,two", "", "onetwo"); + roundtrip(func, "one,two three,four", "", "onetwothreefour"); + roundtrip(func, "one,two three,four ", "", "onetwothreefour"); + roundtrip(func, "one,two three,four,five", "", "onetwothreefourfive"); + roundtrip(func, "one,two three,four", "", "onetwothree<" + + "/field>four"); + roundtrip(func, "one,two \"three,four\",five", "", "onetwothree,fo" + + "urfive"); + roundtrip(func, "one,two \"three,\"\"four\"\"\",five", "", "onetwo" + + "three,\"four\"five"); + roundtrip(func, "one, ,four , ,,,", "", "onefour<" + + "/row>" + ); + roundtrip(func, "one,\"\" \"\",\"four\"", "", "onefour"); + roundtrip(func, "one;two three;four", "'field-delimiter': ';'", "onetwothreefour"); + roundtrip(func, "one,two|three,four", "'row-delimiter': '|'", "onetwot" + + "hreefour"); + roundtrip(func, "one.two|three.four", "'row-delimiter': '|', 'field-delimiter': '.'", "onetwothreefour"); + roundtrip(func, "one,'two,2'|three,'four,4'", "'row-delimiter': '|', 'quote-character': ''''", + "one" + + "two,2threefour,4"); + roundtrip(func, "one,'two,''2'''|three,'four,''4'''", "'row-delimiter': '|', 'quote-character':" + + " ''''", + "one" + + "two,'2'threefour,'4'" + ); + roundtrip(func, "one ,two | three, four", "'row-delimiter': '|'", "one two three four"); + roundtrip(func, "one ,two | three, four", "'row-delimiter': '|', 'trim-whitespace': false()", + "one two three four"); + roundtrip(func, "one ,two | three, twenty four ", "'row-delimiter': '|', 'trim-whitespace': tr" + + "ue()", "onetwothreetwenty four"); + roundtrip(func, " ", "", ""); + roundtrip(func, "left,right|one,two|three,four", "'row-delimiter': '|', 'header': true()", "leftrightonetwothree" + + "four"); + roundtrip(func, "one,two|three,four", "'row-delimiter': '|', 'header': ('left', 'right')", "leftrightonetwothree" + + "four"); + roundtrip(func, "one,two|three,four", "'row-delimiter': '|', 'header': false()", "onetwo" + + "threefour"); + roundtrip(func, "one,two|three,four", "'row-delimiter': '|', 'header': 'left'", "leftonetwothreefour"); + roundtrip(func, "one,two|three,four", "'row-delimiter': '|', 'header': ('', 'right')", "right<" + + "/columns>onetwothreefour"); + roundtrip(func, "one,two|three,four", "'row-delimiter': '|', 'header': ()", "onetwothreefour"); + roundtrip(func, "left,left|one,two|three,four", "'row-delimiter': '|', 'header': true()", "left" + + "leftonetwothreefour" + + ""); + roundtrip(func, ",right|one,two|three,four", "'row-delimiter': '|', 'header': true()", "right<" + + "/columns>onetwothreefour"); + roundtrip(func, ",|one,two|three,four", "'row-delimiter': '|', 'header': true()", "onetwothreefour"); + roundtrip(func, "1,2,3,4,5,6,7,8,9,10|11,12,13,14,15,16,17,18,19,20", "'row-delimiter': '|', 's" + + "elect-columns': (1 to 4)", "" + + "12341" + + "1121314"); + roundtrip(func, "a,b,c,d,e,f,g,h,i|1,2,3,4,5,6,7,8,9,10|11,12,13,14,15,16,17,18,19,20", "'row-d" + + "elimiter': '|', 'select-columns': (1 to 4), 'header': true()", "abcd123411121314"); + roundtrip(func, "1,2,3,4|11,12,13,14,15,16,17,18,19,20", "'row-delimiter': '|', 'trim-rows': tr" + + "ue()", "123411121314"); + roundtrip(func, "a,b,c,d|1,2,3,4,5,6,7,8,9,10|11,12,13,14,15,16,17,18,19,20", "'row-delimiter':" + + " '|', 'trim-rows': true(), 'header': true()", "abcd123411" + + "1213" + + "14"); + roundtrip(func, "1,2,3,4|11,12,13,14,15,16", "'row-delimiter': '|', 'trim-rows': false(), 'head" + + "er': false()", "" + + "1234111213141516" + + ""); + roundtrip(func, "1,2,3,4,5,6|14,15,16", "'row-delimiter': '|', 'select-columns': (1 to 4)", "12<" + + "/field>34141516"); + roundtrip(func, "1,2,3,4,5,6|14,15,16", "'row-delimiter': '|', 'trim-rows': true()", "12" + + "3456141516"); + roundtrip(func, "a,b,c,d,e|1,2,3,4,5|14,15,16", "'row-delimiter': '|', 'trim-rows': true(), 'he" + + "ader': true()", "" + + "abcde12345141516"); + roundtrip(func, "1,2,3,4|11,12,13,14", "'select-columns': (4, 3, 2, 1), 'row-delimiter': '|'", + "43<" + + "/field>2114131211"); + roundtrip(func, "1,2,3,4|11,12,13,14", "'select-columns': (1, 4), 'row-delimiter': '|'", "141114"); + roundtrip(func, "1,2,3,4|11,12,13,14", "'select-columns': (1, 4, 17), 'row-delimiter': '|'", "<" + + "csv xmlns=\"http://www.w3.org/2005/xpath-functions\">14" + + "1114" + ); + roundtrip(func, "1,2,3,4|11,12,13,14", "'select-columns': (1, 17, 4), 'row-delimiter': '|'", "<" + + "csv xmlns=\"http://www.w3.org/2005/xpath-functions\">1" + + "41114" + ); + roundtrip(func, "1,2,3,4|11,12,13,14,15", "'select-columns': (1, 4, 5), 'row-delimiter': '|'", + "14<" + + "/field>111415"); + roundtrip(func, "first,second,third,fourth|1,2,3,4|11,12,13,14", "'select-columns': (1, 4, 3)," + + " 'header': true(), 'row-delimiter': '|'", "firstfourththird<" + + "/columns>14" + + "3111413"); + roundtrip(func, "one,two|three,four", "'row-delimiter': '|'", "onetwothreefour"); + } + + /** + * Parses csv with the given function and verifies that the result is as expected. Then + * serializes the result, parses the serialization, and verifies that this also returns + * the expected result. + * @param function function + * @param input csv input + * @param options options + * @param expected expected result + */ + private void roundtrip(final Function function, final String input, final String options, + final String expected) { + final String parseQuery = function.args(input, " { " + options + " }"); + final String result = query(parseQuery); + compare(parseQuery, result, expected, null); + final String serializeQuery = _CSV_SERIALIZE.args( + result.startsWith("<") ? ' ' + result : + result.startsWith("[") ? " (" + result.replace('\n', ',') + ")" : + result.startsWith("{") ? " " + result.replaceAll(",\"get\":\\(anonymous-function\\)#2", "") : + result.isEmpty() ? " ()" : " " + result, " { " + options + " }"); + final String serialization = query(serializeQuery); + final String roundtripQuery = function.args(" \"" + serialization.replace("\"", "\"\"") + "\"", + " { " + options + " }"); + compare(roundtripQuery, query(roundtripQuery), expected, null); + } +} From 74bd18d19874d60db59f75e6315d2ec359caaab8 Mon Sep 17 00:00:00 2001 From: Gunther Rademacher Date: Tue, 21 Jan 2025 20:59:00 +0100 Subject: [PATCH 2/3] restore getting CSV serializer by format value; remove UnsupportedOperationException; bugfix; refactoring --- .../java/org/basex/io/serial/Serializer.java | 2 +- .../basex/io/serial/csv/CsvSerializer.java | 137 ++++++++++++------ 2 files changed, 91 insertions(+), 48 deletions(-) diff --git a/basex-core/src/main/java/org/basex/io/serial/Serializer.java b/basex-core/src/main/java/org/basex/io/serial/Serializer.java index c2e89a1204..0c342167ec 100644 --- a/basex-core/src/main/java/org/basex/io/serial/Serializer.java +++ b/basex-core/src/main/java/org/basex/io/serial/Serializer.java @@ -79,7 +79,7 @@ public static Serializer get(final OutputStream os, final SerializerOptions sopt case XHTML: return new XHTMLSerializer(os, so); case HTML: return new HTMLSerializer(os, so); case TEXT: return new TextSerializer(os, so); - case CSV: return new CsvSerializer.Delegator(os, so); + case CSV: return CsvSerializer.get(os, so); case JSON: final JsonSerialOptions jopts = so.get(SerializerOptions.JSON); final JsonFormat jformat = jopts.get(JsonOptions.FORMAT); diff --git a/basex-core/src/main/java/org/basex/io/serial/csv/CsvSerializer.java b/basex-core/src/main/java/org/basex/io/serial/csv/CsvSerializer.java index 8092d80ac7..46b4933bb0 100644 --- a/basex-core/src/main/java/org/basex/io/serial/csv/CsvSerializer.java +++ b/basex-core/src/main/java/org/basex/io/serial/csv/CsvSerializer.java @@ -6,11 +6,14 @@ import java.io.*; import org.basex.build.csv.*; +import org.basex.build.csv.CsvOptions.*; import org.basex.io.parse.csv.*; import org.basex.io.serial.*; import org.basex.query.*; +import org.basex.query.func.fn.*; import org.basex.query.func.fn.FnCsvToArrays.*; import org.basex.query.func.fn.FnParseCsv.*; +import org.basex.query.value.*; import org.basex.query.value.array.*; import org.basex.query.value.item.*; import org.basex.query.value.map.*; @@ -64,7 +67,8 @@ public abstract class CsvSerializer extends StandardSerializer { quoteCharacter = copts.quoteCharacter(); selectColumns = copts.get(CsvOptions.SELECT_COLUMNS); maxCol = -1; - for(final int col : selectColumns) if(col > maxCol) maxCol = col; + for(final int col : selectColumns) + if(col > maxCol) maxCol = col; } /** @@ -75,15 +79,18 @@ public abstract class CsvSerializer extends StandardSerializer { final void record(final TokenList entries) throws IOException { int f = 0; if(maxCol < 0) { - for(final byte[] val : entries) field(f++, val); + for(final byte[] val : entries) + field(f++, val); } else { - final byte[][] row = new byte[maxCol + 1][]; + final byte[][] row = new byte[maxCol][]; int i = 0; for(final byte[] val : entries) { + if(i == selectColumns.length) break; final int j = selectColumns[i++] - 1; if(row[j] == null) row[j] = val; } - for(final byte[] val : row) field(f++, val == null ? Token.EMPTY : val); + for(final byte[] val : row) + field(f++, val == null ? Token.EMPTY : val); } out.print(rowDelimiter); entries.reset(); @@ -137,10 +144,34 @@ protected void atomic(final Item value) throws IOException { } /** - * This delegator class allows lazy instantiation of the concrete CSV serializer, depending on - * the item to be serialized. + * Returns a CSV serializer for the given serialization options. + * @param os output stream reference + * @param so serialization options + * @return serializer + * @throws IOException I/O exception */ - public static class Delegator extends Serializer { + public static Serializer get(final OutputStream os, final SerializerOptions so) + throws IOException { + final XQMap opts = (XQMap) so.get(SerializerOptions.CSV); + try { + final Value value = opts.get(Str.get(CsvOptions.FORMAT.name())); + final Item item = value.isItem() ? (Item) value : null; + final String format = item == null ? null : Token.string(item.string(null)); + if(format == null) return new Delegator(os, so); + final CsvParserOptions copts = new CsvParserOptions(); + copts.assign(opts, null); + return format.equals(CsvFormat.XQUERY.toString()) ? new CsvXQuerySerializer(os, so, copts) + : new CsvDirectSerializer(os, so, copts); + } catch(QueryException ex) { + throw new QueryIOException(ex); + } + } + + /** + * This delegator class allows lazy instantiation of the concrete CSV serializer, depending on the + * item to be serialized. + */ + private static class Delegator extends Serializer { /** Output stream. */ private final OutputStream os; /** Serializer options. */ @@ -153,59 +184,71 @@ public static class Delegator extends Serializer { * @param os output stream * @param sopts serializer options */ - public Delegator(final OutputStream os, final SerializerOptions sopts) { + Delegator(final OutputStream os, final SerializerOptions sopts) { this.os = os; this.so = sopts; } @Override public void serialize(final Item item) throws IOException { - if(delegate == null) { - try { - final XQMap opts = (XQMap) so.get(SerializerOptions.CSV); - if(item instanceof FNode) { - final FElem root; - if(item instanceof FElem) { - root = (FElem) item; - } else if(item instanceof FDoc) { - final FDoc doc = (FDoc) item; - root = doc.hasChildren() ? (FElem) doc.childIter().next() : null; - } else { - root = null; - } - if(root != null && root.qname().eq(CsvXmlConverter.Q_FN_CSV)) { - final ParseCsvOptions popts = new ParseCsvOptions(); - popts.assign(opts, null); - popts.validate(null); - delegate = new CsvXmlSerializer(os, so, popts.toCsvParserOptions()); - } else { - final CsvParserOptions copts = new CsvParserOptions(); - copts.assign(opts, null); - delegate = new CsvDirectSerializer(os, so, copts); - } - } else if(item instanceof XQArray) { - final CsvToArraysOptions aopts = new CsvToArraysOptions(); - aopts.assign(opts, null); - aopts.validate(null); - delegate = new CsvArraysSerializer(os, so, aopts.toCsvParserOptions()); - } else if(!(item instanceof XQMap)) { - throw new UnsupportedOperationException( - "Cannot serialize items of type " + item.getClass()); - } else if(((XQMap) item).contains(CsvXQueryConverter.RECORDS)) { - final CsvParserOptions copts = new CsvParserOptions(); - copts.assign(opts, null); - delegate = new CsvXQuerySerializer(os, so, copts); + if(delegate == null) delegate = get(item); + delegate.serialize(item); + } + + /** + * Returns a CSV serializer for the given item. + * @param item item to be serialized + * @return CSV serializer + * @throws IOException IO exception + */ + private CsvSerializer get(final Item item) throws IOException { + try { + final XQMap opts = (XQMap) so.get(SerializerOptions.CSV); + if(item instanceof FNode) { + final FElem root; + if(item instanceof FElem) { + root = (FElem) item; + } else if(item instanceof FDoc) { + final FDoc doc = (FDoc) item; + root = doc.hasChildren() ? (FElem) doc.childIter().next() : null; } else { + root = null; + } + if(root != null && root.qname().eq(CsvXmlConverter.Q_FN_CSV)) { final ParseCsvOptions popts = new ParseCsvOptions(); popts.assign(opts, null); popts.validate(null); - delegate = new CsvMapSerializer(os, so, popts.toCsvParserOptions()); + return new CsvXmlSerializer(os, so, popts.toCsvParserOptions()); } - } catch(final QueryException ex) { - throw new QueryIOException(ex); + final CsvParserOptions copts = new CsvParserOptions(); + copts.assign(opts, null); + return new CsvDirectSerializer(os, so, copts); + } + + if(item instanceof XQArray) { + final CsvToArraysOptions aopts = new CsvToArraysOptions(); + aopts.assign(opts, null); + aopts.validate(null); + return new CsvArraysSerializer(os, so, aopts.toCsvParserOptions()); + } + + if(!(item instanceof XQMap)) { + throw CSV_SERIALIZE_X_X.getIO("Cannot serialize items of type " + item.type); + } + + if(((XQMap) item).contains(FnParseCsv.ROWS)) { + final ParseCsvOptions popts = new ParseCsvOptions(); + popts.assign(opts, null); + popts.validate(null); + return new CsvMapSerializer(os, so, popts.toCsvParserOptions()); } + + final CsvParserOptions copts = new CsvParserOptions(); + copts.assign(opts, null); + return new CsvXQuerySerializer(os, so, copts); + } catch(final QueryException ex) { + throw new QueryIOException(ex); } - delegate.serialize(item); } } } From 903ea51174c7baac15c50d33efc6000002ccd61d Mon Sep 17 00:00:00 2001 From: Gunther Rademacher Date: Wed, 22 Jan 2025 08:33:06 +0100 Subject: [PATCH 3/3] fix error message --- .../src/main/java/org/basex/io/serial/csv/CsvSerializer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/basex-core/src/main/java/org/basex/io/serial/csv/CsvSerializer.java b/basex-core/src/main/java/org/basex/io/serial/csv/CsvSerializer.java index 46b4933bb0..ef6bdb3fce 100644 --- a/basex-core/src/main/java/org/basex/io/serial/csv/CsvSerializer.java +++ b/basex-core/src/main/java/org/basex/io/serial/csv/CsvSerializer.java @@ -233,7 +233,7 @@ private CsvSerializer get(final Item item) throws IOException { } if(!(item instanceof XQMap)) { - throw CSV_SERIALIZE_X_X.getIO("Cannot serialize items of type " + item.type); + throw CSV_SERIALIZE_X_X.getIO("Cannot serialize items of type " + item.type, item); } if(((XQMap) item).contains(FnParseCsv.ROWS)) {