From fd72240de9e2a6d9accb29dba6ab7e1c3ca471f9 Mon Sep 17 00:00:00 2001 From: axexlck Date: Sun, 8 Dec 2024 14:39:18 +0100 Subject: [PATCH] Update tablesaw for `DataSet, SemanticImportString, SemanticImport` - use new https://github.com/tlabs-data/tablesaw which is a maintenance fork of the original project - Discussion: https://github.com/jtablesaw/tablesaw/discussions/1261 - https://github.com/tlabs-data/tablesaw/releases/tag/v0.43.2 - https://github.com/axkr/symja_android_library/blob/master/symja_android_library/doc/functions/Dataset.md - https://github.com/axkr/symja_android_library/blob/master/symja_android_library/doc/functions/SemanticImportString.md - https://github.com/axkr/symja_android_library/blob/master/symja_android_library/doc/functions/SemanticImport.md --- .../doc/functions/Dataset.md | 5 +- .../doc/functions/SemanticImport.md | 2 +- .../doc/functions/SemanticImportString.md | 2 +- .../main/resources/doc/functions/Dataset.md | 5 +- .../resources/doc/functions/SemanticImport.md | 2 +- .../doc/functions/SemanticImportString.md | 2 +- .../aggregate/AggregateFunctions.java | 3 + .../aggregate/BooleanCountFunction.java | 14 + .../aggregate/BooleanNumericFunction.java | 13 + .../tablesaw/aggregate/CountFunction.java | 14 + .../tablesaw/aggregate/StringFunction.java | 13 +- .../java/tech/tablesaw/api/BooleanColumn.java | 156 ++- .../java/tech/tablesaw/api/ColumnType.java | 17 +- .../java/tech/tablesaw/api/InstantColumn.java | 20 +- .../java/tech/tablesaw/api/QuerySupport.java | 9 + .../src/main/java/tech/tablesaw/api/Row.java | 95 +- .../java/tech/tablesaw/api/StringColumn.java | 214 +--- .../main/java/tech/tablesaw/api/Table.java | 182 ++- .../TextColumn.java} | 229 ++-- .../tech/tablesaw/columns/AbstractColumn.java | 7 - .../java/tech/tablesaw/columns/Column.java | 6 - .../tablesaw/columns/dates/DateParser.java | 23 +- .../columns/datetimes/DateTimeParser.java | 20 +- .../columns/instant/PackedInstant.java | 2 +- .../columns/numbers/DoubleColumnType.java | 6 + .../columns/strings/AbstractStringColumn.java | 108 ++ .../columns/strings/ByteDictionaryMap.java | 23 +- .../columns/strings/DictionaryMap.java | 41 +- .../columns/strings/IntDictionaryMap.java | 20 +- .../columns/strings/NullDictionaryMap.java | 194 --- .../columns/strings/ShortDictionaryMap.java | 40 +- .../tablesaw/columns/strings/StringData.java | 96 -- .../columns/strings/StringFilters.java | 20 +- .../columns/strings/StringMapFunctions.java | 17 +- .../columns/strings/StringReduceUtils.java | 6 +- .../columns/strings/TextColumnType.java | 51 + .../tablesaw/columns/times/TimeParser.java | 9 +- .../filtering/DeferredTextColumn.java | 132 +++ .../java/tech/tablesaw/index/StringIndex.java | 4 +- .../tech/tablesaw/io/ColumnTypeDetector.java | 23 +- .../tech/tablesaw/io/DataFrameReader.java | 2 +- .../java/tech/tablesaw/io/Destination.java | 7 +- .../java/tech/tablesaw/io/FileReader.java | 23 +- .../java/tech/tablesaw/io/ReadOptions.java | 28 +- .../tech/tablesaw/io/csv/CsvReadOptions.java | 21 + .../tech/tablesaw/io/csv/CsvWriteOptions.java | 22 + .../io/fixed/FixedWidthReadOptions.java | 21 + .../tablesaw/io/jdbc/SqlResultSetReader.java | 8 +- .../tech/tablesaw/joining/AbstractJoiner.java | 439 ------- .../tablesaw/joining/ColumnIndexPair.java | 30 - .../tablesaw/joining/CrossProductJoin.java | 521 -------- .../tablesaw/joining/DataFrameJoiner.java | 1046 ++++++++++++++--- .../tech/tablesaw/joining/JoinStrategy.java | 15 - .../java/tech/tablesaw/joining/JoinType.java | 9 - .../tablesaw/joining/RowComparatorChain.java | 151 --- .../java/tech/tablesaw/joining/SortKey.java | 177 --- .../tech/tablesaw/joining/SortMergeJoin.java | 458 -------- .../selection/BitmapBackedSelection.java | 5 - .../tech/tablesaw/selection/Selection.java | 6 - .../java/tech/tablesaw/table/Relation.java | 40 +- .../main/java/tech/tablesaw/table/Rows.java | 39 +- .../table/StandardTableSliceGroup.java | 5 +- .../java/tech/tablesaw/table/TableSlice.java | 2 +- ...tCase.java => SemanticImportTestCase.java} | 2 +- 64 files changed, 1964 insertions(+), 2958 deletions(-) rename symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/{columns/strings/TextualStringData.java => api/TextColumn.java} (66%) create mode 100644 symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/AbstractStringColumn.java delete mode 100644 symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/NullDictionaryMap.java delete mode 100644 symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringData.java create mode 100644 symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/TextColumnType.java create mode 100644 symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/filtering/DeferredTextColumn.java delete mode 100644 symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/AbstractJoiner.java delete mode 100644 symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/ColumnIndexPair.java delete mode 100644 symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/CrossProductJoin.java delete mode 100644 symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/JoinStrategy.java delete mode 100644 symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/JoinType.java delete mode 100644 symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/RowComparatorChain.java delete mode 100644 symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/SortKey.java delete mode 100644 symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/SortMergeJoin.java rename symja_android_library/matheclipse-io/src/test/java/org/matheclipse/io/test/{FunctionsTestCase.java => SemanticImportTestCase.java} (99%) diff --git a/symja_android_library/doc/functions/Dataset.md b/symja_android_library/doc/functions/Dataset.md index bf1e1e4f0a..1fbfdb4ae7 100644 --- a/symja_android_library/doc/functions/Dataset.md +++ b/symja_android_library/doc/functions/Dataset.md @@ -1,13 +1,13 @@ ## Dataset -`` +``` Dataset( association ) ``` > create a `Dataset` object from the `association` Dataset uses: -* [Github - JTablesaw - Java dataframe and visualization library ](https://github.com/jtablesaw/tablesaw) +* [Github - JTablesaw - Java dataframe and visualization library ](https://github.com/tlabs-data/tablesaw) ### Examples @@ -20,7 +20,6 @@ Dataset uses: 102 | 42 | 7.5 | 103 | 42 | 7.5 | ``` - ### Related terms [SemanticImport](SemanticImport.md), [SemanticImportString](SemanticImportString.md) \ No newline at end of file diff --git a/symja_android_library/doc/functions/SemanticImport.md b/symja_android_library/doc/functions/SemanticImport.md index cce3d4b850..760681f5a2 100644 --- a/symja_android_library/doc/functions/SemanticImport.md +++ b/symja_android_library/doc/functions/SemanticImport.md @@ -7,7 +7,7 @@ SemanticImport("path-to-filename") > if the file system is enabled, import the data from CSV files and do a semantic interpretation of the columns. Dataset uses: -* [Github - JTablesaw - Java dataframe and visualization library ](https://github.com/jtablesaw/tablesaw) +* [Github - JTablesaw - Java dataframe and visualization library ](https://github.com/tlabs-data/tablesaw) ### Examples diff --git a/symja_android_library/doc/functions/SemanticImportString.md b/symja_android_library/doc/functions/SemanticImportString.md index e83a1f3896..adb9a58d98 100644 --- a/symja_android_library/doc/functions/SemanticImportString.md +++ b/symja_android_library/doc/functions/SemanticImportString.md @@ -7,7 +7,7 @@ SemanticImportString("string-content") > import the data from a content string in CSV format and do a semantic interpretation of the columns. Dataset uses: -* [Github - JTablesaw - Java dataframe and visualization library ](https://github.com/jtablesaw/tablesaw) +* [Github - JTablesaw - Java dataframe and visualization library ](https://github.com/tlabs-data/tablesaw) ### Examples diff --git a/symja_android_library/matheclipse-core/src/main/resources/doc/functions/Dataset.md b/symja_android_library/matheclipse-core/src/main/resources/doc/functions/Dataset.md index bf1e1e4f0a..1fbfdb4ae7 100644 --- a/symja_android_library/matheclipse-core/src/main/resources/doc/functions/Dataset.md +++ b/symja_android_library/matheclipse-core/src/main/resources/doc/functions/Dataset.md @@ -1,13 +1,13 @@ ## Dataset -`` +``` Dataset( association ) ``` > create a `Dataset` object from the `association` Dataset uses: -* [Github - JTablesaw - Java dataframe and visualization library ](https://github.com/jtablesaw/tablesaw) +* [Github - JTablesaw - Java dataframe and visualization library ](https://github.com/tlabs-data/tablesaw) ### Examples @@ -20,7 +20,6 @@ Dataset uses: 102 | 42 | 7.5 | 103 | 42 | 7.5 | ``` - ### Related terms [SemanticImport](SemanticImport.md), [SemanticImportString](SemanticImportString.md) \ No newline at end of file diff --git a/symja_android_library/matheclipse-core/src/main/resources/doc/functions/SemanticImport.md b/symja_android_library/matheclipse-core/src/main/resources/doc/functions/SemanticImport.md index cce3d4b850..760681f5a2 100644 --- a/symja_android_library/matheclipse-core/src/main/resources/doc/functions/SemanticImport.md +++ b/symja_android_library/matheclipse-core/src/main/resources/doc/functions/SemanticImport.md @@ -7,7 +7,7 @@ SemanticImport("path-to-filename") > if the file system is enabled, import the data from CSV files and do a semantic interpretation of the columns. Dataset uses: -* [Github - JTablesaw - Java dataframe and visualization library ](https://github.com/jtablesaw/tablesaw) +* [Github - JTablesaw - Java dataframe and visualization library ](https://github.com/tlabs-data/tablesaw) ### Examples diff --git a/symja_android_library/matheclipse-core/src/main/resources/doc/functions/SemanticImportString.md b/symja_android_library/matheclipse-core/src/main/resources/doc/functions/SemanticImportString.md index e83a1f3896..adb9a58d98 100644 --- a/symja_android_library/matheclipse-core/src/main/resources/doc/functions/SemanticImportString.md +++ b/symja_android_library/matheclipse-core/src/main/resources/doc/functions/SemanticImportString.md @@ -7,7 +7,7 @@ SemanticImportString("string-content") > import the data from a content string in CSV format and do a semantic interpretation of the columns. Dataset uses: -* [Github - JTablesaw - Java dataframe and visualization library ](https://github.com/jtablesaw/tablesaw) +* [Github - JTablesaw - Java dataframe and visualization library ](https://github.com/tlabs-data/tablesaw) ### Examples diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/AggregateFunctions.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/AggregateFunctions.java index 34c2cb260d..4f26e114ac 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/AggregateFunctions.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/AggregateFunctions.java @@ -578,6 +578,9 @@ public Double summarize(NumericColumn column) { } }; + /** @deprecated use {@link #stdDev} instead */ + @Deprecated public static final NumericAggregateFunction standardDeviation = stdDev; + /** Returns the given percentile of the values in the argument */ public static Double percentile(NumericColumn data, Double percentile) { return StatUtils.percentile(removeMissing(data), percentile); diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/BooleanCountFunction.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/BooleanCountFunction.java index 0ef74880a0..36c5f126cd 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/BooleanCountFunction.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/BooleanCountFunction.java @@ -3,20 +3,34 @@ import tech.tablesaw.api.BooleanColumn; import tech.tablesaw.api.ColumnType; +/** + * A partial implementation of an AggregateFunction that returns an Integer value when applied to a + * Boolean Column + * + * @deprecated Use {@link BooleanIntAggregateFunction} instead + */ +@Deprecated abstract class BooleanCountFunction extends AggregateFunction { + /** + * Constructs a BooleanCountFunction with the given name. The name may be used to name a column in + * the output when this function is used by {@link Summarizer} + */ public BooleanCountFunction(String functionName) { super(functionName); } + /** Returns an Integer as a result of applying this function to the given column */ @Override public abstract Integer summarize(BooleanColumn column); + /** {@inheritDoc} */ @Override public boolean isCompatibleColumn(ColumnType type) { return type.equals(ColumnType.BOOLEAN); } + /** {@inheritDoc} */ @Override public ColumnType returnType() { return ColumnType.DOUBLE; diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/BooleanNumericFunction.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/BooleanNumericFunction.java index fae789103e..a947ffafcc 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/BooleanNumericFunction.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/BooleanNumericFunction.java @@ -3,20 +3,33 @@ import tech.tablesaw.api.BooleanColumn; import tech.tablesaw.api.ColumnType; +/** + * Partial implementation of Aggregate function that returns a Double value when applied to a + * BooleanColumn + * + * @deprecated Use {@link BooleanDoubleAggregateFunction} instead + */ abstract class BooleanNumericFunction extends AggregateFunction { + /** + * Constructs a BooleanNumericFunction with the given name. The name may be used to name a column + * in the output when this function is used by {@link Summarizer} + */ public BooleanNumericFunction(String functionName) { super(functionName); } + /** Returns a double that is the result of applying this function to the given column */ @Override public abstract Double summarize(BooleanColumn column); + /** {@inheritDoc} */ @Override public boolean isCompatibleColumn(ColumnType type) { return type.equals(ColumnType.BOOLEAN); } + /** {@inheritDoc} */ @Override public ColumnType returnType() { return ColumnType.DOUBLE; diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/CountFunction.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/CountFunction.java index 4527903f63..749b5eb708 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/CountFunction.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/CountFunction.java @@ -3,20 +3,34 @@ import tech.tablesaw.api.ColumnType; import tech.tablesaw.columns.Column; +/** + * Partial implementation of AggregateFunction that returns an integer when applied to a column of + * any type + * + * @deprecated Use {@link AnyIntAggregateFunction} instead + */ +@Deprecated abstract class CountFunction extends AggregateFunction, Integer> { + /** + * Constructs a CountFunction with the given name. The name is used to name an output column when + * this function is used by {@link Summarizer} + */ public CountFunction(String functionName) { super(functionName); } + /** Returns an Integer when this function is applied to the given column */ @Override public abstract Integer summarize(Column column); + /** {@inheritDoc} */ @Override public boolean isCompatibleColumn(ColumnType type) { return true; } + /** {@inheritDoc} */ @Override public ColumnType returnType() { return ColumnType.DOUBLE; diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/StringFunction.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/StringFunction.java index 3e9c5d71e1..33dbf0677a 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/StringFunction.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/aggregate/StringFunction.java @@ -3,20 +3,31 @@ import tech.tablesaw.api.ColumnType; import tech.tablesaw.api.StringColumn; -/** A partial implementation of aggregate functions to summarize over a date column */ +/** + * A partial implementation of aggregate functions to summarize over a StringColumn + * + * @deprecated Use {@link StringAggregateFunction} instead + */ +@Deprecated public abstract class StringFunction extends AggregateFunction { + /** + * Constructs an {@code StringFunction} with the given name. The name may be used to name a column + * in the output when this function is used by {@link Summarizer} + */ public StringFunction(String name) { super(name); } public abstract String summarize(StringColumn column); + /** {@inheritDoc} */ @Override public boolean isCompatibleColumn(ColumnType type) { return type.equals(ColumnType.STRING); } + /** {@inheritDoc} */ @Override public ColumnType returnType() { return ColumnType.STRING; diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/BooleanColumn.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/BooleanColumn.java index 9bfa583321..d2782a7ce8 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/BooleanColumn.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/BooleanColumn.java @@ -23,7 +23,9 @@ import it.unimi.dsi.fastutil.bytes.Byte2IntMap; import it.unimi.dsi.fastutil.bytes.Byte2IntOpenHashMap; import it.unimi.dsi.fastutil.bytes.ByteArrayList; +import it.unimi.dsi.fastutil.bytes.ByteComparators; import it.unimi.dsi.fastutil.bytes.ByteIterator; +import it.unimi.dsi.fastutil.bytes.ByteListIterator; import it.unimi.dsi.fastutil.bytes.ByteOpenHashSet; import it.unimi.dsi.fastutil.bytes.ByteSet; import it.unimi.dsi.fastutil.ints.IntComparator; @@ -37,7 +39,12 @@ import tech.tablesaw.columns.AbstractColumn; import tech.tablesaw.columns.AbstractColumnParser; import tech.tablesaw.columns.Column; -import tech.tablesaw.columns.booleans.*; +import tech.tablesaw.columns.booleans.BooleanColumnType; +import tech.tablesaw.columns.booleans.BooleanColumnUtils; +import tech.tablesaw.columns.booleans.BooleanFillers; +import tech.tablesaw.columns.booleans.BooleanFilters; +import tech.tablesaw.columns.booleans.BooleanFormatter; +import tech.tablesaw.columns.booleans.BooleanMapUtils; import tech.tablesaw.filtering.BooleanFilterSpec; import tech.tablesaw.filtering.predicates.BytePredicate; import tech.tablesaw.selection.BitmapBackedSelection; @@ -52,8 +59,7 @@ public class BooleanColumn extends AbstractColumn BooleanFilters { /** The data held by this column */ - // protected ByteArrayList data; - BooleanData data; + protected ByteArrayList data; /** An IntComparator. The ints are row indexes */ private final IntComparator comparator = @@ -74,12 +80,7 @@ public class BooleanColumn extends AbstractColumn */ private BooleanColumn(String name, ByteArrayList values) { super(BooleanColumnType.instance(), name, BooleanColumnType.DEFAULT_PARSER); - data = new BitSetBooleanData(values); - } - - public BooleanColumn(String name, BooleanData data) { - super(BooleanColumnType.BOOLEAN, name, BooleanColumnType.DEFAULT_PARSER); - this.data = data; + data = values; } /** Returns {@code true} if b is the missing value indicator for this column type */ @@ -227,13 +228,23 @@ public Table summary() { /** Returns the count of missing values in this column */ @Override public int countMissing() { - return data.countMissing(); + int count = 0; + for (int i = 0; i < size(); i++) { + if (valueIsMissing(getByte(i))) { + count++; + } + } + return count; } /** {@inheritDoc} */ @Override public int countUnique() { - return data.countUnique(); + ByteSet count = new ByteOpenHashSet(3); + for (byte next : data) { + count.add(next); + } + return count.size(); } /** {@inheritDoc} */ @@ -347,19 +358,19 @@ public void clear() { /** {@inheritDoc} */ @Override public BooleanColumn copy() { - return new BooleanColumn(name(), data.copy()); + return new BooleanColumn(name(), data.clone()); } /** {@inheritDoc} */ @Override public void sortAscending() { - data.sortAscending(); + data.sort(ByteComparators.NATURAL_COMPARATOR); } /** {@inheritDoc} */ @Override public void sortDescending() { - data.sortDescending(); + data.sort(ByteComparators.OPPOSITE_COMPARATOR); } /** {@inheritDoc} */ @@ -409,12 +420,24 @@ public boolean isEmpty() { /** Returns the number of {@code true} elements in this column */ public int countTrue() { - return data.countTrue(); + int count = 0; + for (byte b : data) { + if (b == BooleanColumnType.BYTE_TRUE) { + count++; + } + } + return count; } /** Returns the number of {@code false} elements in this column */ public int countFalse() { - return data.countFalse(); + int count = 0; + for (byte b : data) { + if (b == BooleanColumnType.BYTE_FALSE) { + count++; + } + } + return count; } /** Returns the proportion of non-missing row elements that contain true */ @@ -448,13 +471,29 @@ public boolean none() { /** {@inheritDoc} */ @Override public Selection isFalse() { - return data.isFalse(); + Selection results = new BitmapBackedSelection(); + int i = 0; + for (byte next : data) { + if (next == BooleanColumnType.BYTE_FALSE) { + results.add(i); + } + i++; + } + return results; } /** {@inheritDoc} */ @Override public Selection isTrue() { - return data.isTrue(); + Selection results = new BitmapBackedSelection(); + int i = 0; + for (byte next : data) { + if (next == BooleanColumnType.BYTE_TRUE) { + results.add(i); + } + i++; + } + return results; } /** {@inheritDoc} */ @@ -473,11 +512,7 @@ public Selection isEqualTo(BooleanColumn other) { } /** Returns a ByteArrayList containing 0 (false), 1 (true) or Byte.MIN_VALUE (missing) */ - public ByteArrayList toByteArrayList() { - return data.toByteArrayList(); - } - - public BooleanData data() { + public ByteArrayList data() { return data; } @@ -492,7 +527,7 @@ public BooleanColumn set(int i, boolean b) { } /** Sets the value at i to b, and returns this column */ - public BooleanColumn set(int i, byte b) { + private BooleanColumn set(int i, byte b) { data.set(i, b); return this; } @@ -532,7 +567,7 @@ public BooleanColumn lag(int n) { System.arraycopy(data.toByteArray(), srcPos, dest, destPos, length); BooleanColumn copy = emptyCopy(size()); - copy.data = new BitSetBooleanData(new ByteArrayList(dest)); + copy.data = new ByteArrayList(dest); copy.setName(name() + " lag(" + n + ")"); return copy; } @@ -639,7 +674,14 @@ public Column set(int row, Column column, int sourceRow) { /** {@inheritDoc} */ @Override public Selection asSelection() { - return data.asSelection(); + Selection selection = new BitmapBackedSelection(); + for (int i = 0; i < size(); i++) { + byte value = getByte(i); + if (value == 1) { + selection.add(i); + } + } + return selection; } /** {@inheritDoc} */ @@ -707,7 +749,7 @@ public BooleanColumn where(Selection selection) { @Override public BooleanColumn removeMissing() { BooleanColumn noMissing = emptyCopy(); - ByteIterator iterator = byteIterator(); + ByteListIterator iterator = byteListIterator(); while (iterator.hasNext()) { byte b = iterator.nextByte(); if (!valueIsMissing(b)) { @@ -754,12 +796,18 @@ public Selection eval(BiPredicate predicate, Boolean valueToCo return selection; } + /** Returns a byteListIterator, which allows iteration by byte (value) and int (index) */ + private ByteListIterator byteListIterator() { + return data.iterator(); + } + /** * Returns a DoubleColumn containing the elements in this column, with true as 1.0 and false as * 0.0. */ public DoubleColumn asDoubleColumn() { DoubleColumn numberColumn = DoubleColumn.create(this.name(), size()); + ByteArrayList data = data(); for (int i = 0; i < size(); i++) { numberColumn.set(i, data.getByte(i)); } @@ -862,58 +910,4 @@ public Boolean[] asObjectArray() { } return output; } - - /** - * Returns a byte representation of the true values, encoded in the format specified in {@link - * java.util.BitSet#toByteArray()} - */ - public byte[] trueBytes() { - return data.trueBytes(); - } - - /** - * Returns a byte representation of the false values, encoded in the format specified in {@link - * java.util.BitSet#toByteArray()} - */ - public byte[] falseBytes() { - return data.falseBytes(); - } - - /** - * Returns a byte representation of the missing values, encoded in the format specified in {@link - * java.util.BitSet#toByteArray()} - */ - public byte[] missingBytes() { - return data.missingBytes(); - } - - /** - * Sets the true values in the data from a byte[] encoding - * - * @param encodedValues The true values encoded in the format specified in {@link - * java.util.BitSet} - */ - public void trueBytes(byte[] encodedValues) { - data.setTrueBytes(encodedValues); - } - - /** - * Sets the false values in the data from a byte[] encoding - * - * @param encodedValues The false values encoded in the format specified in {@link - * java.util.BitSet} - */ - public void falseBytes(byte[] encodedValues) { - data.setFalseBytes(encodedValues); - } - - /** - * Sets the missing values in the data from a byte[] encoding - * - * @param encodedValues The missing values encoded in the format specified in {@link - * java.util.BitSet} - */ - public void missingBytes(byte[] encodedValues) { - data.setMissingBytes(encodedValues); - } } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/ColumnType.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/ColumnType.java index 420190fa09..9498540cf8 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/ColumnType.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/ColumnType.java @@ -17,6 +17,7 @@ import tech.tablesaw.columns.numbers.LongColumnType; import tech.tablesaw.columns.numbers.ShortColumnType; import tech.tablesaw.columns.strings.StringColumnType; +import tech.tablesaw.columns.strings.TextColumnType; import tech.tablesaw.columns.times.TimeColumnType; import tech.tablesaw.io.ReadOptions; @@ -40,6 +41,7 @@ public interface ColumnType { TimeColumnType LOCAL_TIME = TimeColumnType.instance(); DateTimeColumnType LOCAL_DATE_TIME = DateTimeColumnType.instance(); InstantColumnType INSTANT = InstantColumnType.instance(); + TextColumnType TEXT = TextColumnType.instance(); SkipColumnType SKIP = SkipColumnType.instance(); ExprColumnType EXPR = ExprColumnType.instance(); @@ -84,10 +86,17 @@ static ColumnType valueOf(String name) { /** TODO: Research this method to provide a good comment */ AbstractColumnParser customParser(ReadOptions options); - /** TODO: Research this method to provide a good comment */ - default boolean compare(int rowNumber, Column temp, Column original) { - Object o1 = original.get(rowNumber); - Object o2 = temp.get(temp.size() - 1); + /** + * Compare the row at {@code rownumber} in {@code column1} and {@code column2} and returns whether they are equals. + * @param rowNumber the row to compare + * @param column1 the first column to compare + * @param column2 the second column to compare + * @return true if row {@code rownumber} is equals in both columns + * @throws {@code IndexOutOfBoundsException} if {@code rownumber} exceeds either column size + */ + default boolean compare(int rowNumber, Column column1, Column column2) { + Object o1 = column2.get(rowNumber); + Object o2 = column1.get(rowNumber); return o1 == null ? o2 == null : o1.equals(o2); } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/InstantColumn.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/InstantColumn.java index b3148acfb4..3de6c096c0 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/InstantColumn.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/InstantColumn.java @@ -79,8 +79,8 @@ public boolean equals(int rowNumber1, int rowNumber2) { private final IntComparator comparator = (r1, r2) -> { - long f1 = getPackedInstant(r1); - long f2 = getPackedInstant(r2); + long f1 = getPackedDateTime(r1); + long f2 = getPackedDateTime(r2); return Long.compare(f1, f2); }; @@ -292,13 +292,13 @@ public InstantColumn appendInternal(long dateTime) { /** {@inheritDoc} */ @Override public String getString(int row) { - return printFormatter.format(getPackedInstant(row)); + return printFormatter.format(getPackedDateTime(row)); } /** {@inheritDoc} */ @Override public String getUnformattedString(int row) { - return PackedInstant.toString(getPackedInstant(row)); + return PackedInstant.toString(getPackedDateTime(row)); } /** {@inheritDoc} */ @@ -403,15 +403,15 @@ public long getLongInternal(int index) { return data.getLong(index); } - /** Returns the long-encoded version of the instant at the given index */ - protected long getPackedInstant(int index) { + // TODO: Name? + protected long getPackedDateTime(int index) { return getLongInternal(index); } /** {@inheritDoc} */ @Override public Instant get(int index) { - return PackedInstant.asInstant(getPackedInstant(index)); + return PackedInstant.asInstant(getPackedDateTime(index)); } /** {@inheritDoc} */ @@ -439,7 +439,7 @@ public InstantColumn set(Selection rowSelection, Instant newValue) { public int countMissing() { int count = 0; for (int i = 0; i < size(); i++) { - if (getPackedInstant(i) == InstantColumnType.missingValueIndicator()) { + if (getPackedDateTime(i) == InstantColumnType.missingValueIndicator()) { count++; } } @@ -690,11 +690,11 @@ public int byteSize() { /** Returns the contents of the cell at rowNumber as a byte[] */ @Override public byte[] asBytes(int rowNumber) { - return ByteBuffer.allocate(byteSize()).putLong(getPackedInstant(rowNumber)).array(); + return ByteBuffer.allocate(byteSize()).putLong(getPackedDateTime(rowNumber)).array(); } public double getDouble(int i) { - return getPackedInstant(i); + return getPackedDateTime(i); } public double[] asDoubleArray() { diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/QuerySupport.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/QuerySupport.java index 23a22ad4e3..078101bc6e 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/QuerySupport.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/QuerySupport.java @@ -9,6 +9,7 @@ import tech.tablesaw.filtering.DeferredInstantColumn; import tech.tablesaw.filtering.DeferredNumberColumn; import tech.tablesaw.filtering.DeferredStringColumn; +import tech.tablesaw.filtering.DeferredTextColumn; import tech.tablesaw.filtering.DeferredTimeColumn; import tech.tablesaw.filtering.Not; import tech.tablesaw.filtering.Or; @@ -119,6 +120,14 @@ public static DeferredStringColumn str(String columnName) { return new DeferredStringColumn(columnName); } + public static DeferredTextColumn text(String columnName) { + return new DeferredTextColumn(columnName); + } + + public static DeferredTextColumn textColumn(String columnName) { + return new DeferredTextColumn(columnName); + } + public static DeferredNumberColumn numberColumn(String columnName) { return new DeferredNumberColumn(columnName); } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/Row.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/Row.java index 0118dfe599..b882b55d7b 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/Row.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/Row.java @@ -156,6 +156,9 @@ public Row(TableSlice tableSlice, int rowNumber) { if (column instanceof StringColumn) { stringColumnMap.put(column.name(), (StringColumn) column); } + if (column instanceof TextColumn) { + stringColumnMap.put(column.name(), (TextColumn) column); + } if (column instanceof DateColumn) { dateColumnMap.put(column.name(), (DateColumn) column); @@ -172,10 +175,6 @@ public Row(TableSlice tableSlice, int rowNumber) { } } - public ColumnType type(int columnIndex) { - return tableSlice.column(columnIndex).type(); - } - /** Moves this Row to the given 0-based row index */ public void at(int rowNumber) { this.rowNumber = rowNumber; @@ -461,6 +460,24 @@ public short getShort(String columnName) { return shortColumnMap.get(columnName).getShort(getIndex(rowNumber)); } + /** + * Returns a String representing the text from this Row at the column of the given name. An + * IllegalStateException is thrown if the column is not present in the Row and an + * IllegalArgumentException is thrown if it has a different type + */ + public String getText(String columnName) { + return stringColumnMap.get(columnName).get(getIndex(rowNumber)); + } + + /** + * Returns a String value from this Row at the column with the given index. An + * IllegalStateException is thrown if the column is not present in the Row and an + * IllegalArgumentException is thrown if it has a different type type + */ + public String getText(int columnIndex) { + return getString(columnNames[columnIndex]); + } + /** * Returns a LocalTime value from this Row at the column of the given name. An * IllegalStateException is thrown if the column is not present in the Row and an @@ -557,10 +574,6 @@ public void setDate(int columnIndex, LocalDate value) { setDate(columnNames[columnIndex], value); } - public void setPackedDate(int columnIndex, int value) { - setPackedDate(columnNames[columnIndex], value); - } - /** * Sets the value of the column with the given name at this Row to the given value. An * IllegalStateException is * thrown if the column is not present in the Row and an @@ -571,42 +584,6 @@ public void setDate(String columnName, LocalDate value) { dateColumnMap.get(columnName).set(getIndex(rowNumber), value); } - public void setPackedDate(String columnName, int value) { - dateColumnMap.get(columnName).set(getIndex(rowNumber), value); - } - - public void setPackedTime(int columnIndex, int value) { - setPackedTime(columnNames[columnIndex], value); - } - - public void setPackedDateTime(int columnIndex, long value) { - setPackedDateTime(columnNames[columnIndex], value); - } - - public void setPackedInstant(int columnIndex, long value) { - setPackedInstant(columnNames[columnIndex], value); - } - - public void setPackedTime(String columnName, int value) { - timeColumnMap.get(columnName).set(getIndex(rowNumber), value); - } - - public void setPackedDateTime(String columnName, long value) { - dateTimeColumnMap.get(columnName).set(getIndex(rowNumber), value); - } - - public void setPackedInstant(String columnName, long value) { - instantColumnMap.get(columnName).set(getIndex(rowNumber), value); - } - - public void setBooleanAsByte(String columnName, byte value) { - booleanColumnMap.get(columnName).set(getIndex(rowNumber), value); - } - - public void setBooleanAsByte(int columnIndex, byte value) { - setBooleanAsByte(columnNames[columnIndex], value); - } - /** * Sets the value of the column at the given index and this Row to the given value. An * IllegalStateException is * thrown if the column is not present in the Row and an @@ -769,6 +746,26 @@ public void setString(String columnName, String value) { stringColumnMap.get(columnName).set(getIndex(rowNumber), value); } + /** + * Sets the value of the column at the given index and this Row to the given value. An + * IllegalStateException is * thrown if the column is not present in the Row and an + * IllegalArgumentException is thrown if it has a different type to that named in the method + * signature + */ + public void setText(int columnIndex, String value) { + setString(columnNames[columnIndex], value); + } + + /** + * Sets the value of the column with the given name at this Row to the given value. An + * IllegalStateException is * thrown if the column is not present in the Row and an + * IllegalArgumentException is thrown if it has a different type to that named in the method + * signature + */ + public void setText(String columnName, String value) { + stringColumnMap.get(columnName).set(getIndex(rowNumber), value); + } + /** * Sets the value of the column at the given index and this Row to the given value. An * IllegalStateException is * thrown if the column is not present in the Row and an @@ -789,14 +786,6 @@ private int getIndex(int rowNumber) { return tableSlice.mappedRowNumber(rowNumber); } - /** - * Returns the row number in the table backing the slice behind this row. This value may differ - * from the rowNumber() if the slice covers less than the entire table - */ - public int getBackingRowNumber() { - return getIndex(getRowNumber()); - } - /** * Returns a double representing the value held in the column with the given name at this row, for * any numeric column type @@ -814,7 +803,7 @@ public ColumnType getColumnType(int columnIndex) { return tableSlice.column(columnIndex).type(); } - public Column column(int columnIndex) { + Column column(int columnIndex) { return tableSlice.column(columnIndex); } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/StringColumn.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/StringColumn.java index d9faa833c5..30b427514a 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/StringColumn.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/StringColumn.java @@ -17,15 +17,21 @@ import static com.google.common.base.Preconditions.checkArgument; import static tech.tablesaw.api.ColumnType.*; -import com.google.common.base.Preconditions; import it.unimi.dsi.fastutil.ints.IntComparator; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Set; import java.util.stream.Stream; -import javax.annotation.Nullable; -import tech.tablesaw.columns.AbstractColumn; import tech.tablesaw.columns.AbstractColumnParser; import tech.tablesaw.columns.Column; -import tech.tablesaw.columns.strings.*; +import tech.tablesaw.columns.strings.AbstractStringColumn; +import tech.tablesaw.columns.strings.ByteDictionaryMap; +import tech.tablesaw.columns.strings.DictionaryMap; +import tech.tablesaw.columns.strings.NoKeysAvailableException; +import tech.tablesaw.columns.strings.StringColumnType; import tech.tablesaw.selection.BitmapBackedSelection; import tech.tablesaw.selection.Selection; @@ -37,12 +43,10 @@ *

Because the MISSING_VALUE for this column type is an empty string, there is little or no need * for special handling of missing values in this class's methods. */ -public class StringColumn extends AbstractColumn - implements CategoricalColumn, StringFilters, StringMapFunctions, StringReduceUtils { +public class StringColumn extends AbstractStringColumn { - private DictionaryMap data; - - private StringColumnFormatter printFormatter = new StringColumnFormatter(); + // a bidirectional map of keys to backing string values. + private DictionaryMap lookupTable = new ByteDictionaryMap(); private final IntComparator rowComparator = (i, i1) -> { @@ -58,7 +62,7 @@ public static boolean valueIsMissing(String string) { /** {@inheritDoc} */ @Override public StringColumn appendMissing() { - data.appendMissing(); + lookupTable.appendMissing(); return this; } @@ -82,12 +86,6 @@ public static StringColumn create(String name, String... strings) { return new StringColumn(name, strings); } - /* - public static StringColumn create(String name, StringData stringData) { - return new StringColumn(name, stringData); - } - */ - public static StringColumn create(String name, Collection strings) { return new StringColumn(name, strings); } @@ -97,8 +95,7 @@ public static StringColumn createInternal(String name, DictionaryMap map) { } public static StringColumn create(String name, int size) { - // TODO Pick map implementation based on array size - StringColumn column = new StringColumn(name); + StringColumn column = new StringColumn(name, new ArrayList<>(size)); for (int i = 0; i < size; i++) { column.appendMissing(); } @@ -113,50 +110,31 @@ public static StringColumn create(String name, Stream stream) { private StringColumn(String name, Collection strings) { super(StringColumnType.instance(), name, StringColumnType.DEFAULT_PARSER); - // TODO Pick map implementation based on array size - data = new ByteDictionaryMap(); - for (String s : strings) { - append(s); + for (String string : strings) { + append(string); } } private StringColumn(String name, DictionaryMap map) { super(StringColumnType.instance(), name, StringColumnType.DEFAULT_PARSER); - data = map; + lookupTable = map; } private StringColumn(String name) { super(StringColumnType.instance(), name, StringColumnType.DEFAULT_PARSER); - data = new ByteDictionaryMap(); } private StringColumn(String name, String[] strings) { super(StringColumnType.instance(), name, StringColumnType.DEFAULT_PARSER); - // TODO Pick map implementation based on array size - data = new ByteDictionaryMap(); for (String string : strings) { append(string); } } - /** - * Sets an {@link StringColumnFormatter} which will be used to format the display of data from - * this column when it is printed (using, for example, Table:print()) and optionally when written - * to a text file like a CSV. - */ - public void setPrintFormatter(StringColumnFormatter formatter) { - Preconditions.checkNotNull(formatter); - this.printFormatter = formatter; - } - - /** Returns the current {@link StringColumnFormatter}. */ - public StringColumnFormatter getPrintFormatter() { - return printFormatter; - } /** {@inheritDoc} */ @Override public boolean isMissing(int rowNumber) { - return data.isMissing(rowNumber); + return lookupTable.isMissing(rowNumber); } /** {@inheritDoc} */ @@ -176,13 +154,13 @@ public StringColumn emptyCopy(int rowSize) { /** {@inheritDoc} */ @Override public void sortAscending() { - data.sortAscending(); + lookupTable.sortAscending(); } /** {@inheritDoc} */ @Override public void sortDescending() { - data.sortDescending(); + lookupTable.sortDescending(); } /** @@ -192,7 +170,7 @@ public void sortDescending() { */ @Override public int size() { - return data.size(); + return lookupTable.size(); } /** @@ -204,7 +182,7 @@ public int size() { */ @Override public String get(int rowIndex) { - return data.get(rowIndex); + return lookupTable.getValueForIndex(rowIndex); } /** @@ -251,13 +229,13 @@ public Table summary() { /** {@inheritDoc} */ @Override public Table countByCategory() { - return data.countByCategory(name()); + return lookupTable.countByCategory(name()); } /** {@inheritDoc} */ @Override public void clear() { - data.clear(); + lookupTable.clear(); } /** {@inheritDoc} */ @@ -319,11 +297,11 @@ public StringColumn set(int rowIndex, String stringValue) { return setMissing(rowIndex); } try { - data.set(rowIndex, stringValue); + lookupTable.set(rowIndex, stringValue); } catch (NoKeysAvailableException ex) { - data = data.promoteYourself(); + lookupTable = lookupTable.promoteYourself(); try { - data.set(rowIndex, stringValue); + lookupTable.set(rowIndex, stringValue); } catch (NoKeysAvailableException e) { // this can't happen throw new IllegalStateException(e); @@ -335,7 +313,7 @@ public StringColumn set(int rowIndex, String stringValue) { /** {@inheritDoc} */ @Override public int countUnique() { - return data.countUnique(); + return lookupTable.countUnique(); } /** @@ -385,32 +363,22 @@ public IntComparator rowComparator() { return rowComparator; } - @Override - public Selection isMissing() { - return data.isMissing(); - } - - @Override - public Selection isNotMissing() { - return data.isNotMissing(); - } - /** {@inheritDoc} */ @Override public boolean isEmpty() { - return data.isEmpty(); + return lookupTable.size() == 0; } /** {@inheritDoc} */ @Override public Selection isEqualTo(String string) { - return data.isEqualTo(string); + return lookupTable.isEqualTo(string); } /** {@inheritDoc} */ @Override public Selection isNotEqualTo(String string) { - return data.isNotEqualTo(string); + return lookupTable.isNotEqualTo(string); } /** @@ -422,7 +390,7 @@ public Selection isNotEqualTo(String string) { * @return a list of {@link BooleanColumn} */ public List getDummies() { - return data.getDummies(); + return lookupTable.getDummies(); } /** @@ -432,8 +400,8 @@ public List getDummies() { */ @Override public StringColumn unique() { - List strings = new ArrayList<>(data.asSet()); - return new StringColumn(name(), strings); + List strings = new ArrayList<>(lookupTable.asSet()); + return StringColumn.create(name() + " Unique values", strings); } public DoubleColumn asDoubleColumn() { @@ -463,7 +431,7 @@ public StringColumn copy() { @Override public StringColumn append(Column column) { checkArgument( - column.type().equals(STRING), + column.type() == TEXT || column.type().equals(STRING), "Column '%s' has type %s, but column '%s' has type %s.", name(), type(), @@ -479,7 +447,7 @@ public StringColumn append(Column column) { /** Returns the count of missing values in this column */ @Override public int countMissing() { - return data.countMissing(); + return lookupTable.countMissing(); } /** {@inheritDoc} */ @@ -497,36 +465,38 @@ public StringColumn removeMissing() { /** {@inheritDoc} */ @Override public Iterator iterator() { - return data.iterator(); + return lookupTable.iterator(); } public Set asSet() { - return data.asSet(); + return lookupTable.asSet(); } /** Returns the contents of the cell at rowNumber as a byte[] */ @Override public byte[] asBytes(int rowNumber) { - return data.asBytes(rowNumber); + return lookupTable.asBytes(rowNumber); } public double getDouble(int i) { - return (double) data.uniqueValuesAt(data.firstIndexOf(data.getValueForIndex(i))) - 1; + return (double) + lookupTable.uniqueValuesAt(lookupTable.firstIndexOf(lookupTable.getValueForIndex(i))) + - 1; } public double[] asDoubleArray() { - return Arrays.stream(data.asIntArray()).asDoubleStream().toArray(); + return Arrays.stream(lookupTable.asIntArray()).asDoubleStream().toArray(); } /** Added for naming consistency with all other columns */ @Override public StringColumn append(String value) { try { - data.append(value); + lookupTable.append(value); } catch (NoKeysAvailableException ex) { - data = data.promoteYourself(); + lookupTable = lookupTable.promoteYourself(); try { - data.append(value); + lookupTable.append(value); } catch (NoKeysAvailableException e) { // this can't happen throw new IllegalStateException(e); @@ -551,13 +521,13 @@ public StringColumn appendObj(Object obj) { /** {@inheritDoc} */ @Override public Selection isIn(String... strings) { - return data.isIn(strings); + return lookupTable.selectIsIn(strings); } /** {@inheritDoc} */ @Override public Selection isIn(Collection strings) { - return data.isIn(strings); + return lookupTable.selectIsIn(strings); } /** {@inheritDoc} */ @@ -579,17 +549,17 @@ public Selection isNotIn(Collection strings) { } public int firstIndexOf(String value) { - return data.firstIndexOf(value); + return lookupTable.firstIndexOf(value); } public int countOccurrences(String value) { - return data.countOccurrences(value); + return lookupTable.countOccurrences(value); } /** {@inheritDoc} */ @Override public String[] asObjectArray() { - return data.asObjectArray(); + return lookupTable.asObjectArray(); } /** {@inheritDoc} */ @@ -598,78 +568,16 @@ public StringColumn asStringColumn() { return copy(); } - /** For tablesaw internal use Note: This method returns null if the stringDataType is TEXTUAL */ - public @Nullable DictionaryMap getDictionary() { - return data; - } - - /** {@inheritDoc} */ - @Override - public String getString(int row) { - return printFormatter.format(get(row)); - } - - /** {@inheritDoc} */ - @Override - public String getUnformattedString(int row) { - return String.valueOf(get(row)); - } - - /** - * Returns the largest ("top") n values in the column - * - * @param n The maximum number of records to return. The actual number will be smaller if n is - * greater than the number of observations in the column - * @return A list, possibly empty, of the largest observations - */ - public List top(int n) { - List top = new ArrayList<>(); - Column copy = this.copy(); - copy.sortDescending(); - for (int i = 0; i < n; i++) { - top.add(copy.get(i)); - } - return top; - } - - /** - * Returns the smallest ("bottom") n values in the column - * - * @param n The maximum number of records to return. The actual number will be smaller if n is - * greater than the number of observations in the column - * @return A list, possibly empty, of the smallest n observations - */ - public List bottom(int n) { - List bottom = new ArrayList<>(); - Column copy = this.copy(); - copy.sortAscending(); - for (int i = 0; i < n; i++) { - bottom.add(copy.get(i)); + public TextColumn asTextColumn() { + TextColumn textColumn = TextColumn.create(name(), size()); + for (int i = 0; i < size(); i++) { + textColumn.set(i, get(i)); } - return bottom; - } - - /** {@inheritDoc} */ - @Override - public Column append(Column column, int row) { - return append(column.getUnformattedString(row)); - } - - /** {@inheritDoc} */ - @Override - public Column set(int row, Column column, int sourceRow) { - return set(row, column.getUnformattedString(sourceRow)); + return textColumn; } - /** {@inheritDoc} */ - @Override - public int byteSize() { - return type().byteSize(); - } - - /** {@inheritDoc} */ - @Override - public int compare(String o1, String o2) { - return o1.compareTo(o2); + /** For tablesaw internal use only */ + public DictionaryMap getDictionary() { + return lookupTable; } } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/Table.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/Table.java index 981aae0ed0..7396149b18 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/Table.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/Table.java @@ -19,28 +19,21 @@ import static tech.tablesaw.aggregate.AggregateFunctions.countMissing; import static tech.tablesaw.api.QuerySupport.not; import static tech.tablesaw.selection.Selection.selectNRowsAtRandom; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Comparator; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; + +import com.google.common.base.Preconditions; +import com.google.common.collect.*; +import com.google.common.primitives.Ints; +import io.github.classgraph.ClassGraph; +import io.github.classgraph.ScanResult; +import it.unimi.dsi.fastutil.ints.*; +import java.util.*; +import java.util.function.Consumer; import java.util.function.Function; import java.util.function.IntFunction; +import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; import org.roaringbitmap.RoaringBitmap; -import com.google.common.base.Preconditions; -import com.google.common.collect.Streams; -import com.google.common.primitives.Ints; -import io.github.classgraph.ClassGraph; -import io.github.classgraph.ScanResult; -import it.unimi.dsi.fastutil.ints.Int2ObjectMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; -import it.unimi.dsi.fastutil.ints.IntArrayList; -import it.unimi.dsi.fastutil.ints.IntArrays; -import it.unimi.dsi.fastutil.ints.IntComparator; import tech.tablesaw.aggregate.AggregateFunction; import tech.tablesaw.aggregate.CrossTab; import tech.tablesaw.aggregate.PivotTable; @@ -58,10 +51,7 @@ import tech.tablesaw.sorting.Sort; import tech.tablesaw.sorting.SortUtils; import tech.tablesaw.sorting.comparators.IntComparatorChain; -import tech.tablesaw.table.Relation; -import tech.tablesaw.table.StandardTableSliceGroup; -import tech.tablesaw.table.TableSlice; -import tech.tablesaw.table.TableSliceGroup; +import tech.tablesaw.table.*; /** * A table of data, consisting of some number of columns, each of which has the same number of rows. @@ -125,7 +115,7 @@ protected Table(String name, Collection> columns) { /** TODO: Add documentation */ private static void autoRegisterReadersAndWriters() { try (ScanResult scanResult = - new ClassGraph().enableAllInfo().acceptPackages("tech.tablesaw.io").scan()) { + new ClassGraph().enableAllInfo().whitelistPackages("tech.tablesaw.io").scan()) { List classes = new ArrayList<>(); classes.addAll(scanResult.getClassesImplementing(DataWriter.class.getName()).getNames()); classes.addAll(scanResult.getClassesImplementing(DataReader.class.getName()).getNames()); @@ -231,7 +221,7 @@ private static Sort getSort(String... columnNames) { return key; } - /** Returns an object that can be used to read data from a file into a new Table */ + /** Returns an object that an be used to read data from a file into a new Table */ public static DataFrameReader read() { return new DataFrameReader(defaultReaderRegistry); } @@ -454,7 +444,6 @@ public int columnIndex(String columnName) { * * @throws IllegalArgumentException if the column is not present in this table */ - @Override public int columnIndex(Column column) { int columnIndex = -1; for (int i = 0; i < columnList.size(); i++) { @@ -477,7 +466,6 @@ public String name() { } /** Returns a List of the names of all the columns in this table */ - @Override public List columnNames() { return columnList.stream().map(Column::name).collect(toList()); } @@ -542,17 +530,20 @@ public void copyRowsToTable(int[] rows, Table newTable) { } /** - * Returns {@code true} if the row @rowNumber in table1 holds the same data as the row at - * rowNumber in table2 + * Returns {@code true} if the row {@code rowNumber} in {@code table1} holds the same values than the row at + * {@code rowNumber} in {@code table2}. Returns false if the number of columns is different in the two tables. + * @param rowNumber the row to compare + * @param table1 the first table to compare + * @param table2 the second table to compare + * @return false if row {@code rowNumber} is different in {@code table1} and {@code table2} + * @throws {@code IndexOutOfBoundsException} if {@code rownumber} exceeds either table number of rows */ public static boolean compareRows(int rowNumber, Table table1, Table table2) { - int columnCount = table1.columnCount(); - boolean result; + final int columnCount = table1.columnCount(); + if (columnCount != table2.columnCount()) return false; for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) { ColumnType columnType = table1.column(columnIndex).type(); - result = - columnType.compare(rowNumber, table2.column(columnIndex), table1.column(columnIndex)); - if (!result) { + if (!columnType.compare(rowNumber, table2.column(columnIndex), table1.column(columnIndex))) { return false; } } @@ -660,7 +651,6 @@ public void clear() { } /** Returns a new table containing the first {@code nrows} of data in this table */ - @Override public Table first(int nRows) { int newRowCount = Math.min(nRows, rowCount()); return inRange(0, newRowCount); @@ -789,6 +779,18 @@ public void addRow(int rowIndex, Table sourceTable) { } } + /** + * Adds the given row to this table + * + * @deprecated Use {@link #append(Row)} instead. + */ + @Deprecated + public void addRow(Row row) { + for (int i = 0; i < row.columnCount(); i++) { + column(i).appendObj(row.getObject(i)); + } + } + /** Returns a new Row object with its position set to the given zero-based row index. */ public Row row(int rowIndex) { Row row = new Row(Table.this); @@ -984,12 +986,10 @@ private boolean isDuplicate(Row row, Int2ObjectMap uniqueHashes) { Row oldRow = this.row(key); if (duplicateRows(row, oldRow)) { return true; - } else { - uniqueHashes.get(hash).add(row.getRowNumber()); - return false; } } - return true; + uniqueHashes.get(hash).add(row.getRowNumber()); + return false; } /** Returns only those records in this table that have no columns with missing values */ @@ -1027,6 +1027,17 @@ public Table selectColumns(Column... columns) { return t; } + /** + * Returns a new table containing copies of the selected columns from this table + * + * @param columns The columns to copy into the new table + * @see #retainColumns(Column[]) + * @deprecated Use {@link #selectColumns(Column[])} instead + */ + public Table select(Column... columns) { + return selectColumns(columns); + } + /** * Returns a new table containing copies of the selected columns from this table * @@ -1041,6 +1052,17 @@ public Table selectColumns(String... columnNames) { return t; } + /** + * Returns a new table containing copies of the selected columns from this table + * + * @param columnNames The names of the columns to include + * @see #retainColumns(String[]) + * @deprecated Use {@link #selectColumns(String[])} instead + */ + public Table select(String... columnNames) { + return selectColumns(columnNames); + } + /** * Returns a new table containing copies of all the columns from this table, except those at the * given indexes @@ -1718,6 +1740,9 @@ private void writeIdVariables(List idVariables, Table result, Row row) { if (columnType.equals(ColumnType.STRING)) { StringColumn sc = (StringColumn) resultColumn; sc.append(row.getString(resultColumn.name())); + } else if (columnType.equals(ColumnType.TEXT)) { + TextColumn sc = (TextColumn) resultColumn; + sc.append(row.getString(resultColumn.name())); } else if (columnType.equals(ColumnType.INTEGER)) { IntColumn ic = (IntColumn) resultColumn; ic.append(row.getInt(resultColumn.name())); @@ -1800,6 +1825,10 @@ public Table cast() { StringColumn source = (StringColumn) sliceTable.column(idColumn.name()); StringColumn dest = (StringColumn) result.column(idColumn.name()); dest.append(source.get(0)); + } else if (columnType.equals(ColumnType.TEXT)) { + TextColumn source = (TextColumn) sliceTable.column(idColumn.name()); + TextColumn dest = (TextColumn) result.column(idColumn.name()); + dest.append(source.get(0)); } else if (columnType.equals(ColumnType.INTEGER)) { IntColumn source = (IntColumn) sliceTable.column(idColumn.name()); IntColumn dest = (IntColumn) result.column(idColumn.name()); @@ -1847,4 +1876,83 @@ public Table cast() { } return result; } + + /** + * Applies the operation in {@code doable} to every row in the table + * + * @deprecated use {@code stream().forEach} + */ + @Deprecated + public void doWithRows(Consumer doable) { + stream().forEach(doable); + } + + /** + * Applies the predicate to each row, and return true if any row returns true + * + * @deprecated use {@code stream().anyMatch} + */ + @Deprecated + public boolean detect(Predicate predicate) { + return stream().anyMatch(predicate); + } + + /** @deprecated use steppingStream(n).forEach(rowConsumer) */ + @Deprecated + public void stepWithRows(Consumer rowConsumer, int n) { + steppingStream(n).forEach(rowConsumer); + } + + /** @deprecated use stream(2).forEach(rowConsumer) */ + @Deprecated + public void doWithRows(Pairs pairs) { + rollingStream(2).forEach(rows -> pairs.doWithPair(rows[0], rows[1])); + } + + /** @deprecated use stream(2).forEach(rowConsumer) */ + @Deprecated + public void doWithRowPairs(Consumer pairConsumer) { + rollingStream(2).forEach(rows -> pairConsumer.accept(new RowPair(rows[0], rows[1]))); + } + + /** @deprecated use stream(n).forEach(rowConsumer) */ + @Deprecated + public void rollWithRows(Consumer rowConsumer, int n) { + rollingStream(n).forEach(rowConsumer); + } + + @Deprecated + public static class RowPair { + private final Row first; + private final Row second; + + public RowPair(Row first, Row second) { + this.first = first; + this.second = second; + } + + public Row getFirst() { + return first; + } + + public Row getSecond() { + return second; + } + } + + @Deprecated + interface Pairs { + + void doWithPair(Row row1, Row row2); + + /** + * Returns an object containing the results of applying doWithPair() to the rows in a table. + * + *

The default implementation throws an exception, to be used if the operation produces only + * side effects + */ + default Object getResult() { + throw new UnsupportedOperationException("This Pairs function returns no results"); + } + } } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/TextualStringData.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/TextColumn.java similarity index 66% rename from symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/TextualStringData.java rename to symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/TextColumn.java index 344fe5e6c9..93fe92c77e 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/TextualStringData.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/api/TextColumn.java @@ -12,19 +12,26 @@ * limitations under the License. */ -package tech.tablesaw.columns.strings; +package tech.tablesaw.api; -import static tech.tablesaw.columns.AbstractColumn.DEFAULT_ARRAY_SIZE; +import static tech.tablesaw.api.ColumnType.STRING; +import static tech.tablesaw.api.ColumnType.TEXT; +import com.google.common.base.Preconditions; import com.google.common.collect.Sets; import it.unimi.dsi.fastutil.ints.IntComparator; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; import java.util.stream.Stream; -import javax.annotation.Nullable; -import tech.tablesaw.api.BooleanColumn; -import tech.tablesaw.api.StringColumn; -import tech.tablesaw.api.Table; +import tech.tablesaw.columns.AbstractColumnParser; import tech.tablesaw.columns.Column; +import tech.tablesaw.columns.strings.AbstractStringColumn; +import tech.tablesaw.columns.strings.TextColumnType; import tech.tablesaw.selection.BitmapBackedSelection; import tech.tablesaw.selection.Selection; @@ -37,7 +44,7 @@ *

Because the MISSING_VALUE for this column type is an empty string, there is little or no need * for special handling of missing values in this class's methods. */ -public class TextualStringData implements StringData { +public class TextColumn extends AbstractStringColumn { // holds each element in the column. protected List values; @@ -51,27 +58,33 @@ public class TextualStringData implements StringData { private final Comparator descendingStringComparator = Comparator.reverseOrder(); + /** {@inheritDoc} */ + @Override public int valueHash(int rowNumber) { return get(rowNumber).hashCode(); } /** {@inheritDoc} */ + @Override public boolean equals(int rowNumber1, int rowNumber2) { return get(rowNumber1).equals(get(rowNumber2)); } - private TextualStringData(Collection strings) { + private TextColumn(String name, Collection strings) { + super(TextColumnType.instance(), name, TextColumnType.DEFAULT_PARSER); values = new ArrayList<>(strings.size()); for (String string : strings) { append(string); } } - private TextualStringData() { + private TextColumn(String name) { + super(TextColumnType.instance(), name, TextColumnType.DEFAULT_PARSER); values = new ArrayList<>(DEFAULT_ARRAY_SIZE); } - private TextualStringData(String[] strings) { + private TextColumn(String name, String[] strings) { + super(TextColumnType.instance(), name, TextColumnType.DEFAULT_PARSER); values = new ArrayList<>(strings.length); for (String string : strings) { append(string); @@ -79,61 +92,69 @@ private TextualStringData(String[] strings) { } public static boolean valueIsMissing(String string) { - return StringColumnType.valueIsMissing(string); + return TextColumnType.valueIsMissing(string); } - public TextualStringData appendMissing() { - append(StringColumnType.missingValueIndicator()); + @Override + public TextColumn appendMissing() { + append(TextColumnType.missingValueIndicator()); return this; } - public static TextualStringData create() { - return new TextualStringData(); + public static TextColumn create(String name) { + return new TextColumn(name); } - public static TextualStringData create(String... strings) { - return new TextualStringData(strings); + public static TextColumn create(String name, String... strings) { + return new TextColumn(name, strings); } - public static TextualStringData create(Collection strings) { - return new TextualStringData(strings); + public static TextColumn create(String name, Collection strings) { + return new TextColumn(name, strings); } - public static TextualStringData create(int size) { + public static TextColumn create(String name, int size) { ArrayList strings = new ArrayList<>(size); for (int i = 0; i < size; i++) { - strings.add(StringColumnType.missingValueIndicator()); + strings.add(TextColumnType.missingValueIndicator()); } - return new TextualStringData(strings); + return new TextColumn(name, strings); } - public static TextualStringData create(Stream stream) { - TextualStringData column = create(); + public static TextColumn create(String name, Stream stream) { + TextColumn column = create(name); stream.forEach(column::append); return column; } /** {@inheritDoc} */ + @Override public boolean isMissing(int rowNumber) { - return get(rowNumber).equals(StringColumnType.missingValueIndicator()); + return get(rowNumber).equals(TextColumnType.missingValueIndicator()); } /** {@inheritDoc} */ - public TextualStringData emptyCopy() { - return create(); + @Override + public TextColumn emptyCopy() { + TextColumn empty = create(name()); + empty.setPrintFormatter(getPrintFormatter()); + return empty; } /** {@inheritDoc} */ - public TextualStringData emptyCopy(int rowSize) { - return create(rowSize); + @Override + public TextColumn emptyCopy(int rowSize) { + return create(name(), rowSize); } /** {@inheritDoc} */ + @Override public void sortAscending() { values.sort(String::compareTo); } /** {@inheritDoc} */ + @Override public void sortDescending() { values.sort(descendingStringComparator); } @@ -143,6 +164,7 @@ public void sortDescending() { * * @return size as int */ + @Override public int size() { return values.size(); } @@ -154,6 +176,7 @@ public int size() { * @return value as String * @throws IndexOutOfBoundsException if the given rowIndex is not in the column */ + @Override public String get(int rowIndex) { return values.get(rowIndex); } @@ -166,21 +189,15 @@ public String get(int rowIndex) { * * @return values as a list of String. */ + @Override public List asList() { return new ArrayList<>(values); } - @Override - public Table countByCategory(String columnName) { - throw new UnsupportedOperationException(); - // TODO: fix me - // return asCategoricalStringData().countByCategory(columnName); - } - /** {@inheritDoc} */ + @Override public Table summary() { - // Table table = Table.create("Column: " + name()); - Table table = Table.create(); + Table table = Table.create("Column: " + name()); StringColumn measure = StringColumn.create("Measure"); StringColumn value = StringColumn.create("Value"); table.addColumns(measure); @@ -195,19 +212,25 @@ public Table summary() { } /** {@inheritDoc} */ + @Override public void clear() { values.clear(); } /** {@inheritDoc} */ - public TextualStringData lead(int n) { - return lag(-n); + @Override + public TextColumn lead(int n) { + TextColumn column = lag(-n); + column.setName(name() + " lead(" + n + ")"); + return column; } /** {@inheritDoc} */ - public TextualStringData lag(int n) { + @Override + public TextColumn lag(int n) { - TextualStringData copy = emptyCopy(); + TextColumn copy = emptyCopy(); + copy.setName(name() + " lag(" + n + ")"); if (n >= 0) { for (int m = 0; m < n; m++) { @@ -238,7 +261,8 @@ public TextualStringData lag(int n) { *

Examples: myCatColumn.set(myCatColumn.isEqualTo("Cat"), "Dog"); // no more cats * myCatColumn.set(myCatColumn.valueIsMissing(), "Fox"); // no more missing values */ - public TextualStringData set(Selection rowSelection, String newValue) { + @Override + public TextColumn set(Selection rowSelection, String newValue) { for (int row : rowSelection) { set(row, newValue); } @@ -246,7 +270,8 @@ public TextualStringData set(Selection rowSelection, String newValue) { } /** {@inheritDoc} */ - public TextualStringData set(int rowIndex, String stringValue) { + @Override + public TextColumn set(int rowIndex, String stringValue) { if (stringValue == null) { return setMissing(rowIndex); } @@ -255,6 +280,7 @@ public TextualStringData set(int rowIndex, String stringValue) { } /** {@inheritDoc} */ + @Override public int countUnique() { return asSet().size(); } @@ -265,13 +291,15 @@ public int countUnique() { * @param aString the value to look for * @return true if contains, false otherwise */ + @Override public boolean contains(String aString) { return values.contains(aString); } /** {@inheritDoc} */ - public TextualStringData setMissing(int i) { - return set(i, StringColumnType.missingValueIndicator()); + @Override + public TextColumn setMissing(int i) { + return set(i, TextColumnType.missingValueIndicator()); } /** @@ -279,7 +307,7 @@ public TextualStringData setMissing(int i) { * * @param stringValues a list of values */ - public TextualStringData addAll(List stringValues) { + public TextColumn addAll(List stringValues) { for (String stringValue : stringValues) { append(stringValue); } @@ -287,11 +315,27 @@ public TextualStringData addAll(List stringValues) { } /** {@inheritDoc} */ + @Override + public TextColumn appendCell(String object) { + append(parser().parse(object)); + return this; + } + + /** {@inheritDoc} */ + @Override + public TextColumn appendCell(String object, AbstractColumnParser parser) { + append(String.valueOf(parser.parse(object))); + return this; + } + + /** {@inheritDoc} */ + @Override public IntComparator rowComparator() { return rowComparator; } /** {@inheritDoc} */ + @Override public boolean isEmpty() { return values.isEmpty(); } @@ -301,20 +345,23 @@ public boolean isEmpty() { * * @return a column with unique values. */ - public TextualStringData unique() { + @Override + public TextColumn unique() { List strings = new ArrayList<>(asSet()); - return TextualStringData.create(strings); + return TextColumn.create(name() + " Unique values", strings); } /** {@inheritDoc} */ - public TextualStringData where(Selection selection) { - return (TextualStringData) subset(selection.toArray()); + @Override + public TextColumn where(Selection selection) { + return subset(selection.toArray()); } // TODO (lwhite): This could avoid the append and do a list copy /** {@inheritDoc} */ - public TextualStringData copy() { - TextualStringData newCol = create(size()); + @Override + public TextColumn copy() { + TextColumn newCol = create(name(), size()); int r = 0; for (String string : this) { newCol.set(r, string); @@ -324,18 +371,28 @@ public TextualStringData copy() { } /** {@inheritDoc} */ - public void append(Column column) { + @Override + public TextColumn append(Column column) { + Preconditions.checkArgument( + column.type() == TEXT || column.type().equals(STRING), + "Column '%s' has type %s, but column '%s' has type %s.", + name(), + type(), + column.name(), + column.type()); final int size = column.size(); for (int i = 0; i < size; i++) { append(column.getString(i)); } + return this; } /** Returns the count of missing values in this column */ + @Override public int countMissing() { int count = 0; for (int i = 0; i < size(); i++) { - if (StringColumnType.missingValueIndicator().equals(get(i))) { + if (TextColumnType.missingValueIndicator().equals(get(i))) { count++; } } @@ -343,10 +400,11 @@ public int countMissing() { } /** {@inheritDoc} */ - public TextualStringData removeMissing() { - TextualStringData noMissing = emptyCopy(); + @Override + public TextColumn removeMissing() { + TextColumn noMissing = emptyCopy(); for (String v : this) { - if (!StringColumnType.valueIsMissing(v)) { + if (!TextColumnType.valueIsMissing(v)) { noMissing.append(v); } } @@ -354,29 +412,34 @@ public TextualStringData removeMissing() { } /** {@inheritDoc} */ + @Override public Iterator iterator() { return values.iterator(); } /** {@inheritDoc} */ + @Override public Set asSet() { return new HashSet<>(values); } /** Returns the contents of the cell at rowNumber as a byte[] */ + @Override public byte[] asBytes(int rowNumber) { String value = get(rowNumber); return value.getBytes(); } /** Added for naming consistency with all other columns */ - public TextualStringData append(String value) { + @Override + public TextColumn append(String value) { values.add(value); return this; } /** {@inheritDoc} */ - public TextualStringData appendObj(Object obj) { + @Override + public TextColumn appendObj(Object obj) { if (obj == null) { return appendMissing(); } @@ -388,6 +451,7 @@ public TextualStringData appendObj(Object obj) { } /** {@inheritDoc} */ + @Override public Selection isIn(String... strings) { Set stringSet = Sets.newHashSet(strings); @@ -401,6 +465,7 @@ public Selection isIn(String... strings) { } /** {@inheritDoc} */ + @Override public Selection isIn(Collection strings) { Set stringSet = Sets.newHashSet(strings); @@ -414,6 +479,7 @@ public Selection isIn(Collection strings) { } /** {@inheritDoc} */ + @Override public Selection isNotIn(String... strings) { Selection results = new BitmapBackedSelection(); results.addRange(0, size()); @@ -422,6 +488,7 @@ public Selection isNotIn(String... strings) { } /** {@inheritDoc} */ + @Override public Selection isNotIn(Collection strings) { Selection results = new BitmapBackedSelection(); results.addRange(0, size()); @@ -434,6 +501,7 @@ public int firstIndexOf(String value) { } /** {@inheritDoc} */ + @Override public String[] asObjectArray() { final String[] output = new String[size()]; for (int i = 0; i < size(); i++) { @@ -442,40 +510,13 @@ public String[] asObjectArray() { return output; } - /** - * Returns a double that can stand in for the string at index i in some ML applications - * - *

TODO: Evaluate use of hashCode() here for uniqueness - * - * @param i The index in this column - */ - public double getDouble(int i) { - return values.get(i).hashCode(); - } - - public double[] asDoubleArray() { - double[] result = new double[this.size()]; + /** {@inheritDoc} */ + @Override + public StringColumn asStringColumn() { + StringColumn textColumn = StringColumn.create(name(), size()); for (int i = 0; i < size(); i++) { - result[i] = getDouble(i); + textColumn.set(i, get(i)); } - return result; - } - - public int countOccurrences(String value) { - return isEqualTo(value).size(); - } - - /** - * {@inheritDoc} Unsupported Operation This can't be used on a text column as the number of - * BooleanColumns would likely be excessive - */ - public List getDummies() { - throw new UnsupportedOperationException( - "StringColumns containing arbitary, non-categorical strings do not support the getDummies() method for performance reasons"); - } - - /** Returns null, as this Column is not backed by a dictionaryMap */ - public @Nullable DictionaryMap getDictionary() { - return null; + return textColumn; } } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/AbstractColumn.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/AbstractColumn.java index 8fa2286037..31ab2d76c6 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/AbstractColumn.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/AbstractColumn.java @@ -194,11 +194,4 @@ public StringColumn asStringColumn() { public int indexOf(final Object o) { return IntStream.range(0, size()).filter(i -> get(i).equals(o)).findFirst().orElse(-1); } - - /** {@inheritDoc} */ - @Override - public int lastIndexOf(Object o) { - return IntStream.iterate(size() - 1, i -> (i >= 0), i -> i - 1).filter(i -> get(i).equals(o)) - .findFirst().orElse(-1); - } } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/Column.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/Column.java index 83ba1dc895..f88ebd2c90 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/Column.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/Column.java @@ -743,10 +743,4 @@ default Interpolator interpolate() { * not in the column. */ int indexOf(Object o); - - /** - * Returns the index of the last occurrence of {@code o} in the column or -1 if the element is not - * in the column. - */ - int lastIndexOf(Object o); } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/dates/DateParser.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/dates/DateParser.java index b2d99f3110..0012b50fb8 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/dates/DateParser.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/dates/DateParser.java @@ -8,7 +8,6 @@ import java.util.Locale; import tech.tablesaw.api.ColumnType; import tech.tablesaw.columns.AbstractColumnParser; -import tech.tablesaw.columns.datetimes.DateTimeParser; import tech.tablesaw.io.ReadOptions; public class DateParser extends AbstractColumnParser { @@ -20,19 +19,19 @@ public class DateParser extends AbstractColumnParser { private static final DateTimeFormatter dtf4 = DateTimeFormatter.ofPattern("MM.dd.yyyy"); private static final DateTimeFormatter dtf5 = DateTimeFormatter.ofPattern("yyyy-MM-dd"); private static final DateTimeFormatter dtf6 = DateTimeFormatter.ofPattern("yyyy/MM/dd"); - private static final DateTimeFormatter dtf7 = DateTimeParser.caseInsensitiveFormatter("dd/MMM/yyyy"); - private static final DateTimeFormatter dtf8 = DateTimeParser.caseInsensitiveFormatter("dd-MMM-yyyy"); + private static final DateTimeFormatter dtf7 = DateTimeFormatter.ofPattern("dd/MMM/yyyy"); + private static final DateTimeFormatter dtf8 = DateTimeFormatter.ofPattern("dd-MMM-yyyy"); private static final DateTimeFormatter dtf9 = DateTimeFormatter.ofPattern("M/d/yyyy"); private static final DateTimeFormatter dtf10 = DateTimeFormatter.ofPattern("M/d/yy"); - private static final DateTimeFormatter dtf11 = DateTimeParser.caseInsensitiveFormatter("MMM/dd/yyyy"); - private static final DateTimeFormatter dtf12 = DateTimeParser.caseInsensitiveFormatter("MMM-dd-yyyy"); - private static final DateTimeFormatter dtf13 = DateTimeParser.caseInsensitiveFormatter("MMM/dd/yy"); - private static final DateTimeFormatter dtf14 = DateTimeParser.caseInsensitiveFormatter("MMM-dd-yy"); - private static final DateTimeFormatter dtf15 = DateTimeParser.caseInsensitiveFormatter("MMM/dd/yyyy"); - private static final DateTimeFormatter dtf16 = DateTimeParser.caseInsensitiveFormatter("MMM/d/yyyy"); - private static final DateTimeFormatter dtf17 = DateTimeParser.caseInsensitiveFormatter("MMM-dd-yy"); - private static final DateTimeFormatter dtf18 = DateTimeParser.caseInsensitiveFormatter("MMM dd, yyyy"); - private static final DateTimeFormatter dtf19 = DateTimeParser.caseInsensitiveFormatter("MMM d, yyyy"); + private static final DateTimeFormatter dtf11 = DateTimeFormatter.ofPattern("MMM/dd/yyyy"); + private static final DateTimeFormatter dtf12 = DateTimeFormatter.ofPattern("MMM-dd-yyyy"); + private static final DateTimeFormatter dtf13 = DateTimeFormatter.ofPattern("MMM/dd/yy"); + private static final DateTimeFormatter dtf14 = DateTimeFormatter.ofPattern("MMM-dd-yy"); + private static final DateTimeFormatter dtf15 = DateTimeFormatter.ofPattern("MMM/dd/yyyy"); + private static final DateTimeFormatter dtf16 = DateTimeFormatter.ofPattern("MMM/d/yyyy"); + private static final DateTimeFormatter dtf17 = DateTimeFormatter.ofPattern("MMM-dd-yy"); + private static final DateTimeFormatter dtf18 = DateTimeFormatter.ofPattern("MMM dd, yyyy"); + private static final DateTimeFormatter dtf19 = DateTimeFormatter.ofPattern("MMM d, yyyy"); // A formatter that handles all the date formats defined above public static final DateTimeFormatter DEFAULT_FORMATTER = diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/datetimes/DateTimeParser.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/datetimes/DateTimeParser.java index cc6dd8f39e..aa0dfd4e3e 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/datetimes/DateTimeParser.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/datetimes/DateTimeParser.java @@ -19,26 +19,14 @@ public class DateTimeParser extends AbstractColumnParser { DateTimeFormatter.ofPattern( "yyyy-MM-dd HH:mm:ss.S"); // 2014-07-09 13:03:44.7 (as above, but without leading 0 in // millis) - private static final DateTimeFormatter dtTimef4 = caseInsensitiveFormatter("dd-MMM-yyyy HH:mm"); // 09-Jul-2014 13:03 + private static final DateTimeFormatter dtTimef4 = + DateTimeFormatter.ofPattern("dd-MMM-yyyy HH:mm"); // 09-Jul-2014 13:03 private static final DateTimeFormatter dtTimef5 = DateTimeFormatter.ISO_LOCAL_DATE_TIME; private static final DateTimeFormatter dtTimef6; // ISO, with millis appended private static final DateTimeFormatter dtTimef7 = // 7/9/14 9:04 DateTimeFormatter.ofPattern("M/d/yy H:mm"); - private static final DateTimeFormatter dtTimef8 = caseInsensitiveFormatter("M/d/yyyy h:mm:ss a"); // 7/9/2014 9:04:55 PM - - /** - * Creates a Case-insensitive formatter using the specified pattern. - * This method will create a formatter based on a simple pattern of letters and symbols as described in the class documentation. - * For example, d MMM yyyy will format 2011-12-03 as '3 Dec 2011'. The formatter will use the default FORMAT locale. - * This function can handle cases like am/AM, pm/PM, Jan/JAN, Feb/FEB etc - * - * @param pattern the pattern to use, not null - * @return the formatter based on the pattern, not null - * @throws IllegalArgumentException if the pattern is invalid - */ - public static DateTimeFormatter caseInsensitiveFormatter(String pattern) { - return new DateTimeFormatterBuilder().parseCaseInsensitive().appendPattern(pattern).toFormatter(); - } + private static final DateTimeFormatter dtTimef8 = + DateTimeFormatter.ofPattern("M/d/yyyy h:mm:ss a"); // 7/9/2014 9:04:55 PM static { dtTimef6 = diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/instant/PackedInstant.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/instant/PackedInstant.java index afc09f7c0d..a86eee6757 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/instant/PackedInstant.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/instant/PackedInstant.java @@ -121,7 +121,7 @@ public static String toString(long dateTime) { * Returns the given packedDateTime with amtToAdd of temporal units added * *

TODO(lwhite): Replace with a native implementation that doesn't convert everything to - * Instant + * LocalDateTime */ public static long plus(long packedDateTime, long amountToAdd, TemporalUnit unit) { Instant dateTime = asInstant(packedDateTime); diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/numbers/DoubleColumnType.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/numbers/DoubleColumnType.java index 4427ac806b..acee5bf0ca 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/numbers/DoubleColumnType.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/numbers/DoubleColumnType.java @@ -44,6 +44,12 @@ public static boolean valueIsMissing(double value) { return Double.isNaN(value); } + /** @deprecated Please use valueIsMissing(double) instead */ + @Deprecated + public static boolean isMissingValue(double value) { + return Double.isNaN(value); + } + /** * Returns the missing value indicator for this column type NOTE: Clients should use {@link * DoubleColumnType#valueIsMissing(double)} to test for missing value indicators diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/AbstractStringColumn.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/AbstractStringColumn.java new file mode 100644 index 0000000000..a92a9a218d --- /dev/null +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/AbstractStringColumn.java @@ -0,0 +1,108 @@ +package tech.tablesaw.columns.strings; + +import com.google.common.base.Preconditions; +import java.util.ArrayList; +import java.util.List; +import tech.tablesaw.api.CategoricalColumn; +import tech.tablesaw.api.ColumnType; +import tech.tablesaw.columns.AbstractColumn; +import tech.tablesaw.columns.AbstractColumnParser; +import tech.tablesaw.columns.Column; + +/** Abstract super class for Text like columns. */ +public abstract class AbstractStringColumn> + extends AbstractColumn + implements CategoricalColumn, StringFilters, StringMapFunctions, StringReduceUtils { + + private StringColumnFormatter printFormatter = new StringColumnFormatter(); + + /** Constructs a column of the given ColumnType, name, and parser */ + public AbstractStringColumn(ColumnType type, String name, AbstractColumnParser parser) { + super(type, name, parser); + } + + /** + * Sets an {@link StringColumnFormatter} which will be used to format the display of data from + * this column when it is printed (using, for example, Table:print()) and optionally when written + * to a text file like a CSV. + */ + public void setPrintFormatter(StringColumnFormatter formatter) { + Preconditions.checkNotNull(formatter); + this.printFormatter = formatter; + } + + /** Returns the current {@link StringColumnFormatter}. */ + public StringColumnFormatter getPrintFormatter() { + return printFormatter; + } + + /** {@inheritDoc} */ + @Override + public String getString(int row) { + return printFormatter.format(get(row)); + } + + /** {@inheritDoc} */ + @Override + public String getUnformattedString(int row) { + return String.valueOf(get(row)); + } + + /** + * Returns the largest ("top") n values in the column + * + * @param n The maximum number of records to return. The actual number will be smaller if n is + * greater than the number of observations in the column + * @return A list, possibly empty, of the largest observations + */ + public List top(int n) { + List top = new ArrayList<>(); + Column copy = this.copy(); + copy.sortDescending(); + for (int i = 0; i < n; i++) { + top.add(copy.get(i)); + } + return top; + } + + /** + * Returns the smallest ("bottom") n values in the column + * + * @param n The maximum number of records to return. The actual number will be smaller if n is + * greater than the number of observations in the column + * @return A list, possibly empty, of the smallest n observations + */ + public List bottom(int n) { + List bottom = new ArrayList<>(); + Column copy = this.copy(); + copy.sortAscending(); + for (int i = 0; i < n; i++) { + bottom.add(copy.get(i)); + } + return bottom; + } + + /** {@inheritDoc} */ + @Override + public Column append(Column column, int row) { + return append(column.getUnformattedString(row)); + } + + /** {@inheritDoc} */ + @Override + public Column set(int row, Column column, int sourceRow) { + return set(row, column.getUnformattedString(sourceRow)); + } + + /** {@inheritDoc} */ + @Override + public int byteSize() { + return type().byteSize(); + } + + /** {@inheritDoc} */ + @Override + public int compare(String o1, String o2) { + return o1.compareTo(o2); + } +} diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/ByteDictionaryMap.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/ByteDictionaryMap.java index 0cb9f92784..00733e03bc 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/ByteDictionaryMap.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/ByteDictionaryMap.java @@ -27,7 +27,6 @@ import tech.tablesaw.api.IntColumn; import tech.tablesaw.api.StringColumn; import tech.tablesaw.api.Table; -import tech.tablesaw.columns.booleans.BooleanColumnType; import tech.tablesaw.selection.BitmapBackedSelection; import tech.tablesaw.selection.Selection; @@ -43,8 +42,6 @@ public class ByteDictionaryMap implements DictionaryMap { private static final byte DEFAULT_RETURN_VALUE = Byte.MIN_VALUE; - private boolean canPromoteToText = Boolean.TRUE; - private final ByteComparator reverseDictionarySortComparator = (i, i1) -> Comparator.reverseOrder().compare(getValueForByteKey(i), getValueForByteKey(i1)); @@ -79,12 +76,6 @@ public ByteDictionaryMap() { keyToCount.defaultReturnValue(0); } - public ByteDictionaryMap(boolean canPromoteToText) { - valueToKey.defaultReturnValue(DEFAULT_RETURN_VALUE); - keyToCount.defaultReturnValue(0); - this.canPromoteToText = canPromoteToText; - } - private ByteDictionaryMap(ByteDictionaryBuilder builder) { this.nextIndex = builder.nextIndex; this.keyToValue = builder.keyToValue; @@ -374,9 +365,12 @@ public List getDummies() { String category = getValueForKey(next); for (BooleanColumn column : results) { if (category.equals(column.name())) { - column.append(BooleanColumnType.BYTE_TRUE); + // TODO(lwhite): update the correct row more efficiently, by using set rather than add & + // only + // updating true + column.append(true); } else { - column.append(BooleanColumnType.BYTE_FALSE); + column.append(false); } } } @@ -401,7 +395,7 @@ public int countMissing() { @Override public Iterator iterator() { - return new Iterator<>() { + return new Iterator() { private final ByteListIterator valuesIt = values.iterator(); @@ -451,11 +445,6 @@ public int nextKeyWithoutIncrementing() { return nextIndex.get(); } - @Override - public boolean canPromoteToText() { - return canPromoteToText; - } - public static class ByteDictionaryBuilder { private AtomicInteger nextIndex; diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/DictionaryMap.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/DictionaryMap.java index cf95cd94fd..416eae20cd 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/DictionaryMap.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/DictionaryMap.java @@ -15,7 +15,7 @@ * StringColumn, as well as the primitive values that represent the individual instances of the * String in the column. */ -public interface DictionaryMap extends StringReduceUtils, StringFilters { +public interface DictionaryMap { void sortDescending(); @@ -93,41 +93,10 @@ default Selection isNotEqualTo(String string) { return selection; } - @Override - default String get(int index) { - return getValueForIndex(index); - } - - @Override - default Selection isIn(String... strings) { - return selectIsIn(strings); - } - - @Override - default Selection isIn(Collection strings) { - return selectIsIn(strings); - } - - @Override - default Selection isNotIn(String... strings) { - Selection results = new BitmapBackedSelection(); - results.addRange(0, size()); - results.andNot(isIn(strings)); - return results; - } - - @Override - default Selection isNotIn(Collection strings) { - Selection results = new BitmapBackedSelection(); - results.addRange(0, size()); - results.andNot(isIn(strings)); - return results; - } - List getDummies(); /** Returns the contents of the cell at rowNumber as a byte[] */ - byte[] asBytes(int rowNumber); + public byte[] asBytes(int rowNumber); /** Returns the count of missing values in this column */ int countMissing(); @@ -141,10 +110,4 @@ default Selection isNotIn(Collection strings) { DictionaryMap promoteYourself(); int nextKeyWithoutIncrementing(); - - boolean canPromoteToText(); - - default boolean isEmpty() { - return size() == 0; - } } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/IntDictionaryMap.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/IntDictionaryMap.java index 2b9093f130..20a6ddc0dc 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/IntDictionaryMap.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/IntDictionaryMap.java @@ -27,7 +27,6 @@ import tech.tablesaw.api.IntColumn; import tech.tablesaw.api.StringColumn; import tech.tablesaw.api.Table; -import tech.tablesaw.columns.booleans.BooleanColumnType; import tech.tablesaw.selection.BitmapBackedSelection; import tech.tablesaw.selection.Selection; @@ -43,8 +42,6 @@ public class IntDictionaryMap implements DictionaryMap { private static final int DEFAULT_RETURN_VALUE = Integer.MIN_VALUE; - private final boolean canPromoteToText = Boolean.TRUE; - private final IntComparator reverseDictionarySortComparator = (i, i1) -> Comparator.reverseOrder().compare(getValueForKey(i), getValueForKey(i1)); @@ -363,9 +360,12 @@ public List getDummies() { String category = getValueForKey(next); for (BooleanColumn column : results) { if (category.equals(column.name())) { - column.append(BooleanColumnType.BYTE_TRUE); + // TODO(lwhite): update the correct row more efficiently, by using set rather than add & + // only + // updating true + column.append(true); } else { - column.append(BooleanColumnType.BYTE_FALSE); + column.append(false); } } } @@ -390,7 +390,7 @@ public int countMissing() { @Override public Iterator iterator() { - return new Iterator<>() { + return new Iterator() { private final IntListIterator valuesIt = values.iterator(); @@ -423,9 +423,6 @@ public boolean isMissing(int rowNumber) { @Override public DictionaryMap promoteYourself() { - if (canPromoteToText) { - return new NullDictionaryMap(this); - } return this; } @@ -434,11 +431,6 @@ public int nextKeyWithoutIncrementing() { return nextIndex.get(); } - @Override - public boolean canPromoteToText() { - return canPromoteToText; - } - public static class IntDictionaryBuilder { private AtomicInteger nextIndex; diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/NullDictionaryMap.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/NullDictionaryMap.java deleted file mode 100644 index f031a8b361..0000000000 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/NullDictionaryMap.java +++ /dev/null @@ -1,194 +0,0 @@ -package tech.tablesaw.columns.strings; - -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.Set; -import tech.tablesaw.api.BooleanColumn; -import tech.tablesaw.api.Table; -import tech.tablesaw.selection.Selection; - -/** - * A null dictionary map has no actual dictionary as the underlying data is not dictionary encoded. - * It works with textual data that is non-categorical, or where the number of categories approaches - * 1/2 of the total number of values, making dictionary encoding inefficient. - */ -public class NullDictionaryMap implements DictionaryMap { - - private final TextualStringData data; - - public NullDictionaryMap(DictionaryMap dictionaryMap) { - data = TextualStringData.create(); - for (String s : dictionaryMap) { - data.append(s); - } - } - - @Override - public void sortDescending() { - data.sortDescending(); - } - - @Override - public void sortAscending() { - data.sortAscending(); - } - - @Override - public int getKeyAtIndex(int rowNumber) { - throw new UnsupportedOperationException( - "NullDictionaryMap does not support getKeyAtIndex because there is no dictionary encoding."); - } - - @Override - public String getValueForKey(int key) { - throw new UnsupportedOperationException( - "NullDictionaryMap does not support getValueForKey because there is no dictionary encoding."); - } - - @Override - public int size() { - return data.size(); - } - - @Override - public String getValueForIndex(int rowIndex) { - return data.get(rowIndex); - } - - @Override - public int countOccurrences(String value) { - return data.countOccurrences(value); - } - - @Override - public Set asSet() { - return data.asSet(); - } - - @Override - public int getKeyForIndex(int i) { - throw new UnsupportedOperationException( - "NullDictionaryMap does not support getKeyForIndex because there is no dictionary encoding."); - } - - @Override - public int firstIndexOf(String string) { - return data.firstIndexOf(string); - } - - @Override - public String[] asObjectArray() { - return data.asObjectArray(); - } - - @Override - public Selection selectIsIn(String... strings) { - return data.isIn(strings); - } - - @Override - public Selection selectIsIn(Collection strings) { - return data.isIn(strings); - } - - @Override - public void append(String value) throws NoKeysAvailableException { - data.append(value); - } - - @Override - public void set(int rowIndex, String stringValue) throws NoKeysAvailableException { - data.set(rowIndex, stringValue); - } - - @Override - public void clear() { - data.clear(); - } - - @Override - public int countUnique() { - return data.countUnique(); - } - - @Override - public Table countByCategory(String columnName) { - return data.countByCategory(columnName); - } - - @Override - public Selection isEqualTo(String string) { - return data.isEqualTo(string); - } - - @Override - public String get(int index) { - return data.get(index); - } - - @Override - public Selection isIn(String... strings) { - return data.isIn(strings); - } - - @Override - public Selection isIn(Collection strings) { - return data.isIn(strings); - } - - @Override - public Selection isNotIn(String... strings) { - return data.isNotIn(strings); - } - - @Override - public Selection isNotIn(Collection strings) { - return data.isNotIn(strings); - } - - @Override - public List getDummies() { - return data.getDummies(); - } - - @Override - public byte[] asBytes(int rowNumber) { - return data.asBytes(rowNumber); - } - - @Override - public int countMissing() { - return data.countMissing(); - } - - @Override - public Iterator iterator() { - return data.iterator(); - } - - @Override - public void appendMissing() { - data.appendMissing(); - } - - @Override - public boolean isMissing(int rowNumber) { - return data.isMissing(rowNumber); - } - - @Override - public DictionaryMap promoteYourself() { - return this; - } - - @Override - public int nextKeyWithoutIncrementing() { - return size(); - } - - @Override - public boolean canPromoteToText() { - return false; - } -} diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/ShortDictionaryMap.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/ShortDictionaryMap.java index f4fa242892..f83b2dca5b 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/ShortDictionaryMap.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/ShortDictionaryMap.java @@ -27,7 +27,6 @@ import tech.tablesaw.api.IntColumn; import tech.tablesaw.api.StringColumn; import tech.tablesaw.api.Table; -import tech.tablesaw.columns.booleans.BooleanColumnType; import tech.tablesaw.selection.BitmapBackedSelection; import tech.tablesaw.selection.Selection; @@ -43,8 +42,6 @@ public class ShortDictionaryMap implements DictionaryMap { private static final short DEFAULT_RETURN_VALUE = Short.MIN_VALUE; - private final boolean canPromoteToText; - private final ShortComparator reverseDictionarySortComparator = (i, i1) -> Comparator.reverseOrder() @@ -77,7 +74,6 @@ public int getKeyAtIndex(int rowNumber) { ShortDictionaryMap(ByteDictionaryMap original) throws NoKeysAvailableException { valueToKey.defaultReturnValue(DEFAULT_RETURN_VALUE); keyToCount.defaultReturnValue(0); - canPromoteToText = original.canPromoteToText(); for (int i = 0; i < original.size(); i++) { String value = original.getValueForIndex(i); @@ -90,7 +86,6 @@ private ShortDictionaryMap(ShortDictionaryBuilder builder) { this.keyToValue = builder.keyToValue; this.valueToKey = builder.valueToKey; this.keyToCount = builder.keyToCount; - this.canPromoteToText = builder.canPromoteToText; this.values = builder.values; } @@ -377,9 +372,12 @@ public List getDummies() { String category = getValueForKey(next); for (BooleanColumn column : results) { if (category.equals(column.name())) { - column.append(BooleanColumnType.BYTE_TRUE); + // TODO(lwhite): update the correct row more efficiently, by using set rather than add & + // only + // updating true + column.append(true); } else { - column.append(BooleanColumnType.BYTE_FALSE); + column.append(false); } } } @@ -438,17 +436,13 @@ public boolean isMissing(int rowNumber) { @Override public DictionaryMap promoteYourself() { - DictionaryMap dictionaryMap; + IntDictionaryMap dictionaryMap; - if (canPromoteToText && countUnique() > size() * 0.5) { - dictionaryMap = new NullDictionaryMap(this); - } else { - try { - dictionaryMap = new IntDictionaryMap(this); - } catch (NoKeysAvailableException e) { - // this should never happen; - throw new IllegalStateException(e); - } + try { + dictionaryMap = new IntDictionaryMap(this); + } catch (NoKeysAvailableException e) { + // this should never happen; + throw new IllegalStateException(e); } return dictionaryMap; @@ -459,11 +453,6 @@ public int nextKeyWithoutIncrementing() { return nextIndex.get(); } - @Override - public boolean canPromoteToText() { - return canPromoteToText; - } - public static class ShortDictionaryBuilder { private AtomicInteger nextIndex; @@ -481,8 +470,6 @@ public static class ShortDictionaryBuilder { // the map with counts private Short2IntOpenHashMap keyToCount; - private boolean canPromoteToText = true; - public ShortDictionaryBuilder setNextIndex(int value) { nextIndex = new AtomicInteger(value); return this; @@ -493,11 +480,6 @@ public ShortDictionaryBuilder setKeyToValue(Short2ObjectMap keyToValue) return this; } - public ShortDictionaryBuilder setCanPromoteToText(boolean canPromoteToText) { - this.canPromoteToText = canPromoteToText; - return this; - } - public ShortDictionaryBuilder setValueToKey(Object2ShortOpenHashMap valueToKey) { this.valueToKey = valueToKey; return this; diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringData.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringData.java deleted file mode 100644 index ac016f5504..0000000000 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringData.java +++ /dev/null @@ -1,96 +0,0 @@ -package tech.tablesaw.columns.strings; - -import it.unimi.dsi.fastutil.ints.IntComparator; -import java.util.List; -import java.util.Set; -import javax.annotation.Nullable; -import tech.tablesaw.api.BooleanColumn; -import tech.tablesaw.api.Table; -import tech.tablesaw.columns.Column; -import tech.tablesaw.selection.Selection; - -public interface StringData extends StringFilters, StringReduceUtils { - - StringData appendMissing(); - - StringData append(String value); - - StringData emptyCopy(); - - StringData emptyCopy(int rowSize); - - void sortAscending(); - - void sortDescending(); - - void clear(); - - StringData unique(); - - StringData where(Selection selection); - - StringData copy(); - - StringData lead(int n); - - StringData lag(int n); - - StringData set(Selection rowSelection, String newValue); - - StringData set(int rowNumber, String value); - - boolean contains(String aString); - - StringData setMissing(int i); - - IntComparator rowComparator(); - - StringData removeMissing(); - - Set asSet(); - - byte[] asBytes(int rowNumber); - - String[] asObjectArray(); - - boolean isEmpty(); - - boolean isMissing(int rowNumber); - - int countUnique(); - - void append(Column column); - - int countMissing(); - - List asList(); - - Table countByCategory(String columnName); - - List getDummies(); - - double getDouble(int i); - - double[] asDoubleArray(); - - StringData appendObj(Object obj); - - int firstIndexOf(String value); - - int countOccurrences(String value); - - /** - * Return a StringData of the same type containing just those elements whose indexes are included - * in the given array - */ - default StringData subset(int[] rows) { - final StringData c = this.emptyCopy(); - for (final int row : rows) { - c.appendObj(get(row)); - } - return c; - } - - @Nullable - DictionaryMap getDictionary(); -} diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringFilters.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringFilters.java index 718ff972c2..44fae1ac44 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringFilters.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringFilters.java @@ -41,7 +41,7 @@ import tech.tablesaw.selection.BitmapBackedSelection; import tech.tablesaw.selection.Selection; -public interface StringFilters extends StringFilterSpec { +public interface StringFilters extends Column, StringFilterSpec { default Selection eval(BiPredicate predicate, Column otherColumn) { Selection selection = new BitmapBackedSelection(); @@ -139,6 +139,10 @@ default Selection isLongerThan(int stringLength) { return eval(isLongerThan, stringLength); } + Selection isIn(String... strings); + + Selection isIn(Collection strings); + default Selection isIn(Column strings) { return isIn(strings.unique().asList()); } @@ -147,6 +151,10 @@ default Selection isNotIn(Column strings) { return isNotIn(strings.unique().asList()); } + Selection isNotIn(String... strings); + + Selection isNotIn(Collection strings); + // Column Methods default Selection isEqualTo(Column other) { return eval(isEqualTo, other); @@ -186,14 +194,4 @@ default Selection isNotEqualTo(String string) { } String get(int index); - - Selection isIn(String... strings); - - Selection isIn(Collection strings); - - Selection isNotIn(String... strings); - - Selection isNotIn(Collection strings); - - int size(); } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringMapFunctions.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringMapFunctions.java index 9cb5b07ab9..6429c833db 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringMapFunctions.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringMapFunctions.java @@ -35,11 +35,7 @@ * *

This code was developed as part of Apache Commons Text. */ -public interface StringMapFunctions { - - int size(); - - String getString(int idx); +public interface StringMapFunctions extends Column { default StringColumn upperCase() { StringColumn newColumn = StringColumn.create(this.name() + "[ucase]"); @@ -205,8 +201,7 @@ default StringColumn format(String formatString) { */ default IntColumn parseInt() { IntColumn newColumn = IntColumn.create(name() + "[parsed]"); - for (int i = 0; i < size(); i++) { - String s = getString(i); + for (String s : this) { if (StringColumn.valueIsMissing(s)) { newColumn.appendMissing(); } else { @@ -224,8 +219,7 @@ default IntColumn parseInt() { */ default DoubleColumn parseDouble() { DoubleColumn newColumn = DoubleColumn.create(name() + "[parsed]"); - for (int i = 0; i < size(); i++) { - String s = getString(i); + for (String s : this) { if (StringColumn.valueIsMissing(s)) { newColumn.appendMissing(); } else { @@ -243,8 +237,7 @@ default DoubleColumn parseDouble() { */ default FloatColumn parseFloat() { FloatColumn newColumn = FloatColumn.create(name() + "[parsed]"); - for (int i = 0; i < size(); i++) { - String s = getString(i); + for (String s : this) { if (StringColumn.valueIsMissing(s)) { newColumn.appendMissing(); } else { @@ -508,6 +501,4 @@ default StringColumn tokenizeAndRemoveDuplicates(String separator) { } return newColumn; } - - String name(); } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringReduceUtils.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringReduceUtils.java index 5177c9178b..76f5e4adbd 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringReduceUtils.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/StringReduceUtils.java @@ -14,7 +14,9 @@ package tech.tablesaw.columns.strings; -public interface StringReduceUtils extends Iterable { +import tech.tablesaw.columns.Column; + +public interface StringReduceUtils extends Column, Iterable { /** * Returns a single string made by appending all the strings in this column, separated by the @@ -42,6 +44,4 @@ default String appendAll(String delimiter) { default String appendAll() { return appendAll(" "); } - - int size(); } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/TextColumnType.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/TextColumnType.java new file mode 100644 index 0000000000..c86816fb93 --- /dev/null +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/strings/TextColumnType.java @@ -0,0 +1,51 @@ +package tech.tablesaw.columns.strings; + +import tech.tablesaw.api.ColumnType; +import tech.tablesaw.api.TextColumn; +import tech.tablesaw.columns.AbstractColumnType; +import tech.tablesaw.io.ReadOptions; + +/** + * A ColumnType for columns that holds String values. + * + *

It is optimized for situations where the values in the column rarely if ever repeat, so the + * dictionary encoding performed by StringColumn would be detrimental to memory usage. + * + *

See also: {@link tech.tablesaw.api.StringColumn} + */ +public class TextColumnType extends AbstractColumnType { + + public static final int BYTE_SIZE = 4; + public static final StringParser DEFAULT_PARSER = new StringParser(ColumnType.STRING); + + private static TextColumnType INSTANCE; + + private TextColumnType(int byteSize, String name, String printerFriendlyName) { + super(byteSize, name, printerFriendlyName); + } + + public static TextColumnType instance() { + if (INSTANCE == null) { + INSTANCE = new TextColumnType(BYTE_SIZE, "TEXT", "Text"); + } + return INSTANCE; + } + + public static boolean valueIsMissing(String string) { + return missingValueIndicator().equals(string); + } + + @Override + public TextColumn create(String name) { + return TextColumn.create(name); + } + + @Override + public StringParser customParser(ReadOptions options) { + return new StringParser(this, options); + } + + public static String missingValueIndicator() { + return ""; + } +} diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/times/TimeParser.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/times/TimeParser.java index 242ed97f90..35ee4107f1 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/times/TimeParser.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/columns/times/TimeParser.java @@ -9,17 +9,16 @@ import java.util.Locale; import tech.tablesaw.api.ColumnType; import tech.tablesaw.columns.AbstractColumnParser; -import tech.tablesaw.columns.datetimes.DateTimeParser; import tech.tablesaw.io.ReadOptions; public class TimeParser extends AbstractColumnParser { private static final DateTimeFormatter timef1 = DateTimeFormatter.ofPattern("HH:mm:ss.SSS"); - private static final DateTimeFormatter timef2 = DateTimeParser.caseInsensitiveFormatter("hh:mm:ss a"); - private static final DateTimeFormatter timef3 = DateTimeParser.caseInsensitiveFormatter("h:mm:ss a"); + private static final DateTimeFormatter timef2 = DateTimeFormatter.ofPattern("hh:mm:ss a"); + private static final DateTimeFormatter timef3 = DateTimeFormatter.ofPattern("h:mm:ss a"); private static final DateTimeFormatter timef4 = DateTimeFormatter.ISO_LOCAL_TIME; - private static final DateTimeFormatter timef5 = DateTimeParser.caseInsensitiveFormatter("hh:mm a"); - private static final DateTimeFormatter timef6 = DateTimeParser.caseInsensitiveFormatter("h:mm a"); + private static final DateTimeFormatter timef5 = DateTimeFormatter.ofPattern("hh:mm a"); + private static final DateTimeFormatter timef6 = DateTimeFormatter.ofPattern("h:mm a"); // only for parsing: private static final DateTimeFormatter timef7 = DateTimeFormatter.ofPattern("HHmm"); diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/filtering/DeferredTextColumn.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/filtering/DeferredTextColumn.java new file mode 100644 index 0000000000..5abd3d8998 --- /dev/null +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/filtering/DeferredTextColumn.java @@ -0,0 +1,132 @@ +package tech.tablesaw.filtering; + +import com.google.common.annotations.Beta; +import java.util.Collection; +import java.util.function.Function; +import tech.tablesaw.api.Table; +import tech.tablesaw.columns.Column; +import tech.tablesaw.selection.Selection; + +@Beta +public class DeferredTextColumn extends DeferredColumn + implements StringFilterSpec> { + + public DeferredTextColumn(String columnName) { + super(columnName); + } + + @Override + public Function isEmptyString() { + return table -> table.textColumn(name()).isEmptyString(); + } + + @Override + public Function startsWith(String string) { + return table -> table.textColumn(name()).startsWith(string); + } + + @Override + public Function endsWith(String string) { + return table -> table.textColumn(name()).endsWith(string); + } + + @Override + public Function containsString(String string) { + return table -> table.textColumn(name()).containsString(string); + } + + @Override + public Function matchesRegex(String string) { + return table -> table.textColumn(name()).matchesRegex(string); + } + + @Override + public Function isAlpha() { + return table -> table.textColumn(name()).isAlpha(); + } + + @Override + public Function isNumeric() { + return table -> table.textColumn(name()).isNumeric(); + } + + @Override + public Function isAlphaNumeric() { + return table -> table.textColumn(name()).isAlphaNumeric(); + } + + @Override + public Function isUpperCase() { + return table -> table.textColumn(name()).isUpperCase(); + } + + @Override + public Function isLowerCase() { + return table -> table.textColumn(name()).isLowerCase(); + } + + @Override + public Function lengthEquals(int stringLength) { + return table -> table.textColumn(name()).lengthEquals(stringLength); + } + + @Override + public Function isShorterThan(int stringLength) { + return table -> table.textColumn(name()).isShorterThan(stringLength); + } + + @Override + public Function isLongerThan(int stringLength) { + return table -> table.textColumn(name()).isLongerThan(stringLength); + } + + @Override + public Function isIn(String... strings) { + return table -> table.textColumn(name()).isIn(strings); + } + + @Override + public Function isIn(Collection strings) { + return table -> table.textColumn(name()).isIn(strings); + } + + @Override + public Function isNotIn(String... strings) { + return table -> table.textColumn(name()).isNotIn(strings); + } + + @Override + public Function isNotIn(Collection strings) { + return table -> table.textColumn(name()).isNotIn(strings); + } + + @Override + public Function isEqualTo(Column other) { + return table -> table.textColumn(name()).isEqualTo(other); + } + + @Override + public Function isNotEqualTo(Column other) { + return table -> table.textColumn(name()).isNotEqualTo(other); + } + + @Override + public Function equalsIgnoreCase(Column other) { + return table -> table.textColumn(name()).equalsIgnoreCase(other); + } + + @Override + public Function startsWith(Column other) { + return table -> table.textColumn(name()).startsWith(other); + } + + @Override + public Function isEqualTo(String string) { + return table -> table.textColumn(name()).isEqualTo(string); + } + + @Override + public Function isNotEqualTo(String string) { + return table -> table.textColumn(name()).isNotEqualTo(string); + } +} diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/index/StringIndex.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/index/StringIndex.java index 7598279bdf..bf9b9e770b 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/index/StringIndex.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/index/StringIndex.java @@ -17,7 +17,7 @@ import it.unimi.dsi.fastutil.ints.IntArrayList; import java.util.HashMap; import java.util.Map; -import tech.tablesaw.api.StringColumn; +import tech.tablesaw.columns.strings.AbstractStringColumn; import tech.tablesaw.selection.BitmapBackedSelection; import tech.tablesaw.selection.Selection; @@ -30,7 +30,7 @@ public class StringIndex implements Index { private final Map index; /** Creates an index on the given AbstractStringColumn */ - public StringIndex(StringColumn column) { + public StringIndex(AbstractStringColumn column) { int sizeEstimate = Integer.min(1_000_000, column.size() / 100); Map tempMap = new HashMap<>(sizeEstimate); for (int i = 0; i < column.size(); i++) { diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/ColumnTypeDetector.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/ColumnTypeDetector.java index 096ef90a42..43eacd04bd 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/ColumnTypeDetector.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/ColumnTypeDetector.java @@ -1,5 +1,8 @@ package tech.tablesaw.io; +import static tech.tablesaw.api.ColumnType.STRING; +import static tech.tablesaw.api.ColumnType.TEXT; + import java.util.*; import java.util.concurrent.CopyOnWriteArrayList; import tech.tablesaw.api.ColumnType; @@ -98,17 +101,15 @@ public ColumnType[] detectColumnTypes(Iterator rows, ReadOptions optio // now detect for (List valuesList : columnData) { ColumnType detectedType = detectType(valuesList, options); - /* - if (detectedType.equals(STRING) && rowCount > STRING_COLUMN_ROW_COUNT_CUTOFF - && options.columnTypesToDetect().contains(TEXT) - ) { - HashSet unique = new HashSet<>(valuesList); - double uniquePct = unique.size() / (valuesList.size() * 1.0); - if (uniquePct > STRING_COLUMN_CUTOFF) { - detectedType = TEXT; - } - } - */ + if (detectedType.equals(STRING) + && rowCount > STRING_COLUMN_ROW_COUNT_CUTOFF + && options.columnTypesToDetect().contains(TEXT)) { + HashSet unique = new HashSet<>(valuesList); + double uniquePct = unique.size() / (valuesList.size() * 1.0); + if (uniquePct > STRING_COLUMN_CUTOFF) { + detectedType = TEXT; + } + } columnTypes.add(detectedType); } return columnTypes.toArray(new ColumnType[0]); diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/DataFrameReader.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/DataFrameReader.java index 96a75c31f4..ce06dd41af 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/DataFrameReader.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/DataFrameReader.java @@ -14,6 +14,7 @@ package tech.tablesaw.io; +import com.google.common.io.Files; import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -25,7 +26,6 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.Optional; -import com.google.common.io.Files; import tech.tablesaw.api.Table; import tech.tablesaw.io.csv.CsvReadOptions; import tech.tablesaw.io.csv.CsvReader; diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/Destination.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/Destination.java index 2c952782c8..99eb6ec23e 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/Destination.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/Destination.java @@ -35,11 +35,6 @@ public Writer writer() { } public Writer createWriter() { - if (writer != null) { - return writer; - } else { - assert stream != null; - return new OutputStreamWriter(stream); - } + return writer != null ? writer : new OutputStreamWriter(stream); } } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/FileReader.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/FileReader.java index 4c8f96e51a..c146330207 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/FileReader.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/FileReader.java @@ -1,6 +1,6 @@ package tech.tablesaw.io; -import static tech.tablesaw.api.ColumnType.*; +import static tech.tablesaw.api.ColumnType.SKIP; import com.google.common.base.Strings; import com.google.common.collect.Lists; @@ -17,7 +17,8 @@ import java.util.Random; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import tech.tablesaw.api.*; +import tech.tablesaw.api.ColumnType; +import tech.tablesaw.api.Table; import tech.tablesaw.columns.AbstractColumnParser; import tech.tablesaw.columns.Column; @@ -26,6 +27,14 @@ public abstract class FileReader { private static Logger logger = LoggerFactory.getLogger(FileReader.class); private static final int UNLIMITED_SAMPLE_SIZE = -1; + /** + * @deprecated Use {@link #getColumnTypes(Reader, ReadOptions, int, AbstractParser, String[])} } + */ + @Deprecated + public ColumnType[] getColumnTypes( + Reader reader, ReadOptions options, int linesToSkip, AbstractParser parser) { + return getColumnTypes(reader, options, linesToSkip, parser, null); + } /** * Returns an array containing the inferred columnTypes for the file being read, as calculated by * the ColumnType inference logic. These types may not be correct. @@ -132,12 +141,12 @@ private void renameDuplicateColumnHeaders(String[] headerNames) { Map nameCounter = new HashMap<>(); for (int i = 0; i < headerNames.length; i++) { String name = headerNames[i]; - Integer count = nameCounter.get(name.toLowerCase()); + Integer count = nameCounter.get(name); if (count == null) { - nameCounter.put(name.toLowerCase(), 1); + nameCounter.put(name, 1); } else { count++; - nameCounter.put(name.toLowerCase(), count); + nameCounter.put(name, count); headerNames[i] = name + "-" + count; } } @@ -181,9 +190,7 @@ protected Table parseRows( if (Strings.isNullOrEmpty(columnName)) { columnName = "Column " + table.columnCount(); } - ColumnType type = types[x]; - Column newColumn; - newColumn = type.create(columnName); + Column newColumn = types[x].create(columnName); table.addColumns(newColumn); } } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/ReadOptions.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/ReadOptions.java index 52f40187da..5a8a10f253 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/ReadOptions.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/ReadOptions.java @@ -24,6 +24,7 @@ import static tech.tablesaw.api.ColumnType.LONG; import static tech.tablesaw.api.ColumnType.SHORT; import static tech.tablesaw.api.ColumnType.STRING; +import static tech.tablesaw.api.ColumnType.TEXT; import com.google.common.base.Strings; import com.google.common.collect.Lists; @@ -51,8 +52,7 @@ public class ReadOptions { private static final List DEFAULT_TYPES = Lists.newArrayList( - LOCAL_DATE_TIME, LOCAL_TIME, LOCAL_DATE, BOOLEAN, INTEGER, LONG, DOUBLE, STRING // , TEXT - ); + LOCAL_DATE_TIME, LOCAL_TIME, LOCAL_DATE, BOOLEAN, INTEGER, LONG, DOUBLE, STRING, TEXT); /** * An extended list of types that are used if minimizeColumnSizes is true. By including extra @@ -71,7 +71,8 @@ public class ReadOptions { LONG, FLOAT, DOUBLE, - STRING); + STRING, + TEXT); protected final Source source; protected final String tableName; @@ -273,6 +274,13 @@ public Builder header(boolean hasHeader) { return this; } + /** Deprecated. Use dateFormat(DateTimeFormatter dateFormat) instead */ + @Deprecated + public Builder dateFormat(String dateFormat) { + this.dateFormat = dateFormat; + return this; + } + public Builder dateFormat(DateTimeFormatter dateFormat) { this.dateFormatter = dateFormat; return this; @@ -283,11 +291,25 @@ public Builder allowDuplicateColumnNames(Boolean allow) { return this; } + /** Deprecated. Use timeFormat(DateTimeFormatter dateFormat) instead */ + @Deprecated + public Builder timeFormat(String timeFormat) { + this.timeFormat = timeFormat; + return this; + } + public Builder timeFormat(DateTimeFormatter dateFormat) { this.timeFormatter = dateFormat; return this; } + /** Deprecated. Use dateTimeFormat(DateTimeFormatter dateFormat) instead */ + @Deprecated + public Builder dateTimeFormat(String dateTimeFormat) { + this.dateTimeFormat = dateTimeFormat; + return this; + } + public Builder dateTimeFormat(DateTimeFormatter dateFormat) { this.dateTimeFormatter = dateFormat; return this; diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/csv/CsvReadOptions.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/csv/CsvReadOptions.java index f7584f8e9e..f65d32b443 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/csv/CsvReadOptions.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/csv/CsvReadOptions.java @@ -330,6 +330,27 @@ public Builder sample(boolean sample) { return this; } + @Override + @Deprecated + public Builder dateFormat(String dateFormat) { + super.dateFormat(dateFormat); + return this; + } + + @Override + @Deprecated + public Builder timeFormat(String timeFormat) { + super.timeFormat(timeFormat); + return this; + } + + @Override + @Deprecated + public Builder dateTimeFormat(String dateTimeFormat) { + super.dateTimeFormat(dateTimeFormat); + return this; + } + @Override public Builder dateFormat(DateTimeFormatter dateFormat) { super.dateFormat(dateFormat); diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/csv/CsvWriteOptions.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/csv/CsvWriteOptions.java index 4f54ce3bae..43cfb7a87e 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/csv/CsvWriteOptions.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/csv/CsvWriteOptions.java @@ -169,6 +169,28 @@ public CsvWriteOptions.Builder quoteChar(char quoteChar) { return this; } + /** + * Writes date column output using the given DateFormatter + * + * @deprecated + */ + @Deprecated + public CsvWriteOptions.Builder dateFormatter(DateTimeFormatter dateFormatter) { + this.dateFormatter = dateFormatter; + return this; + } + + /** + * Writes DateTime column output using the given DateFormatter + * + * @deprecated + */ + @Deprecated + public CsvWriteOptions.Builder dateTimeFormatter(DateTimeFormatter dateTimeFormatter) { + this.dateTimeFormatter = dateTimeFormatter; + return this; + } + /** * Sets the usePrintFormatters option @link{tech.tablesaw.columns.ColumnFormatter} When true, * printFormatters will be used in writing the output text for any column that has one. diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/fixed/FixedWidthReadOptions.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/fixed/FixedWidthReadOptions.java index f058ae63e6..2924b28b29 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/fixed/FixedWidthReadOptions.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/fixed/FixedWidthReadOptions.java @@ -255,6 +255,27 @@ public Builder sample(boolean sample) { return this; } + @Override + @Deprecated + public Builder dateFormat(String dateFormat) { + super.dateFormat(dateFormat); + return this; + } + + @Override + @Deprecated + public Builder timeFormat(String timeFormat) { + super.timeFormat(timeFormat); + return this; + } + + @Override + @Deprecated + public Builder dateTimeFormat(String dateTimeFormat) { + super.dateTimeFormat(dateTimeFormat); + return this; + } + @Override public Builder dateFormat(DateTimeFormatter dateFormat) { super.dateFormat(dateFormat); diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/jdbc/SqlResultSetReader.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/jdbc/SqlResultSetReader.java index 8c87a7ff00..424152351a 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/jdbc/SqlResultSetReader.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/io/jdbc/SqlResultSetReader.java @@ -14,14 +14,14 @@ package tech.tablesaw.io.jdbc; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Types; import java.util.HashMap; import java.util.Map; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; import tech.tablesaw.api.BooleanColumn; import tech.tablesaw.api.ColumnType; import tech.tablesaw.api.DoubleColumn; @@ -65,8 +65,8 @@ private static Map initializeMap() { .put(Types.NCHAR, ColumnType.STRING) .put(Types.NVARCHAR, ColumnType.STRING) .put(Types.VARCHAR, ColumnType.STRING) - .put(Types.LONGVARCHAR, ColumnType.STRING) - .put(Types.LONGNVARCHAR, ColumnType.STRING) + .put(Types.LONGVARCHAR, ColumnType.TEXT) + .put(Types.LONGNVARCHAR, ColumnType.TEXT) .build()); } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/AbstractJoiner.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/AbstractJoiner.java deleted file mode 100644 index 73ca96f819..0000000000 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/AbstractJoiner.java +++ /dev/null @@ -1,439 +0,0 @@ -package tech.tablesaw.joining; - -import tech.tablesaw.api.Table; - -public abstract class AbstractJoiner { - - public abstract DataFrameJoiner type(JoinType joinType); - - public abstract DataFrameJoiner keepAllJoinKeyColumns(boolean keep); - - public abstract DataFrameJoiner allowDuplicateColumnNames(boolean allow); - - public abstract DataFrameJoiner rightJoinColumns(String... rightJoinColumnNames); - - public abstract DataFrameJoiner with(Table... tables); - - public abstract Table join(); - - public AbstractJoiner() {} - - /** - * Joins to the given tables assuming that they have a column of the name we're joining on - * - * @param tables The tables to join with - */ - @Deprecated - public Table inner(Table... tables) { - type(JoinType.INNER); - with(tables); - return join(); - } - - /** - * Joins to the given tables assuming that they have a column of the name we're joining on - * - * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than - * the join column have the same name if {@code true} the join will succeed and duplicate - * columns are renamed* - * @param tables The tables to join with - */ - @Deprecated - public Table inner(boolean allowDuplicateColumnNames, Table... tables) { - type(JoinType.INNER); - allowDuplicateColumnNames(allowDuplicateColumnNames); - with(tables); - return join(); - } - - /** - * Joins the joiner to the table2, using the given column for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param col2Name The column to join on. If col2Name refers to a double column, the join is - * performed after rounding to integers. - * @return The resulting table - */ - @Deprecated - public Table inner(Table table2, String col2Name) { - return inner(table2, false, col2Name); - } - - /** - * Joins the joiner to the table2, using the given columns for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param col2Names The columns to join on. If a name refers to a double column, the join is - * performed after rounding to integers. - * @return The resulting table - */ - @Deprecated - public Table inner(Table table2, String[] col2Names) { - return inner(table2, false, col2Names); - } - - /** - * Joins the joiner to the table2, using the given column for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param col2Name The column to join on. If col2Name refers to a double column, the join is - * performed after rounding to integers. - * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than - * the join column have the same name if {@code true} the join will succeed and duplicate - * columns are renamed* - * @return The resulting table - */ - @Deprecated - public Table inner(Table table2, String col2Name, boolean allowDuplicateColumnNames) { - return inner(table2, allowDuplicateColumnNames, col2Name); - } - - /** - * Joins the joiner to the table2, using the given columns for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than - * the join column have the same name if {@code true} the join will succeed and duplicate - * columns are renamed* - * @param col2Names The columns to join on. If a name refers to a double column, the join is - * performed after rounding to integers. - * @return The resulting table - */ - @Deprecated - public Table inner(Table table2, boolean allowDuplicateColumnNames, String... col2Names) { - allowDuplicateColumnNames(allowDuplicateColumnNames); - type(JoinType.INNER); - rightJoinColumns(col2Names); - with(table2); - return join(); - } - - /** - * Joins the joiner to the table2, using the given columns for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than - * the join column have the same name if {@code true} the join will succeed and duplicate - * columns are renamed* - * @param keepAllJoinKeyColumns if {@code false} the join will only keep join key columns in - * table1 if {@code true} the join will return all join key columns in both table, which may - * have difference when there are null values - * @param col2Names The columns to join on. If a name refers to a double column, the join is - * performed after rounding to integers. - * @return The resulting table - */ - @Deprecated - public Table inner( - Table table2, - boolean allowDuplicateColumnNames, - boolean keepAllJoinKeyColumns, - String... col2Names) { - allowDuplicateColumnNames(allowDuplicateColumnNames); - keepAllJoinKeyColumns(keepAllJoinKeyColumns); - type(JoinType.INNER); - rightJoinColumns(col2Names); - with(table2); - return join(); - } - - /** - * Full outer join to the given tables assuming that they have a column of the name we're joining - * on - * - * @param tables The tables to join with - * @return The resulting table - */ - @Deprecated - public Table fullOuter(Table... tables) { - return fullOuter(false, tables); - } - - /** - * Full outer join to the given tables assuming that they have a column of the name we're joining - * on - * - * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than - * the join column have the same name if {@code true} the join will succeed and duplicate - * columns are renamed* - * @param tables The tables to join with - * @return The resulting table - */ - @Deprecated - public Table fullOuter(boolean allowDuplicateColumnNames, Table... tables) { - allowDuplicateColumnNames(allowDuplicateColumnNames); - type(JoinType.FULL_OUTER); - with(tables); - return join(); - } - - /** - * Joins the joiner to the table2, using the given columns for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than - * the join column have the same name if {@code true} the join will succeed and duplicate - * columns are renamed - * @param keepAllJoinKeyColumns if {@code false} the join will only keep join key columns in - * table1 if {@code true} the join will return all join key columns in both table, which may - * have difference when there are null values - * @param col2Names The columns to join on. If a name refers to a double column, the join is - * performed after rounding to integers. - * @return The resulting table - */ - @Deprecated - public Table fullOuter( - Table table2, - boolean allowDuplicateColumnNames, - boolean keepAllJoinKeyColumns, - String... col2Names) { - allowDuplicateColumnNames(allowDuplicateColumnNames); - type(JoinType.FULL_OUTER); - rightJoinColumns(col2Names); - keepAllJoinKeyColumns(keepAllJoinKeyColumns); - with(table2); - return join(); - } - - /** - * Full outer join the joiner to the table2, using the given column for the second table and - * returns the resulting table - * - * @param table2 The table to join with - * @param col2Name The column to join on. If col2Name refers to a double column, the join is - * performed after rounding to integers. - * @return The resulting table - */ - @Deprecated - public Table fullOuter(Table table2, String col2Name) { - type(JoinType.FULL_OUTER); - with(table2); - rightJoinColumns(col2Name); - return join(); - } - - /** - * Joins to the given tables assuming that they have a column of the name we're joining on - * - * @param tables The tables to join with - * @return The resulting table - */ - @Deprecated - public Table leftOuter(Table... tables) { - return leftOuter(false, tables); - } - - /** - * Joins to the given tables assuming that they have a column of the name we're joining on - * - * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than - * the join column have the same name if {@code true} the join will succeed and duplicate - * columns are renamed* - * @param tables The tables to join with - * @return The resulting table - */ - @Deprecated - public Table leftOuter(boolean allowDuplicateColumnNames, Table... tables) { - allowDuplicateColumnNames(allowDuplicateColumnNames); - type(JoinType.LEFT_OUTER); - with(tables); - return join(); - } - - /** - * Joins the joiner to the table2, using the given columns for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param col2Names The columns to join on. If a name refers to a double column, the join is - * performed after rounding to integers. - * @return The resulting table - */ - @Deprecated - public Table leftOuter(Table table2, String[] col2Names) { - return leftOuter(table2, false, col2Names); - } - - /** - * Joins the joiner to the table2, using the given column for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param col2Name The column to join on. If col2Name refers to a double column, the join is - * performed after rounding to integers. - * @return The resulting table - */ - @Deprecated - public Table leftOuter(Table table2, String col2Name) { - return leftOuter(table2, false, col2Name); - } - - /** - * Joins the joiner to the table2, using the given columns for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than - * the join column have the same name if {@code true} the join will succeed and duplicate - * columns are renamed - * @param col2Names The columns to join on. If a name refers to a double column, the join is - * performed after rounding to integers. - * @return The resulting table - */ - @Deprecated - public Table leftOuter(Table table2, boolean allowDuplicateColumnNames, String... col2Names) { - allowDuplicateColumnNames(allowDuplicateColumnNames); - type(JoinType.LEFT_OUTER); - with(table2); - allowDuplicateColumnNames(allowDuplicateColumnNames); - rightJoinColumns(col2Names); - return join(); - } - - /** - * Joins the joiner to the table2, using the given columns for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than - * the join column have the same name if {@code true} the join will succeed and duplicate - * columns are renamed - * @param keepAllJoinKeyColumns if {@code false} the join will only keep join key columns in - * table1 if {@code true} the join will return all join key columns in both table, which may - * have difference when there are null values - * @param col2Names The columns to join on. If a name refers to a double column, the join is - * performed after rounding to integers. - * @return The resulting table - */ - @Deprecated - public Table leftOuter( - Table table2, - boolean allowDuplicateColumnNames, - boolean keepAllJoinKeyColumns, - String... col2Names) { - allowDuplicateColumnNames(allowDuplicateColumnNames); - type(JoinType.LEFT_OUTER); - with(table2); - keepAllJoinKeyColumns(keepAllJoinKeyColumns); - rightJoinColumns(col2Names); - return join(); - } - - /** - * Joins to the given tables assuming that they have a column of the name we're joining on - * - * @param tables The tables to join with - * @return The resulting table - */ - @Deprecated - public Table rightOuter(Table... tables) { - return rightOuter(false, tables); - } - - /** - * Joins to the given tables assuming that they have a column of the name we're joining on - * - * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than - * the join column have the same name if {@code true} the join will succeed and duplicate - * columns are renamed - * @param tables The tables to join with - * @return The resulting table - */ - @Deprecated - public Table rightOuter(boolean allowDuplicateColumnNames, Table... tables) { - allowDuplicateColumnNames(allowDuplicateColumnNames); - type(JoinType.RIGHT_OUTER); - with(tables); - return join(); - } - - /** - * Joins the joiner to the table2, using the given column for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param col2Name The column to join on. If col2Name refers to a double column, the join is - * performed after rounding to integers. - * @return The resulting table - */ - @Deprecated - public Table rightOuter(Table table2, String col2Name) { - return rightOuter(table2, false, col2Name); - } - - /** - * Joins the joiner to the table2, using the given columns for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param col2Names The columns to join on. If a name refers to a double column, the join is - * performed after rounding to integers. - * @return The resulting table - */ - @Deprecated - public Table rightOuter(Table table2, String[] col2Names) { - return rightOuter(table2, false, col2Names); - } - - /** - * Joins the joiner to the table2, using the given columns for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than - * the join column have the same name if {@code true} the join will succeed and duplicate - * columns are renamed - * @param col2Names The columns to join on. If a name refers to a double column, the join is - * performed after rounding to integers. - */ - @Deprecated - public Table rightOuter(Table table2, boolean allowDuplicateColumnNames, String... col2Names) { - allowDuplicateColumnNames(allowDuplicateColumnNames); - type(JoinType.RIGHT_OUTER); - with(table2); - rightJoinColumns(col2Names); - return join(); - } - - /** - * Joins the joiner to the table2, using the given columns for the second table and returns the - * resulting table - * - * @param table2 The table to join with - * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than - * the join column have the same name if {@code true} the join will succeed and duplicate - * columns are renamed - * @param keepAllJoinKeyColumns if {@code false} the join will only keep join key columns in - * table1 if {@code true} the join will return all join key columns in both table, which may - * have difference when there are null values - * @param col2Names The columns to join on. If a name refers to a double column, the join is - * performed after rounding to integers. - * @return The resulting table - */ - @Deprecated - public Table rightOuter( - Table table2, - boolean allowDuplicateColumnNames, - boolean keepAllJoinKeyColumns, - String... col2Names) { - allowDuplicateColumnNames(allowDuplicateColumnNames); - keepAllJoinKeyColumns(keepAllJoinKeyColumns); - type(JoinType.RIGHT_OUTER); - with(table2); - rightJoinColumns(col2Names); - return join(); - } - - abstract Table joinInternal( - Table table, - Table table2, - JoinType rightOuter, - boolean allowDuplicateColumnNames, - boolean keepAllJoinKeyColumns, - String[] col2Names); -} diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/ColumnIndexPair.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/ColumnIndexPair.java deleted file mode 100644 index 61df50cac6..0000000000 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/ColumnIndexPair.java +++ /dev/null @@ -1,30 +0,0 @@ -package tech.tablesaw.joining; - -import tech.tablesaw.api.ColumnType; - -/** - * Describes two columns that are to be compared in a sort The columns are expected to be referenced - * in two separate rows. The values of left and right provide the column index (position) in each of - * the two rows. - */ -public class ColumnIndexPair { - final ColumnType type; - final int left; - final int right; - - public ColumnIndexPair(ColumnType type, int left, int right) { - this.type = type; - this.left = left; - this.right = right; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("ColumnIndexPair{"); - sb.append("type=").append(type); - sb.append(", left=").append(left); - sb.append(", right=").append(right); - sb.append('}'); - return sb.toString(); - } -} diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/CrossProductJoin.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/CrossProductJoin.java deleted file mode 100644 index 9a5e859249..0000000000 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/CrossProductJoin.java +++ /dev/null @@ -1,521 +0,0 @@ -package tech.tablesaw.joining; - -import com.google.common.collect.Streams; -import com.google.common.primitives.Ints; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; -import tech.tablesaw.api.*; -import tech.tablesaw.columns.Column; -import tech.tablesaw.columns.booleans.BooleanColumnType; -import tech.tablesaw.columns.dates.DateColumnType; -import tech.tablesaw.columns.datetimes.DateTimeColumnType; -import tech.tablesaw.columns.instant.InstantColumnType; -import tech.tablesaw.columns.numbers.*; -import tech.tablesaw.columns.strings.StringColumnType; -import tech.tablesaw.columns.times.TimeColumnType; -import tech.tablesaw.index.*; -import tech.tablesaw.selection.Selection; - -/** Implements joins between two or more Tables */ -public class CrossProductJoin implements JoinStrategy { - - private static final String TABLE_ALIAS = "T"; - - private List joinColumnIndexes; - private final AtomicInteger joinTableId = new AtomicInteger(2); - - /** - * Constructor. - * - * @param table The table to join on. - * @param joinColumnNames The join column names to join on. - */ - public CrossProductJoin(Table table, String... joinColumnNames) { - this.joinColumnIndexes = getJoinIndexes(table, joinColumnNames); - } - - /** - * Finds the index of the columns corresponding to the columnNames. E.G. The column named "ID" is - * located at index 5 in table. - * - * @param table the table that contains the columns. - * @param columnNames the column names to find indexes of. - * @return a list of column indexes within the table. - */ - private List getJoinIndexes(Table table, String[] columnNames) { - return Arrays.stream(columnNames).map(table::columnIndex).collect(Collectors.toList()); - } - - /** - * Joins two tables. - * - * @param table1 the table on the left side of the join. - * @param table2 the table on the right side of the join. - * @param joinType the type of join. - * @param allowDuplicates if {@code false} the join will fail if any columns other than the join - * column have the same name if {@code true} the join will succeed and duplicate columns are - * renamed - * @param keepAllJoinKeyColumns if {@code false} the join will only keep join key columns in - * table1 if {@code true} the join will return all join key columns in both table, which may - * have difference when there are null values - * @param table2JoinColumnNames The names of the columns in table2 to join on. - * @return the joined table - */ - public Table performJoin( - Table table1, - Table table2, - JoinType joinType, - boolean allowDuplicates, - boolean keepAllJoinKeyColumns, - int[] leftJoinColumnIndexes, - String... table2JoinColumnNames) { - - this.joinColumnIndexes = - Arrays.stream(leftJoinColumnIndexes).boxed().collect(Collectors.toList()); - List table2JoinColumnIndexes = getJoinIndexes(table2, table2JoinColumnNames); - List table1Indexes = buildIndexesForJoinColumns(joinColumnIndexes, table1); - List table2Indexes = buildIndexesForJoinColumns(table2JoinColumnIndexes, table2); - - // A set of column indexes in the result table that can be ignored. They are duplicate join - // keys. - - // collect all the column name in both tables - Column[] cols = - Streams.concat(table1.columns().stream(), table2.columns().stream()) - .map(Column::emptyCopy) - .toArray(Column[]::new); - - Set resultIgnoreColIndexes = new HashSet<>(); - if (!keepAllJoinKeyColumns) { - resultIgnoreColIndexes = getIgnoreColumns(table1, joinType, table2JoinColumnIndexes, cols); - } - - Table result = emptyTableFromColumns(table1, allowDuplicates, cols); - - validateIndexes(table1Indexes, table2Indexes); - if (table1.rowCount() == 0 && (joinType == JoinType.LEFT_OUTER || joinType == JoinType.INNER)) { - // Handle special case of empty table here so it doesn't fall through to the behavior - // that adds rows for full outer and right outer joins - if (!keepAllJoinKeyColumns) { - result.removeColumns(Ints.toArray(resultIgnoreColIndexes)); - } - return result; - } - - Selection table1DoneRows = Selection.with(); - Selection table2DoneRows = Selection.with(); - // use table 2 for row iteration, which can significantly increase performance - if (table1.rowCount() > table2.rowCount() && joinType == JoinType.INNER) { - for (Row row : table2) { - int ri = row.getRowNumber(); - if (table2DoneRows.contains(ri)) { - // Already processed a selection of table1 that contained this row. - continue; - } - Selection table1Rows = - createMultiColSelection( - table2, ri, table1Indexes, table1.rowCount(), table2JoinColumnIndexes); - Selection table2Rows = - createMultiColSelection( - table2, ri, table2Indexes, table2.rowCount(), table2JoinColumnIndexes); - crossProduct( - result, - table1, - table2, - table1Rows, - table2Rows, - resultIgnoreColIndexes, - keepAllJoinKeyColumns); - - table2DoneRows = table2DoneRows.or(table2Rows); - if (table2DoneRows.size() == table2.rowCount()) { - // Processed all the rows in table1 exit early. - if (!keepAllJoinKeyColumns) { - result.removeColumns(Ints.toArray(resultIgnoreColIndexes)); - } - return result; - } - } - } else { - for (Row row : table1) { - int ri = row.getRowNumber(); - if (table1DoneRows.contains(ri)) { - // Already processed a selection of table1 that contained this row. - continue; - } - Selection table1Rows = - createMultiColSelection( - table1, ri, table1Indexes, table1.rowCount(), joinColumnIndexes); - Selection table2Rows = - createMultiColSelection( - table1, ri, table2Indexes, table2.rowCount(), joinColumnIndexes); - if ((joinType == JoinType.LEFT_OUTER || joinType == JoinType.FULL_OUTER) - && table2Rows.isEmpty()) { - withMissingLeftJoin( - result, table1, table1Rows, resultIgnoreColIndexes, keepAllJoinKeyColumns); - } else { - crossProduct( - result, - table1, - table2, - table1Rows, - table2Rows, - resultIgnoreColIndexes, - keepAllJoinKeyColumns); - } - table1DoneRows = table1DoneRows.or(table1Rows); - if (joinType == JoinType.FULL_OUTER || joinType == JoinType.RIGHT_OUTER) { - // Update done rows in table2 for full Outer. - table2DoneRows = table2DoneRows.or(table2Rows); - } else if (table1DoneRows.size() == table1.rowCount()) { - // Processed all the rows in table1 exit early. - if (!keepAllJoinKeyColumns) { - result.removeColumns(Ints.toArray(resultIgnoreColIndexes)); - } - return result; - } - } - } - - // Add all rows from table2 that were not handled already. - Selection table2Rows = table2DoneRows.flip(0, table2.rowCount()); - withMissingRight( - result, - table1.columnCount(), - table2, - table2Rows, - joinType, - table2JoinColumnIndexes, - resultIgnoreColIndexes, - keepAllJoinKeyColumns); - if (!keepAllJoinKeyColumns) { - result.removeColumns(Ints.toArray(resultIgnoreColIndexes)); - } - return result; - } - - private void validateIndexes(List table1Indexes, List table2Indexes) { - if (table1Indexes.size() != table2Indexes.size()) { - throw new IllegalArgumentException( - "Cannot join using a different number of indices on each table: " - + table1Indexes - + " and " - + table2Indexes); - } - for (int i = 0; i < table1Indexes.size(); i++) { - if (!table1Indexes.get(i).getClass().equals(table2Indexes.get(i).getClass())) { - throw new IllegalArgumentException( - "Cannot join using different index types: " + table1Indexes + " and " + table2Indexes); - } - } - } - - /** Build a reverse index for every join column in the table. */ - private List buildIndexesForJoinColumns(List joinColumnIndexes, Table table) { - return joinColumnIndexes.stream().map(c -> indexFor(table, c)).collect(Collectors.toList()); - } - - /** Create a reverse index for a given column. */ - private Index indexFor(Table table, int colIndex) { - ColumnType type = table.column(colIndex).type(); - if (type instanceof DateColumnType) { - return new IntIndex(table.dateColumn(colIndex)); - } else if (type instanceof DateTimeColumnType) { - return new LongIndex(table.dateTimeColumn(colIndex)); - } else if (type instanceof InstantColumnType) { - return new LongIndex(table.instantColumn(colIndex)); - } else if (type instanceof TimeColumnType) { - return new IntIndex(table.timeColumn(colIndex)); - } else if (type instanceof StringColumnType) { - return new StringIndex(table.stringColumn(colIndex)); - } else if (type instanceof IntColumnType) { - return new IntIndex(table.intColumn(colIndex)); - } else if (type instanceof LongColumnType) { - return new LongIndex(table.longColumn(colIndex)); - } else if (type instanceof ShortColumnType) { - return new ShortIndex(table.shortColumn(colIndex)); - } else if (type instanceof BooleanColumnType) { - return new ByteIndex(table.booleanColumn(colIndex)); - } else if (type instanceof DoubleColumnType) { - return new DoubleIndex(table.doubleColumn(colIndex)); - } else if (type instanceof FloatColumnType) { - return new FloatIndex(table.floatColumn(colIndex)); - } - throw new IllegalArgumentException("Joining attempted on unsupported column type " + type); - } - - /** - * Given a reverse index find a selection of rows that have the same value as the supplied column - * does in the given row index. - */ - private Selection selectionForColumn(Column valueColumn, int rowIndex, Index rawIndex) { - - ColumnType type = valueColumn.type(); - if (type instanceof DateColumnType) { - IntIndex index = (IntIndex) rawIndex; - int value = ((DateColumn) valueColumn).getIntInternal(rowIndex); - return index.get(value); - } else if (type instanceof TimeColumnType) { - IntIndex index = (IntIndex) rawIndex; - int value = ((TimeColumn) valueColumn).getIntInternal(rowIndex); - return index.get(value); - } else if (type instanceof DateTimeColumnType) { - LongIndex index = (LongIndex) rawIndex; - long value = ((DateTimeColumn) valueColumn).getLongInternal(rowIndex); - return index.get(value); - } else if (type instanceof InstantColumnType) { - LongIndex index = (LongIndex) rawIndex; - long value = ((InstantColumn) valueColumn).getLongInternal(rowIndex); - return index.get(value); - } else if (type instanceof StringColumnType) { - StringIndex index = (StringIndex) rawIndex; - String value = ((StringColumn) valueColumn).get(rowIndex); - return index.get(value); - } else if (type instanceof IntColumnType) { - IntIndex index = (IntIndex) rawIndex; - int value = ((IntColumn) valueColumn).getInt(rowIndex); - return index.get(value); - } else if (type instanceof LongColumnType) { - LongIndex index = (LongIndex) rawIndex; - long value = ((LongColumn) valueColumn).getLong(rowIndex); - return index.get(value); - } else if (type instanceof ShortColumnType) { - ShortIndex index = (ShortIndex) rawIndex; - short value = ((ShortColumn) valueColumn).getShort(rowIndex); - return index.get(value); - } else if (type instanceof BooleanColumnType) { - ByteIndex index = (ByteIndex) rawIndex; - byte value = ((BooleanColumn) valueColumn).getByte(rowIndex); - return index.get(value); - } else if (type instanceof DoubleColumnType) { - DoubleIndex index = (DoubleIndex) rawIndex; - double value = ((DoubleColumn) valueColumn).getDouble(rowIndex); - return index.get(value); - } else if (type instanceof FloatColumnType) { - FloatIndex index = (FloatIndex) rawIndex; - float value = ((FloatColumn) valueColumn).getFloat(rowIndex); - return index.get(value); - } else { - throw new IllegalArgumentException( - "Joining is supported on numeric, string, and date-like columns. Column " - + valueColumn.name() - + " is of type " - + valueColumn.type()); - } - } - /** - * Create a big multicolumn selection for all join columns in the given table. Joins two tables. - * - * @param table the table that used to generate Selection. - * @param ri row number of row in table. - * @param indexes a reverse index for every join column in the table. - * @param selectionSize max size in table . - * @param joinColumnIndexes the column index of join key in tables - * @return selection created - */ - private Selection createMultiColSelection( - Table table, - int ri, - List indexes, - int selectionSize, - List joinColumnIndexes) { - Selection multiColSelection = Selection.withRange(0, selectionSize); - int i = 0; - for (Integer joinColumnIndex : joinColumnIndexes) { - Column col = table.column(joinColumnIndex); - Selection oneColSelection = selectionForColumn(col, ri, indexes.get(i)); - // and the selections. - multiColSelection = multiColSelection.and(oneColSelection); - i++; - } - return multiColSelection; - } - - private String newName(String table2Alias, String columnName) { - return table2Alias + "." + columnName; - } - - /** - * Adds empty columns to the destination table with the same type as columns in table1 and table2. - * - *

For inner, left and full outer join types the join columns in table2 are not needed and will - * be marked as placeholders. The indexes of those columns will be returned. The downstream logic - * is easier if we wait to remove the redundant columns until the last step. - * - * @param table1 the table on left side of the join. - * @param allowDuplicates whether to allow duplicates. If yes rename columns in table2 that have - * the same name as columns in table1 with the exception of join columns in table2 when - * performing a right join. - * @return A - */ - private Table emptyTableFromColumns(Table table1, boolean allowDuplicates, Column[] cols) { - - Table destination = Table.create(table1.name()); - - // Rename duplicate columns in second table - if (allowDuplicates) { - Set table1ColNames = - Arrays.stream(cols) - .map(Column::name) - .map(String::toLowerCase) - .limit(table1.columnCount()) - .collect(Collectors.toSet()); - - String table2Alias = TABLE_ALIAS + joinTableId.getAndIncrement(); - for (int c = table1.columnCount(); c < cols.length; c++) { - String columnName = cols[c].name(); - if (table1ColNames.contains(columnName.toLowerCase())) { - cols[c].setName(newName(table2Alias, columnName)); - } - } - } - destination.addColumns(cols); - return destination; - } - - /** - * For inner join, left join and full outer join mark the join columns in table2 as placeholders. - * - *

For right join mark the join columns in table1 as placeholders. Keep track of which join - * columns are placeholders so they can be ignored. - */ - private Set getIgnoreColumns( - Table table1, JoinType joinType, List table2JoinColumnIndexes, Column[] cols) { - Set ignoreColumns = new HashSet<>(); - for (int c = 0; c < cols.length; c++) { - if (joinType == JoinType.RIGHT_OUTER) { - if (c < table1.columnCount() && joinColumnIndexes.contains(c)) { - cols[c].setName("Placeholder_" + ignoreColumns.size()); - ignoreColumns.add(c); - } - } else { - int table2Index = c - table1.columnCount(); - if (c >= table1.columnCount() && table2JoinColumnIndexes.contains(table2Index)) { - cols[c].setName("Placeholder_" + ignoreColumns.size()); - ignoreColumns.add(c); - } - } - } - return ignoreColumns; - } - - /** - * Creates cross product for the selection of two tables. - * - * @param destination the destination table. - * @param table1 the table on left of join. - * @param table2 the table on right of join. - * @param table1Rows the selection of rows in table1. - * @param table2Rows the selection of rows in table2. - * @param ignoreColumns a set of column indexes in the result to ignore. They are redundant join - * columns. - */ - @SuppressWarnings({"rawtypes", "unchecked"}) - private void crossProduct( - Table destination, - Table table1, - Table table2, - Selection table1Rows, - Selection table2Rows, - Set ignoreColumns, - boolean keepTable2JoinKeyColumns) { - for (int c = 0; c < table1.columnCount() + table2.columnCount(); c++) { - if (!keepTable2JoinKeyColumns && ignoreColumns.contains(c)) { - continue; - } - int table2Index = c - table1.columnCount(); - for (int r1 : table1Rows) { - for (int r2 : table2Rows) { - if (c < table1.columnCount()) { - Column t1Col = table1.column(c); - destination.column(c).append(t1Col, r1); - } else { - Column t2Col = table2.column(table2Index); - destination.column(c).append(t2Col, r2); - } - } - } - } - } - - /** - * Adds rows to destination for each row in table1 with the columns from table2 added as missing - * values. - */ - @SuppressWarnings({"rawtypes", "unchecked"}) - private void withMissingLeftJoin( - Table destination, - Table table1, - Selection table1Rows, - Set ignoreColumns, - boolean keepTable2JoinKeyColumns) { - for (int c = 0; c < destination.columnCount(); c++) { - if (!keepTable2JoinKeyColumns && ignoreColumns.contains(c)) { - continue; - } - if (c < table1.columnCount()) { - Column t1Col = table1.column(c); - for (int index : table1Rows) { - destination.column(c).append(t1Col, index); - } - } else { - for (int r1 = 0; r1 < table1Rows.size(); r1++) { - destination.column(c).appendMissing(); - } - } - } - } - - /** - * Adds rows to destination for each row in table2 with the columns from table1 added as missing - * values. - */ - @SuppressWarnings({"rawtypes", "unchecked"}) - private void withMissingRight( - Table destination, - int table1ColCount, - Table table2, - Selection table2Rows, - JoinType joinType, - List col2Indexes, - Set ignoreColumns, - boolean keepTable2JoinKeyColumns) { - - // Add index data from table2 into join column positions in table one. - if (joinType == JoinType.FULL_OUTER) { - for (int i = 0; i < col2Indexes.size(); i++) { - Column t2Col = table2.column(col2Indexes.get(i)); - for (int index : table2Rows) { - destination.column(joinColumnIndexes.get(i)).append(t2Col, index); - } - } - } - - for (int c = 0; c < destination.columnCount(); c++) { - if (!keepTable2JoinKeyColumns) { - if (ignoreColumns.contains(c) || joinColumnIndexes.contains(c)) { - continue; - } - } - if (c < table1ColCount) { - for (int r1 = 0; r1 < table2Rows.size(); r1++) { - destination.column(c).appendMissing(); - } - } else { - Column t2Col = table2.column(c - table1ColCount); - for (int index : table2Rows) { - destination.column(c).append(t2Col, index); - } - } - } - } - - @Override - public String toString() { - return "CrossProductJoin"; - } -} diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/DataFrameJoiner.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/DataFrameJoiner.java index 149097f0d3..4495302f53 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/DataFrameJoiner.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/DataFrameJoiner.java @@ -1,286 +1,946 @@ package tech.tablesaw.joining; -import static tech.tablesaw.joining.JoinType.INNER; - -import com.google.common.base.Preconditions; -import java.util.*; +import com.google.common.collect.Streams; +import com.google.common.primitives.Ints; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; import tech.tablesaw.api.*; +import tech.tablesaw.columns.Column; +import tech.tablesaw.columns.booleans.BooleanColumnType; +import tech.tablesaw.columns.dates.DateColumnType; +import tech.tablesaw.columns.datetimes.DateTimeColumnType; +import tech.tablesaw.columns.instant.InstantColumnType; +import tech.tablesaw.columns.numbers.*; +import tech.tablesaw.columns.strings.StringColumnType; +import tech.tablesaw.columns.strings.TextColumnType; +import tech.tablesaw.columns.times.TimeColumnType; +import tech.tablesaw.index.*; +import tech.tablesaw.selection.Selection; /** Implements joins between two or more Tables */ -public class DataFrameJoiner extends AbstractJoiner { +public class DataFrameJoiner { + + /** The types of joins that are supported */ + private enum JoinType { + INNER, + LEFT_OUTER, + RIGHT_OUTER, + FULL_OUTER + } - /** The join algorithm to be used */ - private JoinStrategy strategy; + private static final String TABLE_ALIAS = "T"; - /** The first (left) table named in the join statement */ private final Table table; + private final String[] joinColumnNames; + private final List joinColumnIndexes; + private final AtomicInteger joinTableId = new AtomicInteger(2); - /** The names of the columns to be used for the first (left) table */ - private final String[] leftJoinColumnNames; + /** + * Constructor. + * + * @param table The table to join on. + * @param joinColumnNames The join column names to join on. + */ + public DataFrameJoiner(Table table, String... joinColumnNames) { + this.table = table; + this.joinColumnNames = joinColumnNames; + this.joinColumnIndexes = getJoinIndexes(table, joinColumnNames); + } /** - * The names of the columns to be joined on in the second (right) table. If these are not - * explicitly provided, they default to the names used for the left table. + * Finds the index of the columns corresponding to the columnNames. E.G. The column named "ID" is + * located at index 5 in table. + * + * @param table the table that contains the columns. + * @param columnNames the column names to find indexes of. + * @return a list of column indexes within the table. */ - private String[] rightJoinColumnNames; + private List getJoinIndexes(Table table, String[] columnNames) { + return Arrays.stream(columnNames).map(table::columnIndex).collect(Collectors.toList()); + } - /** The positions (indexes) in the table of the columns used in the first table */ - private int[] leftJoinColumnPositions; + /** + * Joins to the given tables assuming that they have a column of the name we're joining on + * + * @param tables The tables to join with + */ + public Table inner(Table... tables) { + return inner(false, tables); + } + + /** + * Joins to the given tables assuming that they have a column of the name we're joining on + * + * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than + * the join column have the same name if {@code true} the join will succeed and duplicate + * columns are renamed* + * @param tables The tables to join with + */ + public Table inner(boolean allowDuplicateColumnNames, Table... tables) { + Table joined = table; + for (Table currT : tables) { + joined = + joinInternal( + joined, currT, JoinType.INNER, allowDuplicateColumnNames, false, joinColumnNames); + } + return joined; + } + + /** + * Joins the joiner to the table2, using the given column for the second table and returns the + * resulting table + * + * @param table2 The table to join with + * @param col2Name The column to join on. If col2Name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table + */ + public Table inner(Table table2, String col2Name) { + return inner(table2, false, col2Name); + } /** - * The table(s) to be used on the right side. If more than one table is provided, the join is - * executed repeatedly, merging the next right table with the prior results + * Joins the joiner to the table2, using the given columns for the second table and returns the + * resulting table + * + * @param table2 The table to join with + * @param col2Names The columns to join on. If a name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table */ - private List rightTables = new ArrayList<>(); + public Table inner(Table table2, String[] col2Names) { + return inner(table2, false, col2Names); + } - /** The type of join to be performed (INNER, LEFT_OUTER, RIGHT_OUTER, or FULL_OUTER */ - private JoinType joinType = INNER; + /** + * Joins the joiner to the table2, using the given column for the second table and returns the + * resulting table + * + * @param table2 The table to join with + * @param col2Name The column to join on. If col2Name refers to a double column, the join is + * performed after rounding to integers. + * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than + * the join column have the same name if {@code true} the join will succeed and duplicate + * columns are renamed* + * @return The resulting table + */ + public Table inner(Table table2, String col2Name, boolean allowDuplicateColumnNames) { + return inner(table2, allowDuplicateColumnNames, col2Name); + } /** - * When this is false, columns in the second (and subsequent) join tables are excluded from the - * results if they have the same name as a column in the any prior table. When it is true, they - * are give a prefix and included. The prefix used is "Tn." where n is the number of the table in - * the join. The second table is (T2.column_name), for example. + * Joins the joiner to the table2, using the given columns for the second table and returns the + * resulting table + * + * @param table2 The table to join with + * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than + * the join column have the same name if {@code true} the join will succeed and duplicate + * columns are renamed* + * @param col2Names The columns to join on. If a name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table */ - private boolean allowDuplicateColumnNames = false; + public Table inner(Table table2, boolean allowDuplicateColumnNames, String... col2Names) { + Table joinedTable; + joinedTable = + joinInternal(table, table2, JoinType.INNER, allowDuplicateColumnNames, false, col2Names); + return joinedTable; + } /** - * When this is true, the columns of the second (and subsequent) join tables are included in the - * results, even when they're identical in name and data with the first join table. When false, - * only the first join columns are retained. + * Joins the joiner to the table2, using the given columns for the second table and returns the + * resulting table * - *

If the second (or any subsequent) table has the same join column names as the first (or any - * prior) table, the same scheme used for non-join columns is used, and each column with a - * duplicate name gets a prefix of "Tn." where n is the number of the table in the join. + * @param table2 The table to join with + * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than + * the join column have the same name if {@code true} the join will succeed and duplicate + * columns are renamed* + * @param keepAllJoinKeyColumns if {@code false} the join will only keep join key columns in + * table1 if {@code true} the join will return all join key columns in both table, which may + * have difference when there are null values + * @param col2Names The columns to join on. If a name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table */ - private boolean keepAllJoinKeyColumns = false; + public Table inner( + Table table2, + boolean allowDuplicateColumnNames, + boolean keepAllJoinKeyColumns, + String... col2Names) { + return joinInternal( + table, table2, JoinType.INNER, allowDuplicateColumnNames, keepAllJoinKeyColumns, col2Names); + } /** - * Constructor. + * Joins two tables. * - * @param table The table to join on. - * @param leftJoinColumnNames The join column names in that table to be used. These names also - * serve as the default for the second table, unless other names are explicitly provided. + * @param table1 the table on the left side of the join. + * @param table2 the table on the right side of the join. + * @param joinType the type of join. + * @param allowDuplicates if {@code false} the join will fail if any columns other than the join + * column have the same name if {@code true} the join will succeed and duplicate columns are + * renamed + * @param keepAllJoinKeyColumns if {@code false} the join will only keep join key columns in + * table1 if {@code true} the join will return all join key columns in both table, which may + * have difference when there are null values + * @param table2JoinColumnNames The names of the columns in table2 to join on. + * @return the joined table */ - public DataFrameJoiner(Table table, String... leftJoinColumnNames) { - this.table = table; - this.leftJoinColumnNames = leftJoinColumnNames; + private Table joinInternal( + Table table1, + Table table2, + JoinType joinType, + boolean allowDuplicates, + boolean keepAllJoinKeyColumns, + String... table2JoinColumnNames) { - // we assume the join columns for both tables have the same names, - // unless names for the right table are explicitly set - this.rightJoinColumnNames = leftJoinColumnNames; + List table2JoinColumnIndexes = getJoinIndexes(table2, table2JoinColumnNames); + List table1Indexes = buildIndexesForJoinColumns(joinColumnIndexes, table1); + List table2Indexes = buildIndexesForJoinColumns(table2JoinColumnIndexes, table2); + + Table result = Table.create(table1.name()); + // A set of column indexes in the result table that can be ignored. They are duplicate join + // keys. + Set resultIgnoreColIndexes = + emptyTableFromColumns( + result, + table1, + table2, + joinType, + allowDuplicates, + table2JoinColumnIndexes, + keepAllJoinKeyColumns); + + validateIndexes(table1Indexes, table2Indexes); + if (table1.rowCount() == 0 && (joinType == JoinType.LEFT_OUTER || joinType == JoinType.INNER)) { + // Handle special case of empty table here so it doesn't fall through to the behavior + // that adds rows for full outer and right outer joins + if (!keepAllJoinKeyColumns) { + result.removeColumns(Ints.toArray(resultIgnoreColIndexes)); + } + return result; + } - this.leftJoinColumnPositions = getJoinIndexes(table, leftJoinColumnNames); + Selection table1DoneRows = Selection.with(); + Selection table2DoneRows = Selection.with(); + // use table 2 for row iteration, which can significantly increase performance + if (table1.rowCount() > table2.rowCount() && joinType == JoinType.INNER) { + for (Row row : table2) { + int ri = row.getRowNumber(); + if (table2DoneRows.contains(ri)) { + // Already processed a selection of table1 that contained this row. + continue; + } + Selection table1Rows = + createMultiColSelection( + table2, ri, table1Indexes, table1.rowCount(), table2JoinColumnIndexes); + Selection table2Rows = + createMultiColSelection( + table2, ri, table2Indexes, table2.rowCount(), table2JoinColumnIndexes); + crossProduct( + result, + table1, + table2, + table1Rows, + table2Rows, + resultIgnoreColIndexes, + keepAllJoinKeyColumns); + + table2DoneRows = table2DoneRows.or(table2Rows); + if (table2DoneRows.size() == table2.rowCount()) { + // Processed all the rows in table1 exit early. + if (!keepAllJoinKeyColumns) { + result.removeColumns(Ints.toArray(resultIgnoreColIndexes)); + } + return result; + } + } + } else { + for (Row row : table1) { + int ri = row.getRowNumber(); + if (table1DoneRows.contains(ri)) { + // Already processed a selection of table1 that contained this row. + continue; + } + Selection table1Rows = + createMultiColSelection( + table1, ri, table1Indexes, table1.rowCount(), joinColumnIndexes); + Selection table2Rows = + createMultiColSelection( + table1, ri, table2Indexes, table2.rowCount(), joinColumnIndexes); + if ((joinType == JoinType.LEFT_OUTER || joinType == JoinType.FULL_OUTER) + && table2Rows.isEmpty()) { + withMissingLeftJoin( + result, table1, table1Rows, resultIgnoreColIndexes, keepAllJoinKeyColumns); + } else { + crossProduct( + result, + table1, + table2, + table1Rows, + table2Rows, + resultIgnoreColIndexes, + keepAllJoinKeyColumns); + } + table1DoneRows = table1DoneRows.or(table1Rows); + if (joinType == JoinType.FULL_OUTER || joinType == JoinType.RIGHT_OUTER) { + // Update done rows in table2 for full Outer. + table2DoneRows = table2DoneRows.or(table2Rows); + } else if (table1DoneRows.size() == table1.rowCount()) { + // Processed all the rows in table1 exit early. + if (!keepAllJoinKeyColumns) { + result.removeColumns(Ints.toArray(resultIgnoreColIndexes)); + } + return result; + } + } + } + + // Add all rows from table2 that were not handled already. + Selection table2Rows = table2DoneRows.flip(0, table2.rowCount()); + withMissingRight( + result, + table1.columnCount(), + table2, + table2Rows, + joinType, + table2JoinColumnIndexes, + resultIgnoreColIndexes, + keepAllJoinKeyColumns); + if (!keepAllJoinKeyColumns) { + result.removeColumns(Ints.toArray(resultIgnoreColIndexes)); + } + return result; + } + + private void validateIndexes(List table1Indexes, List table2Indexes) { + if (table1Indexes.size() != table2Indexes.size()) { + throw new IllegalArgumentException( + "Cannot join using a different number of indices on each table: " + + table1Indexes + + " and " + + table2Indexes); + } + for (int i = 0; i < table1Indexes.size(); i++) { + if (!table1Indexes.get(i).getClass().equals(table2Indexes.get(i).getClass())) { + throw new IllegalArgumentException( + "Cannot join using different index types: " + table1Indexes + " and " + table2Indexes); + } + } + } + + /** Build a reverse index for every join column in the table. */ + private List buildIndexesForJoinColumns(List joinColumnIndexes, Table table) { + return joinColumnIndexes.stream().map(c -> indexFor(table, c)).collect(Collectors.toList()); + } + + /** Create a reverse index for a given column. */ + private Index indexFor(Table table, int colIndex) { + ColumnType type = table.column(colIndex).type(); + if (type instanceof DateColumnType) { + return new IntIndex(table.dateColumn(colIndex)); + } else if (type instanceof DateTimeColumnType) { + return new LongIndex(table.dateTimeColumn(colIndex)); + } else if (type instanceof InstantColumnType) { + return new LongIndex(table.instantColumn(colIndex)); + } else if (type instanceof TimeColumnType) { + return new IntIndex(table.timeColumn(colIndex)); + } else if (type instanceof StringColumnType) { + return new StringIndex(table.stringColumn(colIndex)); + } else if (type instanceof TextColumnType) { + return new StringIndex(table.textColumn(colIndex)); + } else if (type instanceof IntColumnType) { + return new IntIndex(table.intColumn(colIndex)); + } else if (type instanceof LongColumnType) { + return new LongIndex(table.longColumn(colIndex)); + } else if (type instanceof ShortColumnType) { + return new ShortIndex(table.shortColumn(colIndex)); + } else if (type instanceof BooleanColumnType) { + return new ByteIndex(table.booleanColumn(colIndex)); + } else if (type instanceof DoubleColumnType) { + return new DoubleIndex(table.doubleColumn(colIndex)); + } else if (type instanceof FloatColumnType) { + return new FloatIndex(table.floatColumn(colIndex)); + } + throw new IllegalArgumentException("Joining attempted on unsupported column type " + type); } /** - * Sets the type of join, which defaults to INNER if not provided. + * Given a reverse index find a selection of rows that have the same value as the supplied column + * does in the given row index. + */ + private Selection selectionForColumn(Column valueColumn, int rowIndex, Index rawIndex) { + + ColumnType type = valueColumn.type(); + if (type instanceof DateColumnType) { + IntIndex index = (IntIndex) rawIndex; + int value = ((DateColumn) valueColumn).getIntInternal(rowIndex); + return index.get(value); + } else if (type instanceof TimeColumnType) { + IntIndex index = (IntIndex) rawIndex; + int value = ((TimeColumn) valueColumn).getIntInternal(rowIndex); + return index.get(value); + } else if (type instanceof DateTimeColumnType) { + LongIndex index = (LongIndex) rawIndex; + long value = ((DateTimeColumn) valueColumn).getLongInternal(rowIndex); + return index.get(value); + } else if (type instanceof InstantColumnType) { + LongIndex index = (LongIndex) rawIndex; + long value = ((InstantColumn) valueColumn).getLongInternal(rowIndex); + return index.get(value); + } else if (type instanceof StringColumnType) { + StringIndex index = (StringIndex) rawIndex; + String value = ((StringColumn) valueColumn).get(rowIndex); + return index.get(value); + } else if (type instanceof TextColumnType) { + StringIndex index = (StringIndex) rawIndex; + String value = ((TextColumn) valueColumn).get(rowIndex); + return index.get(value); + } else if (type instanceof IntColumnType) { + IntIndex index = (IntIndex) rawIndex; + int value = ((IntColumn) valueColumn).getInt(rowIndex); + return index.get(value); + } else if (type instanceof LongColumnType) { + LongIndex index = (LongIndex) rawIndex; + long value = ((LongColumn) valueColumn).getLong(rowIndex); + return index.get(value); + } else if (type instanceof ShortColumnType) { + ShortIndex index = (ShortIndex) rawIndex; + short value = ((ShortColumn) valueColumn).getShort(rowIndex); + return index.get(value); + } else if (type instanceof BooleanColumnType) { + ByteIndex index = (ByteIndex) rawIndex; + byte value = ((BooleanColumn) valueColumn).getByte(rowIndex); + return index.get(value); + } else if (type instanceof DoubleColumnType) { + DoubleIndex index = (DoubleIndex) rawIndex; + double value = ((DoubleColumn) valueColumn).getDouble(rowIndex); + return index.get(value); + } else if (type instanceof FloatColumnType) { + FloatIndex index = (FloatIndex) rawIndex; + float value = ((FloatColumn) valueColumn).getFloat(rowIndex); + return index.get(value); + } else { + throw new IllegalArgumentException( + "Joining is supported on numeric, string, and date-like columns. Column " + + valueColumn.name() + + " is of type " + + valueColumn.type()); + } + } + /** + * Create a big multicolumn selection for all join columns in the given table. Joins two tables. * - * @param joinType The type of join to perform (INNER, LEFT_OUTER, RIGHT_OUTER, FULL_OUTER) - * @return This joiner object. + * @param table the table that used to generate Selection. + * @param ri row number of row in table. + * @param indexes a reverse index for every join column in the table. + * @param selectionSize max size in table . + * @param joinColumnIndexes the column index of join key in tables + * @return selection created */ - public DataFrameJoiner type(JoinType joinType) { - this.joinType = joinType; - return this; + private Selection createMultiColSelection( + Table table, + int ri, + List indexes, + int selectionSize, + List joinColumnIndexes) { + Selection multiColSelection = Selection.withRange(0, selectionSize); + int i = 0; + for (Integer joinColumnIndex : joinColumnIndexes) { + Column col = table.column(joinColumnIndex); + Selection oneColSelection = selectionForColumn(col, ri, indexes.get(i)); + // and the selections. + multiColSelection = multiColSelection.and(oneColSelection); + i++; + } + return multiColSelection; + } + + private String newName(String table2Alias, String columnName) { + return table2Alias + "." + columnName; } /** - * When the argument is true, the join columns of the second (and subsequent) tables are included - * in the results, even when they're identical in name and data with the first join table. When - * false, only one set of join columns is retained in the result. + * Full outer join to the given tables assuming that they have a column of the name we're joining + * on * - *

Note that if the second (or any subsequent) table has the same join column names as the - * first (or any prior) table, the same scheme used for non-join columns is used, and each column - * with a duplicate name gets a prefix of "Tn." where n is the number of the table in the join. + * @param tables The tables to join with + * @return The resulting table + */ + public Table fullOuter(Table... tables) { + return fullOuter(false, tables); + } + + /** + * Full outer join to the given tables assuming that they have a column of the name we're joining + * on * - *

If this method is not called, the default is false + * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than + * the join column have the same name if {@code true} the join will succeed and duplicate + * columns are renamed* + * @param tables The tables to join with + * @return The resulting table + */ + public Table fullOuter(boolean allowDuplicateColumnNames, Table... tables) { + Table joined = table; + + for (Table currT : tables) { + joined = + joinInternal( + joined, + currT, + JoinType.FULL_OUTER, + allowDuplicateColumnNames, + false, + joinColumnNames); + } + return joined; + } + + /** + * Joins the joiner to the table2, using the given columns for the second table and returns the + * resulting table * - * @param keep true or false - * @return this DataFrameJoiner instance + * @param table2 The table to join with + * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than + * the join column have the same name if {@code true} the join will succeed and duplicate + * columns are renamed + * @param keepAllJoinKeyColumns if {@code false} the join will only keep join key columns in + * table1 if {@code true} the join will return all join key columns in both table, which may + * have difference when there are null values + * @param col2Names The columns to join on. If a name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table */ - public DataFrameJoiner keepAllJoinKeyColumns(boolean keep) { - this.keepAllJoinKeyColumns = keep; - return this; + public Table fullOuter( + Table table2, + boolean allowDuplicateColumnNames, + boolean keepAllJoinKeyColumns, + String... col2Names) { + return joinInternal( + table, + table2, + JoinType.FULL_OUTER, + allowDuplicateColumnNames, + keepAllJoinKeyColumns, + col2Names); } /** - * if {@code false} the join will fail if any columns other than the join column have the same - * name; if {@code true} the join will succeed and duplicate columns are renamed and included in - * the results. Specifically, the renamed columns are given a are give a prefix and the prefix - * used is "Tn." where n is the number of the table in the join. The second table is - * (T2.column_name), for example. + * Full outer join the joiner to the table2, using the given column for the second table and + * returns the resulting table * - *

See also {@link DataFrameJoiner#keepAllJoinKeyColumns(boolean)} to determine whether to - * retain the join columns from the second table + * @param table2 The table to join with + * @param col2Name The column to join on. If col2Name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table + */ + public Table fullOuter(Table table2, String col2Name) { + return joinInternal(table, table2, JoinType.FULL_OUTER, false, false, col2Name); + } + + /** + * Joins to the given tables assuming that they have a column of the name we're joining on * - * @param allow true, if columns with duplicate names are to be retained; false otherwise. Default - * is false - * @return this DataFrameJoiner instance + * @param tables The tables to join with + * @return The resulting table */ - public DataFrameJoiner allowDuplicateColumnNames(boolean allow) { - this.allowDuplicateColumnNames = allow; - return this; + public Table leftOuter(Table... tables) { + return leftOuter(false, tables); } /** - * The names of the columns to be joined on in the second (right) table. If this method is not - * called, they default to the names used for the left table. + * Joins to the given tables assuming that they have a column of the name we're joining on * - * @param rightJoinColumnNames The names to be used - * @return This DataFrameJoiner instance + * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than + * the join column have the same name if {@code true} the join will succeed and duplicate + * columns are renamed* + * @param tables The tables to join with + * @return The resulting table */ - public DataFrameJoiner rightJoinColumns(String... rightJoinColumnNames) { - Preconditions.checkNotNull(rightJoinColumnNames); - this.rightJoinColumnNames = rightJoinColumnNames; - return this; + public Table leftOuter(boolean allowDuplicateColumnNames, Table... tables) { + Table joined = table; + for (Table table2 : tables) { + joined = + joinInternal( + joined, + table2, + JoinType.LEFT_OUTER, + allowDuplicateColumnNames, + false, + joinColumnNames); + } + return joined; } /** - * The table or tables to be used on the right side of the join. If more than one table is - * provided, the join is executed repeatedly, merging the next right table with the prior results + * Joins the joiner to the table2, using the given columns for the second table and returns the + * resulting table * - * @param tables The table or tables to be used on the right side - * @return This DataFrameJoiner instance + * @param table2 The table to join with + * @param col2Names The columns to join on. If a name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table */ - public DataFrameJoiner with(Table... tables) { - Preconditions.checkNotNull(tables); - this.rightTables = Arrays.stream(tables).collect(Collectors.toList()); - return this; + public Table leftOuter(Table table2, String[] col2Names) { + return leftOuter(table2, false, col2Names); } /** - * Performs the actual join and returns the results + * Joins the joiner to the table2, using the given column for the second table and returns the + * resulting table * - * @return The combined table + * @param table2 The table to join with + * @param col2Name The column to join on. If col2Name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table */ - public Table join() { + public Table leftOuter(Table table2, String col2Name) { + return leftOuter(table2, false, col2Name); + } - selectJoinStrategy(); + /** + * Joins the joiner to the table2, using the given columns for the second table and returns the + * resulting table + * + * @param table2 The table to join with + * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than + * the join column have the same name if {@code true} the join will succeed and duplicate + * columns are renamed + * @param col2Names The columns to join on. If a name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table + */ + public Table leftOuter(Table table2, boolean allowDuplicateColumnNames, String... col2Names) { + return joinInternal( + table, table2, JoinType.LEFT_OUTER, allowDuplicateColumnNames, false, col2Names); + } - if (!allowDuplicateColumnNames) { - Set rightJoinColumns = Set.of(rightJoinColumnNames); - Set leftJoinColumns = Set.of(leftJoinColumnNames); - Set nonJoinColumns = - table.columnNames().stream() - .filter(e -> !leftJoinColumns.contains(e)) - .collect(Collectors.toSet()); + /** + * Joins the joiner to the table2, using the given columns for the second table and returns the + * resulting table + * + * @param table2 The table to join with + * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than + * the join column have the same name if {@code true} the join will succeed and duplicate + * columns are renamed + * @param keepAllJoinKeyColumns if {@code false} the join will only keep join key columns in + * table1 if {@code true} the join will return all join key columns in both table, which may + * have difference when there are null values + * @param col2Names The columns to join on. If a name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table + */ + public Table leftOuter( + Table table2, + boolean allowDuplicateColumnNames, + boolean keepAllJoinKeyColumns, + String... col2Names) { + return joinInternal( + table, + table2, + JoinType.LEFT_OUTER, + allowDuplicateColumnNames, + keepAllJoinKeyColumns, + col2Names); + } - for (Table t : rightTables) { - List names = - t.columnNames().stream() - .filter(e -> !rightJoinColumns.contains(e)) - .collect(Collectors.toList()); - for (String nm : names) { - if (!nonJoinColumns.contains(nm)) { - nonJoinColumns.add(nm); - } else { - throw new IllegalArgumentException( - "Attempting to join tables containing non-join columns with at least one name: " - + nm - + " appears in more than one table. " - + "If you would like to join tables containing columns with duplicate names, " - + " the value of 'allowDuplicateColumnNames' must be true"); - } - } - } + /** + * Joins to the given tables assuming that they have a column of the name we're joining on + * + * @param tables The tables to join with + * @return The resulting table + */ + public Table rightOuter(Table... tables) { + return rightOuter(false, tables); + } + + /** + * Joins to the given tables assuming that they have a column of the name we're joining on + * + * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than + * the join column have the same name if {@code true} the join will succeed and duplicate + * columns are renamed + * @param tables The tables to join with + * @return The resulting table + */ + public Table rightOuter(boolean allowDuplicateColumnNames, Table... tables) { + Table joined = table; + for (Table table2 : tables) { + joined = + joinInternal( + joined, + table2, + JoinType.RIGHT_OUTER, + allowDuplicateColumnNames, + false, + joinColumnNames); + joinColumnIndexes.clear(); + joinColumnIndexes.addAll(getJoinIndexes(joined, joinColumnNames)); } - return performJoin(table, rightTables); + return joined; } - private void selectJoinStrategy() { - // System.out.println(table); - // System.out.println(rightTables.get(0)); + /** + * Joins the joiner to the table2, using the given column for the second table and returns the + * resulting table + * + * @param table2 The table to join with + * @param col2Name The column to join on. If col2Name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table + */ + public Table rightOuter(Table table2, String col2Name) { + return rightOuter(table2, false, col2Name); + } + + /** + * Joins the joiner to the table2, using the given columns for the second table and returns the + * resulting table + * + * @param table2 The table to join with + * @param col2Names The columns to join on. If a name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table + */ + public Table rightOuter(Table table2, String[] col2Names) { + return rightOuter(table2, false, col2Names); + } + + /** + * Joins the joiner to the table2, using the given columns for the second table and returns the + * resulting table + * + * @param table2 The table to join with + * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than + * the join column have the same name if {@code true} the join will succeed and duplicate + * columns are renamed + * @param col2Names The columns to join on. If a name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table + */ + public Table rightOuter(Table table2, boolean allowDuplicateColumnNames, String... col2Names) { + return joinInternal( + table, table2, JoinType.RIGHT_OUTER, allowDuplicateColumnNames, false, col2Names); + } - int leftRowCount = table.rowCount(); - int rightRowCount = rightTables.get(0).rowCount(); + /** + * Joins the joiner to the table2, using the given columns for the second table and returns the + * resulting table + * + * @param table2 The table to join with + * @param allowDuplicateColumnNames if {@code false} the join will fail if any columns other than + * the join column have the same name if {@code true} the join will succeed and duplicate + * columns are renamed + * @param keepAllJoinKeyColumns if {@code false} the join will only keep join key columns in + * table1 if {@code true} the join will return all join key columns in both table, which may + * have difference when there are null values + * @param col2Names The columns to join on. If a name refers to a double column, the join is + * performed after rounding to integers. + * @return The resulting table + */ + public Table rightOuter( + Table table2, + boolean allowDuplicateColumnNames, + boolean keepAllJoinKeyColumns, + String... col2Names) { + return joinInternal( + table, + table2, + JoinType.RIGHT_OUTER, + allowDuplicateColumnNames, + keepAllJoinKeyColumns, + col2Names); + } + + /** + * Adds empty columns to the destination table with the same type as columns in table1 and table2. + * + *

For inner, left and full outer join types the join columns in table2 are not needed and will + * be marked as placeholders. The indexes of those columns will be returned. The downstream logic + * is easier if we wait to remove the redundant columns until the last step. + * + * @param destination the table to fill up with columns. Will be mutated in place. + * @param table1 the table on left side of the join. + * @param table2 the table on the right side of the join. + * @param joinType the type of join. + * @param allowDuplicates whether to allow duplicates. If yes rename columns in table2 that have + * the same name as columns in table1 with the exception of join columns in table2 when + * performing a right join. + * @param table2JoinColumnIndexes the index locations of the table2 join columns. + * @return A + */ + private Set emptyTableFromColumns( + Table destination, + Table table1, + Table table2, + JoinType joinType, + boolean allowDuplicates, + List table2JoinColumnIndexes, + boolean keepTable2JoinKeyColumns) { - int minCardinalityLeft = Integer.MAX_VALUE; - int minCardinalityRight = Integer.MAX_VALUE; + Column[] cols = + Streams.concat(table1.columns().stream(), table2.columns().stream()) + .map(Column::emptyCopy) + .toArray(Column[]::new); - for (int i = 0; i < rightJoinColumnNames.length; i++) { - int cardinality = table.column(leftJoinColumnNames[i]).countUnique(); - if (cardinality < minCardinalityLeft) { - minCardinalityLeft = cardinality; + // For inner join, left join and full outer join mark the join columns in table2 as + // placeholders. + // For right join mark the join columns in table1 as placeholders. + // Keep track of which join columns are placeholders so they can be ignored. + Set ignoreColumns = new HashSet<>(); + for (int c = 0; c < cols.length; c++) { + if (joinType == JoinType.RIGHT_OUTER) { + if (c < table1.columnCount() && joinColumnIndexes.contains(c)) { + if (!keepTable2JoinKeyColumns) { + cols[c].setName("Placeholder_" + ignoreColumns.size()); + } + ignoreColumns.add(c); + } + } else { + int table2Index = c - table1.columnCount(); + if (c >= table1.columnCount() && table2JoinColumnIndexes.contains(table2Index)) { + if (!keepTable2JoinKeyColumns) { + cols[c].setName("Placeholder_" + ignoreColumns.size()); + } + ignoreColumns.add(c); + } } } - for (String rightJoinColumnName : rightJoinColumnNames) { - int cardinality = rightTables.get(0).column(rightJoinColumnName).countUnique(); - if (cardinality < minCardinalityRight) { - minCardinalityRight = cardinality; + + // Rename duplicate columns in second table + if (allowDuplicates) { + Set table1ColNames = + Arrays.stream(cols) + .map(Column::name) + .map(String::toLowerCase) + .limit(table1.columnCount()) + .collect(Collectors.toSet()); + + String table2Alias = TABLE_ALIAS + joinTableId.getAndIncrement(); + for (int c = table1.columnCount(); c < cols.length; c++) { + String columnName = cols[c].name(); + if (table1ColNames.contains(columnName.toLowerCase())) { + cols[c].setName(newName(table2Alias, columnName)); + } } } - // System.out.println("min cardinality left " + minCardinalityLeft); - // System.out.println("min cardinality right " + minCardinalityRight); - // System.out.println("Avg values left " + (leftRowCount/(minCardinalityLeft * 1.0))); - // System.out.println("Avg values right " + rightRowCount / minCardinalityRight); - if ((leftRowCount / (minCardinalityLeft * 1.0)) > 1000 - || (rightRowCount / (minCardinalityRight * 1.0) > 1000)) { - this.strategy = new SortMergeJoin(table, leftJoinColumnNames); - } else { - this.strategy = new CrossProductJoin(table, leftJoinColumnNames); - } - // System.out.println(strategy + " selected."); + destination.addColumns(cols); + return ignoreColumns; } /** - * Finds the position of the columns corresponding to the columnNames. E.G. The column named "ID" - * is located at position 5 (0-based) in the table. + * Creates cross product for the selection of two tables. * - * @param table the table that contains the columns. - * @param columnNames the column names to find position of. - * @return a list of column indexes within the table. + * @param destination the destination table. + * @param table1 the table on left of join. + * @param table2 the table on right of join. + * @param table1Rows the selection of rows in table1. + * @param table2Rows the selection of rows in table2. + * @param ignoreColumns a set of column indexes in the result to ignore. They are redundant join + * columns. */ - private int[] getJoinIndexes(Table table, String[] columnNames) { - int[] results = new int[columnNames.length]; - for (int i = 0; i < columnNames.length; i++) { - String nm = columnNames[i]; - results[i] = table.columnIndex(nm); + @SuppressWarnings({"rawtypes", "unchecked"}) + private void crossProduct( + Table destination, + Table table1, + Table table2, + Selection table1Rows, + Selection table2Rows, + Set ignoreColumns, + boolean keepTable2JoinKeyColumns) { + for (int c = 0; c < table1.columnCount() + table2.columnCount(); c++) { + if (!keepTable2JoinKeyColumns && ignoreColumns.contains(c)) { + continue; + } + int table2Index = c - table1.columnCount(); + for (int r1 : table1Rows) { + for (int r2 : table2Rows) { + if (c < table1.columnCount()) { + Column t1Col = table1.column(c); + destination.column(c).append(t1Col, r1); + } else { + Column t2Col = table2.column(table2Index); + destination.column(c).append(t2Col, r2); + } + } + } } - return results; } /** - * Recursively joins the table on the left with each of the tables on the right, substituting the - * result of the nth join as the left table for the nth + 1 join - * - * @param left The first (left) table participating in the join - * @param rightTables One or more tables to be joined with the first (left) table - * @return this DataFrameJoiner instance - */ - private Table performJoin(Table left, List

rightTables) { - Table result = - joinInternal( - left, - rightTables.remove(0), - joinType, - allowDuplicateColumnNames, - keepAllJoinKeyColumns, - rightJoinColumnNames); - if (rightTables.isEmpty()) { - return result; - } else { - // on subsequent calls, the left column may have a new structure - this.leftJoinColumnPositions = getJoinIndexes(result, leftJoinColumnNames); - return performJoin(result, rightTables); + * Adds rows to destination for each row in table1 with the columns from table2 added as missing + * values. + */ + @SuppressWarnings({"rawtypes", "unchecked"}) + private void withMissingLeftJoin( + Table destination, + Table table1, + Selection table1Rows, + Set ignoreColumns, + boolean keepTable2JoinKeyColumns) { + for (int c = 0; c < destination.columnCount(); c++) { + if (!keepTable2JoinKeyColumns && ignoreColumns.contains(c)) { + continue; + } + if (c < table1.columnCount()) { + Column t1Col = table1.column(c); + for (int index : table1Rows) { + destination.column(c).append(t1Col, index); + } + } else { + for (int r1 = 0; r1 < table1Rows.size(); r1++) { + destination.column(c).appendMissing(); + } + } } } - @Override - Table joinInternal( - Table table1, + /** + * Adds rows to destination for each row in table2 with the columns from table1 added as missing + * values. + */ + @SuppressWarnings({"rawtypes", "unchecked"}) + private void withMissingRight( + Table destination, + int table1ColCount, Table table2, + Selection table2Rows, JoinType joinType, - boolean allowDuplicates, - boolean keepAllJoinKeyColumns, - String[] rightJoinColumnPositions) { - return strategy.performJoin( - table1, - table2, - joinType, - allowDuplicates, - keepAllJoinKeyColumns, - leftJoinColumnPositions, - rightJoinColumnPositions); + List col2Indexes, + Set ignoreColumns, + boolean keepTable2JoinKeyColumns) { + + // Add index data from table2 into join column positions in table one. + if (joinType == JoinType.FULL_OUTER) { + for (int i = 0; i < col2Indexes.size(); i++) { + Column t2Col = table2.column(col2Indexes.get(i)); + for (int index : table2Rows) { + destination.column(joinColumnIndexes.get(i)).append(t2Col, index); + } + } + } + + for (int c = 0; c < destination.columnCount(); c++) { + if (!keepTable2JoinKeyColumns) { + if (ignoreColumns.contains(c) || joinColumnIndexes.contains(c)) { + continue; + } + } + if (c < table1ColCount) { + for (int r1 = 0; r1 < table2Rows.size(); r1++) { + destination.column(c).appendMissing(); + } + } else { + Column t2Col = table2.column(c - table1ColCount); + for (int index : table2Rows) { + destination.column(c).append(t2Col, index); + } + } + } } } diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/JoinStrategy.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/JoinStrategy.java deleted file mode 100644 index ecdb92b18e..0000000000 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/JoinStrategy.java +++ /dev/null @@ -1,15 +0,0 @@ -package tech.tablesaw.joining; - -import tech.tablesaw.api.Table; - -interface JoinStrategy { - - Table performJoin( - Table table1, - Table table2, - JoinType joinType, - boolean allowDuplicates, - boolean keepAllJoinKeyColumns, - int[] leftJoinColumnIndexes, - String... table2JoinColumnNames); -} diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/JoinType.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/JoinType.java deleted file mode 100644 index c0cd3788b7..0000000000 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/JoinType.java +++ /dev/null @@ -1,9 +0,0 @@ -package tech.tablesaw.joining; - -/** The types of joins that are supported */ -public enum JoinType { - INNER, - LEFT_OUTER, - RIGHT_OUTER, - FULL_OUTER -} diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/RowComparatorChain.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/RowComparatorChain.java deleted file mode 100644 index 808a5c219e..0000000000 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/RowComparatorChain.java +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package tech.tablesaw.joining; - -import java.util.*; -import tech.tablesaw.api.Row; - -/** - * A comparator for comparing two Row objects on one or more Columns. The two rows are expected to - * be from different tables, but they should have the same number and type of Columns being used for - * their sorts The implementation is based on Apache Commons Collections - */ -class RowComparatorChain implements Comparator { - - private final List> comparatorChain; - private BitSet orderingBits; - private boolean isLocked; - - /** Constructs a comparator chain with the argument as the first node in the chain */ - public RowComparatorChain(Comparator comparator) { - this(comparator, false); - } - - private RowComparatorChain(Comparator comparator, boolean reverse) { - this.orderingBits = null; - this.isLocked = false; - this.comparatorChain = new ArrayList<>(1); - this.comparatorChain.add(comparator); - this.orderingBits = new BitSet(1); - if (reverse) { - this.orderingBits.set(0); - } - } - - /** Appends the comparator to the end of the chain */ - public void addComparator(Comparator comparator) { - this.comparatorChain.add(comparator); - } - - /** Returns the number of comparators in the chain */ - public int size() { - return this.comparatorChain.size(); - } - - private void checkChainIntegrity() { - if (this.comparatorChain.isEmpty()) { - throw new UnsupportedOperationException( - "ComparatorChains must contain at least one Comparator"); - } - } - - /** - * {@inheritDoc} - * - * @throws UnsupportedOperationException - */ - @Override - public int compare(Row o1, Row o2) throws UnsupportedOperationException { - if (!this.isLocked) { - this.checkChainIntegrity(); - this.isLocked = true; - } - - Iterator> comparators = this.comparatorChain.iterator(); - - for (int comparatorIndex = 0; comparators.hasNext(); ++comparatorIndex) { - Comparator comparator = comparators.next(); - int retval = comparator.compare(o1, o2); - if (retval != 0) { - if (this.orderingBits.get(comparatorIndex)) { - if (retval > 0) { - retval = -1; - } else { - retval = 1; - } - } - return retval; - } - } - return 0; - } - - @Override - public int hashCode() { - int hash = 0; - if (null != this.comparatorChain) { - hash ^= this.comparatorChain.hashCode(); - } - - if (null != this.orderingBits) { - hash ^= this.orderingBits.hashCode(); - } - return hash; - } - - @Override - public boolean equals(Object object) { - if (this == object) { - return true; - } else if (null == object) { - return false; - } else if (!object.getClass().equals(this.getClass())) { - return false; - } else { - label48: - { - label32: - { - RowComparatorChain chain = (RowComparatorChain) object; - if (null == this.orderingBits) { - if (null != chain.orderingBits) { - break label32; - } - } else if (!this.orderingBits.equals(chain.orderingBits)) { - break label32; - } - - if (null == this.comparatorChain) { - if (null == chain.comparatorChain) { - break label48; - } - } else if (this.comparatorChain.equals(chain.comparatorChain)) { - break label48; - } - } - return false; - } - return true; - } - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("RowComparatorChain{"); - sb.append("comparatorChain=").append(comparatorChain); - sb.append('}'); - return sb.toString(); - } -} diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/SortKey.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/SortKey.java deleted file mode 100644 index 4f5e1a241f..0000000000 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/SortKey.java +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package tech.tablesaw.joining; - -import com.google.common.base.MoreObjects; -import java.util.*; -import tech.tablesaw.api.ColumnType; -import tech.tablesaw.api.Row; - -/** - * SortKey is basically a specification for a sort. It defines the sort required for a - * merge-sort-join. The sort order is defined such that the tables being joined are both sorted - * independently on the join columns. All columns being sorted are sorted in ascending order - */ -class SortKey implements Iterable { - - /** Describes how the tables are to be sorted */ - private final ArrayList sortOrder = new ArrayList<>(); - - private SortKey(ColumnIndexPair pair) { - next(pair); - } - - /** - * Returns a new SortKey defining the first sort (for the first join column) - * - * @param pair The details of the sort, i.e. what type of column and the index of the columns in - * the respective tables. - */ - public static SortKey on(ColumnIndexPair pair) { - return new SortKey(pair); - } - - /** - * Returns a new SortKey defining an additional sort clause - * - * @param pair The details of the sort, i.e. what type of column and the index of the columns in - * the respective tables. - */ - public SortKey next(ColumnIndexPair pair) { - sortOrder.add(pair); - return this; - } - - /** Returns true if no order has been set */ - public boolean isEmpty() { - return sortOrder.isEmpty(); - } - - /** Returns the number of columns used in this sort */ - public int size() { - return sortOrder.size(); - } - - /** - * Returns a new SortKey for the given ColumnIndexPairs. A table being sorted on three columns, - * will have three pairs in the SortKey - */ - public static SortKey create(List pairs) { - SortKey key = null; - - for (ColumnIndexPair pair : pairs) { - if (key == null) { // key will be null the first time through - key = new SortKey(pair); - } else { - key.next(pair); - } - } - return key; - } - - /** - * Returns a ComparatorChain consisting of one or more comparators as specified in the given - * SortKey - */ - static RowComparatorChain getChain(SortKey key) { - Iterator entries = key.iterator(); - ColumnIndexPair sort = entries.next(); - Comparator comparator = comparator(sort); - - RowComparatorChain chain = new RowComparatorChain(comparator); - while (entries.hasNext()) { - sort = entries.next(); - chain.addComparator(comparator(sort)); - } - return chain; - } - - /** Returns a comparator for a given ColumnIndexPair */ - private static Comparator comparator(ColumnIndexPair pair) { - if (pair.type.equals(ColumnType.INTEGER)) { - return (r11, r21) -> { - int b1 = r11.getInt(pair.left); - int b2 = r21.getInt(pair.right); - return Integer.compare(b1, b2); - }; - } else if (pair.type.equals(ColumnType.LOCAL_DATE)) { - return (r11, r21) -> { - int b1 = r11.getPackedDate(pair.left); - int b2 = r21.getPackedDate(pair.right); - return Integer.compare(b1, b2); - }; - } else if (pair.type.equals(ColumnType.LOCAL_TIME)) { - return (r11, r21) -> { - int b1 = r11.getPackedTime(pair.left); - int b2 = r21.getPackedTime(pair.right); - return Integer.compare(b1, b2); - }; - } else if (pair.type.equals(ColumnType.LONG)) { - return (r11, r21) -> { - long b1 = r11.getLong(pair.left); - long b2 = r21.getLong(pair.right); - return Long.compare(b1, b2); - }; - } else if (pair.type.equals(ColumnType.LOCAL_DATE_TIME)) { - return (r11, r21) -> { - long b1 = r11.getPackedDateTime(pair.left); - long b2 = r21.getPackedDateTime(pair.right); - return Long.compare(b1, b2); - }; - } else if (pair.type.equals(ColumnType.INSTANT)) { - return (r11, r21) -> { - long b1 = r11.getPackedInstant(pair.left); - long b2 = r21.getPackedInstant(pair.right); - return Long.compare(b1, b2); - }; - } else if (pair.type.equals(ColumnType.DOUBLE)) { - return (r11, r21) -> { - double b1 = r11.getDouble(pair.left); - double b2 = r21.getDouble(pair.right); - return Double.compare(b1, b2); - }; - } else if (pair.type.equals(ColumnType.FLOAT)) { - return (r11, r21) -> { - float b1 = r11.getFloat(pair.left); - float b2 = r21.getFloat(pair.right); - return Float.compare(b1, b2); - }; - } else if (pair.type.equals(ColumnType.BOOLEAN)) { - return (r11, r21) -> { - byte b1 = r11.getBooleanAsByte(pair.left); - byte b2 = r21.getBooleanAsByte(pair.right); - return Byte.compare(b1, b2); - }; - } else if (pair.type.equals(ColumnType.STRING)) { - return (r11, r21) -> { - String b1 = r11.getString(pair.left); - String b2 = r21.getString(pair.right); - return b1.compareTo(b2); - }; - } - throw new RuntimeException("Unhandled ColumnType in SortKey."); - } - - /** Returns the iterator for the SortKey */ - @Override - public Iterator iterator() { - return sortOrder.iterator(); - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this).add("order", sortOrder).toString(); - } -} diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/SortMergeJoin.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/SortMergeJoin.java deleted file mode 100644 index 3890554fac..0000000000 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/joining/SortMergeJoin.java +++ /dev/null @@ -1,458 +0,0 @@ -package tech.tablesaw.joining; - -import com.google.common.collect.Streams; -import java.util.*; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; -import tech.tablesaw.api.ColumnType; -import tech.tablesaw.api.IntColumn; -import tech.tablesaw.api.Row; -import tech.tablesaw.api.Table; -import tech.tablesaw.columns.Column; -import tech.tablesaw.selection.Selection; - -/** Implements joins between two or more Tables */ -class SortMergeJoin implements JoinStrategy { - - private static final String LEFT_RECORD_ID_NAME = "_left_record_id_"; - private static final String RIGHT_RECORD_ID_NAME = "_right_record_id_"; - - private static final String TABLE_ALIAS = "T"; - public static final String PLACEHOLDER_COL_PREFIX = "Placeholder_"; - - private final String[] leftjoinColumnNames; - private int[] leftJoinColumnPositions; - private int[] rightJoinColumnPositions; - - private final AtomicInteger joinTableId = new AtomicInteger(1); - - /** - * Constructor. - * - * @param table The table to join on. - * @param joinColumnNames The join column names to join on. - */ - public SortMergeJoin(Table table, String... joinColumnNames) { - this.leftJoinColumnPositions = getJoinIndexes(table, joinColumnNames); - this.leftjoinColumnNames = joinColumnNames; - } - - /** - * Finds the index of the columns corresponding to the columnNames. E.G. The column named "ID" is - * located at index 5 in table. - * - * @param table the table that contains the columns. - * @param columnNames the column names to find indexes of. - * @return a list of column indexes within the table. - */ - private int[] getJoinIndexes(Table table, String[] columnNames) { - int[] results = new int[columnNames.length]; - for (int i = 0; i < columnNames.length; i++) { - String nm = columnNames[i]; - results[i] = table.columnIndex(nm); - } - return results; - } - - /** - * Joins two tables. - * - * @param t1 the table on the left side of the join. - * @param t2 the table on the right side of the join. - * @param joinType the type of join. - * @param allowDuplicates if {@code false} the join will fail if any columns other than the join - * column have the same name if {@code true} the join will succeed and duplicate columns are - * renamed - * @param keepAllJoinKeyColumns if {@code false} the join will only keep join key columns in - * table1 if {@code true} the join will return all join key columns in both table, which may - * have difference when there are null values - * @param table2JoinColumnNames The names of the columns in table2 to join on. - * @return the joined table - */ - public Table performJoin( - Table t1, - Table t2, - JoinType joinType, - boolean allowDuplicates, - boolean keepAllJoinKeyColumns, - int[] leftJoinColumnIndexes, - String... table2JoinColumnNames) { - - this.leftJoinColumnPositions = leftJoinColumnIndexes; - rightJoinColumnPositions = getJoinIndexes(t2, table2JoinColumnNames); - - Table table1 = t1.sortAscendingOn(leftjoinColumnNames); - Table table2 = t2.sortAscendingOn(table2JoinColumnNames); - - Column[] cols = - Streams.concat(table1.columns().stream(), table2.columns().stream()) - .map(Column::emptyCopy) - .toArray(Column[]::new); - - // A set of column indexes in the result table that can be ignored. They are duplicate join - // keys. - int[] resultIgnoreColIndexes = - keepAllJoinKeyColumns ? new int[0] : getIgnoredColumns(table1, joinType, cols); - - Table result = emptyTableFromColumns(table1, allowDuplicates, cols); - - // add indexes for outer join processing - IntColumn indexLeft = IntColumn.indexColumn(LEFT_RECORD_ID_NAME, table1.rowCount(), 0); - table1.addColumns(indexLeft); - result.addColumns(IntColumn.create(LEFT_RECORD_ID_NAME)); - - IntColumn indexRight = IntColumn.indexColumn(RIGHT_RECORD_ID_NAME, table2.rowCount(), 0); - table2.addColumns(indexRight); - result.addColumns(IntColumn.create(RIGHT_RECORD_ID_NAME)); - - validateJoinColumns(table1, table2); - - if (table1.rowCount() == 0 && (joinType == JoinType.LEFT_OUTER || joinType == JoinType.INNER)) { - // Handle special case of empty table here so it doesn't fall through to the behavior - // that adds rows for full outer and right outer joins - if (!keepAllJoinKeyColumns) { - result.removeColumns(resultIgnoreColIndexes); - } - return result; - } - if (joinType == JoinType.INNER) { - joinInner(result, table1, table2, resultIgnoreColIndexes); - } else if (joinType == JoinType.LEFT_OUTER) { - joinLeft(result, table1, table2, resultIgnoreColIndexes); - } else if (joinType == JoinType.RIGHT_OUTER) { - joinRight(result, table1, table2, resultIgnoreColIndexes); - } else if (joinType == JoinType.FULL_OUTER) { - joinFull(result, table1, table2, resultIgnoreColIndexes); - } - result.removeColumns(LEFT_RECORD_ID_NAME, RIGHT_RECORD_ID_NAME); - - if (!keepAllJoinKeyColumns) { - result = result.removeColumns(resultIgnoreColIndexes); - } else { - renameJoinColumns(result, table1, resultIgnoreColIndexes); - } - return result; - } - - /** - * Renames the column indexes for the second table from Placeholder_X to their original names - * - * @param resultIgnoreColIndexes The positions of the secondary join columns - */ - private void renameJoinColumns(Table result, Table left, int[] resultIgnoreColIndexes) { - - String table2Alias = TABLE_ALIAS + joinTableId.get(); - - for (int position : resultIgnoreColIndexes) { - String realName = result.column(position).name().replace(PLACEHOLDER_COL_PREFIX, ""); - if (position >= left.columnCount()) { - if (result.containsColumn(realName.toLowerCase())) { - result.column(position).setName(newName(table2Alias, realName)); - } else { - result.column(position).setName(realName); - } - } else { - result.column(position).setName(realName); - } - } - } - - private String newName(String table2Alias, String columnName) { - return table2Alias + "." + columnName; - } - - /** - * Adds empty columns to the destination table with the same type as columns in table1 and table2. - * - *

For inner, left and full outer join types the join columns in table2 are not needed and will - * be marked as placeholders. The indexes of those columns will be returned. The downstream logic - * is easier if we wait to remove the redundant columns until the last step. - * - * @param table1 the table on left side of the join. - * @param allowDuplicates whether to allow duplicates. If yes rename columns in table2 that have - * the same name as columns in table1, with the exception of join columns in table2 when - * performing a right join. - * @param cols An array of columns from both join tables - * @return the table to use for the join results - */ - Table emptyTableFromColumns(Table table1, boolean allowDuplicates, Column[] cols) { - - Table destination = Table.create(table1.name()); - - // Rename duplicate columns in second table - if (allowDuplicates) { - Set table1ColNames = - Arrays.stream(cols) - .map(Column::name) - .map(String::toLowerCase) - .limit(table1.columnCount()) - .collect(Collectors.toSet()); - - String table2Alias = TABLE_ALIAS + joinTableId.incrementAndGet(); - for (int c = table1.columnCount(); c < cols.length; c++) { - String columnName = cols[c].name(); - if (table1ColNames.contains(columnName.toLowerCase())) { - cols[c].setName(newName(table2Alias, columnName)); - } - } - } - destination.addColumns(cols); - return destination; - } - - /** - * Returns the positions of columns that can be ignored in the result table - * - *

For inner join, left join and full outer join mark the join columns in table2 as - * placeholders. - * - *

For right join, mark the join columns in table1 as placeholders. Keep track of which join - * columns are placeholders so they can be ignored. - */ - private int[] getIgnoredColumns(Table table1, JoinType joinType, Column[] cols) { - int[] ignoreColumns = new int[leftJoinColumnPositions.length]; - int ignoreIndex = 0; - for (int c = 0; c < cols.length; c++) { - if (joinType == JoinType.RIGHT_OUTER) { - if (c < table1.columnCount() && indexesContainsValue(leftJoinColumnPositions, c)) { - ignoreColumns[ignoreIndex] = c; - cols[c].setName(PLACEHOLDER_COL_PREFIX + cols[c].name()); - ignoreIndex++; - } - } else { // JoinType is LEFT, INNER, or FULL - int table2Index = c + table1.columnCount(); - if (indexesContainsValue(rightJoinColumnPositions, c)) { - ignoreColumns[ignoreIndex] = table2Index; - cols[table2Index].setName(PLACEHOLDER_COL_PREFIX + cols[table2Index].name()); - ignoreIndex++; - } - } - } - return ignoreColumns; - } - - private void joinInner(Table destination, Table left, Table right, int[] ignoreColumns) { - - Comparator comparator = getRowComparator(left, rightJoinColumnPositions); - - Row leftRow = left.row(0); - Row rightRow = right.row(0); - - // Marks the position of the first record in right table that matches a specific join value - int mark = -1; - - while (leftRow.hasNext() || rightRow.hasNext()) { - if (mark == -1) { - while (comparator.compare(leftRow, rightRow) < 0 && leftRow.hasNext()) leftRow.next(); - while (comparator.compare(leftRow, rightRow) > 0 && rightRow.hasNext()) rightRow.next(); - // set the position of the first matching record on the right side - mark = rightRow.getRowNumber(); - } - if (comparator.compare(leftRow, rightRow) == 0 && (leftRow.hasNext() || rightRow.hasNext())) { - addValues(destination, leftRow, rightRow); - if (rightRow.hasNext()) { - rightRow.next(); - } else { - rightRow.at(mark); - if (leftRow.hasNext()) { - leftRow.next(); - } - mark = -1; - } - } else { - if (rightRow.hasNext() && leftRow.hasNext()) { - rightRow.at(mark); - leftRow.next(); - mark = -1; - } else { - if (leftRow.hasNext()) leftRow.next(); - if (!leftRow.hasNext()) { - break; - } - } - } - } - // add the last value if you end on a match - if (comparator.compare(leftRow, rightRow) == 0) { - addValues(destination, leftRow, rightRow); - } - } - - private void joinLeft(Table destination, Table left, Table right, int[] ignoreColumns) { - - joinInner(destination, left, right, ignoreColumns); - Selection unmatched = - left.intColumn(LEFT_RECORD_ID_NAME) - .isNotIn(destination.intColumn(LEFT_RECORD_ID_NAME).unique()); - addLeftOnlyValues(destination, left, unmatched); - } - - private void joinRight(Table destination, Table left, Table right, int[] ignoreColumns) { - joinInner(destination, left, right, ignoreColumns); - Selection unmatched = - right - .intColumn(RIGHT_RECORD_ID_NAME) - .isNotIn(destination.intColumn(RIGHT_RECORD_ID_NAME).unique()); - addRightOnlyValues(destination, left, right, unmatched); - } - - private void joinFull(Table destination, Table left, Table right, int[] ignoreColumns) { - - Table tempDestination = destination.emptyCopy(); - - joinInner(destination, left, right, ignoreColumns); - - Selection unmatchedLeft = - left.intColumn(LEFT_RECORD_ID_NAME) - .isNotIn(destination.intColumn(LEFT_RECORD_ID_NAME).unique()); - addLeftOnlyValues(destination, left, unmatchedLeft); - - Selection unmatchedRight = - right - .intColumn(RIGHT_RECORD_ID_NAME) - .isNotIn(destination.intColumn(RIGHT_RECORD_ID_NAME).unique()); - addRightOnlyValues(tempDestination, left, right, unmatchedRight); - for (int i = 0; i < ignoreColumns.length; i++) { - String name = tempDestination.columnNames().get(leftJoinColumnPositions[i]); - tempDestination.replaceColumn( - leftJoinColumnPositions[i], - tempDestination.column(ignoreColumns[i]).copy().setName(name)); - } - destination.append(tempDestination); - } - - private void addLeftOnlyValues(Table destination, Table left, Selection unmatched) { - for (Row leftRow : left.where(unmatched)) { - Row destRow = destination.appendRow(); - for (int c = 0; c < leftRow.columnCount() - 1; c++) { - updateDestinationRow(destRow, leftRow, c, c); - } - // update the index column putting it at the end of the destination table - updateDestinationRow(destRow, leftRow, destRow.columnCount() - 2, leftRow.columnCount() - 1); - } - } - - private void addRightOnlyValues(Table destination, Table left, Table right, Selection unmatched) { - int leftColumnCount = left.columnCount(); - for (Row rightRow : right.where(unmatched)) { - Row destRow = destination.appendRow(); - for (int c = 0; c < rightRow.columnCount() - 1; c++) { - updateDestinationRow(destRow, rightRow, c + leftColumnCount - 1, c); - } - // update the index column putting it at the end of the destination table - updateDestinationRow( - destRow, rightRow, destRow.columnCount() - 1, rightRow.columnCount() - 1); - } - } - - private Comparator getRowComparator(Table left, int[] rightJoinColumnIndexes) { - List pairs = createJoinColumnPairs(left, rightJoinColumnIndexes); - return SortKey.getChain(SortKey.create(pairs)); - } - - private List createJoinColumnPairs(Table left, int[] rightJoinColumnIndexes) { - List pairs = new ArrayList<>(); - for (int i = 0; i < leftJoinColumnPositions.length; i++) { - ColumnIndexPair columnIndexPair = - new ColumnIndexPair( - left.column(leftJoinColumnPositions[i]).type(), - leftJoinColumnPositions[i], - rightJoinColumnIndexes[i]); - pairs.add(columnIndexPair); - } - return pairs; - } - - private void updateDestinationRow( - Row destRow, Row sourceRow, int destColumnPosition, int sourceColumnPosition) { - ColumnType type = destRow.getColumnType(destColumnPosition); - if (type.equals(ColumnType.INTEGER)) { - destRow.setInt(destColumnPosition, sourceRow.getInt(sourceColumnPosition)); - } else if (type.equals(ColumnType.LONG)) { - destRow.setLong(destColumnPosition, sourceRow.getLong(sourceColumnPosition)); - } else if (type.equals(ColumnType.SHORT)) { - destRow.setShort(destColumnPosition, sourceRow.getShort(sourceColumnPosition)); - } else if (type.equals(ColumnType.STRING)) { - destRow.setString(destColumnPosition, sourceRow.getString(sourceColumnPosition)); - } else if (type.equals(ColumnType.LOCAL_DATE)) { - destRow.setPackedDate(destColumnPosition, sourceRow.getPackedDate(sourceColumnPosition)); - } else if (type.equals(ColumnType.LOCAL_TIME)) { - destRow.setPackedTime(destColumnPosition, sourceRow.getPackedTime(sourceColumnPosition)); - } else if (type.equals(ColumnType.LOCAL_DATE_TIME)) { - destRow.setPackedDateTime( - destColumnPosition, sourceRow.getPackedDateTime(sourceColumnPosition)); - } else if (type.equals(ColumnType.INSTANT)) { - destRow.setPackedInstant( - destColumnPosition, sourceRow.getPackedInstant(sourceColumnPosition)); - } else if (type.equals(ColumnType.DOUBLE)) { - destRow.setDouble(destColumnPosition, sourceRow.getDouble(sourceColumnPosition)); - } else if (type.equals(ColumnType.FLOAT)) { - destRow.setFloat(destColumnPosition, sourceRow.getFloat(sourceColumnPosition)); - } else if (type.equals(ColumnType.BOOLEAN)) { - destRow.setBooleanAsByte( - destColumnPosition, sourceRow.getBooleanAsByte(sourceColumnPosition)); - } - } - - private void addValues(Table destination, Row leftRow, Row rightRow) { - - Row destRow = destination.appendRow(); - - // update positionally, but take into account the RECORD_ID COLUMNS at the end of the dest table - int leftColumnCount = leftRow.columnCount(); - int rightColumnCount = rightRow.columnCount(); - - // update from the left table first (everythint but the RECORD_ID column) - for (int destIdx1 = 0; destIdx1 < leftColumnCount - 1; destIdx1++) { - updateDestinationRow(destRow, leftRow, destIdx1, destIdx1); - } - - // update from the right table (everythint but the RECORD_ID column) - for (int destIdx2 = (leftColumnCount - 1); - destIdx2 < (leftColumnCount + rightColumnCount) - 2; - destIdx2++) { - int rightIndex = destIdx2 - (leftColumnCount - 1); - updateDestinationRow(destRow, rightRow, destIdx2, rightIndex); - } - - // update the RECORD_ID columns - updateDestinationRow(destRow, leftRow, destRow.columnCount() - 2, leftColumnCount - 1); - updateDestinationRow(destRow, rightRow, destRow.columnCount() - 1, rightColumnCount - 1); - } - - private boolean indexesContainsValue(int[] joinColumnIndexes, int columnIndex) { - for (int i : joinColumnIndexes) { - if (columnIndex == i) { - return true; - } - } - return false; - } - - private void validateJoinColumns(Table table1, Table table2) { - if (leftJoinColumnPositions.length != rightJoinColumnPositions.length) { - throw new IllegalArgumentException( - "Cannot join using a different number of indices on each table: " - + Arrays.toString(leftJoinColumnPositions) - + " and " - + Arrays.toString(rightJoinColumnPositions)); - } - for (int i = 0; i < leftJoinColumnPositions.length; i++) { - if (!table1 - .column(leftJoinColumnPositions[i]) - .getClass() - .equals(table2.column(rightJoinColumnPositions[i]).getClass())) { - throw new IllegalArgumentException( - "Cannot join using different index types: " - + Arrays.toString(leftJoinColumnPositions) - + " and " - + Arrays.toString(rightJoinColumnPositions)); - } - } - } - - @Override - public String toString() { - return "SortMergeJoin"; - } -} diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/selection/BitmapBackedSelection.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/selection/BitmapBackedSelection.java index 2a4ff1ae21..2a05f24d34 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/selection/BitmapBackedSelection.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/selection/BitmapBackedSelection.java @@ -209,11 +209,6 @@ protected static Selection with(int... rows) { return selection; } - /** Returns a Selection containing all indexes in the array */ - protected static Selection fromBitmap(RoaringBitmap bitmap) { - return new BitmapBackedSelection(bitmap); - } - /** * Returns a Selection containing all indexes in the range start (inclusive) to end (exclusive), */ diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/selection/Selection.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/selection/Selection.java index 1be5df79c0..6998d60367 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/selection/Selection.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/selection/Selection.java @@ -15,7 +15,6 @@ package tech.tablesaw.selection; import it.unimi.dsi.fastutil.ints.IntIterable; -import org.roaringbitmap.RoaringBitmap; /** * A selection maintains an ordered set of ints that can be used to filter rows from a table or @@ -99,11 +98,6 @@ static Selection with(int... rows) { return BitmapBackedSelection.with(rows); } - /** */ - static Selection fromBitmap(RoaringBitmap bitmap) { - return BitmapBackedSelection.fromBitmap(bitmap); - } - /** * Returns a Selection containing all indexes in the range start (inclusive) to end (exclusive), */ diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/Relation.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/Relation.java index d83a220563..f5302eef17 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/Relation.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/Relation.java @@ -14,7 +14,6 @@ package tech.tablesaw.table; -import static tech.tablesaw.joining.JoinType.FULL_OUTER; import java.io.ByteArrayOutputStream; import java.util.ArrayList; import java.util.Arrays; @@ -37,6 +36,7 @@ import tech.tablesaw.api.ShortColumn; import tech.tablesaw.api.StringColumn; import tech.tablesaw.api.Table; +import tech.tablesaw.api.TextColumn; import tech.tablesaw.api.TimeColumn; import tech.tablesaw.columns.Column; import tech.tablesaw.conversion.TableConverter; @@ -199,6 +199,21 @@ public Object get(int r, int c) { /** Returns a list containing the names of all the columns in this relation */ public abstract List columnNames(); + /** + * Returns an array of the column types of all columns in the relation, including duplicates as + * appropriate, and maintaining order + * + * @deprecated for API name consistency. Use {@link #typeArray()} instead. + */ + @Deprecated + public ColumnType[] columnTypes() { + ColumnType[] columnTypes = new ColumnType[columnCount()]; + for (int i = 0; i < columnCount(); i++) { + columnTypes[i] = columns().get(i).type(); + } + return columnTypes; + } + /** * Returns an array of the column types of all columns in the relation, including duplicates as * appropriate, and maintaining order @@ -298,12 +313,7 @@ public Table summary() { Table columnSummary = this.column(i).summary(); columnSummary.column(1).setName(this.column(i).name()); summaryTable = - summaryTable - .joinOn("Measure") - .with(columnSummary) - .rightJoinColumns(columnSummary.column(0).name()) - .type(FULL_OUTER) - .join(); + summaryTable.joinOn("Measure").fullOuter(columnSummary, columnSummary.column(0).name()); } summaryTable.column(0).setName("Summary"); return summaryTable; @@ -604,6 +614,22 @@ public StringColumn stringColumn(int columnIndex) { return (StringColumn) column(columnIndex); } + /** + * Returns a TextColumn with the given name if it is present in this Relation. If the column has a + * different type, a ClassCastException is thrown. + */ + public TextColumn textColumn(String columnName) { + return (TextColumn) column(columnName); + } + + /** + * Returns the TextColumn at the given 0-based index if present. A ClassCastException is the + * column is of a different type. + */ + public TextColumn textColumn(int columnIndex) { + return (TextColumn) column(columnIndex); + } + /** * Returns the DateTimeColumn at the given 0-based index if present. A ClassCastException is the * column is of a different type. diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/Rows.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/Rows.java index 3f9d5e9ef3..b618a9adf7 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/Rows.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/Rows.java @@ -16,13 +16,20 @@ import javax.annotation.concurrent.Immutable; import tech.tablesaw.api.ColumnType; +import tech.tablesaw.api.Row; import tech.tablesaw.api.Table; import tech.tablesaw.columns.Column; import tech.tablesaw.selection.BitmapBackedSelection; import tech.tablesaw.selection.Selection; -/** A static utility class for row operations */ +/** + * A static utility class for row operations + * + * @deprecated Functionality provided by this class is methods in the {@link + * tech.tablesaw.api.Table} class hierarchy, and/or by methods in {@link tech.tablesaw.api.Row} + */ @Immutable +@Deprecated public final class Rows { // Don't instantiate @@ -31,7 +38,10 @@ private Rows() {} /** * Copies the rows indicated by the row index values in the given selection from oldTable to * newTable + * + * @deprecated Use the instance method {Table:where(Selection} instead */ + @Deprecated @SuppressWarnings({"rawtypes", "unchecked"}) public static void copyRowsToTable(Selection rows, Table oldTable, Table newTable) { for (int columnIndex = 0; columnIndex < oldTable.columnCount(); columnIndex++) { @@ -46,7 +56,11 @@ public static void copyRowsToTable(Selection rows, Table oldTable, Table newTabl /** * Copies the rows indicated by the row index values in the given array from oldTable to newTable + * + * @deprecated Use the instance method {@link tech.tablesaw.api.Table#copyRowsToTable(int[], + * Table)} instead */ + @Deprecated @SuppressWarnings({"rawtypes", "unchecked"}) public static void copyRowsToTable(int[] rows, Table oldTable, Table newTable) { for (int columnIndex = 0; columnIndex < oldTable.columnCount(); columnIndex++) { @@ -59,6 +73,12 @@ public static void copyRowsToTable(int[] rows, Table oldTable, Table newTable) { } } + /** + * Appends a row from oldTable to newTable + * + * @deprecated Use the instance method {@link tech.tablesaw.api.Table#append(Row)} instead + */ + @Deprecated @SuppressWarnings({"rawtypes", "unchecked"}) public static void appendRowToTable(int row, Table oldTable, Table newTable) { int[] rows = new int[] {row}; @@ -70,6 +90,11 @@ public static void appendRowToTable(int row, Table oldTable, Table newTable) { } } + /** + * @deprecated Use the static method {@link tech.tablesaw.api.Table#compareRows(int, Table, + * Table)} instead + */ + @Deprecated public static boolean compareRows(int rowInOriginal, Table original, Table tempTable) { int columnCount = original.columnCount(); boolean result; @@ -85,6 +110,12 @@ public static boolean compareRows(int rowInOriginal, Table original, Table tempT return true; } + /** + * Copies the first n rows to a new table + * + * @deprecated Use {@link tech.tablesaw.api.Table#first(int)} instead + */ + @Deprecated public static void head(int rowCount, Table oldTable, Table newTable) { Selection rows = new BitmapBackedSelection(rowCount); for (int i = 0; i < rowCount; i++) { @@ -93,6 +124,12 @@ public static void head(int rowCount, Table oldTable, Table newTable) { copyRowsToTable(rows, oldTable, newTable); } + /** + * Copies the last n rows to a new table + * + * @deprecated Use {@link tech.tablesaw.api.Table#last(int)} instead + */ + @Deprecated public static void tail(int rowsToInclude, Table oldTable, Table newTable) { int oldTableSize = oldTable.rowCount(); int start = oldTableSize - rowsToInclude; diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/StandardTableSliceGroup.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/StandardTableSliceGroup.java index b0e8816595..f649b5268f 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/StandardTableSliceGroup.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/StandardTableSliceGroup.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.Map.Entry; import tech.tablesaw.api.CategoricalColumn; +import tech.tablesaw.api.ColumnType; import tech.tablesaw.api.Table; import tech.tablesaw.columns.Column; import tech.tablesaw.selection.BitmapBackedSelection; @@ -99,7 +100,6 @@ private void splitOn(String... splitColumnNames) { } else { // handle the case where split is on non-text-columns int byteSize = getByteSize(splitColumns); for (int i = 0; i < getSourceTable().rowCount(); i++) { - // TODO: instead of splitting on column type, have a function that returns the byte size? StringBuilder stringKey = new StringBuilder(); ByteBuffer byteBuffer = ByteBuffer.allocate(byteSize); int count = 0; @@ -129,8 +129,7 @@ private void splitOn(String... splitColumnNames) { } private boolean containsTextColumn(List> splitColumns) { - return false; - // return splitColumns.stream().anyMatch(objects -> objects.type().equals(ColumnType.TEXT)); + return splitColumns.stream().anyMatch(objects -> objects.type().equals(ColumnType.TEXT)); } /** Wrapper class for a byte[] that implements equals and hashcode. */ diff --git a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/TableSlice.java b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/TableSlice.java index 9ab1e2b39e..8830ee7e08 100644 --- a/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/TableSlice.java +++ b/symja_android_library/matheclipse-io/src/main/java/tech/tablesaw/table/TableSlice.java @@ -188,7 +188,7 @@ public Table first(int nRows) { Table copy = table.emptyCopy(); while (it.hasNext() && count < nRows) { int row = it.nextInt(); - copy.append(table.row(row)); + copy.addRow(table.row(row)); count++; } return copy; diff --git a/symja_android_library/matheclipse-io/src/test/java/org/matheclipse/io/test/FunctionsTestCase.java b/symja_android_library/matheclipse-io/src/test/java/org/matheclipse/io/test/SemanticImportTestCase.java similarity index 99% rename from symja_android_library/matheclipse-io/src/test/java/org/matheclipse/io/test/FunctionsTestCase.java rename to symja_android_library/matheclipse-io/src/test/java/org/matheclipse/io/test/SemanticImportTestCase.java index f42b5942e9..76e4874acb 100644 --- a/symja_android_library/matheclipse-io/src/test/java/org/matheclipse/io/test/FunctionsTestCase.java +++ b/symja_android_library/matheclipse-io/src/test/java/org/matheclipse/io/test/SemanticImportTestCase.java @@ -3,7 +3,7 @@ import org.junit.Test; import org.matheclipse.core.basic.Config; -public class FunctionsTestCase extends AbstractTestCase { +public class SemanticImportTestCase extends AbstractTestCase { @Test public void testSemanticImport() {