diff --git a/any2json-commons/pom.xml b/any2json-commons/pom.xml
new file mode 100644
index 00000000..6bd3eec3
--- /dev/null
+++ b/any2json-commons/pom.xml
@@ -0,0 +1,58 @@
+
+ 4.0.0
+
+
+ com.github.romualdrousseau
+ any2json-monorepo
+ 2.45-SNAPSHOT
+
+
+ com.github.romualdrousseau
+ any2json-commons
+ 2.45-SNAPSHOT
+ jar
+
+ any2json-commons
+
+ Any2Json plugin to tag tabular output implementing embeddings.
+
+ https://github.com/romualdrousseau/any2json-monorepo
+
+
+
+
+ org.furyio
+ fury-core
+ ${fury.version}
+
+
+ org.furyio
+ fury-format
+ ${fury.version}
+
+
+ org.xerial.snappy
+ snappy-java
+ ${snappy.version}
+
+
+
+ org.reflections
+ reflections
+ ${reflections.version}
+
+
+
+ org.apache.logging.log4j
+ log4j-core
+ ${log4j.version}
+ test
+
+
+ org.apache.logging.log4j
+ log4j-slf4j2-impl
+ ${log4j.version}
+ test
+
+
+
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/Chunk.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/Chunk.java
new file mode 100644
index 00000000..3f72e015
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/Chunk.java
@@ -0,0 +1,42 @@
+package com.github.romualdrousseau.any2json.commons.bigdata;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class Chunk {
+
+ private final int batchSize;
+ private final List batches;
+
+ private Row[] rows;
+
+ public Chunk(final int batchSize) {
+ this.batchSize = batchSize;
+ this.batches = new ArrayList<>();
+ this.rows = new Row[this.batchSize];
+ }
+
+ public int getBatchSize() {
+ return this.batchSize;
+ }
+
+ public List getBatches() {
+ return this.batches;
+ }
+
+ public Row[] getRows() {
+ return this.rows;
+ }
+
+ public void setRows(final Row[] rows) {
+ this.rows = rows;
+ }
+
+ public void setRow(final int idx, final Row row) {
+ this.rows[idx] = row;
+ }
+
+ public Row getRow(final int idx) {
+ return this.rows[idx];
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/ChunkMetaData.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/ChunkMetaData.java
new file mode 100644
index 00000000..bc7d5499
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/ChunkMetaData.java
@@ -0,0 +1,24 @@
+package com.github.romualdrousseau.any2json.commons.bigdata;
+
+public class ChunkMetaData {
+
+ private final long position;
+ private final int length;
+
+ private ChunkMetaData(final long position, final int length) {
+ this.position = position;
+ this.length = length;
+ }
+
+ public long position() {
+ return position;
+ }
+
+ public int length() {
+ return length;
+ }
+
+ public static ChunkMetaData of(final long position, final int length) {
+ return new ChunkMetaData(position, length);
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/ChunkSerializer.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/ChunkSerializer.java
new file mode 100644
index 00000000..543b2fdf
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/ChunkSerializer.java
@@ -0,0 +1,10 @@
+package com.github.romualdrousseau.any2json.commons.bigdata;
+
+import java.io.IOException;
+
+public interface ChunkSerializer {
+
+ byte[] serialize(Row[] batch) throws IOException;
+
+ Row[] deserialize(byte[] bytes) throws IOException;
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/ChunkSerializerFactory.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/ChunkSerializerFactory.java
new file mode 100644
index 00000000..abc057c4
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/ChunkSerializerFactory.java
@@ -0,0 +1,114 @@
+package com.github.romualdrousseau.any2json.commons.bigdata;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UncheckedIOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Optional;
+import java.util.Properties;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.github.romualdrousseau.any2json.commons.bigdata.serializer.ChunkSerializerFury;
+import com.github.romualdrousseau.any2json.commons.bigdata.serializer.ChunkSerializerJava;
+
+public class ChunkSerializerFactory {
+
+ public enum SerializerType {
+ DEFAULT, // DEFAULT IS FURY
+ JAVA,
+ FURY
+ }
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(ChunkSerializerFactory.class);
+
+ private static final ThreadLocal CONTEXT = new ThreadLocal<>();
+
+ public static ChunkSerializer newInstance() {
+ return ChunkSerializerFactory.newInstance(SerializerType.DEFAULT);
+ }
+
+ public static ChunkSerializer newInstance(final SerializerType type) {
+ if (CONTEXT.get() == null) {
+ CONTEXT.set(new ChunkSerializerFactory(type).createSerializerInstance());
+ }
+ return CONTEXT.get();
+ }
+
+ private final SerializerType type;
+
+ private ChunkSerializerFactory(final SerializerType type) {
+ try {
+ if (type.equals(SerializerType.DEFAULT)) {
+ final var prop = new Properties();
+ prop.load(this.openDefaultPropertiesInputStream());
+ final var typeVal = prop.getProperty("serializer");
+ if (typeVal != null) {
+ this.type = Enum.valueOf(SerializerType.class, typeVal);
+ } else {
+ this.type = type;
+ }
+ } else {
+ this.type = type;
+ }
+ LOGGER.info("ChunkSerializerFactor set to {}", this.type);
+ } catch (final IOException x) {
+ LOGGER.error("Error during ChunkSerializerFactor initialization: {}", x.getMessage());
+ throw new UncheckedIOException(x);
+ }
+ }
+
+ private ChunkSerializer createSerializerInstance() {
+ switch (this.type) {
+ case JAVA:
+ return new ChunkSerializerJava();
+ case FURY:
+ return new ChunkSerializerFury();
+ default:
+ return new ChunkSerializerFury();
+ }
+ }
+
+ private InputStream openDefaultPropertiesInputStream() throws IOException {
+ return this.openPropertiesInputStream("chunk-serializer.properties")
+ .or(() -> this.openPropertiesInputStream("batch-serializer.properties"))
+ .orElseGet(InputStream::nullInputStream);
+ }
+
+ private Optional openPropertiesInputStream(final String fileName) {
+ final var userDir = System.getProperty("user.dir");
+ return this.getPathIfExists(Path.of(userDir, fileName))
+ .or(() -> this.getPathIfExists(Path.of(userDir, "classes", fileName)))
+ .flatMap(this::pathToStream)
+ .or(() -> this.resolveResourceAsStream(fileName));
+ }
+
+ private Optional pathToStream(final Path x) {
+ try {
+ return Optional.of(Files.newInputStream(x));
+ } catch (final IOException e) {
+ return Optional.empty();
+ }
+ }
+
+ private Optional resolveResourceAsStream(final String resourceName) {
+ final InputStream resource = this.getClass().getClassLoader().getResourceAsStream(resourceName);
+ if (resource == null) {
+ LOGGER.debug("module: {} not found", resourceName);
+ return Optional.empty();
+ }
+ LOGGER.debug("module: {} found at {}", resourceName, this.getClass().getClassLoader().getResource(resourceName));
+ return Optional.of(resource);
+ }
+
+ private Optional getPathIfExists(final Path path) {
+ if (!path.toFile().exists()) {
+ LOGGER.debug("module: {} not found at {}", path.getFileName(), path);
+ return Optional.empty();
+ }
+ LOGGER.debug("module: {} found at {}", path.getFileName(), path);
+ return Optional.of(path);
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataFrame.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataFrame.java
new file mode 100644
index 00000000..2ea58d69
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataFrame.java
@@ -0,0 +1,131 @@
+package com.github.romualdrousseau.any2json.commons.bigdata;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.nio.ByteBuffer;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.EnumSet;
+import java.util.Iterator;
+import java.util.Objects;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DataFrame implements Closeable, Iterable {
+ private final Logger logger = LoggerFactory.getLogger(DataFrame.class);
+
+ private final ChunkSerializer serializer = ChunkSerializerFactory.newInstance();
+
+ private final Chunk chunk;
+ private final Path storePath;
+ private final int rowCount;
+ private final int columnCount;
+ private final FileChannel fileChannel;
+ private final MappedByteBuffer mappedBuffer;
+
+ private int currentChunkIdx;
+ private boolean isClosed;
+
+ public DataFrame(final Chunk chunk, final Path storePath, final int rowCount, final int columnCount)
+ throws IOException {
+ this.chunk = chunk;
+ this.storePath = storePath;
+ this.rowCount = rowCount;
+ this.columnCount = columnCount;
+ this.fileChannel = (FileChannel) Files.newByteChannel(this.storePath,
+ EnumSet.of(StandardOpenOption.READ, StandardOpenOption.DELETE_ON_CLOSE));
+ if (this.fileChannel.size() <= Integer.MAX_VALUE) {
+ this.mappedBuffer = this.fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, this.fileChannel.size());
+ } else {
+ this.mappedBuffer = null;
+ }
+
+ this.currentChunkIdx = -1;
+ this.isClosed = false;
+
+ this.logger.info("DataFrame initialized with Mapped Buffer: {}", this.isMappedBuffer());
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (this.isClosed) {
+ return;
+ }
+ this.fileChannel.close();
+ this.isClosed = true;
+ }
+
+ public DataView view(final int rowStart, final int columnStart, final int rowCount, final int columnCount) {
+ Objects.checkFromToIndex(rowStart, rowStart + rowCount - 1, this.rowCount);
+ Objects.checkFromToIndex(columnStart, columnStart + columnCount - 1, this.columnCount);
+ return new DataView(this, rowStart, columnStart, rowCount, columnCount);
+ }
+
+ public int getRowCount() {
+ return this.rowCount;
+ }
+
+ public int getColumnCount() {
+ return this.columnCount;
+ }
+
+ public int getColumnCount(final int row) {
+ Objects.checkIndex(row, this.rowCount);
+ final var r = this.getRow(row);
+ return r.size();
+ }
+
+ public Row getRow(final int row) {
+ Objects.checkIndex(row, this.rowCount);
+ final int idx = row / this.chunk.getBatchSize();
+ if (this.currentChunkIdx != idx) {
+ this.chunk.setRows(this.loadOneBatch(this.chunk.getBatches().get(idx)));
+ this.currentChunkIdx = idx;
+ }
+ return this.chunk.getRow(row % this.chunk.getBatchSize());
+ }
+
+ public String getCell(final int row, final int column) {
+ Objects.checkIndex(row, this.rowCount);
+ Objects.checkIndex(column, this.columnCount);
+ return this.getRow(row).get(column);
+ }
+
+ @Override
+ public Iterator iterator() {
+ return new DataFrameIterator(this);
+ }
+
+ private Row[] loadOneBatch(final ChunkMetaData batch) {
+ final long startTime = System.currentTimeMillis();
+ try {
+
+ if (this.isMappedBuffer()) {
+ final var bytes = new byte[batch.length()];
+ this.mappedBuffer.position((int) batch.position());
+ this.mappedBuffer.get(bytes);
+ return serializer.deserialize(bytes);
+ } else {
+ final var bytes = ByteBuffer.allocate(batch.length());
+ this.fileChannel.position(batch.position());
+ this.fileChannel.read(bytes);
+ return serializer.deserialize(bytes.array());
+ }
+ } catch (final IOException x) {
+ throw new UncheckedIOException(x);
+ } finally {
+ final var stopTime = System.currentTimeMillis();
+ final var executionTimeInMS = (int) (stopTime - startTime);
+ this.logger.debug("Load a chunk in memory offset: {}, lenght: {}. Took {}ms", batch.position(), batch.length(), executionTimeInMS);
+ }
+ }
+
+ private boolean isMappedBuffer() {
+ return this.mappedBuffer != null;
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataFrameIterator.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataFrameIterator.java
new file mode 100644
index 00000000..eea533fd
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataFrameIterator.java
@@ -0,0 +1,28 @@
+package com.github.romualdrousseau.any2json.commons.bigdata;
+
+import java.util.Iterator;
+import java.util.Spliterator;
+import java.util.Spliterators;
+
+public class DataFrameIterator implements Iterator {
+ private final DataFrame dataFrame;
+
+ private int curr;
+
+ public DataFrameIterator(final DataFrame dataFrame) {
+ this.dataFrame = dataFrame;
+ this.curr = 0;
+ }
+
+ public boolean hasNext() {
+ return this.curr < this.dataFrame.getRowCount();
+ }
+
+ public Row next() {
+ return this.dataFrame.getRow(this.curr++);
+ }
+
+ public Spliterator spliterator() {
+ return Spliterators.spliterator(this, this.dataFrame.getRowCount(), Spliterator.IMMUTABLE);
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataFrameWriter.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataFrameWriter.java
new file mode 100644
index 00000000..8a8d14af
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataFrameWriter.java
@@ -0,0 +1,88 @@
+package com.github.romualdrousseau.any2json.commons.bigdata;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.EnumSet;
+
+public class DataFrameWriter implements Closeable {
+
+ private final ChunkSerializer serializer = ChunkSerializerFactory.newInstance();
+
+ private final Chunk chunk;
+ private final Path storePath;
+ private final FileChannel fileChannel;
+
+ private int columnCount;
+ private int rowCount;
+ private boolean isClosed;
+
+ public DataFrameWriter(final int chunkSize) throws IOException {
+ this(chunkSize, 0, null);
+ }
+
+ public DataFrameWriter(final int chunkSize, final int columnCount) throws IOException {
+ this(chunkSize, columnCount, null);
+ }
+
+ public DataFrameWriter(final int batchSize, final Path path) throws IOException {
+ this(batchSize, 0, path);
+ }
+
+ public DataFrameWriter(final int chunkSize, final int columnCount, final Path path) throws IOException {
+ this.chunk = new Chunk(chunkSize);
+ this.storePath = (path == null) ? Files.createTempFile(null, null) : Files.createTempFile(path, null, null);
+ this.storePath.toFile().deleteOnExit();
+ this.fileChannel = (FileChannel) Files.newByteChannel(this.storePath,
+ EnumSet.of(StandardOpenOption.CREATE, StandardOpenOption.WRITE));
+ this.columnCount = columnCount;
+ this.rowCount = 0;
+ this.isClosed = false;
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (this.isClosed) {
+ return;
+ }
+
+ if ((this.rowCount % this.chunk.getBatchSize()) > 0) {
+ this.flushCurrentChunk();
+ }
+
+ this.fileChannel.close();
+ this.isClosed = true;
+ }
+
+ public int getRowCount() {
+ return this.rowCount;
+ }
+
+ public int getColumnCount() {
+ return this.columnCount;
+ }
+
+ public DataFrame getDataFrame() throws IOException {
+ this.close();
+ return new DataFrame(this.chunk, this.storePath, this.rowCount, this.columnCount);
+ }
+
+ public void write(final Row data) throws IOException {
+ this.chunk.setRow(this.rowCount % this.chunk.getBatchSize(), data);
+ this.columnCount = Math.max(this.columnCount, data.size());
+ this.rowCount++;
+ if ((this.rowCount % this.chunk.getBatchSize()) == 0) {
+ this.flushCurrentChunk();
+ }
+ }
+
+ private void flushCurrentChunk() throws IOException {
+ final var bytes = serializer.serialize(this.chunk.getRows());
+ this.chunk.getBatches().add(ChunkMetaData.of(this.fileChannel.position(), bytes.length));
+ this.fileChannel.write(ByteBuffer.wrap(bytes));
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataView.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataView.java
new file mode 100644
index 00000000..07a4d6c3
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataView.java
@@ -0,0 +1,69 @@
+package com.github.romualdrousseau.any2json.commons.bigdata;
+
+import java.util.Iterator;
+
+public class DataView implements Iterable {
+ private final DataFrame dataFrame;
+ private final int rowStart;
+ private final int columnStart;
+ private final int rowCount;
+ private final int columnCount;
+
+ public DataView(final DataFrame dataFrame, final int rowStart, final int columnStart, final int rowCount, final int columnCount) {
+ this.dataFrame = dataFrame;
+ this.rowStart = rowStart;
+ this.columnStart = columnStart;
+ this.rowCount = rowCount;
+ this.columnCount = columnCount;
+ }
+
+ public DataFrame getDataFrame() {
+ return dataFrame;
+ }
+
+ public int getRowStart() {
+ return rowStart;
+ }
+
+ public int getRowCount() {
+ return this.rowCount;
+ }
+
+ public int getColumnStart() {
+ return columnStart;
+ }
+
+ public int getColumnCount() {
+ return columnCount;
+ }
+
+ public Row getRow(int row) {
+ this.checkRowIndex(row);
+ return this.dataFrame.getRow(this.rowStart + row).view(this.columnStart, this.columnCount);
+ }
+
+ public String getCell(final int row, final int column) {
+ this.checkRowIndex(row);
+ this.checkColumnIndex(column);
+ return this.dataFrame.getCell(this.rowStart + row, this.columnStart + column);
+ }
+
+ @Override
+ public Iterator iterator() {
+ return new DataViewIterator(this);
+ }
+
+ private void checkRowIndex(final int index) {
+ if (index < 0 || index >= this.rowCount)
+ throw new IndexOutOfBoundsException(this.outOfBoundsMsg(index, this.rowCount));
+ }
+
+ private void checkColumnIndex(final int index) {
+ if (index < 0 || index >= this.columnCount)
+ throw new IndexOutOfBoundsException(this.outOfBoundsMsg(index, this.columnCount));
+ }
+
+ private String outOfBoundsMsg(final int index, final int count) {
+ return "Index: " + index + ", Size: " + count;
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataViewIterator.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataViewIterator.java
new file mode 100644
index 00000000..586e9622
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/DataViewIterator.java
@@ -0,0 +1,28 @@
+package com.github.romualdrousseau.any2json.commons.bigdata;
+
+import java.util.Iterator;
+import java.util.Spliterator;
+import java.util.Spliterators;
+
+public class DataViewIterator implements Iterator {
+ private final DataView view;
+
+ private int curr;
+
+ public DataViewIterator(final DataView view) {
+ this.view = view;
+ this.curr = 0;
+ }
+
+ public boolean hasNext() {
+ return this.curr < this.view.getRowCount();
+ }
+
+ public Row next() {
+ return this.view.getRow(this.curr++);
+ }
+
+ public Spliterator spliterator() {
+ return Spliterators.spliterator(this, this.view.getRowCount(), Spliterator.IMMUTABLE);
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/Row.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/Row.java
new file mode 100644
index 00000000..9ffaabf2
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/Row.java
@@ -0,0 +1,72 @@
+package com.github.romualdrousseau.any2json.commons.bigdata;
+
+import java.io.Serializable;
+import java.util.Iterator;
+import java.util.Objects;
+
+public class Row implements Iterable, Serializable {
+
+ private final int columnStart;
+ private final int columnCount;
+ private final String[] data;
+
+ public static final Row Empty = new Row(0);
+
+ public Row(final int columnCount) {
+ this.columnStart = 0;
+ this.columnCount = columnCount;
+ this.data = new String[columnCount];
+ }
+
+ private Row(final String[] data) {
+ this.columnStart = 0;
+ this.columnCount = data.length;
+ this.data = data;
+ }
+
+ private Row(final int columnStart, final int columnCount, final String[] data) {
+ this.columnStart = columnStart;
+ this.columnCount = columnCount;
+ this.data = data;
+ }
+
+ public static Row of(String... data) {
+ return new Row(data);
+ }
+
+ public Row view(final int columnStart, final int columnCount) {
+ Objects.checkFromToIndex(columnStart, columnStart + columnCount - 1, this.columnCount);
+ return new Row(columnStart, columnCount, this.data);
+ }
+
+ public int getColumnCount() {
+ return this.columnCount;
+ }
+
+ public int size() {
+ return this.data.length;
+ }
+
+ public String get(final int index) {
+ Objects.checkIndex(index, this.columnCount);
+ if ((this.columnStart + index) < data.length) {
+ return this.data[this.columnStart + index];
+ } else {
+ return null;
+ }
+ }
+
+ public Row set(final int index, final String element) {
+ assert this != Row.Empty : "Row.Empty is not mutable";
+ Objects.checkIndex(index, this.columnCount);
+ if ((this.columnStart + index) < data.length) {
+ this.data[this.columnStart + index] = element;
+ }
+ return this;
+ }
+
+ @Override
+ public Iterator iterator() {
+ return new RowIterator(this.data);
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/RowIterator.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/RowIterator.java
new file mode 100644
index 00000000..d5991394
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/RowIterator.java
@@ -0,0 +1,28 @@
+package com.github.romualdrousseau.any2json.commons.bigdata;
+
+import java.util.Iterator;
+import java.util.Spliterator;
+import java.util.Spliterators;
+
+public class RowIterator implements Iterator {
+ private final String[] row;
+
+ private int curr;
+
+ public RowIterator(final String[] row) {
+ this.row = row;
+ this.curr = 0;
+ }
+
+ public boolean hasNext() {
+ return this.curr < this.row.length;
+ }
+
+ public String next() {
+ return this.row[this.curr++];
+ }
+
+ public Spliterator spliterator() {
+ return Spliterators.spliterator(this, this.row.length, Spliterator.IMMUTABLE);
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/serializer/ChunkSerializerFury.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/serializer/ChunkSerializerFury.java
new file mode 100644
index 00000000..888be015
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/serializer/ChunkSerializerFury.java
@@ -0,0 +1,36 @@
+package com.github.romualdrousseau.any2json.commons.bigdata.serializer;
+
+import java.io.IOException;
+
+import org.xerial.snappy.Snappy;
+
+import com.github.romualdrousseau.any2json.commons.bigdata.ChunkSerializer;
+import com.github.romualdrousseau.any2json.commons.bigdata.Row;
+
+import io.fury.Fury;
+import io.fury.config.Language;
+
+public class ChunkSerializerFury implements ChunkSerializer {
+
+ private final Fury fury;
+
+ public ChunkSerializerFury() {
+ this.fury = Fury.builder()
+ .withLanguage(Language.JAVA)
+ .build();
+ this.fury.register(String.class);
+ this.fury.register(String[].class);
+ this.fury.register(Row.class);
+ this.fury.register(Row[].class);
+ }
+
+ @Override
+ public byte[] serialize(Row[] batch) throws IOException {
+ return Snappy.compress(this.fury.serializeJavaObject(batch));
+ }
+
+ @Override
+ public Row[] deserialize(byte[] bytes) throws IOException {
+ return this.fury.deserializeJavaObject(Snappy.uncompress(bytes), Row[].class);
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/serializer/ChunkSerializerJava.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/serializer/ChunkSerializerJava.java
new file mode 100644
index 00000000..39dcfe4d
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/bigdata/serializer/ChunkSerializerJava.java
@@ -0,0 +1,32 @@
+package com.github.romualdrousseau.any2json.commons.bigdata.serializer;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
+import com.github.romualdrousseau.any2json.commons.bigdata.ChunkSerializer;
+import com.github.romualdrousseau.any2json.commons.bigdata.Row;
+
+public class ChunkSerializerJava implements ChunkSerializer {
+
+ @Override
+ public byte[] serialize(Row[] batch) throws IOException {
+ try (
+ final var byteArrayOutputStream = new ByteArrayOutputStream();
+ final var objectOutputStream = new ObjectOutputStream(byteArrayOutputStream)) {
+ objectOutputStream.writeObject(batch);
+ return byteArrayOutputStream.toByteArray();
+ }
+ }
+
+ @Override
+ public Row[] deserialize(byte[] bytes) throws IOException {
+ try (ObjectInputStream o = new ObjectInputStream(new ByteArrayInputStream(bytes))) {
+ return (Row[]) o.readObject();
+ } catch (final ClassNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/Filter.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/Filter.java
new file mode 100644
index 00000000..db976713
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/Filter.java
@@ -0,0 +1,80 @@
+package com.github.romualdrousseau.any2json.commons.cv;
+
+public class Filter {
+
+ public Filter(Template filter) {
+ this.filter = filter;
+ }
+
+ public void apply(ISearchBitmap searchBitmap, double threshold) {
+ for (int y = 0; y < searchBitmap.getHeight(); y++) {
+ for (int x = 0; x < searchBitmap.getWidth(); x++) {
+ float acc = this.filter.sobel(searchBitmap, x, y);
+ if (acc < threshold) {
+ searchBitmap.set(x, y, 0);
+ }
+ }
+ }
+ }
+
+ public void apply(ISearchBitmap searchBitmap, int[] clip, double threshold) {
+ for (int y = clip[1]; y < clip[3]; y++) {
+ for (int x = clip[0]; x < clip[2]; x++) {
+ float acc = this.filter.sobel(searchBitmap, x, y);
+ if (acc < threshold) {
+ searchBitmap.set(x, y, 0);
+ }
+ }
+ }
+ }
+
+ public void applyNeg(ISearchBitmap searchBitmap, double threshold) {
+ for (int y = 0; y < searchBitmap.getHeight(); y++) {
+ for (int x = 0; x < searchBitmap.getWidth(); x++) {
+ float acc = this.filter.sobel(searchBitmap, x, y);
+ if (acc >= threshold) {
+ searchBitmap.set(x, y, 1);
+ }
+ }
+ }
+ }
+
+ public void applyNeg(ISearchBitmap searchBitmap, int[] clip, double threshold) {
+ for (int y = clip[1]; y < clip[3]; y++) {
+ for (int x = clip[0]; x < clip[2]; x++) {
+ float acc = this.filter.sobel(searchBitmap, x, y);
+ if (acc >= threshold) {
+ searchBitmap.set(x, y, 1);
+ }
+ }
+ }
+ }
+
+ public void apply(ISearchBitmap sourceBitmap, ISearchBitmap destBitmap, double threshold) {
+ for (int y = 0; y < sourceBitmap.getHeight(); y++) {
+ for (int x = 0; x < sourceBitmap.getWidth(); x++) {
+ float acc = this.filter.sobel(sourceBitmap, x, y);
+ if (acc < threshold) {
+ destBitmap.set(x, y, 0);
+ } else {
+ destBitmap.set(x, y, 1);
+ }
+ }
+ }
+ }
+
+ public void apply(ISearchBitmap sourceBitmap, ISearchBitmap destBitmap, int[] clip, double threshold) {
+ for (int y = clip[1]; y < clip[3]; y++) {
+ for (int x = clip[0]; x < clip[2]; x++) {
+ float acc = this.filter.sobel(sourceBitmap, x, y);
+ if (acc < threshold) {
+ destBitmap.set(x, y, 0);
+ } else {
+ destBitmap.set(x, y, 1);
+ }
+ }
+ }
+ }
+
+ private Template filter;
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/ISearchBitmap.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/ISearchBitmap.java
new file mode 100644
index 00000000..af07063f
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/ISearchBitmap.java
@@ -0,0 +1,14 @@
+package com.github.romualdrousseau.any2json.commons.cv;
+
+public interface ISearchBitmap {
+
+ int getWidth();
+
+ int getHeight();
+
+ int get(int x, int y);
+
+ void set(int x, int y, int v);
+
+ ISearchBitmap clone();
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/IShapeExtractor.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/IShapeExtractor.java
new file mode 100644
index 00000000..46dcce5b
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/IShapeExtractor.java
@@ -0,0 +1,10 @@
+package com.github.romualdrousseau.any2json.commons.cv;
+
+import java.util.List;
+
+public abstract class IShapeExtractor {
+
+ public abstract List extractAll(ISearchBitmap bitmap);
+
+ public abstract SearchPoint[] extractBest(ISearchBitmap bitmap);
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/SearchPoint.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/SearchPoint.java
new file mode 100644
index 00000000..6f87181e
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/SearchPoint.java
@@ -0,0 +1,324 @@
+package com.github.romualdrousseau.any2json.commons.cv;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+public class SearchPoint {
+
+ public SearchPoint(int x, int y) {
+ this.x = x;
+ this.y = y;
+ this.sad = 0;
+ }
+
+ public SearchPoint(int x, int y, float sad) {
+ this.x = x;
+ this.y = y;
+ this.sad = sad;
+ }
+
+ public int getX() {
+ return this.x;
+ }
+
+ public void setX(int x) {
+ this.x = x;
+ }
+
+ public int getY() {
+ return this.y;
+ }
+
+ public void setY(int y) {
+ this.y = y;
+ }
+
+ public float getSAD() {
+ return this.sad;
+ }
+
+ public void setSAD(int sad) {
+ this.sad = sad;
+ }
+
+ public boolean equals(SearchPoint o) {
+ return this.x == o.x && this.y == o.y;
+ }
+
+ public static boolean isValid(SearchPoint[] s) {
+ return s[1].getX() >= s[0].getX() && s[1].getY() >= s[0].getY();
+ }
+
+ public static int GetArea(SearchPoint[] s) {
+ return (s[1].getX() - s[0].getX()) * (s[1].getY() - s[0].getY());
+ }
+
+ public static boolean IsOverlap(SearchPoint[] s1, SearchPoint[] s2) {
+ return s1[1].getX() >= s2[0].getX() && s1[0].getX() <= s2[1].getX() && s1[1].getY() >= s2[0].getY()
+ && s1[0].getY() <= s2[1].getY();
+ }
+
+ public static boolean IsInside(SearchPoint[] points, int x, int y) {
+ return points[0].getX() <= x && x <= points[1].getX() && points[0].getY() <= y && y <= points[1].getY();
+ }
+
+ public static boolean IsDuplicate(SearchPoint[] shape1, List shapes) {
+ boolean foundDuplicate = false;
+ for (SearchPoint[] shape2 : shapes) {
+ if (shape1[0].equals(shape2[0]) && shape1[1].equals(shape2[1])) {
+ foundDuplicate = true;
+ }
+ }
+ return foundDuplicate;
+ }
+
+ public static List RemoveDuplicates(List shapes) {
+ ArrayList result = new ArrayList();
+ for (SearchPoint[] shape1 : shapes) {
+ if (!SearchPoint.IsDuplicate(shape1, result)) {
+ result.add(shape1);
+ }
+ }
+ return result;
+ }
+
+ public static List RemoveOverlaps(List shapes) {
+ if (shapes.size() < 2) {
+ return shapes;
+ }
+
+ Collections.sort(shapes, new Comparator() {
+ public int compare(SearchPoint[] o1, SearchPoint[] o2) {
+ return SearchPoint.GetArea(o1) - SearchPoint.GetArea(o2);
+ }
+ });
+
+ List result = new ArrayList();
+ result.addAll(shapes);
+ for (SearchPoint[] shape1 : shapes) {
+ ArrayList tmp = new ArrayList();
+ for (SearchPoint[] shape2 : result) {
+ tmp.addAll(SearchPoint.Clipping(shape1, shape2));
+ }
+ result = SearchPoint.RemoveDuplicates(tmp);
+ }
+
+ return SearchPoint.RemoveDuplicates(result);
+ }
+
+ public static List MergeInX(List shapes) {
+ if (shapes.size() < 2) {
+ return shapes;
+ }
+
+ ArrayList result = new ArrayList();
+ for (SearchPoint[] shape1 : shapes)
+ if (shape1[0] != null && shape1[1] != null) {
+ for (SearchPoint[] shape2 : shapes)
+ if (shape1 != shape2 && shape2[0] != null && shape2[1] != null) {
+ if (shape1[0].getY() == shape2[0].getY() && shape1[1].getY() == shape2[1].getY()) {
+ shape1[0].setX(Math.min(shape1[0].getX(), shape2[0].getX()));
+ shape1[1].setX(Math.max(shape1[1].getX(), shape2[1].getX()));
+ shape2[0] = null;
+ shape2[1] = null;
+ }
+ }
+ result.add(shape1);
+ }
+ return result;
+ }
+
+ public static List MergeInY(List shapes) {
+ if (shapes.size() < 2) {
+ return shapes;
+ }
+
+ ArrayList result = new ArrayList();
+ for (SearchPoint[] shape1 : shapes)
+ if (shape1[0] != null && shape1[1] != null) {
+ for (SearchPoint[] shape2 : shapes)
+ if (shape1 != shape2 && shape2[0] != null && shape2[1] != null) {
+ if (shape1[0].getX() == shape2[0].getX() && shape1[1].getX() == shape2[1].getX()) {
+ shape1[0].setY(Math.min(shape1[0].getY(), shape2[0].getY()));
+ shape1[1].setY(Math.max(shape1[1].getY(), shape2[1].getY()));
+ shape2[0] = null;
+ shape2[1] = null;
+ }
+ }
+ result.add(shape1);
+ }
+ return result;
+ }
+
+ public static List TrimInX(List shapes, ISearchBitmap bitmap) {
+ for (SearchPoint[] shape : shapes) {
+ for (int i = shape[0].getX(); i <= shape[1].getX(); i++) {
+ if (SearchPoint.columnIsEmpty(shape, i, bitmap)) {
+ shape[0].setX(i + 1);
+ } else {
+ break;
+ }
+ }
+
+ for (int i = shape[1].getX(); i >= shape[0].getX(); i--) {
+ if (SearchPoint.columnIsEmpty(shape, i, bitmap)) {
+ shape[1].setX(i - 1);
+ } else {
+ break;
+ }
+ }
+ }
+
+ return shapes;
+ }
+
+ public static List TrimInY(List shapes, ISearchBitmap bitmap) {
+ for (SearchPoint[] shape : shapes) {
+ for (int i = shape[0].getY(); i <= shape[1].getY(); i++) {
+ if (rowIsEmpty(shape, i, bitmap)) {
+ shape[0].setY(i + 1);
+ } else {
+ break;
+ }
+ }
+
+ for (int i = shape[1].getY(); i >= shape[0].getY(); i--) {
+ if (rowIsEmpty(shape, i, bitmap)) {
+ shape[1].setY(i - 1);
+ } else {
+ break;
+ }
+ }
+ }
+
+ return shapes;
+ }
+
+ public static List ExpandInX(List shapes, ISearchBitmap bitmap) {
+ for (SearchPoint[] shape : shapes) {
+ for (int i = shape[0].getX() - 1; i > 0; i--) {
+ if (!SearchPoint.columnIsEmpty(shape, i, bitmap)) {
+ shape[0].setX(i - 1);
+ } else {
+ break;
+ }
+ }
+
+ for (int i = shape[1].getX(); i < bitmap.getWidth(); i++) {
+ if (!SearchPoint.columnIsEmpty(shape, i, bitmap)) {
+ shape[1].setX(i + 1);
+ } else {
+ break;
+ }
+ }
+ }
+
+ return shapes;
+ }
+
+ public static List ExpandInY(List shapes, ISearchBitmap bitmap) {
+ for (SearchPoint[] shape : shapes) {
+ for (int i = shape[0].getY() - 1; i > 0; i--) {
+ if (!SearchPoint.rowIsEmpty(shape, i, bitmap)) {
+ shape[0].setY(i - 1);
+ } else {
+ break;
+ }
+ }
+
+ for (int i = shape[1].getY(); i < bitmap.getHeight(); i++) {
+ if (!SearchPoint.rowIsEmpty(shape, i, bitmap)) {
+ shape[1].setY(i + 1);
+ } else {
+ break;
+ }
+ }
+ }
+
+ return shapes;
+ }
+
+ public static List Clipping(SearchPoint[] master, SearchPoint[] slave) {
+ ArrayList result = new ArrayList();
+
+ result.add(slave);
+
+ // Top to bottom
+ for (int i = 0; i < 2; i++) {
+ ArrayList tmp = new ArrayList();
+ for (SearchPoint[] r : result) {
+ if (!SearchPoint.IsOverlap(r, master)) {
+ tmp.add(r);
+ } else if (r[0].getY() < master[0].getY()) {
+ int d = master[0].getY() - r[0].getY();
+ tmp.add(new SearchPoint[] { new SearchPoint(r[0].getX(), r[0].getY()),
+ new SearchPoint(r[1].getX(), r[0].getY() + d - 1) });
+ tmp.add(new SearchPoint[] { new SearchPoint(r[0].getX(), r[0].getY() + d),
+ new SearchPoint(r[1].getX(), r[1].getY()) });
+ } else if (r[1].getY() > master[1].getY()) {
+ int d = r[1].getY() - master[1].getY();
+ tmp.add(new SearchPoint[] { new SearchPoint(r[0].getX(), r[0].getY()),
+ new SearchPoint(r[1].getX(), r[1].getY() - d) });
+ tmp.add(new SearchPoint[] { new SearchPoint(r[0].getX(), r[1].getY() - d + 1),
+ new SearchPoint(r[1].getX(), r[1].getY()) });
+ } else {
+ tmp.add(r);
+ }
+ }
+ result = tmp;
+ }
+
+ // Left to right
+ for (int i = 0; i < 2; i++) {
+ ArrayList tmp = new ArrayList();
+ for (SearchPoint[] r : result) {
+ if (!SearchPoint.IsOverlap(r, master)) {
+ tmp.add(r);
+ } else if (r[0].getX() < master[0].getX()) {
+ tmp.add(new SearchPoint[] { new SearchPoint(r[0].getX(), r[0].getY()),
+ new SearchPoint(master[0].getX() - 1, r[1].getY()) });
+ tmp.add(new SearchPoint[] { new SearchPoint(master[0].getX(), r[0].getY()),
+ new SearchPoint(r[1].getX(), r[1].getY()) });
+ } else if (r[1].getX() > master[1].getX()) {
+ tmp.add(new SearchPoint[] { new SearchPoint(r[0].getX(), r[0].getY()),
+ new SearchPoint(master[1].getX(), r[1].getY()) });
+ tmp.add(new SearchPoint[] { new SearchPoint(master[1].getX() + 1, r[0].getY()),
+ new SearchPoint(r[1].getX(), r[1].getY()) });
+ }
+ }
+ result = tmp;
+ }
+
+ result.add(master);
+
+ return result;
+ }
+
+ private static boolean columnIsEmpty(SearchPoint[] table, int colIndex, ISearchBitmap bitmap) {
+ boolean isEmpty = true;
+ for (int i = table[0].getY(); i <= table[1].getY(); i++) {
+ if (bitmap.get(colIndex, i) > 0) {
+ isEmpty = false;
+ break;
+ }
+ }
+ return isEmpty;
+ }
+
+ private static boolean rowIsEmpty(SearchPoint[] table, int rowIndex, ISearchBitmap bitmap) {
+ boolean isEmpty = true;
+ for (int i = table[0].getX(); i <= table[1].getX(); i++) {
+ if (bitmap.get(i, rowIndex) > 0) {
+ isEmpty = false;
+ break;
+ }
+ }
+ return isEmpty;
+ }
+
+ private int x;
+ private int y;
+ private float sad;
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/Template.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/Template.java
new file mode 100644
index 00000000..0aa1c8e3
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/Template.java
@@ -0,0 +1,54 @@
+package com.github.romualdrousseau.any2json.commons.cv;
+
+public class Template {
+
+ public Template(float[][] data) {
+ this.data = data;
+ this.inv_area = 1.0f / Float.valueOf(data[0].length * data.length);
+ }
+
+ public int getWidth() {
+ return this.data[0].length;
+ }
+
+ public int getHeight() {
+ return this.data.length;
+ }
+
+ public float get(int x, int y) {
+ return this.data[y][x];
+ }
+
+ public float sobel(ISearchBitmap searchBitmap, int x, int y) {
+ int w = (this.data[0].length - 1) / 2;
+ int h = (this.data.length - 1) / 2;
+ float acc = 0;
+ for (int i = 0; i < this.data.length; i++) {
+ for (int j = 0; j < this.data[i].length; j++) {
+ acc += this.data[i][j] * Float.valueOf(searchBitmap.get(x - w + j, y - h + i));
+ }
+ }
+ return acc;
+ }
+
+ public float sad(ISearchBitmap searchBitmap, int x, int y) {
+ int hw = this.data[0].length / 2;
+ int hh = this.data.length / 2;
+ float acc = 0.0f;
+ for (int i = 0; i < this.data.length; i++) {
+ for (int j = 0; j < data[i].length; j++) {
+ float searchPixel = Float.valueOf(searchBitmap.get(x + j - hw, y + i - hh));
+ float templatePixel = this.data[i][j];
+ acc += Math.abs(searchPixel - templatePixel);
+ }
+ }
+ return acc;
+ }
+
+ public float normalize(float v) {
+ return 1.0f - v * this.inv_area;
+ }
+
+ private float[][] data;
+ private float inv_area;
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/TemplateMatcher.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/TemplateMatcher.java
new file mode 100644
index 00000000..51295805
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/TemplateMatcher.java
@@ -0,0 +1,56 @@
+package com.github.romualdrousseau.any2json.commons.cv;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class TemplateMatcher {
+
+ public TemplateMatcher(Template template) {
+ this.template = template;
+ }
+
+ public List matchAll(ISearchBitmap searchBitmap, int x, int y, int w, int h, double threshold) {
+ ArrayList result = new ArrayList();
+ for (int i = y; i < y + h; i++) {
+ for (int j = x; j < x + w; j++) {
+ float sad = this.template.sad(searchBitmap, j, i);
+ float score = this.template.normalize(sad);
+ if (score > threshold) {
+ result.add(new SearchPoint(j, i, sad));
+ }
+ }
+ }
+ return result;
+ }
+
+ public SearchPoint matchFirst(ISearchBitmap searchBitmap, int x, int y, int w, int h, double threshold) {
+ for (int i = y; i < y + h; i++) {
+ for (int j = x; j < x + w; j++) {
+ float sad = this.template.sad(searchBitmap, j, i);
+ float score = this.template.normalize(sad);
+ if (score > threshold) {
+ return new SearchPoint(j, i, sad);
+ }
+ }
+ }
+ return null;
+ }
+
+ public SearchPoint matchBest(ISearchBitmap searchBitmap, int x, int y, int w, int h) {
+ SearchPoint result = null;
+ double maxScore = 0.0;
+ for (int i = y; i < y + h; i++) {
+ for (int j = x; j < x + w; j++) {
+ float sad = this.template.sad(searchBitmap, j, i);
+ float score = this.template.normalize(sad);
+ if (score > maxScore) {
+ maxScore = score;
+ result = new SearchPoint(j, i, sad);
+ }
+ }
+ }
+ return result;
+ }
+
+ private Template template;
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/filter/EdgeFilter.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/filter/EdgeFilter.java
new file mode 100644
index 00000000..00a240d1
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/filter/EdgeFilter.java
@@ -0,0 +1,48 @@
+package com.github.romualdrousseau.any2json.commons.cv.filter;
+
+import com.github.romualdrousseau.any2json.commons.cv.Filter;
+import com.github.romualdrousseau.any2json.commons.cv.ISearchBitmap;
+import com.github.romualdrousseau.any2json.commons.cv.Template;
+
+public class EdgeFilter extends Filter {
+
+ public EdgeFilter() {
+ super(null);
+ }
+
+ public void apply(ISearchBitmap sourceBitmap, ISearchBitmap destBitmap, double threshold) {
+ for (int y = 0; y < sourceBitmap.getHeight(); y++) {
+ for (int x = 0; x < sourceBitmap.getWidth(); x++) {
+ float lx = this.edgeX.sobel(sourceBitmap, x, y);
+ float ly = this.edgeY.sobel(sourceBitmap, x, y);
+ double acc = Math.sqrt(lx * lx + ly * ly);
+ // double phi = Math.atan2(ly, lx);
+ if (acc < threshold) {
+ destBitmap.set(x, y, 0);
+ } else {
+ destBitmap.set(x, y, 1);
+ }
+ }
+ }
+ }
+
+ public void apply(ISearchBitmap sourceBitmap, ISearchBitmap destBitmap, int[] clip, double threshold) {
+ for (int y = clip[1]; y < clip[3]; y++) {
+ for (int x = clip[0]; x < clip[2]; x++) {
+ float lx = this.edgeX.sobel(sourceBitmap, x, y);
+ float ly = this.edgeY.sobel(sourceBitmap, x, y);
+ double acc = Math.sqrt(lx * lx + ly * ly);
+ // double phi = Math.atan2(ly, lx);
+ if (acc < threshold) {
+ destBitmap.set(x, y, 0);
+ } else {
+ destBitmap.set(x, y, 1);
+ }
+ }
+ }
+ }
+
+ private Template edgeX = new Template(new float[][] { { 1, 0, -1 }, { 2, 0, -2 }, { 1, 0, -1 } });
+
+ private Template edgeY = new Template(new float[][] { { 1, 2, 1 }, { 0, 0, 0 }, { -1, -2, -1 } });
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/shapeextractor/RectangleExtractor.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/shapeextractor/RectangleExtractor.java
new file mode 100644
index 00000000..52998865
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/cv/shapeextractor/RectangleExtractor.java
@@ -0,0 +1,170 @@
+package com.github.romualdrousseau.any2json.commons.cv.shapeextractor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.github.romualdrousseau.any2json.commons.cv.ISearchBitmap;
+import com.github.romualdrousseau.any2json.commons.cv.IShapeExtractor;
+import com.github.romualdrousseau.any2json.commons.cv.SearchPoint;
+import com.github.romualdrousseau.any2json.commons.cv.Template;
+import com.github.romualdrousseau.any2json.commons.cv.TemplateMatcher;
+
+public class RectangleExtractor extends IShapeExtractor {
+
+ @Override
+ public List extractAll(ISearchBitmap searchBitmap) {
+ ArrayList result = new ArrayList();
+
+ ArrayList> allCorners = new ArrayList>();
+ allCorners.add(
+ cornerTopLeft.matchAll(searchBitmap, 0, 0, searchBitmap.getWidth(), searchBitmap.getHeight(), 0.8));
+ allCorners.add(
+ cornerTopRight.matchAll(searchBitmap, 0, 0, searchBitmap.getWidth(), searchBitmap.getHeight(), 0.8));
+ allCorners.add(
+ cornerBottomRight.matchAll(searchBitmap, 0, 0, searchBitmap.getWidth(), searchBitmap.getHeight(), 0.8));
+ allCorners.add(
+ cornerBottomLeft.matchAll(searchBitmap, 0, 0, searchBitmap.getWidth(), searchBitmap.getHeight(), 0.8));
+
+ // Simple version of Hought transformation with 4 pre-defined rotations
+ for (int phi = 0; phi < allCorners.size(); phi++) {
+ for (SearchPoint corner : allCorners.get(phi)) {
+ SearchPoint[] a = houghTransform(phi, corner, allCorners);
+ if (count(a) < (a.length - 1)) {
+ continue;
+ }
+
+ SearchPoint[] bbox = minmax(phi, a);
+
+ if (searchBitmap.get(bbox[0].getX(), bbox[0].getY()) > 0
+ && searchBitmap.get(bbox[1].getX(), bbox[0].getY()) > 0
+ && searchBitmap.get(bbox[1].getX(), bbox[1].getY()) > 0
+ && searchBitmap.get(bbox[0].getX(), bbox[1].getY()) > 0) {
+ if(SearchPoint.isValid(bbox) && !SearchPoint.IsDuplicate(bbox, result)) {
+ result.add(bbox);
+ }
+ }
+ }
+ }
+ if (result.size() > 1) {
+ return SearchPoint.RemoveOverlaps(result);
+ } else {
+ return result;
+ }
+ }
+
+ @Override
+ public SearchPoint[] extractBest(ISearchBitmap searchBitmap) {
+ SearchPoint[] result = null;
+ int maxArea = 0;
+
+ ArrayList> allCorners = new ArrayList>();
+ allCorners.add(
+ cornerTopLeft.matchAll(searchBitmap, 0, 0, searchBitmap.getWidth(), searchBitmap.getHeight(), 0.8));
+ allCorners.add(
+ cornerTopRight.matchAll(searchBitmap, 0, 0, searchBitmap.getWidth(), searchBitmap.getHeight(), 0.8));
+ allCorners.add(
+ cornerBottomRight.matchAll(searchBitmap, 0, 0, searchBitmap.getWidth(), searchBitmap.getHeight(), 0.8));
+ allCorners.add(
+ cornerBottomLeft.matchAll(searchBitmap, 0, 0, searchBitmap.getWidth(), searchBitmap.getHeight(), 0.8));
+
+ // Simple version of Hought transformation with 4 pre-defined rotations
+ for (int phi = 0; phi < allCorners.size(); phi++) {
+ for (SearchPoint corner : allCorners.get(phi)) {
+ SearchPoint[] a = houghTransform(phi, corner, allCorners);
+ if (count(a) < (a.length - 1)) {
+ continue;
+ }
+
+ SearchPoint[] bbox = minmax(phi, a);
+
+ if (searchBitmap.get(bbox[0].getX(), bbox[0].getY()) > 0
+ && searchBitmap.get(bbox[1].getX(), bbox[0].getY()) > 0
+ && searchBitmap.get(bbox[1].getX(), bbox[1].getY()) > 0
+ && searchBitmap.get(bbox[0].getX(), bbox[1].getY()) > 0) {
+ int area = SearchPoint.GetArea(bbox);
+ if (area > maxArea) {
+ maxArea = area;
+ result = bbox;
+ }
+ }
+ }
+ }
+ return result;
+ }
+
+ private SearchPoint[] houghTransform(int phi, SearchPoint locus, List> points) {
+ SearchPoint[] a = { null, null, null, null };
+
+ a[phi] = locus;
+
+ for (int j = 0; j < points.size(); j++) {
+ if (j != phi) {
+ for (SearchPoint point : points.get(j)) {
+ int[] g = gradient(locus, point);
+ if (g[0] == R[phi][j][0] && g[1] == R[phi][j][1]) {
+ if (a[j] == null || distance(locus, point) < distance(locus, a[j])) {
+ a[j] = point;
+ }
+ }
+ }
+ }
+ }
+
+ return a;
+ }
+
+ private int[] gradient(SearchPoint p1, SearchPoint p2) {
+ int vx = p2.getX() - p1.getX();
+ vx = (vx == 0) ? 0 : ((vx > 0) ? 1 : -1);
+ int vy = p2.getY() - p1.getY();
+ vy = (vy == 0) ? 0 : ((vy > 0) ? 1 : -1);
+ return new int[] { vx, vy };
+ }
+
+ private double distance(SearchPoint p1, SearchPoint p2) {
+ double vx = p1.getX() - p2.getX();
+ double vy = p1.getY() - p2.getY();
+ return Math.sqrt(vx * vx + vy * vy);
+ }
+
+ private int count(SearchPoint[] points) {
+ int count = 0;
+ for (int k = 0; k < 4; k++) {
+ if (points[k] != null) {
+ count++;
+ }
+ }
+ return count;
+ }
+
+ private SearchPoint[] minmax(int phi, SearchPoint[] points) {
+ int minX = points[phi].getX();
+ int minY = points[phi].getY();
+ int maxX = points[phi].getX();
+ int maxY = points[phi].getY();
+ for (int k = 0; k < 4; k++) {
+ if (k != phi && points[k] != null) {
+ minX = Math.min(minX, points[k].getX());
+ minY = Math.min(minY, points[k].getY());
+ maxX = Math.max(maxX, points[k].getX());
+ maxY = Math.max(maxY, points[k].getY());
+ }
+ }
+ return new SearchPoint[] { new SearchPoint(minX, minY), new SearchPoint(maxX, maxY) };
+ }
+
+ private int R[][][] = { { { 0, 0 }, { 1, 0 }, { 2, 2 }, { 0, 1 } }, { { -1, 0 }, { 0, 0 }, { 0, 1 }, { -2, 2 } },
+ { { -2, -2 }, { 0, -1 }, { 0, 0 }, { -1, 0 } }, { { 0, -1 }, { 2, -2 }, { 1, 0 }, { 0, 0 } } };
+
+ private TemplateMatcher cornerTopLeft = new TemplateMatcher(
+ new Template(new float[][] { { 0, 0, 0 }, { 0, 1, 1 }, { 0, 1, 1 } }));
+
+ private TemplateMatcher cornerTopRight = new TemplateMatcher(
+ new Template(new float[][] { { 0, 0, 0 }, { 1, 1, 0 }, { 1, 1, 0 } }));
+
+ private TemplateMatcher cornerBottomLeft = new TemplateMatcher(
+ new Template(new float[][] { { 0, 1, 1 }, { 0, 1, 1 }, { 0, 0, 0 } }));
+
+ private TemplateMatcher cornerBottomRight = new TemplateMatcher(
+ new Template(new float[][] { { 1, 1, 0 }, { 1, 1, 0 }, { 0, 0, 0 } }));
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSON.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSON.java
new file mode 100644
index 00000000..ec6a0f23
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSON.java
@@ -0,0 +1,134 @@
+package com.github.romualdrousseau.any2json.commons.json;
+
+import java.lang.reflect.InvocationTargetException;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Stream;
+
+import org.reflections.Reflections;
+
+public class JSON {
+ public final static String PACKAGE_LOADER_PREFIX = "com.github.romualdrousseau.shuju.json";
+
+ private static JSONFactory Factory;
+ static {
+ final Reflections reflections = new Reflections(PACKAGE_LOADER_PREFIX);
+ JSON.Factory = reflections.getSubTypesOf(JSONFactory.class).stream()
+ .map(JSON::newFactoryInstance)
+ .findFirst()
+ .get();
+ }
+
+ private static JSONFactory newFactoryInstance(Class clazz) {
+ try {
+ return (JSONFactory) clazz.getConstructor().newInstance();
+ } catch (InstantiationException | IllegalAccessException
+ | IllegalArgumentException | InvocationTargetException
+ | NoSuchMethodException | SecurityException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static JSONArray newArray() {
+ return JSON.Factory.newArray();
+ }
+
+ public static JSONArray arrayOf(String data) {
+ return JSON.Factory.parseArray(data);
+ }
+
+ public static JSONArray arrayOf(Object object) {
+ return JSON.Factory.parseArray(object);
+ }
+
+ public static JSONArray arrayOf(final List l) {
+ final JSONArray array = JSON.newArray();
+ l.forEach(s -> array.append(s));
+ return array;
+ }
+
+ public static JSONArray arrayOf(final Stream l) {
+ final JSONArray array = JSON.newArray();
+ l.forEach(s -> array.append(s));
+ return array;
+ }
+
+ public static JSONArray arrayOf(final Map m) {
+ final JSONArray array = JSON.newArray();
+ m.forEach((k, v) -> {
+ JSONObject pair = JSON.newObject();
+ pair.set("key", k);
+ pair.set("value", v);
+ array.append(pair);
+ });
+ return array;
+ }
+
+ public static JSONArray loadArray(Path filePath) {
+ return JSON.Factory.loadArray(filePath);
+ }
+
+ public static void saveArray(JSONArray a, Path filePath) {
+ JSON.Factory.saveArray(a, filePath, false);
+ }
+
+ public static void saveArray(JSONArray a, Path filePath, final boolean pretty) {
+ JSON.Factory.saveArray(a, filePath, pretty);
+ }
+
+ public static JSONObject newObject() {
+ return JSON.Factory.newObject();
+ }
+
+ public static JSONObject objectOf(String data) {
+ return JSON.Factory.parseObject(data);
+ }
+
+ public static JSONObject objectOf(Object object) {
+ return JSON.Factory.parseObject(object);
+ }
+
+ public static JSONObject objectOf(final Map m) {
+ final JSONObject object = JSON.newObject();
+ m.forEach((k, v) -> object.set(k, v));
+ return object;
+ }
+
+ public static JSONObject loadObject(Path filePath) {
+ return JSON.Factory.loadObject(filePath);
+ }
+
+ public static void saveObject(JSONObject o, Path filePath) {
+ JSON.Factory.saveObject(o, filePath, false);
+ }
+
+ public static void saveObject(JSONObject o, Path filePath, final boolean pretty) {
+ JSON.Factory.saveObject(o, filePath, pretty);
+ }
+
+ @SuppressWarnings("unchecked")
+ public static Optional query(final Object a, final String q) {
+ Object curr = a;
+ for(String token: Arrays.asList(q.split("\\."))) {
+ if (curr instanceof JSONArray) {
+ int i = Integer.parseInt(token);
+ curr = ((JSONArray) curr).get(i).orElse(null);
+ } else if (curr instanceof JSONObject) {
+ curr = ((JSONObject) curr).get(token).orElse(null);
+ } else {
+ curr = null;
+ }
+ }
+ return Optional.ofNullable((T) curr);
+ }
+
+ public static Stream queryStream(final Object a, final String q) {
+ return JSON.query(a, q)
+ .filter(o -> o instanceof JSONArray)
+ .map(o -> ((JSONArray) o).stream())
+ .orElse(Stream.empty());
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSONArray.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSONArray.java
new file mode 100644
index 00000000..4f099dd9
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSONArray.java
@@ -0,0 +1,45 @@
+package com.github.romualdrousseau.any2json.commons.json;
+
+import java.util.Iterator;
+import java.util.Optional;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+public interface JSONArray {
+
+ int size();
+
+ Optional get(int i);
+
+ JSONArray set(int i, T o);
+
+ JSONArray append(T o);
+
+ JSONArray remove(int i);
+
+ String toString(final boolean pretty);
+
+ String toString();
+
+ default Stream stream() {
+ Iterable it = new Iterable() {
+ @Override
+ public Iterator iterator() {
+ return new Iterator() {
+ private int idx = 0;
+
+ @Override
+ public boolean hasNext() {
+ return idx < JSONArray.this.size();
+ }
+
+ @Override
+ public T next() {
+ return JSONArray.this.get(idx++).get();
+ }
+ };
+ }
+ };
+ return StreamSupport.stream(it.spliterator(), false);
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSONCollector.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSONCollector.java
new file mode 100644
index 00000000..376fac80
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSONCollector.java
@@ -0,0 +1,16 @@
+package com.github.romualdrousseau.any2json.commons.json;
+
+import java.util.Map;
+import java.util.stream.Collector;
+import java.util.stream.Collectors;
+
+public class JSONCollector {
+
+ public static Collector> toMap(final String key, final String value) {
+ return Collectors.toMap(x -> x.get(key).get(), x -> x.get(value).get());
+ }
+
+ public static Collector> toUnmodifiableMap(final String key, final String value) {
+ return Collectors.toUnmodifiableMap(x -> x.get(key).get(), x -> x.get(value).get());
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSONFactory.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSONFactory.java
new file mode 100644
index 00000000..a94334b0
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSONFactory.java
@@ -0,0 +1,26 @@
+package com.github.romualdrousseau.any2json.commons.json;
+
+import java.nio.file.Path;
+
+public interface JSONFactory {
+
+ JSONArray newArray();
+
+ JSONArray parseArray(String data);
+
+ JSONArray parseArray(Object object);
+
+ JSONArray loadArray(Path filePath);
+
+ void saveArray(JSONArray a, Path filePath, boolean pretty);
+
+ JSONObject newObject();
+
+ JSONObject parseObject(String data);
+
+ JSONObject parseObject(Object object);
+
+ JSONObject loadObject(Path filePath);
+
+ void saveObject(JSONObject o, Path filePath, boolean pretty);
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSONObject.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSONObject.java
new file mode 100644
index 00000000..925873d0
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/json/JSONObject.java
@@ -0,0 +1,18 @@
+package com.github.romualdrousseau.any2json.commons.json;
+
+import java.util.Optional;
+
+public interface JSONObject {
+
+ Iterable keys();
+
+ Optional get(String k);
+
+ JSONObject set(String k, T o);
+
+ JSONObject remove(String k);
+
+ String toString(final boolean pretty);
+
+ String toString();
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/Text.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/Text.java
new file mode 100644
index 00000000..490a1cff
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/Text.java
@@ -0,0 +1,149 @@
+package com.github.romualdrousseau.any2json.commons.preprocessing;
+
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+
+import com.github.romualdrousseau.any2json.commons.types.CollectionUtils;
+import com.github.romualdrousseau.any2json.commons.json.JSON;
+import com.github.romualdrousseau.any2json.commons.json.JSONArray;
+import com.github.romualdrousseau.any2json.commons.preprocessing.hasher.DefaultHasher;
+import com.github.romualdrousseau.any2json.commons.preprocessing.tokenizer.DefaultTokenizer;
+import com.github.romualdrousseau.any2json.commons.preprocessing.comparer.DefaultComparer;
+
+public class Text {
+
+ public interface ITokenizer extends Function> {
+ }
+
+ public interface IHasher extends Function {
+ }
+
+ public interface IComparer extends BiFunction, Boolean> {
+ String anonymize(String v);
+ String anonymize(final String v, final String pattern);
+ Optional find(String v);
+ Optional find(final String v, final String pattern);
+ }
+
+ public static ITokenizer DefaultTokenizer = new DefaultTokenizer();
+
+ public static IHasher DefaultHasher = new DefaultHasher();
+
+ public static IComparer DefaultComparer = new DefaultComparer();
+
+ public static List DefaultFilters = List.of("[\\\\!\"#$%&()*+,-./:;<=>?@\\[\\]^_`{|}~\\t\\n]");
+
+ public static Comparator ComparatorByLength = (a, b) -> b.length() - a.length();
+
+ public static Map> get_lexicon(List lexicon) {
+ return lexicon.stream()
+ .map(w -> List.of(w.split(",")))
+ .collect(Collectors.toMap(
+ w -> w.get(0),
+ w -> w.stream().distinct().sorted(Text.ComparatorByLength).toList()));
+ }
+
+ public static List all_words(final List documents) {
+ return Text.all_words(documents, Text.DefaultFilters);
+ }
+
+ public static List all_words(final List documents, final List filters) {
+ return Text.all_words(documents, filters, Text.DefaultTokenizer);
+ }
+
+ public static List all_words(final List documents, final List filters, final ITokenizer tokenizer) {
+ return documents.stream()
+ .flatMap(d -> d != null ? Text.to_words(d, filters, tokenizer).stream() : Stream.empty())
+ .distinct().sorted().toList();
+ }
+
+ public static List to_words(final String text) {
+ return Text.to_words(text, Text.DefaultFilters);
+ }
+
+ public static List to_words(final String text, final List filters) {
+ return Text.to_words(text, filters, Text.DefaultTokenizer);
+ }
+
+ public static List to_words(final String text, final List filters, final ITokenizer tokenizer) {
+ return tokenizer.apply(filters.stream().reduce(text, (a, x) -> a.replaceAll("(?i)" + x, " ")));
+ }
+
+ public static List to_categorical(final String label, final List classes) {
+ return Text.to_categorical(label, classes, Text.DefaultComparer);
+ }
+
+ public static List to_categorical(final String label, final List classes,
+ final IComparer comparer) {
+ return Text.to_categorical(List.of(label), classes, comparer);
+ }
+
+ public static List to_categorical(final List labels, final List classes) {
+ return Text.to_categorical(labels, classes, Text.DefaultComparer);
+ }
+
+ public static List to_categorical(final List labels, final List classes,
+ final IComparer comparer) {
+ return classes.stream().map(c -> comparer.apply(c, labels) ? 1 : 0).toList();
+ }
+
+ public static String anonymize(final String label, final IComparer comparer) {
+ return Text.anonymize(List.of(label), comparer).get(0);
+ }
+
+ public static List anonymize(final List labels, final IComparer comparer) {
+ return labels.stream().map(l -> comparer.anonymize(l)).toList();
+ }
+
+ public static List one_hot(final String text) {
+ return Text.one_hot(text, Text.DefaultFilters, Text.DefaultTokenizer, Text.DefaultHasher);
+ }
+
+ public static List one_hot(final String text, final List filters) {
+ return Text.one_hot(text, filters, Text.DefaultTokenizer, Text.DefaultHasher);
+ }
+
+ public static List one_hot(final String text, final List filters, final ITokenizer tokenizer) {
+ return Text.one_hot(text, filters, tokenizer, Text.DefaultHasher);
+ }
+
+ public static List one_hot(final String text, final List filters, final ITokenizer tokenizer, IHasher hasher) {
+ return Text.to_words(text, filters, tokenizer).stream().map(hasher).toList();
+ }
+
+ public static List pad_sequence(final List sequence, final int maxLen) {
+ return Text.pad_sequence(sequence, maxLen, 0);
+ }
+
+ public static List pad_sequence(final List sequence, final int maxLen, final int value) {
+ final IntStream padding = IntStream.range(sequence.size(), maxLen).map(x -> value);
+ return Stream.concat(sequence.stream(), padding.boxed()).toList();
+ }
+
+ public static List mutate_sequence(final List sequence) {
+ return Text.mutate_sequence(sequence, 0.1f, 0);
+ }
+
+ public static List mutate_sequence(final List sequence, final float p) {
+ return Text.mutate_sequence(sequence, p, 0);
+ }
+
+ public static List mutate_sequence(final List sequence, final float p, final int value) {
+ final var shuffler = CollectionUtils.shuffle(CollectionUtils.mutableRange(0, sequence.size()));
+ final Function mutator = x -> Math.random() < p ? value : sequence.get(x);
+ return shuffler.stream().map(mutator).filter(x -> x != value).toList();
+ }
+
+ public static JSONArray json_sequence(final List sequence) {
+ JSONArray result = JSON.newArray();
+ sequence.forEach(x -> result.append(x));
+ return result;
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/comparer/DefaultComparer.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/comparer/DefaultComparer.java
new file mode 100644
index 00000000..55df0523
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/comparer/DefaultComparer.java
@@ -0,0 +1,34 @@
+package com.github.romualdrousseau.any2json.commons.preprocessing.comparer;
+
+import java.util.List;
+import java.util.Optional;
+
+import com.github.romualdrousseau.any2json.commons.preprocessing.Text;
+
+public class DefaultComparer implements Text.IComparer {
+
+ @Override
+ public Boolean apply(final String a, final List b) {
+ return b.contains(a);
+ }
+
+ @Override
+ public String anonymize(final String v) {
+ return v;
+ }
+
+ @Override
+ public String anonymize(final String v, final String pattern) {
+ return v;
+ }
+
+ @Override
+ public Optional find(final String v) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional find(final String v, final String pattern) {
+ return Optional.empty();
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/comparer/RegexComparer.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/comparer/RegexComparer.java
new file mode 100644
index 00000000..8100580a
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/comparer/RegexComparer.java
@@ -0,0 +1,72 @@
+package com.github.romualdrousseau.any2json.commons.preprocessing.comparer;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import com.github.romualdrousseau.any2json.commons.preprocessing.Text;
+
+public class RegexComparer implements Text.IComparer {
+
+ private final Map patterns;
+ private final Map compiledPatterns;
+
+ public RegexComparer(final Map patterns) {
+ this.patterns = patterns;
+ this.compiledPatterns = patterns.keySet().stream()
+ .collect(Collectors.toUnmodifiableMap(r -> r, this::compileRegex));
+ }
+
+ @Override
+ public Boolean apply(final String a, final List b) {
+ return (a == null) ? false
+ : this.patterns.entrySet().stream()
+ .filter(p -> p.getValue().equals(a))
+ .map(p -> this.compiledPatterns.get(p.getKey()).matcher(""))
+ .anyMatch(m -> b.stream().anyMatch(v -> v != null && m.reset(v).find()));
+ }
+
+ @Override
+ public String anonymize(final String v) {
+ return (v == null) ? null
+ : this.patterns.entrySet().stream()
+ .reduce(v, (r, e) -> this.compiledPatterns.get(e.getKey()).matcher(r).replaceAll(e.getValue()),
+ (res1, res2) -> res1);
+ }
+
+ @Override
+ public String anonymize(final String v, final String filter) {
+ return (v == null) ? null
+ : this.patterns.entrySet().stream()
+ .filter(e -> e.getValue().equals(filter))
+ .reduce(v, (r, e) -> this.compiledPatterns.get(e.getKey()).matcher(r).replaceAll(e.getValue()),
+ (res1, res2) -> res1);
+ }
+
+ @Override
+ public Optional find(final String v) {
+ return (v == null) ? Optional.empty()
+ : this.compiledPatterns.values().stream()
+ .map(e -> e.matcher(v))
+ .filter(m -> m.find())
+ .map(m -> m.group())
+ .findFirst();
+ }
+
+ @Override
+ public Optional find(final String v, final String filter) {
+ return (v == null) ? Optional.empty()
+ : this.patterns.entrySet().stream()
+ .filter(p -> p.getValue().equals(filter))
+ .map(p -> this.compiledPatterns.get(p.getKey()).matcher(v))
+ .filter(m -> m.find())
+ .map(m -> m.group())
+ .findFirst();
+ }
+
+ private Pattern compileRegex(final String r) {
+ return Pattern.compile(r, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/hasher/DefaultHasher.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/hasher/DefaultHasher.java
new file mode 100644
index 00000000..08121d84
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/hasher/DefaultHasher.java
@@ -0,0 +1,11 @@
+package com.github.romualdrousseau.any2json.commons.preprocessing.hasher;
+
+import com.github.romualdrousseau.any2json.commons.preprocessing.Text;
+
+public class DefaultHasher implements Text.IHasher {
+
+ @Override
+ public Integer apply(final String w) {
+ return w.hashCode();
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/hasher/VocabularyHasher.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/hasher/VocabularyHasher.java
new file mode 100644
index 00000000..c051e5e1
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/hasher/VocabularyHasher.java
@@ -0,0 +1,20 @@
+package com.github.romualdrousseau.any2json.commons.preprocessing.hasher;
+
+import java.util.Collections;
+import java.util.List;
+
+import com.github.romualdrousseau.any2json.commons.preprocessing.Text;
+
+public class VocabularyHasher implements Text.IHasher {
+
+ private final List vocabulary;
+
+ public VocabularyHasher(final List vocabulary) {
+ this.vocabulary = vocabulary;
+ }
+
+ @Override
+ public Integer apply(final String w) {
+ return Math.max(0, Collections.binarySearch(this.vocabulary, w) + 1);
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/tokenizer/DefaultTokenizer.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/tokenizer/DefaultTokenizer.java
new file mode 100644
index 00000000..19e06c94
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/tokenizer/DefaultTokenizer.java
@@ -0,0 +1,15 @@
+package com.github.romualdrousseau.any2json.commons.preprocessing.tokenizer;
+
+import java.util.Arrays;
+import java.util.List;
+
+import com.github.romualdrousseau.any2json.commons.preprocessing.Text;
+import com.github.romualdrousseau.any2json.commons.strings.StringUtils;
+
+public class DefaultTokenizer implements Text.ITokenizer {
+ @Override
+ public List apply(final String w) {
+ final String s = StringUtils.normalizeWhiteSpaces(w).toLowerCase();
+ return Arrays.asList(s.split("\s+"));
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/tokenizer/NgramTokenizer.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/tokenizer/NgramTokenizer.java
new file mode 100644
index 00000000..b1e8529b
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/tokenizer/NgramTokenizer.java
@@ -0,0 +1,37 @@
+package com.github.romualdrousseau.any2json.commons.preprocessing.tokenizer;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.github.romualdrousseau.any2json.commons.preprocessing.Text;
+import com.github.romualdrousseau.any2json.commons.strings.StringUtils;
+
+public class NgramTokenizer implements Text.ITokenizer {
+
+ private final int n;
+
+ public NgramTokenizer(final int n) {
+ this.n = n;
+ }
+
+ @Override
+ public List apply(final String w) {
+ String s = StringUtils.normalizeWhiteSpaces(w);
+
+ // Join by space and underscore
+ s = s.replaceAll("[\\s_]+", "").trim();
+
+ // Fill up with ? to have at least one token
+ while (s.length() < this.n) {
+ s += "?";
+ }
+
+ final ArrayList result = new ArrayList();
+ for (int i = 0; i < s.length() - this.n + 1; i++) {
+ final String ss = s.substring(i, i + this.n);
+ result.add(ss.toLowerCase());
+ }
+
+ return result;
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/tokenizer/ShingleTokenizer.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/tokenizer/ShingleTokenizer.java
new file mode 100644
index 00000000..fb7d90aa
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/preprocessing/tokenizer/ShingleTokenizer.java
@@ -0,0 +1,87 @@
+package com.github.romualdrousseau.any2json.commons.preprocessing.tokenizer;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import com.github.romualdrousseau.any2json.commons.preprocessing.Text;
+import com.github.romualdrousseau.any2json.commons.strings.StringUtils;
+
+public class ShingleTokenizer implements Text.ITokenizer {
+
+ private static final int MIN_SIZE = 2;
+
+ private static final ThreadLocal CAMEL_PATTERN = new ThreadLocal() {
+ @Override
+ protected Pattern initialValue() {
+ return Pattern.compile("(?>> variants;
+ private final int minSize;
+
+ private boolean lemmatization;
+
+ public ShingleTokenizer(final List lexicon) {
+ this(lexicon, MIN_SIZE);
+ }
+
+ public ShingleTokenizer(final List lexicon, final int minSize) {
+ this(lexicon, minSize, true);
+ }
+
+ public ShingleTokenizer(final List lexicon, final int minSize, final boolean lemmatization) {
+ this.variants = Text.get_lexicon(lexicon).entrySet().stream()
+ .sorted((a, b) -> b.getKey().length() - a.getKey().length()).toList();
+ this.minSize = minSize;
+ this.lemmatization = lemmatization;
+ }
+
+ public void enableLemmatization() {
+ this.lemmatization = true;
+ }
+
+ public void disableLemmatization() {
+ this.lemmatization = false;
+ }
+
+ @Override
+ public List apply(final String w) {
+ var s = StringUtils.normalizeWhiteSpaces(w);
+
+ // Split using a lexicon of known words if any and prioritize longest variant
+
+ final var lexems = this.variants.stream().collect(Collectors.toList());
+ while (lexems.size() > 0) {
+ final var lexem = lexems.remove(0);
+ for (final String variant : lexem.getValue()) {
+ if (s.toLowerCase().contains(variant)) {
+ final var replacement = this.lemmatization ? lexem.getKey() : variant;
+ s = s.replaceAll("(?i)" + variant, " " + replacement + " ");
+ lexems.removeIf(x -> x.getValue().stream().anyMatch(y -> replacement.contains(y)));
+ break;
+ }
+ }
+ }
+
+ // Clean by space and underscore
+
+ s = s.replaceAll("[\\s_]+", " ").trim();
+
+ // Split by space and then by Camel notation words
+
+ final ArrayList result = new ArrayList();
+ for (final String ss : s.split(" ")) {
+ for (final String sss : CAMEL_PATTERN.get().split(ss)) {
+ if (sss.length() > 0 && (sss.length() > (minSize - 1) || !Character.isAlphabetic(sss.charAt(0)))) {
+ result.add(sss.toLowerCase());
+ }
+ }
+ }
+
+ return result;
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/python/PythonManager.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/python/PythonManager.java
new file mode 100644
index 00000000..d974cb93
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/python/PythonManager.java
@@ -0,0 +1,201 @@
+package com.github.romualdrousseau.any2json.commons.python;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.stream.Stream;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class PythonManager {
+ private static final Logger LOGGER = LoggerFactory.getLogger(PythonManager.class);
+
+ public PythonManager(final String moduleName) throws IOException {
+ final var prop = new Properties();
+ prop.load(this.findPropertiesFile());
+
+ this.modulePath = this.getModulePath(prop.getProperty(moduleName + ".module-path"));
+ this.mainEntry = prop.getProperty(moduleName + ".module-main", "main.py");
+ this.hasVirtualEnv = prop.getProperty(moduleName + ".virtual-env", "false").equals("true");
+ this.virtualEnvPath = this.getVirtualEnvPath(prop.getProperty(moduleName + ".virtual-env-path", ".venv"));
+ this.hasDependencies = prop.getProperty(moduleName + ".dependencies", "false").equals("true");
+ }
+
+ public PythonManager enableVirtualEnv() throws IOException, InterruptedException {
+ if (this.virtualEnvPath.toFile().exists()) {
+ return this;
+ }
+
+ LOGGER.info("venv: Create a new virtual environment");
+
+ final ProcessBuilder processBuilder = new ProcessBuilder("python", "-m", "venv", this.virtualEnvPath.toString());
+ processBuilder.directory(this.modulePath.toFile());
+ processBuilder.inheritIO();
+ processBuilder.redirectErrorStream(true);
+ processBuilder.start().waitFor();
+ return this;
+ }
+
+ public PythonManager installDependencies() throws IOException, InterruptedException {
+ if (this.isRequirementsInstalled()) {
+ return this;
+ }
+
+ LOGGER.info("pip: Install and update all dependencies");
+
+ final ProcessBuilder processBuilder = new ProcessBuilder(this.getPipScript(), "install", "-r",
+ "requirements.txt");
+ processBuilder.directory(this.modulePath.toFile());
+ processBuilder.inheritIO();
+ processBuilder.redirectErrorStream(true);
+ processBuilder.start().waitFor();
+
+ final var lockFile = this.modulePath.resolve("requirements.lock").toFile();
+ lockFile.createNewFile();
+
+ return this;
+ }
+
+ public PythonManager setEnviroment(final Map environment) {
+ this.environment = environment;
+ return this;
+ }
+
+ public Process run(final String... args) throws IOException, InterruptedException {
+ if (this.hasVirtualEnv) {
+ this.enableVirtualEnv();
+ }
+
+ if (this.hasDependencies) {
+ this.installDependencies();
+ }
+
+ final var command = Stream.of(List.of(this.getPythonScript(), this.mainEntry), List.of(args))
+ .flatMap(Collection::stream).toList();
+ final ProcessBuilder processBuilder = new ProcessBuilder(command);
+ processBuilder.directory(this.modulePath.toFile());
+ processBuilder.redirectErrorStream(true);
+
+ if (this.environment != null || this.environment.size() > 0) {
+ final var env = processBuilder.environment();
+ this.environment.forEach((k, v) -> env.put(k, v));
+ }
+
+ LOGGER.info("python: Call {} with args: {}", this.mainEntry, args);
+
+ return processBuilder.start();
+ }
+
+ private InputStream findPropertiesFile() throws IOException {
+ final var userDir = System.getProperty("user.dir");
+ return this.getPathIfExists(Path.of(userDir, "python4j.properties"))
+ .or(() -> this.getPathIfExists(Path.of(userDir, "classes", "python4j.properties")))
+ .flatMap(this::pathToStream)
+ .or(() -> this.resolveResourceAsStream("python4j.properties"))
+ .orElseThrow(() -> PythonManager.panicAndAbort("python4j.properties"));
+ }
+
+ private boolean isRequirementsInstalled() throws IOException {
+ final var requireFile = this.modulePath.resolve("requirements.txt").toFile();
+ if (!requireFile.exists()) {
+ return false;
+ }
+
+ final var lockFile = this.modulePath.resolve("requirements.lock").toFile();
+ if (lockFile.exists()) {
+ if (requireFile.lastModified() < lockFile.lastModified()) {
+ return true;
+ }
+ lockFile.delete();
+ }
+
+ return false;
+ }
+
+ private String getPythonScript() {
+ if (this.hasVirtualEnv) {
+ return this.getScriptPath("bin/python")
+ .or(() -> this.getScriptPath("Scripts/python.exe"))
+ .orElseThrow(() -> PythonManager.panicAndAbort("python"))
+ .toString();
+ } else {
+ return "python";
+ }
+ }
+
+ private String getPipScript() {
+ if (this.hasVirtualEnv) {
+ return this.getScriptPath("bin/pip")
+ .or(() -> this.getScriptPath("Scripts/pip.exe"))
+ .orElseThrow(() -> PythonManager.panicAndAbort("pip"))
+ .toString();
+ } else {
+ return "pip";
+ }
+ }
+
+ private Optional getScriptPath(final String pathName) {
+ return this.getPathIfExists(this.virtualEnvPath.resolve(pathName));
+ }
+
+ private Path getModulePath(final String moduleName) {
+ final var userDir = System.getProperty("user.dir");
+ return this.getPathIfExists(Path.of(userDir, moduleName))
+ .or(() -> this.getPathIfExists(Path.of(userDir, "classes", moduleName)))
+ .orElseThrow(() -> PythonManager.panicAndAbort(moduleName));
+ }
+
+ private Path getVirtualEnvPath(String virtualEnvPath) {
+ if (Path.of(virtualEnvPath).isAbsolute()) {
+ return Path.of(virtualEnvPath);
+ } else {
+ return this.modulePath.resolve(virtualEnvPath);
+ }
+ }
+
+ private Optional pathToStream(final Path x) {
+ try {
+ return Optional.of(Files.newInputStream(x));
+ } catch (final IOException e) {
+ return Optional.empty();
+ }
+ }
+
+ private Optional resolveResourceAsStream(final String resourceName) {
+ final InputStream resource = this.getClass().getClassLoader().getResourceAsStream(resourceName);
+ if (resource == null) {
+ LOGGER.debug("module: {} not found", resourceName);
+ return Optional.empty();
+ }
+ LOGGER.debug("module: {} found at {}", resourceName, resource);
+ return Optional.of(resource);
+ }
+
+ private Optional getPathIfExists(final Path path) {
+ if (!path.toFile().exists()) {
+ LOGGER.debug("module: {} not found at {}", path.getFileName(), path);
+ return Optional.empty();
+ }
+ LOGGER.debug("module: {} found at {}", path.getFileName(), path);
+ return Optional.of(path);
+ }
+
+ private static RuntimeException panicAndAbort(final String name) {
+ LOGGER.error("module: {} not found, abort ...", name);
+ return new RuntimeException(String.format("%s not found, abort ...", name));
+ }
+
+ private final Path modulePath;
+ private final String mainEntry;
+ private final boolean hasVirtualEnv;
+ private final Path virtualEnvPath;
+ private final boolean hasDependencies;
+ private Map environment = null;
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/python/PythonSimpleDateFormat.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/python/PythonSimpleDateFormat.java
new file mode 100644
index 00000000..15dd4a26
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/python/PythonSimpleDateFormat.java
@@ -0,0 +1,81 @@
+package com.github.romualdrousseau.any2json.commons.python;
+
+import java.text.DateFormatSymbols;
+import java.text.SimpleDateFormat;
+import java.util.Locale;
+
+public class PythonSimpleDateFormat extends SimpleDateFormat {
+
+ public PythonSimpleDateFormat() {
+ super();
+ }
+
+ public PythonSimpleDateFormat(final String pattern) {
+ super(PythonSimpleDateFormat.toJava(pattern));
+ }
+
+ public PythonSimpleDateFormat(final String pattern, DateFormatSymbols formatSymbols) {
+ super(PythonSimpleDateFormat.toJava(pattern), formatSymbols);
+ }
+
+ public PythonSimpleDateFormat(final String pattern, Locale locale) {
+ super(PythonSimpleDateFormat.toJava(pattern), locale);
+ }
+
+ public static String toPython(final String javaPattern) {
+ return javaPattern
+ .replaceAll("YYYY", "%G")
+ .replaceAll("yyyy", "%Y")
+ .replaceAll("yy", "%y")
+ .replaceAll("y", "%-y")
+ .replaceAll("MMMMM", "%B")
+ .replaceAll("MMM", "%b")
+ .replaceAll("MM", "%m")
+ .replaceAll("M", "%-m")
+ .replaceAll("DDD", "%j")
+ .replaceAll("dd", "%d")
+ .replaceAll("d", "%-d")
+ .replaceAll("EEEEE", "%A")
+ .replaceAll("EEE", "%a")
+ .replaceAll("ww", "%W")
+ .replaceAll("u", "%u")
+ .replaceAll("HH", "%H")
+ .replaceAll("H", "%-H")
+ .replaceAll("hh", "%I")
+ .replaceAll("h", "%-I")
+ .replaceAll("mm", "%M")
+ .replaceAll("m", "%-M")
+ .replaceAll("ss", "%S")
+ .replaceAll("s", "%-S");
+ }
+
+ public static String toJava(final String pythonPattern) {
+ return pythonPattern
+ .replaceAll("%G", "YYYY")
+ .replaceAll("%Y", "yyyy")
+ .replaceAll("%y", "yy")
+ .replaceAll("%-y", "y")
+ .replaceAll("%B", "MMMMM")
+ .replaceAll("%b", "MMM")
+ .replaceAll("%m", "MM")
+ .replaceAll("%-m", "M")
+ .replaceAll("%j", "DDD")
+ .replaceAll("%d", "dd")
+ .replaceAll("%-d", "d")
+ .replaceAll("%A", "EEEEE")
+ .replaceAll("%a", "EEE")
+ .replaceAll("%W", "ww")
+ .replaceAll("%w", "u")
+ .replaceAll("%u", "u")
+ .replaceAll("%U", "ww")
+ .replaceAll("%V", "ww")
+ .replaceAll("%H", "HH")
+ .replaceAll("%-H", "H")
+ .replaceAll("%I", "hh")
+ .replaceAll("%-I", "h")
+ .replaceAll("%M", "mm")
+ .replaceAll("%-M", "m")
+ .replaceAll("%S", "ss")
+ .replaceAll("%-S", "s");
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/redux/Action.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/redux/Action.java
new file mode 100644
index 00000000..6930e07e
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/redux/Action.java
@@ -0,0 +1,33 @@
+package com.github.romualdrousseau.any2json.commons.redux;
+
+import java.util.function.Supplier;
+
+public class Action implements Supplier {
+
+ private final String type;
+
+ public Action(final String type) {
+ this.type = type;
+ }
+
+ public String getType() {
+ return this.type;
+ }
+
+ public int hashCode() {
+ return this.type.hashCode();
+ }
+
+ public boolean equals(final Object obj) {
+ if (!(obj instanceof Action)) {
+ return false;
+ }
+ final var otherAction = (Action) obj;
+ return this.type.equals(otherAction.type);
+ }
+
+ @Override
+ public Action get() {
+ return this;
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/redux/Reducer.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/redux/Reducer.java
new file mode 100644
index 00000000..a4aaf65c
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/redux/Reducer.java
@@ -0,0 +1,5 @@
+package com.github.romualdrousseau.any2json.commons.redux;
+
+import java.util.function.BiFunction;
+
+public interface Reducer extends BiFunction {}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/redux/Store.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/redux/Store.java
new file mode 100644
index 00000000..c92a1658
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/redux/Store.java
@@ -0,0 +1,36 @@
+package com.github.romualdrousseau.any2json.commons.redux;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class Store {
+
+ private final Map>> subscribers = new HashMap<>();
+ private final List> reducers = new ArrayList<>();
+ private S state;
+
+ public Store(final S state) {
+ this.state = state;
+ }
+
+ public S getState() {
+ return this.state;
+ }
+
+ public void addSubscriber(final A action, final Subscriber subscriber) {
+ this.subscribers.computeIfAbsent(action, x -> new ArrayList<>()).add(subscriber);
+ }
+
+ public void addReducer(final Reducer reducer) {
+ this.reducers.add(reducer);
+ }
+
+ public void dispatch(final A action) {
+ @SuppressWarnings("unchecked") final var result = (A) action.get();
+ this.state = reducers.stream().reduce(this.state, (x, y) -> y.apply(x, result), (x, y) -> y);
+ this.subscribers.getOrDefault(result, Collections.emptyList()).forEach(x -> x.accept(this, result));
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/redux/Subscriber.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/redux/Subscriber.java
new file mode 100644
index 00000000..fffa47af
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/redux/Subscriber.java
@@ -0,0 +1,5 @@
+package com.github.romualdrousseau.any2json.commons.redux;
+
+import java.util.function.BiConsumer;
+
+public interface Subscriber extends BiConsumer, A> {}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/strings/StringFuzzy.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/strings/StringFuzzy.java
new file mode 100644
index 00000000..b18f969a
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/strings/StringFuzzy.java
@@ -0,0 +1,141 @@
+package com.github.romualdrousseau.any2json.commons.strings;
+
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.List;
+
+public class StringFuzzy {
+ public static float Hamming(String s, String t) {
+ if(s.length() != t.length()) {
+ return 0.0f;
+ }
+
+ if (s.length() == 0) {
+ return 0.0f;
+ }
+
+ int n = 0;
+ for(int i = 0; i < s.length(); i++) {
+ if(s.charAt(i) == t.charAt(i)) {
+ n++;
+ }
+ }
+ return (float) Math.exp(n) / (float) Math.exp(s.length());
+ }
+
+ public static float JaroWinkler(String s, String t) {
+ int s_len = s.length();
+ int t_len = t.length();
+
+ if (s_len == 0 && t_len == 0) {
+ return 1.0f;
+ }
+
+ int match_distance = Integer.max(s_len, t_len) / 2 - 1;
+
+ boolean[] s_matches = new boolean[s_len];
+ boolean[] t_matches = new boolean[t_len];
+
+ int matches = 0;
+ int transpositions = 0;
+
+ for (int i = 0; i < s_len; i++) {
+ int start = Integer.max(0, i - match_distance);
+ int end = Integer.min(i + match_distance + 1, t_len);
+
+ for (int j = start; j < end; j++) {
+ if (t_matches[j])
+ continue;
+ if (s.charAt(i) != t.charAt(j))
+ continue;
+ s_matches[i] = true;
+ t_matches[j] = true;
+ matches++;
+ break;
+ }
+ }
+
+ if (matches == 0)
+ return 0;
+
+ int k = 0;
+ for (int i = 0; i < s_len; i++) {
+ if (!s_matches[i])
+ continue;
+ while (!t_matches[k])
+ k++;
+ if (s.charAt(i) != t.charAt(k))
+ transpositions++;
+ k++;
+ }
+
+ return ((((float) matches / (float) s_len) + ((float) matches / (float) t_len)
+ + (((float) matches - (float) transpositions / 2.0f) / (float) matches)) / 3.0f);
+ }
+
+ public static float Jaccard(String s1, String s2) {
+ return Float.valueOf(StringFuzzy.intersect(s1, s2).length())
+ / Float.valueOf(StringFuzzy.union(s1, s2).length());
+ }
+
+ public static String union(String s1, String s2) {
+ String result = "";
+
+ for (char c : s1.toCharArray()) {
+ if (!result.contains(String.valueOf(c))) {
+ result += c;
+ }
+ }
+
+ for (char c : s2.toCharArray()) {
+ if (!result.contains(String.valueOf(c))) {
+ result += c;
+ }
+ }
+
+ return result;
+ }
+
+ public static String[] union(String[] s1, String[] s2) {
+ ArrayList result = new ArrayList(s1.length + s2.length);
+
+ for (String v : s1) {
+ if (!result.contains(v)) {
+ result.add(v);
+ }
+ }
+
+ for (String v : s2) {
+ if (!result.contains(v)) {
+ result.add(v);
+ }
+ }
+
+ return result.toArray(new String[result.size()]);
+ }
+
+ public static String intersect(String s1, String s2) {
+ String result = "";
+
+ for (char c : s1.toCharArray()) {
+ if (!result.contains(String.valueOf(c)) && s2.contains(String.valueOf(c))) {
+ result += c;
+ }
+ }
+
+ return result;
+ }
+
+ public static String[] intersect(String[] s1, String[] s2) {
+ ArrayList result = new ArrayList(s1.length + s2.length);
+ List tmp = Arrays.asList(s2);
+
+ for (String v : s1) {
+ if (!result.contains(v) && tmp.contains(v)) {
+ result.add(v);
+ }
+ }
+
+ return result.toArray(new String[result.size()]);
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/strings/StringUtils.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/strings/StringUtils.java
new file mode 100644
index 00000000..02981951
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/strings/StringUtils.java
@@ -0,0 +1,153 @@
+package com.github.romualdrousseau.any2json.commons.strings;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import com.github.romualdrousseau.any2json.commons.preprocessing.Text;
+
+public class StringUtils {
+ public static final String WHITE_SPACES = "\\s\\u00A0\\u3000";
+ public static final String WRONG_UNICODE = "\\uFFFD";
+ public static final char WRONG_UNICODE_CHAR = '\uFFFD';
+ public static final String BOM = "\\uFEFF";
+ public static final char BOM_CHAR = '\uFEFF';
+
+ public static final Map symbols = Map.of(
+ "%+", "percent",
+ "\\$+", "dollar"
+ );
+
+ private static final ThreadLocal CLEAN_TOKEN_REGEX1 = new ThreadLocal<>() {
+ @Override
+ protected Pattern initialValue() {
+ return Pattern.compile("[" + StringUtils.WHITE_SPACES + "]+");
+ }
+ };
+ private static final ThreadLocal CLEAN_TOKEN_REGEX2 = new ThreadLocal<>() {
+ @Override
+ protected Pattern initialValue() {
+ return Pattern.compile("^[\" ]+|[\" ]+$");
+ }
+ };
+
+ public static boolean isBlank(final String s) {
+ return s == null || StringUtils.trim(s).equals("");
+ }
+
+ public static boolean isFastBlank(final String s) {
+ return s == null || s.isBlank();
+ }
+
+ public static String trim(final String s) {
+ return trim(s, StringUtils.WHITE_SPACES);
+ }
+
+ public static String trim(final String s, final String whiteSpaces) {
+ if (s == null) {
+ return null;
+ }
+ return s.replaceAll("^[" + whiteSpaces + "]+", "").replaceAll("[" + whiteSpaces + "]+$", "");
+ }
+
+ public static String normalizeWhiteSpaces(final String s) {
+ return normalizeWhiteSpaces(s, StringUtils.WHITE_SPACES);
+ }
+
+ public static String normalizeWhiteSpaces(final String s, final String whiteSpaces) {
+ if (s == null) {
+ return null;
+ }
+ return s.replaceAll("[" + whiteSpaces + "]", " ");
+ }
+
+ public static String removeWhiteSpaces(final String s) {
+ return removeWhiteSpaces(s, StringUtils.WHITE_SPACES);
+ }
+
+ public static String removeWhiteSpaces(final String s, final String whiteSpaces) {
+ if (s == null) {
+ return null;
+ }
+ return s.replaceAll("[" + whiteSpaces + "]", "");
+ }
+
+ public static String singleWhiteSpaces(final String s) {
+ return singleWhiteSpaces(s, StringUtils.WHITE_SPACES);
+ }
+
+ public static String singleWhiteSpaces(final String s, final String whiteSpaces) {
+ if (s == null) {
+ return null;
+ }
+ return s.replaceAll("[" + whiteSpaces + "]+", " ");
+ }
+
+ public static String capitalize(final String s) {
+ if (s == null) {
+ return null;
+ }
+ if (s.length() <= 1) {
+ return s.toLowerCase();
+ } else {
+ return Character.toUpperCase(s.charAt(0)) + s.substring(1).toLowerCase();
+ }
+ }
+
+ public static String uncapitalize(final String s) {
+ if (s == null) {
+ return null;
+ }
+ if (s.length() <= 1) {
+ return s.toLowerCase();
+ } else {
+ return Character.toLowerCase(s.charAt(0)) + s.substring(1);
+ }
+ }
+
+ public static boolean checkIfGoodEncoding(final String s) {
+ if (s == null) {
+ return false;
+ }
+ return !Pattern.compile(StringUtils.WRONG_UNICODE).matcher(s).find();
+ }
+
+ public static String cleanToken(final String s) {
+ if (s == null) {
+ return null;
+ }
+ var ss = CLEAN_TOKEN_REGEX1.get().matcher(s).replaceAll(" ").trim();
+ if (ss.startsWith("\"") && ss.endsWith("\"")) {
+ ss = CLEAN_TOKEN_REGEX2.get().matcher(ss).replaceAll("");
+ }
+ return ss;
+ }
+
+ public static String toSnake(final String w, final Text.ITokenizer tokenizer) {
+ return String.join("_", tokenizer.apply(StringUtils.encodeSymbols(w).replaceAll("\\W+", " "))).toLowerCase();
+ }
+
+ public static String toCamel(final String w, final Text.ITokenizer tokenizer) {
+ return uncapitalize(
+ String.join("", tokenizer.apply(StringUtils.encodeSymbols(w).replaceAll("\\W+", " ")).stream()
+ .map(StringUtils::capitalize).toArray(String[]::new)));
+ }
+
+ public static String encodeSymbols(final String s) {
+ var tmp = s;
+ for(var e: symbols.entrySet()) {
+ tmp = tmp.replaceAll(e.getKey(), e.getValue());
+ }
+ return tmp;
+ }
+
+ public static Set getSymbols() {
+ return symbols.keySet();
+ }
+
+ public static Optional merge(final String sep, final List values) {
+ return values.stream().reduce((a, x) -> !a.contains(x) ? String.join(sep, a, x) : a);
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/types/CollectionUtils.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/types/CollectionUtils.java
new file mode 100644
index 00000000..e469889b
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/types/CollectionUtils.java
@@ -0,0 +1,21 @@
+package com.github.romualdrousseau.any2json.commons.types;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+public class CollectionUtils {
+
+ public static List mutableRange(int a, int b) {
+ List result = new ArrayList();
+ for (int i = a; i < b; i++) {
+ result.add(i);
+ }
+ return result;
+ }
+
+ public static List shuffle(List l) {
+ Collections.shuffle(l);
+ return l;
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/types/Pair.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/types/Pair.java
new file mode 100644
index 00000000..1d2328a3
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/types/Pair.java
@@ -0,0 +1,28 @@
+package com.github.romualdrousseau.any2json.commons.types;
+
+import java.util.Map;
+
+public class Pair implements Map.Entry {
+ private final String left;
+ private final String right;
+
+ public Pair(final String left, final String right) {
+ this.left = left;
+ this.right = right;
+ }
+
+ @Override
+ public String getKey() {
+ return this.left;
+ }
+
+ @Override
+ public String getValue() {
+ return this.right;
+ }
+
+ @Override
+ public String setValue(String arg0) {
+ throw new UnsupportedOperationException("Unimplemented method 'setValue'");
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/types/Tensor.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/types/Tensor.java
new file mode 100644
index 00000000..3f0a74dc
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/types/Tensor.java
@@ -0,0 +1,54 @@
+package com.github.romualdrousseau.any2json.commons.types;
+
+public class Tensor {
+
+ public static final Tensor Null = Tensor.zeros(0);
+
+ public static Tensor of(final double... data) {
+ final var floats = new float[data.length];
+ for (int i = 0; i < data.length; i++) {
+ floats[i] = (float) data[i];
+ }
+ return new Tensor(floats);
+ }
+
+ public static Tensor of(final float... data) {
+ return new Tensor(data);
+ }
+
+ public static Tensor zeros(final int size) {
+ final var zeros = new float[size];
+ for (int i = 0; i < size; i++) {
+ zeros[i] = 0.0f;
+ }
+ return new Tensor(zeros);
+ }
+
+ public final int size;
+ public final float[] data;
+
+ public Tensor(final float[] data) {
+ this.data = data;
+ this.size = data.length;
+ }
+
+ public Tensor iadd(final Tensor t) {
+ assert this.size == t.size;
+ for (int i = 0; i < this.size; i++) {
+ this.data[i] += t.data[i];
+ }
+ return this;
+ }
+
+ public Tensor if_lt_then(final float n, final float f, final float g) {
+ for (int i = 0; i < this.data.length; i++) {
+ this.data[i] = (this.data[i] < n) ? f : g;
+ }
+ return this;
+ }
+
+ public int argmax() {
+ // TODO implement this
+ return 0;
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAML.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAML.java
new file mode 100644
index 00000000..b4ddd0f7
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAML.java
@@ -0,0 +1,134 @@
+package com.github.romualdrousseau.any2json.commons.yaml;
+
+import java.lang.reflect.InvocationTargetException;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Stream;
+
+import org.reflections.Reflections;
+
+public class YAML {
+ public final static String PACKAGE_LOADER_PREFIX = "com.github.romualdrousseau.shuju.yaml";
+
+ private static YAMLFactory Factory;
+ static {
+ final var reflections = new Reflections(PACKAGE_LOADER_PREFIX);
+ YAML.Factory = reflections.getSubTypesOf(YAMLFactory.class).stream()
+ .map(YAML::newFactoryInstance)
+ .findFirst()
+ .get();
+ }
+
+ private static YAMLFactory newFactoryInstance(final Class clazz) {
+ try {
+ return (YAMLFactory) clazz.getConstructor().newInstance();
+ } catch (InstantiationException | IllegalAccessException
+ | IllegalArgumentException | InvocationTargetException
+ | NoSuchMethodException | SecurityException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static YAMLArray newArray() {
+ return YAML.Factory.newArray();
+ }
+
+ public static YAMLArray arrayOf(final String data) {
+ return YAML.Factory.parseArray(data);
+ }
+
+ public static YAMLArray arrayOf(final Object object) {
+ return YAML.Factory.parseArray(object);
+ }
+
+ public static YAMLArray arrayOf(final List l) {
+ final var array = YAML.newArray();
+ l.forEach(s -> array.append(s));
+ return array;
+ }
+
+ public static YAMLArray arrayOf(final Stream l) {
+ final var array = YAML.newArray();
+ l.forEach(s -> array.append(s));
+ return array;
+ }
+
+ public static YAMLArray arrayOf(final Map m) {
+ final var array = YAML.newArray();
+ m.forEach((k, v) -> {
+ final var pair = YAML.newObject();
+ pair.set("key", k);
+ pair.set("value", v);
+ array.append(pair);
+ });
+ return array;
+ }
+
+ public static YAMLArray loadArray(final Path filePath) {
+ return YAML.Factory.loadArray(filePath);
+ }
+
+ public static void saveArray(final YAMLArray a, final Path filePath) {
+ YAML.Factory.saveArray(a, filePath, false);
+ }
+
+ public static void saveArray(final YAMLArray a, final Path filePath, final boolean pretty) {
+ YAML.Factory.saveArray(a, filePath, pretty);
+ }
+
+ public static YAMLObject newObject() {
+ return YAML.Factory.newObject();
+ }
+
+ public static YAMLObject objectOf(final String data) {
+ return YAML.Factory.parseObject(data);
+ }
+
+ public static YAMLObject objectOf(final Object object) {
+ return YAML.Factory.parseObject(object);
+ }
+
+ public static YAMLObject objectOf(final Map m) {
+ final YAMLObject object = YAML.newObject();
+ m.forEach((k, v) -> object.set(k, v));
+ return object;
+ }
+
+ public static YAMLObject loadObject(final Path filePath) {
+ return YAML.Factory.loadObject(filePath);
+ }
+
+ public static void saveObject(final YAMLObject o, final Path filePath) {
+ YAML.Factory.saveObject(o, filePath, false);
+ }
+
+ public static void saveObject(final YAMLObject o, final Path filePath, final boolean pretty) {
+ YAML.Factory.saveObject(o, filePath, pretty);
+ }
+
+ @SuppressWarnings("unchecked")
+ public static Optional query(final Object a, final String q) {
+ Object curr = a;
+ for(final var token: Arrays.asList(q.split("\\."))) {
+ if (curr instanceof YAMLArray) {
+ final int i = Integer.parseInt(token);
+ curr = ((YAMLArray) curr).get(i).orElse(null);
+ } else if (curr instanceof YAMLObject) {
+ curr = ((YAMLObject) curr).get(token).orElse(null);
+ } else {
+ curr = null;
+ }
+ }
+ return Optional.ofNullable((T) curr);
+ }
+
+ public static Stream queryStream(final Object a, final String q) {
+ return YAML.query(a, q)
+ .filter(o -> o instanceof YAMLArray)
+ .map(o -> ((YAMLArray) o).stream())
+ .orElse(Stream.empty());
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAMLArray.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAMLArray.java
new file mode 100644
index 00000000..4b477754
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAMLArray.java
@@ -0,0 +1,45 @@
+package com.github.romualdrousseau.any2json.commons.yaml;
+
+import java.util.Iterator;
+import java.util.Optional;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+public interface YAMLArray {
+
+ int size();
+
+ Optional get(int i);
+
+ YAMLArray set(int i, T o);
+
+ YAMLArray append(T o);
+
+ YAMLArray remove(int i);
+
+ String toString(final boolean pretty);
+
+ String toString();
+
+ default Stream stream() {
+ Iterable it = new Iterable() {
+ @Override
+ public Iterator iterator() {
+ return new Iterator() {
+ private int idx = 0;
+
+ @Override
+ public boolean hasNext() {
+ return idx < YAMLArray.this.size();
+ }
+
+ @Override
+ public T next() {
+ return YAMLArray.this.get(idx++).get();
+ }
+ };
+ }
+ };
+ return StreamSupport.stream(it.spliterator(), false);
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAMLCollector.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAMLCollector.java
new file mode 100644
index 00000000..3241793f
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAMLCollector.java
@@ -0,0 +1,16 @@
+package com.github.romualdrousseau.any2json.commons.yaml;
+
+import java.util.Map;
+import java.util.stream.Collector;
+import java.util.stream.Collectors;
+
+public class YAMLCollector {
+
+ public static Collector> toMap(final String key, final String value) {
+ return Collectors.toMap(x -> x.get(key).get(), x -> x.get(value).get());
+ }
+
+ public static Collector> toUnmodifiableMap(final String key, final String value) {
+ return Collectors.toUnmodifiableMap(x -> x.get(key).get(), x -> x.get(value).get());
+ }
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAMLFactory.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAMLFactory.java
new file mode 100644
index 00000000..5e8ddd29
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAMLFactory.java
@@ -0,0 +1,26 @@
+package com.github.romualdrousseau.any2json.commons.yaml;
+
+import java.nio.file.Path;
+
+public interface YAMLFactory {
+
+ YAMLArray newArray();
+
+ YAMLArray parseArray(String data);
+
+ YAMLArray parseArray(Object object);
+
+ YAMLArray loadArray(Path filePath);
+
+ void saveArray(YAMLArray a, Path filePath, boolean pretty);
+
+ YAMLObject newObject();
+
+ YAMLObject parseObject(String data);
+
+ YAMLObject parseObject(Object object);
+
+ YAMLObject loadObject(Path filePath);
+
+ void saveObject(YAMLObject o, Path filePath, boolean pretty);
+}
diff --git a/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAMLObject.java b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAMLObject.java
new file mode 100644
index 00000000..2cf73c4c
--- /dev/null
+++ b/any2json-commons/src/main/java/com/github/romualdrousseau/any2json/commons/yaml/YAMLObject.java
@@ -0,0 +1,18 @@
+package com.github.romualdrousseau.any2json.commons.yaml;
+
+import java.util.Optional;
+
+public interface YAMLObject {
+
+ Iterable keys();
+
+ Optional get(String k);
+
+ YAMLObject set(String k, T o);
+
+ YAMLObject remove(String k);
+
+ String toString(final boolean pretty);
+
+ String toString();
+}
diff --git a/any2json-commons/src/site/markdown/index.md b/any2json-commons/src/site/markdown/index.md
new file mode 100644
index 00000000..5bf7a78d
--- /dev/null
+++ b/any2json-commons/src/site/markdown/index.md
@@ -0,0 +1,20 @@
+# About Any2Json LLM Commons
+
+Any2Json Commons.
+
+## Description
+
+In today's data-driven landscape, navigating the complexities of semi-structured documents poses a significant challenge
+for organizations. These documents, characterized by diverse formats and a lack of standardization, often require
+specialized skills for effective manipulation and analysis. However, we propose a novel framework to address this
+challenge. By leveraging innovative algorithms and machine learning techniques, [Any2Json](https://github.com/RomualdRousseau/Any2Json)
+offers a solution that transcends manual coding, providing enhanced accessibility to users across diverse skill levels.
+Moreover, by automating the extraction process, it not only saves time but also minimizes errors, particularly beneficial
+for industries dealing with large volumes of such documents. Crucially, this framework integrates seamlessly with machine
+learning workflows, unlocking new possibilities for data enrichment and predictive modeling. Aligned with the paradigm of
+data as a service, it offers a scalable and efficient means of managing semi-structured data, thereby expanding the toolkit
+of data services available to organizations.
+
+## Getting Started
+
+You will find articles and tutorials [here](https://romualdrousseau.github.io/Any2Json-Documents/).
diff --git a/any2json-commons/src/site/resources/css/site.css b/any2json-commons/src/site/resources/css/site.css
new file mode 100644
index 00000000..c48367c3
--- /dev/null
+++ b/any2json-commons/src/site/resources/css/site.css
@@ -0,0 +1,3 @@
+#bodyColumn {
+ max-width: 1000px;
+}
\ No newline at end of file
diff --git a/any2json-commons/src/site/resources/images/any2json-logo.png b/any2json-commons/src/site/resources/images/any2json-logo.png
new file mode 100644
index 00000000..bc971a5f
Binary files /dev/null and b/any2json-commons/src/site/resources/images/any2json-logo.png differ
diff --git a/any2json-commons/src/site/site.xml b/any2json-commons/src/site/site.xml
new file mode 100644
index 00000000..14a3aded
--- /dev/null
+++ b/any2json-commons/src/site/site.xml
@@ -0,0 +1,34 @@
+