-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Romuald Rousseau
committed
Oct 1, 2024
1 parent
7f0ceb3
commit e92bc5d
Showing
8 changed files
with
602 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
155 changes: 155 additions & 0 deletions
155
any2json-commons/src/test/java/com/github/romualdrousseau/any2json/commons/Test_BigData.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
package com.github.romualdrousseau.any2json.commons; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
import static org.junit.Assert.assertThrows; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.Path; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.stream.IntStream; | ||
|
||
import org.junit.Test; | ||
|
||
import com.github.romualdrousseau.any2json.commons.bigdata.ChunkSerializer; | ||
import com.github.romualdrousseau.any2json.commons.bigdata.ChunkSerializerFactory; | ||
import com.github.romualdrousseau.any2json.commons.bigdata.ChunkSerializerFactory.SerializerType; | ||
import com.github.romualdrousseau.any2json.commons.bigdata.DataFrameWriter; | ||
import com.github.romualdrousseau.any2json.commons.bigdata.Row; | ||
|
||
public class Test_BigData { | ||
|
||
private final ChunkSerializer serializer = ChunkSerializerFactory.newInstance(SerializerType.FURY); | ||
|
||
@Test | ||
public void testSerialize() throws IOException { | ||
final var rows = IntStream.range(0, 10000) | ||
.mapToObj(i -> Row.of(IntStream.range(0, 1000) | ||
.mapToObj(j -> "nisl purus in mollis nunc") | ||
.toArray(String[]::new))) | ||
.toArray(Row[]::new); | ||
final var bytes = serializer.serialize(rows); | ||
System.out.println(bytes.length >> 20); | ||
|
||
final var rows2 = serializer.deserialize(bytes); | ||
assertEquals(rows.length, rows2.length); | ||
Arrays.stream(rows2).forEach(row -> { | ||
row.forEach(x -> { | ||
assertEquals("nisl purus in mollis nunc", x); | ||
}); | ||
}); | ||
} | ||
|
||
@Test | ||
public void testDataFrameWhole() throws IOException { | ||
try (final var writer = new DataFrameWriter(100, 10)) { | ||
for (int i = 0; i < 10; i++) { | ||
writer.write(Row.of(IntStream.range(0, writer.getColumnCount()) | ||
.mapToObj(j -> "nisl purus in mollis nunc") | ||
.toArray(String[]::new))); | ||
} | ||
try (final var df = writer.getDataFrame()) { | ||
df.forEach(y -> { | ||
y.forEach(x -> { | ||
assertEquals("nisl purus in mollis nunc", x); | ||
}); | ||
}); | ||
} | ||
} | ||
} | ||
|
||
@Test | ||
public void testDataFrameFullView() throws IOException { | ||
try (final var writer = new DataFrameWriter(100, 10)) { | ||
for (int i = 0; i < 10; i++) { | ||
writer.write(Row.of(IntStream.range(0, writer.getColumnCount()) | ||
.mapToObj(j -> "nisl purus in mollis nunc") | ||
.toArray(String[]::new))); | ||
} | ||
try (final var df = writer.getDataFrame()) { | ||
final var view = df.view(0, 0, 10, 10); | ||
view.forEach(y -> { | ||
y.forEach(x -> { | ||
assertEquals("nisl purus in mollis nunc", x); | ||
}); | ||
}); | ||
} | ||
} | ||
} | ||
|
||
@Test | ||
public void testDataFramePartialView() throws IOException { | ||
try (final var writer = new DataFrameWriter(100, 100)) { | ||
for (int i = 0; i < 5000; i++) { | ||
writer.write(Row.of(IntStream.range(0, writer.getColumnCount()) | ||
.mapToObj(j -> "nisl purus in mollis nunc") | ||
.toArray(String[]::new))); | ||
} | ||
try (final var df = writer.getDataFrame()) { | ||
final var view = df.view(2000, 50, 10, 10); | ||
view.forEach(y -> { | ||
y.forEach(x -> { | ||
assertEquals("nisl purus in mollis nunc", x); | ||
}); | ||
}); | ||
} | ||
} | ||
} | ||
|
||
@Test | ||
public void testDataFrameRandom() throws IOException { | ||
try (final var writer = new DataFrameWriter(100, 100)) { | ||
for (int i = 0; i < 5000; i++) { | ||
writer.write(Row.of(IntStream.range(0, writer.getColumnCount()) | ||
.mapToObj(j -> "nisl purus in mollis nunc") | ||
.toArray(String[]::new))); | ||
} | ||
try (final var df = writer.getDataFrame()) { | ||
assertEquals("nisl purus in mollis nunc", df.getCell(0, 5)); | ||
assertEquals("nisl purus in mollis nunc", df.getCell(5, 8)); | ||
} | ||
} | ||
} | ||
|
||
@Test | ||
public void testDataFrameMassive() throws IOException { | ||
if (!Path.of("/mnt/media2").toFile().exists()) { | ||
return; | ||
} | ||
try (final var writer = new DataFrameWriter(10000, 1000)) { | ||
for (int i = 0; i < 10000000; i++) { | ||
writer.write(Row.of(IntStream.range(0, writer.getColumnCount()) | ||
.mapToObj(j -> "nisl purus in mollis nunc") | ||
.toArray(String[]::new))); | ||
} | ||
try (final var df = writer.getDataFrame()) { | ||
df.forEach(y -> { | ||
y.forEach(x -> { | ||
assertEquals("nisl purus in mollis nunc", x); | ||
}); | ||
}); | ||
} | ||
} | ||
} | ||
|
||
@Test | ||
public void testArrayListMassive() { | ||
if (!Path.of("/mnt/media2").toFile().exists()) { | ||
return; | ||
} | ||
assertThrows(OutOfMemoryError.class, () -> { | ||
final var list = new ArrayList<String[]>(); | ||
for (int i = 0; i < 10000000; i++) { | ||
list.add(IntStream.range(0, 1000) | ||
.mapToObj(j -> "nisl purus in mollis nunc") | ||
.toArray(String[]::new)); | ||
} | ||
list.forEach(y -> { | ||
List.of(y).forEach(x -> { | ||
assertEquals("nisl purus in mollis nunc", x); | ||
}); | ||
}); | ||
}); | ||
} | ||
} |
22 changes: 22 additions & 0 deletions
22
any2json-commons/src/test/java/com/github/romualdrousseau/any2json/commons/Test_Python.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
package com.github.romualdrousseau.any2json.commons; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
|
||
import java.text.ParseException; | ||
import java.time.LocalDate; | ||
import java.time.ZoneId; | ||
import java.util.Date; | ||
|
||
import org.junit.Test; | ||
|
||
import com.github.romualdrousseau.any2json.commons.python.PythonSimpleDateFormat; | ||
|
||
public class Test_Python { | ||
|
||
@Test | ||
public void testPythonSimpleDateformat() throws ParseException { | ||
final PythonSimpleDateFormat formatter = new PythonSimpleDateFormat("%a,%d/%m/%y"); | ||
assertEquals("Sun,24/09/23", formatter.format(Date.from(LocalDate.of(2023, 9, 24).atStartOfDay(ZoneId.systemDefault()).toInstant()))); | ||
assertEquals("Sun,05/12/99", formatter.format(formatter.parse("Sun,05/12/99"))); | ||
} | ||
} |
55 changes: 55 additions & 0 deletions
55
any2json-commons/src/test/java/com/github/romualdrousseau/any2json/commons/Test_Redux.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
package com.github.romualdrousseau.any2json.commons; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
|
||
import java.util.HashMap; | ||
|
||
import org.junit.Test; | ||
|
||
import com.github.romualdrousseau.any2json.commons.redux.Action; | ||
import com.github.romualdrousseau.any2json.commons.redux.Store; | ||
|
||
public class Test_Redux { | ||
|
||
@Test | ||
public void testStoreNoReducerNoSbuscriber() { | ||
final var state = new HashMap<String, Integer>(); | ||
final var store = new Store<HashMap<String, Integer>, Action>(state); | ||
final var testAction = new Action("test"); | ||
store.dispatch(testAction); | ||
} | ||
|
||
@Test | ||
public void testStoreNoReducerWithSubscriber() { | ||
final var state = new HashMap<String, Integer>(); | ||
final var store = new Store<HashMap<String, Integer>, Action>(state); | ||
final var testAction = new Action("test"); | ||
store.addSubscriber(testAction, (s, a) -> assertEquals("test", a.getType())); | ||
store.dispatch(testAction); | ||
} | ||
|
||
@Test | ||
public void testStoreWithReducerAndSubscribers() { | ||
final var state = new HashMap<String, Integer>(); | ||
state.put("counter", 0); | ||
|
||
final var store = new Store<HashMap<String, Integer>, Action>(state); | ||
store.addReducer((s, a) -> { | ||
if (a.getType().equals("inc")) { | ||
s.computeIfPresent("counter", (x, y) -> y + 1); | ||
} | ||
if (a.getType().equals("dec")) { | ||
s.computeIfPresent("counter", (x, y) -> y - 1); | ||
} | ||
return s; | ||
}); | ||
|
||
final var incAction = new Action("inc"); | ||
final var decAction = new Action("dec"); | ||
|
||
store.addSubscriber(incAction, (s, a) -> assertEquals(Integer.valueOf(1), s.getState().get("counter"))); | ||
store.addSubscriber(decAction, (s, a) -> assertEquals(Integer.valueOf(0), s.getState().get("counter"))); | ||
store.dispatch(incAction); | ||
store.dispatch(decAction); | ||
} | ||
} |
115 changes: 115 additions & 0 deletions
115
...commons/src/test/java/com/github/romualdrousseau/any2json/commons/Test_RegexComparer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
package com.github.romualdrousseau.any2json.commons; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
import static org.junit.Assert.assertFalse; | ||
import static org.junit.Assert.assertTrue; | ||
|
||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.stream.Stream; | ||
|
||
import org.junit.Before; | ||
import org.junit.Test; | ||
|
||
import com.github.romualdrousseau.any2json.commons.preprocessing.comparer.RegexComparer; | ||
|
||
|
||
public class Test_RegexComparer { | ||
|
||
private Map<String, String> patterns; | ||
private List<String> bagOfWords; | ||
|
||
@Before | ||
public void setUp() { | ||
this.patterns = new HashMap<>(); | ||
patterns.put("\\d{1,4}[/|.|-]\\d{1,2}[/|.|-]\\d{1,4}", "DATE"); | ||
patterns.put("^-?[\\d,]+(\\.\\d+)?([e|E]-?\\d+)?$", "NUMBER"); | ||
|
||
this.bagOfWords = Stream.of("1000.25", "Date: 2024-01-01", "some words", "2 words: word1 and word2", null).toList(); | ||
} | ||
|
||
@Test | ||
public void testApplyNullValuesInList() { | ||
final var rc = new RegexComparer(this.patterns); | ||
assertFalse(rc.apply(null, this.bagOfWords)); | ||
} | ||
|
||
@Test | ||
public void testApplyEmptyList() { | ||
final var rc = new RegexComparer(this.patterns); | ||
assertFalse(rc.apply("NUMBER", Collections.emptyList())); | ||
} | ||
|
||
@Test | ||
public void testApplyMatchingPattern() { | ||
final var rc = new RegexComparer(this.patterns); | ||
assertTrue(rc.apply("NUMBER", this.bagOfWords)); | ||
assertTrue(rc.apply("DATE", this.bagOfWords)); | ||
} | ||
|
||
@Test | ||
public void testApplyNonMatchingPattern() { | ||
final var rc = new RegexComparer(this.patterns); | ||
assertFalse(rc.apply("NUMBER", this.bagOfWords.stream().skip(2).toList())); | ||
assertFalse(rc.apply("DATE", this.bagOfWords.stream().skip(2).toList())); | ||
} | ||
|
||
@Test | ||
public void testAnonymizeMatchingPattern() { | ||
final var rc = new RegexComparer(this.patterns); | ||
assertEquals("NUMBER", rc.anonymize(this.bagOfWords.get(0))); | ||
assertEquals("Date: DATE", rc.anonymize(this.bagOfWords.get(1))); | ||
} | ||
|
||
@Test | ||
public void testAnonymizeNonMatchingPattern() { | ||
final var rc = new RegexComparer(this.patterns); | ||
assertEquals(this.bagOfWords.get(2), rc.anonymize(this.bagOfWords.get(2))); | ||
assertEquals(this.bagOfWords.get(3), rc.anonymize(this.bagOfWords.get(3))); | ||
} | ||
|
||
@Test | ||
public void testAnonymizeMatchingPatternWithFilter() { | ||
final var rc = new RegexComparer(this.patterns); | ||
assertEquals("NUMBER", rc.anonymize(this.bagOfWords.get(0), "NUMBER")); | ||
assertEquals("Date: DATE", rc.anonymize(this.bagOfWords.get(1), "DATE")); | ||
} | ||
|
||
@Test | ||
public void testAnonymizeNonMatchingPatternWithFilter() { | ||
final var rc = new RegexComparer(this.patterns); | ||
assertEquals(this.bagOfWords.get(0), rc.anonymize(this.bagOfWords.get(0), "DATE")); | ||
assertEquals(this.bagOfWords.get(1), rc.anonymize(this.bagOfWords.get(1), "NUMBER")); | ||
} | ||
|
||
@Test | ||
public void testFindMatchingPattern() { | ||
final var rc = new RegexComparer(this.patterns); | ||
assertEquals("1000.25", rc.find(this.bagOfWords.get(0)).get()); | ||
assertEquals("2024-01-01", rc.find(this.bagOfWords.get(1)).get()); | ||
} | ||
|
||
@Test | ||
public void testFindNonMatchingPattern() { | ||
final var rc = new RegexComparer(this.patterns); | ||
assertTrue(rc.find(this.bagOfWords.get(2)).isEmpty()); | ||
assertTrue(rc.find(this.bagOfWords.get(3)).isEmpty()); | ||
} | ||
|
||
@Test | ||
public void testFindMatchingPatternWithFilter() { | ||
final var rc = new RegexComparer(this.patterns); | ||
final var expected = List.of("1000.25", "2024-01-01"); | ||
assertEquals(expected.get(0), rc.find(this.bagOfWords.get(0), "NUMBER").get()); | ||
assertEquals(expected.get(1), rc.find(this.bagOfWords.get(1), "DATE").get()); | ||
} | ||
|
||
@Test | ||
public void testFindNonMatchingPatternWithFilter() { | ||
final var rc = new RegexComparer(this.patterns); | ||
assertTrue(rc.find(this.bagOfWords.get(0), "DATE").isEmpty()); | ||
assertTrue(rc.find(this.bagOfWords.get(1), "NUMBER").isEmpty()); | ||
} | ||
} |
Oops, something went wrong.