Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-42030: [Java] Update Unit Tests for Adapter Module #42038

Merged
merged 21 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
eb39e2d
update org.junit.Assert to org.junit.jupiter.api.Assertions for avro
llama90 Jun 7, 2024
41203ec
update org.junit.Assert to org.junit.jupiter.api.Assertions for jdbc
llama90 Jun 7, 2024
bb991d2
update annotations such as Before, ClassRule, TemporaryFolder, Test f…
llama90 Jun 8, 2024
2e3a62c
update annotations such as After, Before, Test for jdbc
llama90 Jun 8, 2024
7ce2273
update parameterized test (step 1) for jdbc
llama90 Jun 8, 2024
297e5b7
update parameterized test (step 2) for jdbc
llama90 Jun 8, 2024
536f97a
update org.junit.Assert to org.junit.jupiter.api.Assertions for orc
llama90 Jun 8, 2024
803a7d8
update annotations such as BeforeClass, Rule, TemporaryFolder
llama90 Jun 9, 2024
d5f1375
perform self-review to add missing access modifier for orc
llama90 Jun 9, 2024
4e83d20
perform self-review to update temporary folder for avro
llama90 Jun 9, 2024
37f868c
perform self-review for jdbc
llama90 Jun 9, 2024
dfd023f
perform mvn spotless:apply
llama90 Jun 10, 2024
b4e8ef0
reflect the review feedback
llama90 Jun 10, 2024
76d62b8
remove incorrect code
llama90 Jun 12, 2024
0c50f7f
update code for consistency
llama90 Jun 12, 2024
e563b45
test try-with-resources
llama90 Jun 13, 2024
14138d5
apply lint
llama90 Jun 13, 2024
93b80b6
test try-with-resources
llama90 Jun 13, 2024
58a81ab
test try-with-resources for TestWriteReadAvroRecord
llama90 Jun 13, 2024
993e4da
test try-with-resources for AvroToArrowIteratorTest
llama90 Jun 13, 2024
1d4c4d1
enhance duplicate
llama90 Jun 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import org.apache.avro.Conversions;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericFixed;
import org.junit.Test;
import org.junit.jupiter.api.Test;

public class AvroLogicalTypesTest extends AvroTestBase {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
*/
package org.apache.arrow.adapter.avro;

import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
Expand All @@ -31,7 +31,7 @@
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.junit.Test;
import org.junit.jupiter.api.Test;

public class AvroSkipFieldTest extends AvroTestBase {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
*/
package org.apache.arrow.adapter.avro;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.io.File;
import java.io.FileInputStream;
Expand All @@ -43,17 +43,16 @@
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.EncoderFactory;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.rules.TemporaryFolder;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.io.TempDir;

public class AvroTestBase {

@ClassRule public static final TemporaryFolder TMP = new TemporaryFolder();
@TempDir public File TMP;

protected AvroToArrowConfig config;

@Before
@BeforeEach
public void init() {
BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
config = new AvroToArrowConfigBuilder(allocator).build();
Expand Down Expand Up @@ -82,19 +81,21 @@ public static Schema getSchema(String schemaName) throws Exception {
}

protected VectorSchemaRoot writeAndRead(Schema schema, List data) throws Exception {
File dataFile = TMP.newFile();
File dataFile = new File(TMP, "test.avro");

BinaryEncoder encoder =
new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null);
DatumWriter writer = new GenericDatumWriter(schema);
BinaryDecoder decoder =
new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null);
try (FileOutputStream fos = new FileOutputStream(dataFile);
FileInputStream fis = new FileInputStream(dataFile)) {

for (Object value : data) {
writer.write(value, encoder);
}
BinaryEncoder encoder = new EncoderFactory().directBinaryEncoder(fos, null);
DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
BinaryDecoder decoder = new DecoderFactory().directBinaryDecoder(fis, null);

for (Object value : data) {
writer.write(value, encoder);
}

return AvroToArrow.avroToArrow(schema, decoder, config);
return AvroToArrow.avroToArrow(schema, decoder, config);
}
}

protected void checkArrayResult(List<List<?>> expected, ListVector vector) {
Expand Down
Copy link
Contributor Author

@llama90 llama90 Jun 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was getting an error in Windows saying "failed to delete temp directory" because "The process cannot access the file because it is being used by another process."

To fix this, I found out that using try-with-resources to properly close resources would help.

But for the convert method, the FileInputStream (fis) needs to stay open for the iterator after the convert method is called. This caused the error because the fis wasn't properly closed. So, I made a writeDataToFile method for the FileOutputStream and used try-with-resources for fis in the same method where the iterator is used.

Now, the tests run fine on Windows too.

Here are the different approaches I tried:

Attempt 1:

convert method:

  • FileOutputStream with try-with-resources
  • FileInputStream with try-with-resources

Error: Stream Closed - because the fis was closed while the iterator still needed it.

Attempt 2:

convert method:

  • FileOutputStream with try-with-resources
  • FileInputStream

Error: failed to delete temp directory - because the fis wasn't closed.

Attempt 3:

convert method:

  • FileOutputStream with try-with-resources
  • Added FileInputStream as a parameter to the convert method and explicitly closed it in the test method.

Didn't work as expected.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pushing this one and evaluating diverse approaches.

Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
*/
package org.apache.arrow.adapter.avro;

import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;

import java.io.EOFException;
import java.io.File;
Expand Down Expand Up @@ -44,29 +44,31 @@
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.util.Utf8;
import org.junit.Test;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

public class AvroToArrowIteratorTest extends AvroTestBase {

@Override
@BeforeEach
public void init() {
final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
this.config = new AvroToArrowConfigBuilder(allocator).setTargetBatchSize(3).build();
}

private AvroToArrowVectorIterator convert(Schema schema, List data) throws Exception {
File dataFile = TMP.newFile();
private void writeDataToFile(Schema schema, List<?> data, File dataFile) throws Exception {
try (FileOutputStream fos = new FileOutputStream(dataFile)) {
BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder(fos, null);
DatumWriter<Object> writer = new GenericDatumWriter<>(schema);

BinaryEncoder encoder =
new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null);
DatumWriter writer = new GenericDatumWriter(schema);
BinaryDecoder decoder =
new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null);

for (Object value : data) {
writer.write(value, encoder);
for (Object value : data) {
writer.write(value, encoder);
}
encoder.flush();
}
}

private AvroToArrowVectorIterator convert(Schema schema, FileInputStream fis) throws Exception {
BinaryDecoder decoder = DecoderFactory.get().directBinaryDecoder(fis, null);
return AvroToArrow.avroToArrowIterator(schema, decoder, config);
}

Expand All @@ -75,9 +77,13 @@ public void testStringType() throws Exception {
Schema schema = getSchema("test_primitive_string.avsc");
List<String> data = Arrays.asList("v1", "v2", "v3", "v4", "v5");

File dataFile = new File(TMP, "test.avro");
writeDataToFile(schema, data, dataFile);

List<VectorSchemaRoot> roots = new ArrayList<>();
List<FieldVector> vectors = new ArrayList<>();
try (AvroToArrowVectorIterator iterator = convert(schema, data)) {
try (FileInputStream fis = new FileInputStream(dataFile);
AvroToArrowVectorIterator iterator = convert(schema, fis)) {
while (iterator.hasNext()) {
VectorSchemaRoot root = iterator.next();
FieldVector vector = root.getFieldVectors().get(0);
Expand All @@ -103,9 +109,13 @@ public void testNullableStringType() throws Exception {
data.add(record);
}

File dataFile = new File(TMP, "test.avro");
writeDataToFile(schema, data, dataFile);

List<VectorSchemaRoot> roots = new ArrayList<>();
List<FieldVector> vectors = new ArrayList<>();
try (AvroToArrowVectorIterator iterator = convert(schema, data); ) {
try (FileInputStream fis = new FileInputStream(dataFile);
AvroToArrowVectorIterator iterator = convert(schema, fis)) {
while (iterator.hasNext()) {
VectorSchemaRoot root = iterator.next();
FieldVector vector = root.getFieldVectors().get(0);
Expand All @@ -129,8 +139,12 @@ public void testRecordType() throws Exception {
data.add(record);
}

File dataFile = new File(TMP, "test.avro");
writeDataToFile(schema, data, dataFile);

List<VectorSchemaRoot> roots = new ArrayList<>();
try (AvroToArrowVectorIterator iterator = convert(schema, data)) {
try (FileInputStream fis = new FileInputStream(dataFile);
AvroToArrowVectorIterator iterator = convert(schema, fis)) {
while (iterator.hasNext()) {
roots.add(iterator.next());
}
Expand All @@ -150,9 +164,13 @@ public void testArrayType() throws Exception {
Arrays.asList("1vvv", "2bbb"),
Arrays.asList("1fff", "2"));

File dataFile = new File(TMP, "test.avro");
writeDataToFile(schema, data, dataFile);

List<VectorSchemaRoot> roots = new ArrayList<>();
List<ListVector> vectors = new ArrayList<>();
try (AvroToArrowVectorIterator iterator = convert(schema, data)) {
try (FileInputStream fis = new FileInputStream(dataFile);
AvroToArrowVectorIterator iterator = convert(schema, fis)) {
while (iterator.hasNext()) {
VectorSchemaRoot root = iterator.next();
roots.add(root);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
*/
package org.apache.arrow.adapter.avro;

import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
Expand All @@ -34,7 +34,7 @@
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.junit.Test;
import org.junit.jupiter.api.Test;

public class AvroToArrowTest extends AvroTestBase {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
*/
package org.apache.arrow.adapter.avro;

import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;

import java.io.File;
import java.util.ArrayList;
Expand All @@ -30,18 +30,16 @@
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

public class TestWriteReadAvroRecord {

@ClassRule public static final TemporaryFolder TMP = new TemporaryFolder();
@TempDir public static File TMP;

@Test
public void testWriteAndRead() throws Exception {

File dataFile = TMP.newFile();
File dataFile = new File(TMP, "test.avro");
Schema schema = AvroTestBase.getSchema("test.avsc");

// write data to disk
Expand All @@ -55,20 +53,22 @@ public void testWriteAndRead() throws Exception {
user2.put("favorite_color", "red");

DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
dataFileWriter.create(schema, dataFile);
dataFileWriter.append(user1);
dataFileWriter.append(user2);
dataFileWriter.close();
try (DataFileWriter<GenericRecord> dataFileWriter =
new DataFileWriter<GenericRecord>(datumWriter)) {
dataFileWriter.create(schema, dataFile);
dataFileWriter.append(user1);
dataFileWriter.append(user2);
}

// read data from disk
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
DataFileReader<GenericRecord> dataFileReader =
new DataFileReader<GenericRecord>(dataFile, datumReader);
List<GenericRecord> result = new ArrayList<>();
while (dataFileReader.hasNext()) {
GenericRecord user = dataFileReader.next();
result.add(user);
try (DataFileReader<GenericRecord> dataFileReader =
new DataFileReader<GenericRecord>(dataFile, datumReader)) {
while (dataFileReader.hasNext()) {
GenericRecord user = dataFileReader.next();
result.add(user);
}
}

assertEquals(2, result.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.util.ValueVectorUtility;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;

/** Class to abstract out some common test functionality for testing JDBC to Arrow. */
public abstract class AbstractJdbcToArrowTest {
Expand Down Expand Up @@ -94,8 +92,9 @@ protected static Table getTable(String ymlFilePath, @SuppressWarnings("rawtypes"
* @throws SQLException on error
* @throws ClassNotFoundException on error
*/
@Before
public void setUp() throws SQLException, ClassNotFoundException {
protected void initializeDatabase(Table table) throws SQLException, ClassNotFoundException {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vibhatha I'll look into it more, but if you have any suggestions for a better structure, I would really appreciate your advice.

cc @lidavidm

classDiagram
    direction LR
    class AbstractJdbcToArrowTest {
    }

    class JdbcToArrowCharSetTest {
    }

    class JdbcToArrowDataTypesTest {
    }

    class JdbcToArrowMapDataTypeTest {
    }

    class JdbcToArrowNullTest {
    }

    class JdbcToArrowOptionalColumnsTest {
    }

    class JdbcToArrowTest {
    }

    class JdbcToArrowTimeZoneTest {
    }

    class JdbcToArrowVectorIteratorTest {
    }

    AbstractJdbcToArrowTest <|-- JdbcToArrowCharSetTest
    AbstractJdbcToArrowTest <|-- JdbcToArrowDataTypesTest
    AbstractJdbcToArrowTest <|-- JdbcToArrowMapDataTypeTest
    AbstractJdbcToArrowTest <|-- JdbcToArrowNullTest
    AbstractJdbcToArrowTest <|-- JdbcToArrowOptionalColumnsTest
    AbstractJdbcToArrowTest <|-- JdbcToArrowTest
    AbstractJdbcToArrowTest <|-- JdbcToArrowTimeZoneTest
    JdbcToArrowTest <|-- JdbcToArrowVectorIteratorTest
Loading

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@llama90 I will review this tomorrow. Thanks.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess what you are concerned about is making the database connection for each test case with @BeforeEach setup method?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we need to create a database connection with a different .yml for each test, but @BeforeEach cannot take arguments for @ParameterizedTest.

I described something similar here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is fine

this.table = table;

TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
String url = "jdbc:h2:mem:JdbcToArrowTest";
String driver = "org.h2.Driver";
Expand All @@ -114,7 +113,7 @@ public void setUp() throws SQLException, ClassNotFoundException {
*
* @throws SQLException on error
*/
@After
@AfterEach
public void destroy() throws SQLException {
if (conn != null) {
conn.close();
Expand Down Expand Up @@ -146,11 +145,12 @@ public static Object[][] prepareTestData(
/**
* Abstract method to implement test Functionality to test JdbcToArrow methods.
*
* @param table Table object
* @throws SQLException on error
* @throws IOException on error
*/
@Test
public abstract void testJdbcToArrowValues() throws SQLException, IOException;
public abstract void testJdbcToArrowValues(Table table)
throws SQLException, IOException, ClassNotFoundException;

/**
* Abstract method to implement logic to assert test various datatype values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
*/
package org.apache.arrow.adapter.jdbc;

import static org.junit.Assert.*;
import static org.junit.jupiter.api.Assertions.assertEquals;

import java.sql.Types;
import org.junit.Test;
import org.junit.jupiter.api.Test;

public class JdbcFieldInfoTest {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.Schema;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

public class JdbcToArrowCommentMetadataTest {

Expand All @@ -53,7 +53,7 @@ public class JdbcToArrowCommentMetadataTest {
* @throws SQLException on error
* @throws ClassNotFoundException on error
*/
@Before
@BeforeEach
public void setUp() throws SQLException, ClassNotFoundException {
String url =
"jdbc:h2:mem:JdbcToArrowTest?characterEncoding=UTF-8;INIT=runscript from 'classpath:/h2/comment.sql'";
Expand All @@ -62,7 +62,7 @@ public void setUp() throws SQLException, ClassNotFoundException {
conn = DriverManager.getConnection(url);
}

@After
@AfterEach
public void tearDown() throws SQLException {
if (conn != null) {
conn.close();
Expand Down
Loading
Loading