Skip to content

Commit

Permalink
added test
Browse files Browse the repository at this point in the history
  • Loading branch information
shrshi committed Nov 4, 2024
1 parent 0872f7c commit 4dbf16a
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 13 deletions.
2 changes: 1 addition & 1 deletion cpp/src/io/comp/io_uncomp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

using cudf::host_span;

namespace cudf {
namespace CUDF_EXPORT cudf {
namespace io {

/**
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/io/json/read_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ device_span<char> ingest_raw_input(device_span<char> buffer,
compression_type compression,
std::size_t range_offset,
std::size_t range_size,
char delimiter,
rmm::cuda_stream_view stream)
{
CUDF_FUNC_RANGE();
Expand Down Expand Up @@ -459,7 +460,7 @@ device_span<char> ingest_raw_input(device_span<char> buffer,
if (sources.size() > 1 && !delimiter_map.empty()) {
static_assert(num_delimiter_chars == 1,
"Currently only single-character delimiters are supported");
auto const delimiter_source = thrust::make_constant_iterator('\n');
auto const delimiter_source = thrust::make_constant_iterator(delimiter);
auto const d_delimiter_map = cudf::detail::make_device_uvector_async(
delimiter_map, stream, cudf::get_current_device_resource_ref());
thrust::scatter(rmm::exec_policy_nosync(stream),
Expand Down
1 change: 1 addition & 0 deletions cpp/src/io/json/read_json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ constexpr int max_subchunks_prealloced = 3;
* @param compression Compression format of source
* @param range_offset Number of bytes to skip from source start
* @param range_size Number of bytes to read from source
* @param delimiter Delimiter character for JSONL inputs
* @param stream CUDA stream used for device memory operations and kernel launches
* @returns A subspan of the input device span containing data read
*/
Expand Down
17 changes: 17 additions & 0 deletions cpp/tests/io/json/json_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2991,4 +2991,21 @@ TEST_F(JsonReaderTest, LastRecordInvalid)
CUDF_TEST_EXPECT_TABLES_EQUAL(result.tbl->view(), cudf::table_view{{expected}});
}

TEST_F(JsonReaderTest, Debug)
{
std::string data = R"({"col1":"a","col2":"d","col3":1,"col4":1.5,"col5":null}
{"col1":"b","col2":"e","col3":2,"col4":2.5,"col5":2}
{"col1":"c","col2":"f","col3":3,"col4":3.5,"col5":null}
)";
auto opts =
cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
.lines(true)
.recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL)
.build();
auto const result = cudf::io::read_json(opts);

EXPECT_EQ(result.tbl->num_columns(), 5);
EXPECT_EQ(result.tbl->num_rows(), 3);
}

CUDF_TEST_PROGRAM_MAIN()
40 changes: 29 additions & 11 deletions cpp/tests/io/json/json_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/

#include "io/comp/io_uncomp.hpp"

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/default_stream.hpp>
Expand Down Expand Up @@ -185,12 +187,12 @@ TEST_P(JsonCompressedWriterTest, PlainTable)
cudf::io::compression_type const comptype = GetParam();
cudf::test::strings_column_wrapper col1{"a", "b", "c"};
cudf::test::strings_column_wrapper col2{"d", "e", "f"};
cudf::test::fixed_width_column_wrapper<int> col3{1, 2, 3};
cudf::test::fixed_width_column_wrapper<float> col4{1.5, 2.5, 3.5};
cudf::test::fixed_width_column_wrapper<int16_t> col5{{1, 2, 3},
cudf::test::fixed_width_column_wrapper<int64_t> col3{1, 2, 3};
cudf::test::fixed_width_column_wrapper<double> col4{1.5, 2.5, 3.5};
cudf::test::fixed_width_column_wrapper<int64_t> col5{{1, 2, 3},
cudf::test::iterators::nulls_at({0, 2})};
cudf::table_view tbl_view{{col1, col2, col3, col4, col5}};
cudf::io::table_metadata mt{{{"col1"}, {"col2"}, {"int"}, {"float"}, {"int16"}}};
cudf::io::table_metadata mt{{{"col1"}, {"col2"}, {"col3"}, {"col4"}, {"col5"}}};

std::vector<char> out_buffer;
auto destination = cudf::io::sink_info(&out_buffer);
Expand All @@ -203,12 +205,22 @@ TEST_P(JsonCompressedWriterTest, PlainTable)

cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream());

if (comptype == cudf::io::compression_type::GZIP) {
auto decomp_out_buffer = cudf::io::decompress(comptype,
cudf::host_span<uint8_t const>(reinterpret_cast<uint8_t*>(out_buffer.data()), out_buffer.size()));
std::string const expected =
R"([{"col1":"a","col2":"d","col3":1,"col4":1.5,"col5":null},{"col1":"b","col2":"e","col3":2,"col4":2.5,"col5":2},{"col1":"c","col2":"f","col3":3,"col4":3.5,"col5":null}])";
EXPECT_EQ(expected, std::string(reinterpret_cast<char*>(decomp_out_buffer.data()), decomp_out_buffer.size()));
}

cudf::io::json_reader_options json_parser_options =
cudf::io::json_reader_options::builder(cudf::io::source_info{out_buffer.data(), out_buffer.size()})
.lines(false)
.compression(comptype);

auto result = cudf::io::read_json(json_parser_options);

/*
cudf::test::print(tbl_view.column(0));
cudf::test::print(tbl_view.column(1));
cudf::test::print(tbl_view.column(2));
Expand All @@ -220,22 +232,28 @@ TEST_P(JsonCompressedWriterTest, PlainTable)
cudf::test::print(result.tbl->get_column(2));
cudf::test::print(result.tbl->get_column(3));
cudf::test::print(result.tbl->get_column(4));
*/

EXPECT_EQ(result.tbl->num_columns(), 5);
EXPECT_EQ(result.tbl->num_rows(), 3);

EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRING);
EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::STRING);
EXPECT_EQ(result.tbl->get_column(2).type().id(), cudf::type_id::INT32);
EXPECT_EQ(result.tbl->get_column(2).type().id(), cudf::type_id::INT64);
EXPECT_EQ(result.tbl->get_column(3).type().id(), cudf::type_id::FLOAT64);
EXPECT_EQ(result.tbl->get_column(4).type().id(), cudf::type_id::INT16);
EXPECT_EQ(result.tbl->get_column(4).type().id(), cudf::type_id::INT64);

EXPECT_EQ(tbl_view.column(0).type().id(), cudf::type_id::STRING);
EXPECT_EQ(tbl_view.column(1).type().id(), cudf::type_id::STRING);
EXPECT_EQ(tbl_view.column(2).type().id(), cudf::type_id::INT32);
EXPECT_EQ(tbl_view.column(2).type().id(), cudf::type_id::INT64);
EXPECT_EQ(tbl_view.column(3).type().id(), cudf::type_id::FLOAT64);
EXPECT_EQ(tbl_view.column(4).type().id(), cudf::type_id::INT16);
EXPECT_EQ(tbl_view.column(4).type().id(), cudf::type_id::INT64);

EXPECT_EQ(result.metadata.schema_info[0].name, "a");
EXPECT_EQ(result.metadata.schema_info[1].name, "b");
EXPECT_EQ(result.metadata.schema_info[2].name, "c");
EXPECT_EQ(result.metadata.schema_info[0].name, "col1");
EXPECT_EQ(result.metadata.schema_info[1].name, "col2");
EXPECT_EQ(result.metadata.schema_info[2].name, "col3");
EXPECT_EQ(result.metadata.schema_info[3].name, "col4");
EXPECT_EQ(result.metadata.schema_info[4].name, "col5");

CUDF_TEST_EXPECT_TABLES_EQUAL(tbl_view, result.tbl->view());
}
Expand Down

0 comments on commit 4dbf16a

Please sign in to comment.