From 790f892a2a533e3bdceb5e6e84168f2c1ea34e86 Mon Sep 17 00:00:00 2001 From: Crystal <45134936+CrystalZhou0529@users.noreply.github.com> Date: Thu, 5 Sep 2024 04:55:37 -0400 Subject: [PATCH] GH-43967: [C++] Enhance error message for URI parsing (#43938) ### Rationale for this change We want to enhance error message for URI parsing error to provide more information for the syntax error scenario. When error message is generated from `uriParseSingleUriExA`, the return value might indicate a `URI_ERROR_SYNTAX` error, and `error_pos` would be set to the position causing syntax error. ([uriparser/Uri.h](https://github.com/apache/arrow/blob/c455d6b8c4ae2cb22baceb4c27e1325b973d39e1/cpp/src/arrow/vendored/uriparser/Uri.h#L288)) In the new error message, it includes the character causing syntax error and its position, so users can have a better idea why the error happens. ### What changes are included in this PR? - Error message change in URI parsing function. ### Are these changes tested? PR includes unit tests. ### Are there any user-facing changes? Yes, but only for error message. * GitHub Issue: #41365 * GitHub Issue: #43967 Authored-by: Crystal Zhou Signed-off-by: Joris Van den Bossche --- cpp/src/arrow/filesystem/filesystem_test.cc | 8 ++++++++ cpp/src/arrow/util/uri.cc | 13 ++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc index 8477647b2cd73..5072c3a8c25b1 100644 --- a/cpp/src/arrow/filesystem/filesystem_test.cc +++ b/cpp/src/arrow/filesystem/filesystem_test.cc @@ -20,6 +20,7 @@ #include #include +#include #include #include "arrow/filesystem/filesystem.h" @@ -632,6 +633,13 @@ TEST_F(TestMockFS, FileSystemFromUri) { ASSERT_OK_AND_ASSIGN(fs_, FileSystemFromUri("mock:///foo/bar?q=zzz", &path)); ASSERT_EQ(path, "foo/bar"); CheckDirs({}); + ASSERT_OK_AND_ASSIGN(fs_, FileSystemFromUri("mock:/folder+name/bar?q=zzz", &path)); + ASSERT_EQ(path, "folder+name/bar"); + CheckDirs({}); + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, ::testing::HasSubstr("syntax error at character ' ' (position 12)"), + FileSystemFromUri("mock:/folder name/bar", &path)); + CheckDirs({}); } //////////////////////////////////////////////////////////////////////////// diff --git a/cpp/src/arrow/util/uri.cc b/cpp/src/arrow/util/uri.cc index 9c0f7f9a59630..6c0787a87e046 100644 --- a/cpp/src/arrow/util/uri.cc +++ b/cpp/src/arrow/util/uri.cc @@ -250,9 +250,16 @@ Status Uri::Parse(const std::string& uri_string) { const auto& s = impl_->KeepString(uri_string); impl_->string_rep_ = s; const char* error_pos; - if (uriParseSingleUriExA(&impl_->uri_, s.data(), s.data() + s.size(), &error_pos) != - URI_SUCCESS) { - return Status::Invalid("Cannot parse URI: '", uri_string, "'"); + int retval = + uriParseSingleUriExA(&impl_->uri_, s.data(), s.data() + s.size(), &error_pos); + if (retval != URI_SUCCESS) { + if (retval == URI_ERROR_SYNTAX) { + return Status::Invalid("Cannot parse URI: '", uri_string, + "' due to syntax error at character '", *error_pos, + "' (position ", error_pos - s.data(), ")"); + } else { + return Status::Invalid("Cannot parse URI: '", uri_string, "'"); + } } const auto scheme = TextRangeToView(impl_->uri_.scheme);