forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-50031][SQL] Add the
TryParseUrl
expression
### What changes were proposed in this pull request? This PR adds `try_parse_url` expression that sets `failOnError` to false by default. ### Why are the changes needed? INVALID_URL contains suggested fix for turning off ANSI mode. Now that in Spark 4.0.0 we have moved to ANSI mode on by default, we want to keep suggestions of this kind to the minimum. There exist implementations of `try_*` functions which provide safe way to get behavior as for ANSI mode off and suggestions of this kind should be sufficient. In this case, try expressions were missing so new expressions were added to patch up the missing implementations. ### Does this PR introduce _any_ user-facing change? Yes, new expression added. ### How was this patch tested? Tests added. ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#48500 from jovanm-db/invalidUrl. Authored-by: Jovan Markovic <[email protected]> Signed-off-by: Max Gekk <[email protected]>
- Loading branch information
Showing
10 changed files
with
238 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -587,6 +587,7 @@ URL Functions | |
:toctree: api/ | ||
|
||
parse_url | ||
try_parse_url | ||
url_decode | ||
url_encode | ||
try_url_decode | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -84,6 +84,50 @@ class UrlFunctionsSuite extends QueryTest with SharedSparkSession { | |
} | ||
} | ||
|
||
test("url try_parse_url function") { | ||
|
||
def testUrl(url: String, expected: Row): Unit = { | ||
checkAnswer(Seq[String]((url)).toDF("url").selectExpr( | ||
"try_parse_url(url, 'HOST')", "try_parse_url(url, 'PATH')", | ||
"try_parse_url(url, 'QUERY')", "try_parse_url(url, 'REF')", | ||
"try_parse_url(url, 'PROTOCOL')", "try_parse_url(url, 'FILE')", | ||
"try_parse_url(url, 'AUTHORITY')", "try_parse_url(url, 'USERINFO')", | ||
"try_parse_url(url, 'QUERY', 'query')"), expected) | ||
} | ||
|
||
testUrl( | ||
"http://[email protected]/path?query=1#Ref", | ||
Row("spark.apache.org", "/path", "query=1", "Ref", | ||
"http", "/path?query=1", "[email protected]", "userinfo", "1")) | ||
|
||
testUrl( | ||
"https://use%20r:pas%[email protected]/dir%20/pa%20th.HTML?query=x%20y&q2=2#Ref%20two", | ||
Row("example.com", "/dir%20/pa%20th.HTML", "query=x%20y&q2=2", "Ref%20two", | ||
"https", "/dir%20/pa%20th.HTML?query=x%20y&q2=2", "use%20r:pas%[email protected]", | ||
"use%20r:pas%20s", "x%20y")) | ||
|
||
testUrl( | ||
"http://user:pass@host", | ||
Row("host", "", null, null, "http", "", "user:pass@host", "user:pass", null)) | ||
|
||
testUrl( | ||
"http://user:pass@host/", | ||
Row("host", "/", null, null, "http", "/", "user:pass@host", "user:pass", null)) | ||
|
||
testUrl( | ||
"http://user:pass@host/?#", | ||
Row("host", "/", "", "", "http", "/?", "user:pass@host", "user:pass", null)) | ||
|
||
testUrl( | ||
"http://user:pass@host/file;param?query;p2", | ||
Row("host", "/file;param", "query;p2", null, "http", "/file;param?query;p2", | ||
"user:pass@host", "user:pass", null)) | ||
|
||
testUrl( | ||
"inva lid://user:pass@host/file;param?query;p2", | ||
Row(null, null, null, null, null, null, null, null, null)) | ||
} | ||
|
||
test("url encode/decode function") { | ||
def testUrl(url: String, fn: String, expected: Row): Unit = { | ||
checkAnswer(Seq[String]((url)).toDF("url") | ||
|