Skip to content

Commit

Permalink
[SPARK-49240][BUILD] Add scalastyle and checkstyle rules to avoid…
Browse files Browse the repository at this point in the history
… `URL` constructors

### What changes were proposed in this pull request?

This PR aims to add `scalastyle` and `checkstyle` rules to avoid `URL` constructors.

### Why are the changes needed?

The java.net.URL class does not itself encode or decode any URL components according to the escaping mechanism defined in RFC2396.

So, from Java 20, all `URL` constructors are deprecated. We had better use better `URI` class.
- https://bugs.openjdk.org/browse/JDK-8295949

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs with newly added rules.

After this PR, there is only two exceptional instances in `JettyUtils.scala` and `UISuite.scala`.
- `JettyUtils` is tricky instance
- UISuite test case is supposed to add bad URL which URI prevents with `java.net.URISyntaxException`. This is an example why `URI` is better. In this PR, we keep the old, URL class, to keep the test coverage.
```
$ git grep -C1 'new URL('
core/src/main/scala/org/apache/spark/ui/JettyUtils.scala-        // scalastyle:off URLConstructor
core/src/main/scala/org/apache/spark/ui/JettyUtils.scala:        val newUrl = new URL(requestURL, prefixedDestPath).toString
core/src/main/scala/org/apache/spark/ui/JettyUtils.scala-        // scalastyle:on URLConstructor
--
core/src/test/scala/org/apache/spark/ui/UISuite.scala-      // scalastyle:off URLConstructor
core/src/test/scala/org/apache/spark/ui/UISuite.scala:      val badRequest = new URL(
core/src/test/scala/org/apache/spark/ui/UISuite.scala-        s"http://$localhost:${serverInfo.boundPort}$path/root?bypass&invalid<=foo")
```

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes apache#47762 from dongjoon-hyun/SPARK-49240.

Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: yangjie01 <[email protected]>
  • Loading branch information
dongjoon-hyun authored and LuciferYang committed Aug 15, 2024
1 parent a0fc398 commit 0c24ae1
Show file tree
Hide file tree
Showing 20 changed files with 95 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql.avro

import java.io._
import java.net.URL
import java.net.URI
import java.nio.file.{Files, Paths, StandardCopyOption}
import java.sql.{Date, Timestamp}
import java.util.UUID
Expand Down Expand Up @@ -648,7 +648,7 @@ abstract class AvroSuite
assert(message.contains("No Avro files found."))

Files.copy(
Paths.get(new URL(episodesAvro).toURI),
Paths.get(new URI(episodesAvro)),
Paths.get(dir.getCanonicalPath, "episodes.avro"))

val result = spark.read.format("avro").load(episodesAvro).collect()
Expand Down Expand Up @@ -2139,7 +2139,7 @@ abstract class AvroSuite
test("SPARK-24805: do not ignore files without .avro extension by default") {
withTempDir { dir =>
Files.copy(
Paths.get(new URL(episodesAvro).toURI),
Paths.get(new URI(episodesAvro)),
Paths.get(dir.getCanonicalPath, "episodes"))

val fileWithoutExtension = s"${dir.getCanonicalPath}/episodes"
Expand Down Expand Up @@ -2178,7 +2178,7 @@ abstract class AvroSuite
test("SPARK-24836: ignoreExtension must override hadoop's config") {
withTempDir { dir =>
Files.copy(
Paths.get(new URL(episodesAvro).toURI),
Paths.get(new URI(episodesAvro)),
Paths.get(dir.getCanonicalPath, "episodes"))

val hadoopConf = spark.sessionState.newHadoopConf()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.deploy

import java.io._
import java.net.URL
import java.net.URI
import java.nio.charset.StandardCharsets
import java.util.concurrent.TimeoutException

Expand Down Expand Up @@ -351,7 +351,7 @@ private class TestMasterInfo(val ip: String, val dockerId: DockerId, val logFile
def readState(): Unit = {
try {
val masterStream = new InputStreamReader(
new URL("http://%s:8080/json".format(ip)).openStream, StandardCharsets.UTF_8)
new URI("http://%s:8080/json".format(ip)).toURL.openStream, StandardCharsets.UTF_8)
val json = JsonMethods.parse(masterStream)

val workers = json \ "workers"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.deploy.rest

import java.io.{DataOutputStream, FileNotFoundException}
import java.net.{ConnectException, HttpURLConnection, SocketException, URL}
import java.net.{ConnectException, HttpURLConnection, SocketException, URI, URL}
import java.nio.charset.StandardCharsets
import java.util.concurrent.TimeoutException

Expand Down Expand Up @@ -383,37 +383,37 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
/** Return the REST URL for creating a new submission. */
private def getSubmitUrl(master: String): URL = {
val baseUrl = getBaseUrl(master)
new URL(s"$baseUrl/create")
new URI(s"$baseUrl/create").toURL
}

/** Return the REST URL for killing an existing submission. */
private def getKillUrl(master: String, submissionId: String): URL = {
val baseUrl = getBaseUrl(master)
new URL(s"$baseUrl/kill/$submissionId")
new URI(s"$baseUrl/kill/$submissionId").toURL
}

/** Return the REST URL for killing all submissions. */
private def getKillAllUrl(master: String): URL = {
val baseUrl = getBaseUrl(master)
new URL(s"$baseUrl/killall")
new URI(s"$baseUrl/killall").toURL
}

/** Return the REST URL for clear all existing submissions and applications. */
private def getClearUrl(master: String): URL = {
val baseUrl = getBaseUrl(master)
new URL(s"$baseUrl/clear")
new URI(s"$baseUrl/clear").toURL
}

/** Return the REST URL for requesting the readyz API. */
private def getReadyzUrl(master: String): URL = {
val baseUrl = getBaseUrl(master)
new URL(s"$baseUrl/readyz")
new URI(s"$baseUrl/readyz").toURL
}

/** Return the REST URL for requesting the status of an existing submission. */
private def getStatusUrl(master: String, submissionId: String): URL = {
val baseUrl = getBaseUrl(master)
new URL(s"$baseUrl/status/$submissionId")
new URI(s"$baseUrl/status/$submissionId").toURL
}

/** Return the base URL for communicating with the server, including the protocol version. */
Expand Down
5 changes: 4 additions & 1 deletion core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,10 @@ private[spark] object JettyUtils extends Logging {
private def doRequest(request: HttpServletRequest, response: HttpServletResponse): Unit = {
beforeRedirect(request)
// Make sure we don't end up with "//" in the middle
val newUrl = new URL(new URL(request.getRequestURL.toString), prefixedDestPath).toString
val requestURL = new URI(request.getRequestURL.toString).toURL
// scalastyle:off URLConstructor
val newUrl = new URL(requestURL, prefixedDestPath).toString
// scalastyle:on URLConstructor
response.sendRedirect(newUrl)
}
// SPARK-5983 ensure TRACE is not supported
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/util/Utils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ private[spark] object Utils
val is = Channels.newInputStream(source)
downloadFile(url, is, targetFile, fileOverwrite)
case "http" | "https" | "ftp" =>
val uc = new URL(url).openConnection()
val uc = new URI(url).toURL.openConnection()
val timeoutMs =
conf.getTimeAsSeconds("spark.files.fetchTimeout", "60s").toInt * 1000
uc.setConnectTimeout(timeoutMs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.deploy

import java.net.URL
import java.net.URI

import scala.collection.mutable
import scala.io.Source
Expand Down Expand Up @@ -65,7 +65,7 @@ class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {
listener.addedExecutorInfos.values.foreach { info =>
assert(info.logUrlMap.nonEmpty)
info.logUrlMap.values.foreach { logUrl =>
assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS)
assert(new URI(logUrl).toURL.getHost === SPARK_PUBLIC_DNS)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.deploy.history

import java.net.URL
import java.net.URI

import jakarta.servlet.http.HttpServletResponse
import org.json4s.DefaultFormats
Expand Down Expand Up @@ -72,7 +72,7 @@ class HistoryServerPageSuite extends SparkFunSuite with BeforeAndAfter {
ApplicationStatus.COMPLETED.toString.toLowerCase()
}
val (code, jsonOpt, errOpt) = HistoryServerSuite.getContentAndCode(
new URL(s"http://$localhost:$port/api/v1/applications?status=$param")
new URI(s"http://$localhost:$port/api/v1/applications?status=$param").toURL
)
assert(code == HttpServletResponse.SC_OK)
assert(jsonOpt.isDefined)
Expand Down Expand Up @@ -107,7 +107,7 @@ class HistoryServerPageSuite extends SparkFunSuite with BeforeAndAfter {
startHistoryServer(logDirs.head, title)
val page = new HistoryPage(server.get)
val (code, htmlOpt, errOpt) = HistoryServerSuite.getContentAndCode(
new URL(s"http://$localhost:$port/")
new URI(s"http://$localhost:$port/").toURL
)
assert(code == HttpServletResponse.SC_OK)
val expected = title.getOrElse("History Server")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package org.apache.spark.deploy.history

import java.io.{File, FileInputStream, FileWriter, InputStream, IOException}
import java.net.{HttpURLConnection, URL}
import java.net.{HttpURLConnection, URI, URL}
import java.nio.charset.StandardCharsets
import java.util.zip.ZipInputStream

Expand Down Expand Up @@ -261,9 +261,9 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with

val url = attemptId match {
case Some(id) =>
new URL(s"${generateURL(s"applications/$appId")}/$id/logs")
new URI(s"${generateURL(s"applications/$appId")}/$id/logs").toURL
case None =>
new URL(s"${generateURL(s"applications/$appId")}/logs")
new URI(s"${generateURL(s"applications/$appId")}/logs").toURL
}

val (code, inputStream, error) = HistoryServerSuite.connectAndGetInputStream(url)
Expand Down Expand Up @@ -433,12 +433,12 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with

// build a URL for an app or app/attempt plus a page underneath
def buildURL(appId: String, suffix: String): URL = {
new URL(s"http://$localhost:$port/history/$appId$suffix")
new URI(s"http://$localhost:$port/history/$appId$suffix").toURL
}

// build a rest URL for the application and suffix.
def applications(appId: String, suffix: String): URL = {
new URL(s"http://$localhost:$port/api/v1/applications/$appId$suffix")
new URI(s"http://$localhost:$port/api/v1/applications/$appId$suffix").toURL
}

// start initial job
Expand Down Expand Up @@ -601,7 +601,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
tests.foreach { case (user, expectedCode) =>
testUrls.foreach { url =>
val headers = if (user != null) Seq(FakeAuthFilter.FAKE_HTTP_USER -> user) else Nil
val sc = TestUtils.httpResponseCode(new URL(url), headers = headers)
val sc = TestUtils.httpResponseCode(new URI(url).toURL, headers = headers)
assert(sc === expectedCode, s"Unexpected status code $sc for $url (user = $user)")
}
}
Expand All @@ -620,7 +620,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
s"http://$localhost:$port/api/v1/applications/$appId/2/logs")

testUrls.foreach { url =>
TestUtils.httpResponseCode(new URL(url))
TestUtils.httpResponseCode(new URI(url).toURL)
}
assert(server.cacheMetrics.loadCount.getCount === 0, "downloading event log shouldn't load ui")
}
Expand All @@ -642,7 +642,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
case _ =>
assert(location.stripSuffix("/") === url.toString)
}
HistoryServerSuite.getUrl(new URL(location))
HistoryServerSuite.getUrl(new URI(location).toURL)
}
}

Expand Down Expand Up @@ -676,22 +676,22 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
def buildPageAttemptUrl(appId: String, attemptId: Option[Int]): URL = {
attemptId match {
case Some(id) =>
new URL(s"http://$localhost:$port/history/$appId/$id")
new URI(s"http://$localhost:$port/history/$appId/$id").toURL
case None =>
new URL(s"http://$localhost:$port/history/$appId")
new URI(s"http://$localhost:$port/history/$appId").toURL
}
}

def getContentAndCode(path: String, port: Int = port): (Int, Option[String], Option[String]) = {
HistoryServerSuite.getContentAndCode(new URL(s"http://$localhost:$port/api/v1/$path"))
HistoryServerSuite.getContentAndCode(new URI(s"http://$localhost:$port/api/v1/$path").toURL)
}

def getUrl(path: String): String = {
HistoryServerSuite.getUrl(generateURL(path))
}

def generateURL(path: String): URL = {
new URL(s"http://$localhost:$port/api/v1/$path")
new URI(s"http://$localhost:$port/api/v1/$path").toURL
}

def generateExpectation(name: String, path: String): Unit = {
Expand All @@ -706,7 +706,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
test("SPARK-31697: HistoryServer should set Content-Type") {
val port = server.boundPort
val nonExistenceAppId = "local-non-existence"
val url = new URL(s"http://$localhost:$port/history/$nonExistenceAppId")
val url = new URI(s"http://$localhost:$port/history/$nonExistenceAppId").toURL
val conn = url.openConnection().asInstanceOf[HttpURLConnection]
conn.setRequestMethod("GET")
conn.connect()
Expand All @@ -717,7 +717,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with

test("Redirect to the root page when accessed to /history/") {
val port = server.boundPort
val url = new URL(s"http://$localhost:$port/history/")
val url = new URI(s"http://$localhost:$port/history/").toURL
val conn = url.openConnection().asInstanceOf[HttpURLConnection]
conn.setRequestMethod("GET")
conn.setUseCaches(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.deploy.master

import java.net.{HttpURLConnection, URL}
import java.net.{HttpURLConnection, URI}

import scala.concurrent.duration._

Expand All @@ -38,7 +38,7 @@ class MasterDecommisionSuite extends MasterSuiteBase {
val masterUrl = s"http://${Utils.localHostNameForURI()}:${localCluster.masterWebUIPort}"
try {
eventually(timeout(30.seconds), interval(100.milliseconds)) {
val url = new URL(s"$masterUrl/workers/kill/?host=${Utils.localHostNameForURI()}")
val url = new URI(s"$masterUrl/workers/kill/?host=${Utils.localHostNameForURI()}").toURL
val conn = url.openConnection().asInstanceOf[HttpURLConnection]
conn.setRequestMethod("POST")
assert(conn.getResponseCode === 405)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.deploy.master.ui

import java.io.DataOutputStream
import java.net.{HttpURLConnection, URL}
import java.net.{HttpURLConnection, URI}
import java.nio.charset.StandardCharsets
import java.util.Date

Expand Down Expand Up @@ -125,7 +125,7 @@ object MasterWebUISuite {
url: String,
method: String,
body: String = ""): HttpURLConnection = {
val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection]
val conn = new URI(url).toURL.openConnection().asInstanceOf[HttpURLConnection]
conn.setRequestMethod(method)
if (body.nonEmpty) {
conn.setDoOutput(true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.deploy.rest

import java.io.DataOutputStream
import java.net.{HttpURLConnection, URL}
import java.net.{HttpURLConnection, URI}
import java.nio.charset.StandardCharsets
import java.util.Base64

Expand Down Expand Up @@ -674,7 +674,7 @@ class StandaloneRestSubmitSuite extends SparkFunSuite {
url: String,
method: String,
body: String = ""): HttpURLConnection = {
val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection]
val conn = new URI(url).toURL.openConnection().asInstanceOf[HttpURLConnection]
conn.setRequestMethod(method)
if (body.nonEmpty) {
conn.setDoOutput(true)
Expand Down
16 changes: 8 additions & 8 deletions core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.ui

import java.net.URL
import java.net.{URI, URL}
import java.util.Locale

import scala.io.Source
Expand Down Expand Up @@ -544,8 +544,8 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers {
withSpark(newSparkContext(killEnabled = true)) { sc =>
sc.parallelize(1 to 10).map{x => Thread.sleep(10000); x}.countAsync()
eventually(timeout(5.seconds), interval(50.milliseconds)) {
val url = new URL(
sc.ui.get.webUrl.stripSuffix("/") + "/stages/stage/kill/?id=0")
val url = new URI(
sc.ui.get.webUrl.stripSuffix("/") + "/stages/stage/kill/?id=0").toURL
// SPARK-6846: should be POST only but YARN AM doesn't proxy POST
TestUtils.httpResponseCode(url, "GET") should be (200)
TestUtils.httpResponseCode(url, "POST") should be (200)
Expand All @@ -557,8 +557,8 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers {
withSpark(newSparkContext(killEnabled = true)) { sc =>
sc.parallelize(1 to 10).map{x => Thread.sleep(10000); x}.countAsync()
eventually(timeout(5.seconds), interval(50.milliseconds)) {
val url = new URL(
sc.ui.get.webUrl.stripSuffix("/") + "/jobs/job/kill/?id=0")
val url = new URI(
sc.ui.get.webUrl.stripSuffix("/") + "/jobs/job/kill/?id=0").toURL
// SPARK-6846: should be POST only but YARN AM doesn't proxy POST
TestUtils.httpResponseCode(url, "GET") should be (200)
TestUtils.httpResponseCode(url, "POST") should be (200)
Expand Down Expand Up @@ -692,8 +692,8 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers {

test("live UI json application list") {
withSpark(newSparkContext()) { sc =>
val appListRawJson = HistoryServerSuite.getUrl(new URL(
sc.ui.get.webUrl + "/api/v1/applications"))
val appListRawJson = HistoryServerSuite.getUrl(new URI(
sc.ui.get.webUrl + "/api/v1/applications").toURL)
val appListJsonAst = JsonMethods.parse(appListRawJson)
appListJsonAst.children.length should be (1)
val attempts = (appListJsonAst.children.head \ "attempts").children
Expand Down Expand Up @@ -918,6 +918,6 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers {
}

def apiUrl(ui: SparkUI, path: String): URL = {
new URL(ui.webUrl + "/api/v1/applications/" + ui.sc.get.applicationId + "/" + path)
new URI(ui.webUrl + "/api/v1/applications/" + ui.sc.get.applicationId + "/" + path).toURL
}
}
Loading

0 comments on commit 0c24ae1

Please sign in to comment.