From 089659e53eb62be93e334a467b713bccd057125a Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Mon, 22 Apr 2024 17:04:54 -0700 Subject: [PATCH] Don't depend on spark-packages anymore --- README.md | 35 +++------------- .../app/EventHistoryToSparklensJson.scala | 40 ------------------- 2 files changed, 5 insertions(+), 70 deletions(-) delete mode 100644 src/main/scala/com/qubole/sparklens/app/EventHistoryToSparklensJson.scala diff --git a/README.md b/README.md index 19975fb..c119814 100644 --- a/README.md +++ b/README.md @@ -83,54 +83,29 @@ Spark users can help us in finding what is missing here by raising challenging t #### 1. Using the Sparklens package while running your app #### -Note: Apart from the console based report, you can also get an UI based report similar to -[this](http://sparklens.qubole.com/report_view/1b3868a49388e7ab6a16) in your email. You have to pass +You can also get an UI based report in your email. You have to pass `--conf spark.sparklens.report.email=` along with other relevant confs mentioned below. This functionality is available in Sparklens 0.3.2 and above. Use the following arguments to `spark-submit` or `spark-shell`: ``` ---packages qubole:sparklens:0.3.2-s_2.11 +--jars your_packaged_jar.jar --conf spark.extraListeners=com.qubole.sparklens.QuboleJobListener ``` -#### 2. Run from Sparklens offline data #### - -You can choose not to run sparklens inside the app, but at a later time. Run your app as above -with additional configuration parameters: -``` ---packages qubole:sparklens:0.3.2-s_2.11 ---conf spark.extraListeners=com.qubole.sparklens.QuboleJobListener ---conf spark.sparklens.reporting.disabled=true -``` - -This will not run reporting, but instead create a Sparklens JSON file for the application which is -stored in the **spark.sparklens.data.dir** directory (by default, **/tmp/sparklens/**). Note that this will be stored on HDFS by default. To save this file to s3, please set **spark.sparklens.data.dir** to s3 path. This data file can now be used to run Sparklens reporting independently, using `spark-submit` command as follows: - -`./bin/spark-submit --packages qubole:sparklens:0.3.2-s_2.11 --class com.qubole.sparklens.app.ReporterApp qubole-dummy-arg ` - -`` should be replaced by the full path of sparklens json file. If the file is on s3 use the full s3 path. For files on local file system, use file:// prefix with the local file location. HDFS is supported as well. - -You can also upload a Sparklens JSON data file to http://sparklens.qubole.com to see this report as an HTML page. - -#### 3. Run from Spark event-history #### +#### 2. Run from Spark event-history #### You can also run Sparklens on a previously run spark-app using an event history. Note the extra `source=history` parameter in this example: -`./bin/spark-submit --packages qubole:sparklens:0.3.2-s_2.11 --class com.qubole.sparklens.app.ReporterApp qubole-dummy-arg source=history appId=` +`./bin/spark-submit --jars your_packaged_jar.jar --class com.qubole.sparklens.app.ReporterApp qubole-dummy-arg source=history appId=` And optionally you can also provide the parameter `attemptId=` Another option is to directly specify the event history file. This file can be in any of the formats the event history files supports, i.e. **text, snappy, lz4 or lzf**. -`./bin/spark-submit --packages qubole:sparklens:0.3.2-s_2.11 --class com.qubole.sparklens.app.ReporterApp qubole-dummy-arg source=history` - -It is also possible to convert an event history file to a Sparklens json file using the following command: - -`./bin/spark-submit --packages qubole:sparklens:0.3.2-s_2.11 --class com.qubole.sparklens.app.EventHistoryToSparklensJson qubole-dummy-arg ` +`./bin/spark-submit --jars your_packaged_jar.jar --class com.qubole.sparklens.app.ReporterApp qubole-dummy-arg source=history` -EventHistoryToSparklensJson is designed to work on local file system only. Please make sure that the source and target directories are on local file system. #### 4. Checkout the code and use the normal sbt commands: #### diff --git a/src/main/scala/com/qubole/sparklens/app/EventHistoryToSparklensJson.scala b/src/main/scala/com/qubole/sparklens/app/EventHistoryToSparklensJson.scala deleted file mode 100644 index 4fa52ba..0000000 --- a/src/main/scala/com/qubole/sparklens/app/EventHistoryToSparklensJson.scala +++ /dev/null @@ -1,40 +0,0 @@ -package com.qubole.sparklens.app - -import java.io.File - -object EventHistoryToSparklensJson { - - def main(args:Array[String]):Unit = { - val defaultDestination = new File("/tmp/sparklens/") - - val dirs = args.length match { - case 0 => (new File("."), defaultDestination) - case 1 => (new File(args(0)), defaultDestination) - case _ => (new File(args(0)), new File(args(1))) - } - println("Converting Event History files to Sparklens Json files") - println(s"src: ${dirs._1.getAbsolutePath} destination: ${dirs._2.getAbsolutePath}") - convert(dirs._1, dirs._2) - } - - private def convert(srcLoc:File, destLoc:File): Unit = { - if (srcLoc.isFile) { - try { - new EventHistoryReporter(srcLoc.getAbsolutePath, List( - ("spark.sparklens.reporting.disabled", "true"), - ("spark.sparklens.save.data", "true"), - ("spark.sparklens.data.dir", destLoc.getAbsolutePath) - )) - } catch { - case e: Exception => { - println(s"Failed to process file: ${srcLoc} error: ${e.getMessage}") - } - } - } else { - //This is a directory. Process all files - srcLoc.listFiles().foreach( f => { - convert(f, destLoc) - }) - } - } -} \ No newline at end of file