diff --git a/SparkConnector/sparkconnector/configuration.py b/SparkConnector/sparkconnector/configuration.py index 221a26fb..050a9a56 100644 --- a/SparkConnector/sparkconnector/configuration.py +++ b/SparkConnector/sparkconnector/configuration.py @@ -131,7 +131,7 @@ def configure(self, opts, ports): conf.set(name, value) # Extend conf adding logging of log4j to java options - base_extra_java_options = "-Dlog4j.configuration=file:%s" % self.connector.log4j_file + base_extra_java_options = "-Dlog4j2.configurationFile=%s" % self.connector.log4j_file extra_java_options = conf.get("spark.driver.extraJavaOptions") if extra_java_options: extra_java_options = base_extra_java_options + " " + extra_java_options diff --git a/SparkConnector/sparkconnector/connector.py b/SparkConnector/sparkconnector/connector.py index dffa2314..ee213092 100644 --- a/SparkConnector/sparkconnector/connector.py +++ b/SparkConnector/sparkconnector/connector.py @@ -175,7 +175,7 @@ def _recv(msg): def create_properties_file(self, log_path): """ Creates a configuration file for Spark log4j """ - fd, path = tempfile.mkstemp() + fd, path = tempfile.mkstemp(suffix='.properties') os.close(fd) # Reopen tempfile because mkstemp opens it in binary format f = open(path, 'w') @@ -186,7 +186,7 @@ def create_properties_file(self, log_path): for line in f_configs: f.write(line) - f.write(u'log4j.appender.file.File=%s\n' % log_path) + f.write(u'appender.file.fileName=%s\n' % log_path) f_configs.close() f.close() diff --git a/SparkConnector/sparkconnector/log4j_conf b/SparkConnector/sparkconnector/log4j_conf index 17912e3b..c98dee5c 100644 --- a/SparkConnector/sparkconnector/log4j_conf +++ b/SparkConnector/sparkconnector/log4j_conf @@ -1,21 +1,79 @@ -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.console.target=System.err -log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n +# Root Logger +rootLogger.level = WARN +rootLogger.appenderRef.stdout.ref = STDOUT +rootLogger.appenderRef.file.ref = LOGFILE -log4j.rootCategory=WARN, console +# Log to stdout +appender.console.type = Console +appender.console.name = STDOUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex -log4j.logger.org.eclipse.jetty=WARN -log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO -log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO +# Log to file +appender.file.type = File +appender.file.name = LOGFILE +appender.file.layout.type = PatternLayout +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex +# SparkMonitor logger +logger.sparkmonitor.name = MONITOR +logger.sparkmonitor.level = WARN +logger.sparkmonitor.appenderRef.stdout.ref = STDOUT -log4j.appender.file=org.apache.log4j.RollingFileAppender -log4j.appender.file.layout=org.apache.log4j.PatternLayout -log4j.appender.file.layout.ConversionPattern=%m%n -log4j.logger.org.apache.spark.api.python.PythonGatewayServer=WARN -log4j.logger.org.apache.spark=WARN, file +# Spark Default Template +# Set the default spark-shell/spark-sql log level to WARN. When running the +# spark-shell/spark-sql, the log level for these classes is used to overwrite +# the root logger's log level, so that the user can have different defaults +# for the shell and regular Spark apps. +logger.repl.name = org.apache.spark.repl.Main +logger.repl.level = info -log4j.logger.sparkmonitor.listener.JupyterSparkMonitorListener=WARN -log4j.logger.sparkmonitor.listener=WARN, console +# Settings to quiet third party logs that are too verbose +logger.jetty1.name = org.sparkproject.jetty +logger.jetty1.level = info +logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle +logger.jetty2.level = error +logger.replexprTyper.name = org.apache.spark.repl.SparkIMain$exprTyper +logger.replexprTyper.level = info +logger.replSparkILoopInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter +logger.replSparkILoopInterpreter.level = info +logger.parquet1.name = org.apache.parquet +logger.parquet1.level = error +logger.parquet2.name = parquet +logger.parquet2.level = error + +# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support +logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler +logger.RetryingHMSHandler.level = fatal +logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry +logger.FunctionRegistry.level = error + + +# NXCALS +# Silencing: WARN URLConfigurationSource: No URLs will be polled as dynamic configuration sources. +logger.urlConf.name = com.netflix.config.sources.URLConfigurationSource +logger.urlConf.level = error + +# Silencing: WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +logger.nativeCodeLoader.name = org.apache.hadoop.util.NativeCodeLoader +logger.nativeCodeLoader.level = error + +# Silencing: WARN DomainSocketFactory: The short-circuit local reads feature cannot be used because libhadoop cannot be loaded. +logger.domainSocketFactory.name = org.apache.hadoop.hdfs.shortcircuit.DomainSocketFactory +logger.domainSocketFactory.level = error + +# Silencing: WARN Client: Same name resource added multiple times to distributed cache +logger.sparkYarnClient.name = org.apache.spark.deploy.yarn.Client +logger.sparkYarnClient.level = error + +# Silencing: WARN Client: Exception encountered while connecting to the server +# org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.ipc.StandbyException): Operation category READ is not supported in state standby. +logger.hadoopIpcClient.name = org.apache.hadoop.ipc.Client +logger.hadoopIpcClient.level = error + + +# Silencing: WARN Client: Exception encountered while connecting to the server +# org.apache.zookeeper.ClientCnxnSocketNIO.doIO(ClientCnxnSocketNIO.java:77): Unable to read additional data from server sessionid, likely server has closed socket +logger.zookeeper.name = org.apache.zookeeper.ClientCnxn +logger.zookeeper.level = ERROR diff --git a/SparkConnector/sparkconnector/logreader.py b/SparkConnector/sparkconnector/logreader.py index fd2a36d2..8463b119 100644 --- a/SparkConnector/sparkconnector/logreader.py +++ b/SparkConnector/sparkconnector/logreader.py @@ -12,7 +12,7 @@ def __init__(self, connector, log): Thread.__init__(self) def format_log_line(self, line): - return line.strip() + "\n\n" + return line.strip() + "\n" def tail(self, max_size=10*1024*1024): # Use rb mode to be able to seek backwards