diff --git a/Dockerfile b/Dockerfile index e888b51..e4763ba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -94,6 +94,9 @@ EXPOSE 22 4040 7077 8020 8030 8031 8032 8042 8088 9000 10020 19888 50010 50020 5 RUN mkdir -p /var/run/sshd ${HADOOP_HOME}/hdfs ${HADOOP_HOME}/hdfs/data ${HADOOP_HOME}/hdfs/data/dfs ${HADOOP_HOME}/hdfs/data/dfs/namenode ${HADOOP_HOME}/logs +COPY spark/base/confs/log4j.properties /spark/conf/log4j.properties +COPY spark/base/confs/log4j2.properties /spark/conf/log4j2.properties + COPY . /gen3spark WORKDIR /gen3spark diff --git a/spark/base/confs/log4j.properties b/spark/base/confs/log4j.properties new file mode 100644 index 0000000..f526d88 --- /dev/null +++ b/spark/base/confs/log4j.properties @@ -0,0 +1,9 @@ +log4j.rootCategory=DEBUG, console +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p %c{1}:%L - %m%n +log4j.logger.org.apache.spark.executor.CoarseGrainedExecutorBackend=DEBUG +log4j.logger.org.apache.spark.scheduler.TaskSetManager=DEBUG +log4j.logger.org.apache.hadoop.fs=DEBUG +log4j.logger.org.apache.spark.scheduler.DAGScheduler=DEBUG \ No newline at end of file diff --git a/spark/base/confs/log4j2.properties b/spark/base/confs/log4j2.properties new file mode 100644 index 0000000..a256c30 --- /dev/null +++ b/spark/base/confs/log4j2.properties @@ -0,0 +1,99 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the console +rootLogger.level = debug +rootLogger.appenderRef.stdout.ref = console + +# In the pattern layout configuration below, we specify an explicit `%ex` conversion +# pattern for logging Throwables. If this was omitted, then (by default) Log4J would +# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional +# class packaging information. That extra information can sometimes add a substantial +# performance overhead, so we disable it in our default logging config. +# For more information, see SPARK-39361. +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex + +# Set the default spark-shell/spark-sql log level to WARN. When running the +# spark-shell/spark-sql, the log level for these classes is used to overwrite +# the root logger's log level, so that the user can have different defaults +# for the shell and regular Spark apps. +logger.repl.name = org.apache.spark.repl.Main +logger.repl.level = warn + +logger.thriftserver.name = org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver +logger.thriftserver.level = warn + +# Settings to quiet third party logs that are too verbose +logger.jetty1.name = org.sparkproject.jetty +logger.jetty1.level = debug +logger.jetty1.appenderRef.stdout.ref = console + +logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle +logger.jetty2.level = info +logger.jetty2.appenderRef.stdout.ref = console + +logger.replexprTyper.name = org.apache.spark.repl.SparkIMain$exprTyper +logger.replexprTyper.level = info +logger.replexprTyper.appenderRef.stdout.ref = console + +logger.replSparkILoopInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter +logger.replSparkILoopInterpreter.level = info +logger.replSparkILoopInterpreter.appenderRef.stdout.ref = console + +logger.parquet1.name = org.apache.parquet +logger.parquet1.level = info +logger.parquet1.appenderRef.stdout.ref = console + +logger.parquet2.name = parquet +logger.parquet2.level = info +logger.parquet2.appenderRef.stdout.ref = console + +# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support +logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler +logger.RetryingHMSHandler.level = fatal +logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry +logger.FunctionRegistry.level = info + +# For deploying Spark ThriftServer +# SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805 +appender.console.filter.1.type = RegexFilter +appender.console.filter.1.regex = .*Thrift error occurred during processing of message.* +appender.console.filter.1.onMatch = deny +appender.console.filter.1.onMismatch = neutral + +logger.org.name=org.apache +logger.org.level=debug +logger.org.appenderRef.stdout.ref = console + +# Logger configuration for org.apache.spark package +logger.spark.name = org.apache.spark +logger.spark.level = debug +logger.spark.appenderRef.stdout.ref = console + +# Logger configuration for org.apache.spark.sql package +logger.sql.name = org.apache.spark.sql +logger.sql.level = info +logger.sql.appenderRef.stdout.ref = console + +# Logger configuration for py4j package +logger.py4j.name = py4j +logger.py4j.level = debug +logger.py4j.appenderRef.stdout.ref = console \ No newline at end of file