Skip to content

Commit

Permalink
add log4j properties files to base image
Browse files Browse the repository at this point in the history
  • Loading branch information
thanh-nguyen-dang committed Jul 15, 2024
1 parent 2bf8908 commit 3838bb8
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 0 deletions.
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ EXPOSE 22 4040 7077 8020 8030 8031 8032 8042 8088 9000 10020 19888 50010 50020 5

RUN mkdir -p /var/run/sshd ${HADOOP_HOME}/hdfs ${HADOOP_HOME}/hdfs/data ${HADOOP_HOME}/hdfs/data/dfs ${HADOOP_HOME}/hdfs/data/dfs/namenode ${HADOOP_HOME}/logs

COPY spark/base/confs/log4j.properties /spark/conf/log4j.properties
COPY spark/base/confs/log4j2.properties /spark/conf/log4j2.properties

COPY . /gen3spark
WORKDIR /gen3spark

Expand Down
9 changes: 9 additions & 0 deletions spark/base/confs/log4j.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
log4j.rootCategory=DEBUG, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p %c{1}:%L - %m%n
log4j.logger.org.apache.spark.executor.CoarseGrainedExecutorBackend=DEBUG
log4j.logger.org.apache.spark.scheduler.TaskSetManager=DEBUG
log4j.logger.org.apache.hadoop.fs=DEBUG
log4j.logger.org.apache.spark.scheduler.DAGScheduler=DEBUG
99 changes: 99 additions & 0 deletions spark/base/confs/log4j2.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Set everything to be logged to the console
rootLogger.level = debug
rootLogger.appenderRef.stdout.ref = console

# In the pattern layout configuration below, we specify an explicit `%ex` conversion
# pattern for logging Throwables. If this was omitted, then (by default) Log4J would
# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional
# class packaging information. That extra information can sometimes add a substantial
# performance overhead, so we disable it in our default logging config.
# For more information, see SPARK-39361.
appender.console.type = Console
appender.console.name = console
appender.console.target = SYSTEM_ERR
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex

# Set the default spark-shell/spark-sql log level to WARN. When running the
# spark-shell/spark-sql, the log level for these classes is used to overwrite
# the root logger's log level, so that the user can have different defaults
# for the shell and regular Spark apps.
logger.repl.name = org.apache.spark.repl.Main
logger.repl.level = warn

logger.thriftserver.name = org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver
logger.thriftserver.level = warn

# Settings to quiet third party logs that are too verbose
logger.jetty1.name = org.sparkproject.jetty
logger.jetty1.level = debug
logger.jetty1.appenderRef.stdout.ref = console

logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle
logger.jetty2.level = info
logger.jetty2.appenderRef.stdout.ref = console

logger.replexprTyper.name = org.apache.spark.repl.SparkIMain$exprTyper
logger.replexprTyper.level = info
logger.replexprTyper.appenderRef.stdout.ref = console

logger.replSparkILoopInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
logger.replSparkILoopInterpreter.level = info
logger.replSparkILoopInterpreter.appenderRef.stdout.ref = console

logger.parquet1.name = org.apache.parquet
logger.parquet1.level = info
logger.parquet1.appenderRef.stdout.ref = console

logger.parquet2.name = parquet
logger.parquet2.level = info
logger.parquet2.appenderRef.stdout.ref = console

# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
logger.RetryingHMSHandler.level = fatal
logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
logger.FunctionRegistry.level = info

# For deploying Spark ThriftServer
# SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805
appender.console.filter.1.type = RegexFilter
appender.console.filter.1.regex = .*Thrift error occurred during processing of message.*
appender.console.filter.1.onMatch = deny
appender.console.filter.1.onMismatch = neutral

logger.org.name=org.apache
logger.org.level=debug
logger.org.appenderRef.stdout.ref = console

# Logger configuration for org.apache.spark package
logger.spark.name = org.apache.spark
logger.spark.level = debug
logger.spark.appenderRef.stdout.ref = console

# Logger configuration for org.apache.spark.sql package
logger.sql.name = org.apache.spark.sql
logger.sql.level = info
logger.sql.appenderRef.stdout.ref = console

# Logger configuration for py4j package
logger.py4j.name = py4j
logger.py4j.level = debug
logger.py4j.appenderRef.stdout.ref = console

0 comments on commit 3838bb8

Please sign in to comment.