From d8499d66f6b2bdab037193b6a21f813eaeb93f4c Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Tue, 6 Jun 2023 13:56:46 +0800 Subject: [PATCH 1/4] hadoop upgrade to 3.2.1 hive upgrade to 3.1.2 --- Dockerfile | 12 ++++++++++-- README.md | 5 +++-- docker-compose.yml | 28 ++++++++++++++++++---------- entrypoint.sh | 2 ++ hadoop-hive.env | 2 ++ 5 files changed, 35 insertions(+), 14 deletions(-) diff --git a/Dockerfile b/Dockerfile index 037fe91..55a6abf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,15 @@ -FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.2.1-java8 MAINTAINER Yiannis Mouchakis MAINTAINER Ivan Ermilov +MAINTAINER Jian Shen # Allow buildtime config of HIVE_VERSION ARG HIVE_VERSION # Set HIVE_VERSION from arg if provided at build, env if provided at run, or default # https://docs.docker.com/engine/reference/builder/#using-arg-variables # https://docs.docker.com/engine/reference/builder/#environment-replacement -ENV HIVE_VERSION=${HIVE_VERSION:-2.3.2} +ENV HIVE_VERSION=${HIVE_VERSION:-3.1.2} ENV HIVE_HOME /opt/hive ENV PATH $HIVE_HOME/bin:$PATH @@ -16,6 +17,9 @@ ENV HADOOP_HOME /opt/hadoop-$HADOOP_VERSION WORKDIR /opt +RUN sed -i 's/^.*$/deb http:\/\/deb.debian.org\/debian\/ buster main\ndeb-src http:\/\/deb.debian.org\/debian\/ buster main\ndeb http:\/\/deb.debian.org\/debian-security\/ buster\/updates main\ndeb-src http:\/\/deb.debian.org\/debian-security\/ buster\/updates main/g' /etc/apt/sources.list + +#Install Hive and PostgreSQL JDBC #Install Hive and PostgreSQL JDBC RUN apt-get update && apt-get install -y wget procps && \ wget https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz && \ @@ -46,6 +50,10 @@ RUN chmod +x /usr/local/bin/startup.sh COPY entrypoint.sh /usr/local/bin/ RUN chmod +x /usr/local/bin/entrypoint.sh +# solve log version conflict +RUN cp /opt/hadoop-3.2.1/share/hadoop/common/lib/guava-27.0-jre.jar /opt/hive/lib/ +RUN rm -rf /opt/hive/lib/guava-19.0.jar + EXPOSE 10000 EXPOSE 10002 diff --git a/README.md b/README.md index e64d2da..c367b45 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # docker-hive -This is a docker container for Apache Hive 2.3.2. It is based on https://github.com/big-data-europe/docker-hadoop so check there for Hadoop configurations. +This is a docker container for Apache Hive 3.1.2 It is based on https://github.com/big-data-europe/docker-hadoop so check there for Hadoop configurations. This deploys Hive and starts a hiveserver2 on port 10000. Metastore is running with a connection to postgresql database. The hive configuration is performed with HIVE_SITE_CONF_ variables (see hadoop-hive.env for an example). @@ -29,7 +29,7 @@ This deploys a Presto server listens on port `8080` Load data into Hive: ``` $ docker-compose exec hive-server bash - # /opt/hive/bin/beeline -u jdbc:hive2://localhost:10000 + # /opt/hive/bin/beeline -u jdbc:hive2:// > CREATE TABLE pokes (foo INT, bar STRING); > LOAD DATA LOCAL INPATH '/opt/hive/examples/files/kv1.txt' OVERWRITE INTO TABLE pokes; ``` @@ -47,3 +47,4 @@ Then query it from PrestoDB. You can get [presto.jar](https://prestosql.io/docs/ * Ivan Ermilov [@earthquakesan](https://github.com/earthquakesan) (maintainer) * Yiannis Mouchakis [@gmouchakis](https://github.com/gmouchakis) * Ke Zhu [@shawnzhu](https://github.com/shawnzhu) +* Jian Shen [@SJshenjian](https://github.com/SJshenjian) diff --git a/docker-compose.yml b/docker-compose.yml index a1bcc3a..5811e10 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,8 @@ -version: "3" +version: "3.5" services: namenode: - image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8 volumes: - namenode:/hadoop/dfs/name environment: @@ -10,19 +10,20 @@ services: env_file: - ./hadoop-hive.env ports: - - "50070:50070" + - "9870:9870" + - "9000:9000" datanode: - image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8 volumes: - datanode:/hadoop/dfs/data env_file: - ./hadoop-hive.env environment: - SERVICE_PRECONDITION: "namenode:50070" + SERVICE_PRECONDITION: "namenode:9870" ports: - - "50075:50075" + - "9864:9864" hive-server: - image: bde2020/hive:2.3.2-postgresql-metastore + image: bde2020/hive:3.1.2-postgresql-metastore env_file: - ./hadoop-hive.env environment: @@ -31,16 +32,18 @@ services: ports: - "10000:10000" hive-metastore: - image: bde2020/hive:2.3.2-postgresql-metastore + image: bde2020/hive:3.1.2-postgresql-metastore env_file: - ./hadoop-hive.env command: /opt/hive/bin/hive --service metastore environment: - SERVICE_PRECONDITION: "namenode:50070 datanode:50075 hive-metastore-postgresql:5432" + SERVICE_PRECONDITION: "namenode:9870 datanode:9864 hive-metastore-postgresql:5432" ports: - "9083:9083" hive-metastore-postgresql: - image: bde2020/hive-metastore-postgresql:2.3.0 + image: bde2020/hive-metastore-postgresql:3.1.0 + ports: + - "5432:5432" presto-coordinator: image: shawnzhu/prestodb:0.181 ports: @@ -49,3 +52,8 @@ services: volumes: namenode: datanode: + +# solve java.net.URISyntaxException Illegal character in hostname at index 49: thrift://docker-hive-hive-metastore-1.docker-hive_default:9083 +networks: + default: + name: docker-hive-default \ No newline at end of file diff --git a/entrypoint.sh b/entrypoint.sh index baefba5..a8d2bd9 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -110,9 +110,11 @@ function wait_for_it() echo "[$i/$max_try] $service:${port} is available." } +# shellcheck disable=SC2068 for i in ${SERVICE_PRECONDITION[@]} do wait_for_it ${i} done +# shellcheck disable=SC2068 exec $@ diff --git a/hadoop-hive.env b/hadoop-hive.env index 3da87a9..f63a0ea 100644 --- a/hadoop-hive.env +++ b/hadoop-hive.env @@ -4,6 +4,8 @@ HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive HIVE_SITE_CONF_datanucleus_autoCreateSchema=false HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083 +HIVE_SITE_CONF_hive_server2_thrift_bind_host=0.0.0.0 +HIVE_SITE_CONF_hive_server2_thrift_port=10000 HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false CORE_CONF_fs_defaultFS=hdfs://namenode:8020 From 2ad28b30c85bab5312b53742e56ed0a644b280ca Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Tue, 6 Jun 2023 14:32:22 +0800 Subject: [PATCH 2/4] hadoop upgrade to 3.2.1 hive upgrade to 3.1.2 --- startup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/startup.sh b/startup.sh index f73e066..323eb54 100644 --- a/startup.sh +++ b/startup.sh @@ -6,4 +6,4 @@ hadoop fs -chmod g+w /tmp hadoop fs -chmod g+w /user/hive/warehouse cd $HIVE_HOME/bin -./hiveserver2 --hiveconf hive.server2.enable.doAs=false +./hiveserver2 --hiveconf hive.server2.enable.doAs=false \ No newline at end of file From c54042d0036b7ff194cdeaf894e4feaed5f4b67b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=AE=97=E6=B3=95=E5=B0=8F=E7=94=9F?= Date: Sat, 17 Jun 2023 19:30:00 +0800 Subject: [PATCH 3/4] Update hive-site.xml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修复hive-server报错Caused by: org.apache.thrift.TApplicationException: Internal error processing get_current_notificationEventId --- conf/hive-site.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/conf/hive-site.xml b/conf/hive-site.xml index 60f3935..3082b4c 100644 --- a/conf/hive-site.xml +++ b/conf/hive-site.xml @@ -15,4 +15,8 @@ See the License for the specific language governing permissions and limitations under the License. --> + + hive.metastore.event.db.notification.api.auth + false + From 80ba9e647db547513c25d8aaed2ee8133a69a6f2 Mon Sep 17 00:00:00 2001 From: f2u12 Date: Wed, 15 May 2024 21:13:07 +0800 Subject: [PATCH 4/4] =?UTF-8?q?apt,hive=20=E5=88=87=E6=8D=A2=E4=B8=BA?= =?UTF-8?q?=E6=B8=85=E5=8D=8E=E6=BA=90=20=E5=9B=BA=E5=AE=9Ahive=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E4=B8=BA3.1.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 55a6abf..6faabcd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ ARG HIVE_VERSION # Set HIVE_VERSION from arg if provided at build, env if provided at run, or default # https://docs.docker.com/engine/reference/builder/#using-arg-variables # https://docs.docker.com/engine/reference/builder/#environment-replacement -ENV HIVE_VERSION=${HIVE_VERSION:-3.1.2} +ENV HIVE_VERSION=${HIVE_VERSION:-3.1.3} ENV HIVE_HOME /opt/hive ENV PATH $HIVE_HOME/bin:$PATH @@ -17,16 +17,16 @@ ENV HADOOP_HOME /opt/hadoop-$HADOOP_VERSION WORKDIR /opt -RUN sed -i 's/^.*$/deb http:\/\/deb.debian.org\/debian\/ buster main\ndeb-src http:\/\/deb.debian.org\/debian\/ buster main\ndeb http:\/\/deb.debian.org\/debian-security\/ buster\/updates main\ndeb-src http:\/\/deb.debian.org\/debian-security\/ buster\/updates main/g' /etc/apt/sources.list +RUN sed -i 's/^.*$/deb http:\/\/mirrors.tuna.tsinghua.edu.cn\/debian\/ buster main\ndeb-src http:\/\/mirrors.tuna.tsinghua.edu.cn\/debian\/ buster main\ndeb http:\/\/mirrors.tuna.tsinghua.edu.cn\/debian-security\/ buster\/updates main\ndeb-src http:\/\/mirrors.tuna.tsinghua.edu.cn\/debian-security\/ buster\/updates main/g' /etc/apt/sources.list #Install Hive and PostgreSQL JDBC #Install Hive and PostgreSQL JDBC RUN apt-get update && apt-get install -y wget procps && \ - wget https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz && \ - tar -xzvf apache-hive-$HIVE_VERSION-bin.tar.gz && \ - mv apache-hive-$HIVE_VERSION-bin hive && \ - wget https://jdbc.postgresql.org/download/postgresql-9.4.1212.jar -O $HIVE_HOME/lib/postgresql-jdbc.jar && \ - rm apache-hive-$HIVE_VERSION-bin.tar.gz && \ + wget https://mirrors.tuna.tsinghua.edu.cn/apache/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz && \ + tar -xzvf apache-hive-3.1.3-bin.tar.gz && \ + mv apache-hive-3.1.3-bin hive +RUN wget https://jdbc.postgresql.org/download/postgresql-9.4.1212.jar -O $HIVE_HOME/lib/postgresql-jdbc.jar && \ + rm apache-hive-3.1.3-bin.tar.gz && \ apt-get --purge remove -y wget && \ apt-get clean && \ rm -rf /var/lib/apt/lists/*