diff --git a/tools/hive/README.md b/tools/hive/README.md index 97d816f1..e6cb0d6a 100644 --- a/tools/hive/README.md +++ b/tools/hive/README.md @@ -1,7 +1,10 @@ -# Cloudera Hadoop and Hive Docker Image with Kerberos +# Cloudera Hadoop and Hive Docker Image with Kerberos, Sentry -This is a Hadoop Docker image running CDH5 versions of Hadoop and Hive, all in one container. There is a separate Kerberos image in which Hadoop and Hive use Kerberos for authentication. Adapted from https://github.com/tilakpatidar/cdh5_hive_postgres and based on Ubuntu (trusty). +This is a Hadoop Docker image running CDH5 versions of Hadoop and Hive, all in one container. +There is a separate Kerberos image in which Hadoop and Hive use Kerberos for authentication, +and Sentry for authorization. +Adapted from https://github.com/tilakpatidar/cdh5_hive_postgres and based on Ubuntu (trusty). Postgres is also installed so that Hive can use it for its Metastore backend and run in remote mode. diff --git a/tools/hive/hadoop-hive/scripts/hive-bootstrap.sh b/tools/hive/hadoop-hive/scripts/hive-bootstrap.sh index 5f4f2d58..d1b04467 100644 --- a/tools/hive/hadoop-hive/scripts/hive-bootstrap.sh +++ b/tools/hive/hadoop-hive/scripts/hive-bootstrap.sh @@ -6,6 +6,16 @@ printenv | cat >> /root/.bashrc # hadoop bootstrap /etc/hadoop-bootstrap.sh +# init and start sentry +SENTRY_CONF_TEMPLATE=$SENTRY_HOME/conf/sentry-site.xml.template +SENTRY_CONF_FILE=$SENTRY_HOME/conf/sentry-site.xml +if [ -f "$SENTRY_CONF_TEMPLATE" ]; then + sed s/{{HOSTNAME}}/$HOSTNAME/ $SENTRY_HOME/conf/sentry-site.xml.template > $SENTRY_HOME/conf/sentry-site.xml + sed s/{{HOSTNAME}}/$HOSTNAME/ $HIVE_CONF/sentry-site.xml.template > $HIVE_CONF/sentry-site.xml + $SENTRY_HOME/bin/sentry --command schema-tool --conffile $SENTRY_CONF_FILE --dbType derby --initSchema + $SENTRY_HOME/bin/sentry --command service --conffile $SENTRY_CONF_FILE & +fi + # restart postgresql /etc/init.d/postgresql restart @@ -19,16 +29,22 @@ do echo "waiting for hdfs to be ready"; sleep 10; done +# create hive user +useradd hive + # create hdfs directories -$HADOOP_PREFIX/bin/hdfs dfs -mkdir -p /user/root +hdfs dfs -mkdir -p /user/root hdfs dfs -chown -R hdfs:supergroup /user -$HADOOP_PREFIX/bin/hdfs dfs -mkdir -p /apps/hive/warehouse +hdfs dfs -mkdir -p /apps/hive/warehouse hdfs dfs -chown -R hive:supergroup /apps/hive hdfs dfs -chmod 777 /apps/hive/warehouse +hdfs dfs -mkdir -p /tmp/hive +hdfs dfs -chmod 777 /tmp/hive + # altering the hive-site configuration -sed s/{{HOSTNAME}}/$HOSTNAME/ /usr/local/hive/conf/hive-site.xml.template > /usr/local/hive/conf/hive-site.xml +sed s/{{HOSTNAME}}/$HOSTNAME/ $HIVE_CONF/hive-site.xml.template > $HIVE_CONF/hive-site.xml sed s/{{HOSTNAME}}/$HOSTNAME/ /opt/files/hive-site.xml.template > /opt/files/hive-site.xml # start hive metastore server diff --git a/tools/hive/kerberos/Dockerfile b/tools/hive/kerberos/Dockerfile index 19559f65..8fa96b69 100644 --- a/tools/hive/kerberos/Dockerfile +++ b/tools/hive/kerberos/Dockerfile @@ -1,5 +1,14 @@ FROM cdh5-hive +ENV SENTRY_VERSION 1.5.1 +ENV SENTRY_HOME /usr/local/sentry + +# download sentry +RUN curl -L http://archive.cloudera.com/cdh${CDH_VERSION}/cdh/${CDH_VERSION}/sentry-${SENTRY_VERSION}-cdh${CDH_EXACT_VERSION}.tar.gz \ + | tar -xzC /usr/local && \ + cd /usr/local && \ + ln -s apache-sentry-${SENTRY_VERSION}-cdh${CDH_EXACT_VERSION}-bin/ sentry + # copy kerberized hadoop config files ADD templates/core-site.xml.template $HADOOP_PREFIX/etc/hadoop/core-site.xml.template ADD templates/hdfs-site.xml.template $HADOOP_PREFIX/etc/hadoop/hdfs-site.xml.template @@ -9,6 +18,14 @@ ADD templates/yarn-site.xml.template $HADOOP_PREFIX/etc/hadoop/yarn-site.xml.tem ADD templates/hive-site.xml.template /opt/files/ ADD templates/hive-site.xml.template $HIVE_CONF/hive-site.xml.template +# sentry config files +ADD templates/sentry-site.xml.hive-client.template /usr/local/hive/conf/sentry-site.xml.template +ADD templates/sentry-site.xml.server.template /usr/local/sentry/conf/sentry-site.xml.template + +# hive / sentry test script +ADD scripts/grant-hive-privileges.sh /etc/grant-hive-privileges.sh +RUN chmod 700 /etc/grant-hive-privileges.sh + # krb5.conf ADD conf/krb5.conf /etc/ diff --git a/tools/hive/kerberos/marathon/hdfs-hive-kerberos.json b/tools/hive/kerberos/marathon/hdfs-hive-kerberos.json index 8cfa91ac..e1650a04 100644 --- a/tools/hive/kerberos/marathon/hdfs-hive-kerberos.json +++ b/tools/hive/kerberos/marathon/hdfs-hive-kerberos.json @@ -33,7 +33,7 @@ [ "hostname", "IS", - "10.0.0.114" + "1.2.3.4" ] ] } diff --git a/tools/hive/kerberos/scripts/grant-hive-privileges.sh b/tools/hive/kerberos/scripts/grant-hive-privileges.sh new file mode 100644 index 00000000..3e442b33 --- /dev/null +++ b/tools/hive/kerberos/scripts/grant-hive-privileges.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -x + +export HADOOP_HOME=/usr/local/hadoop + +# Create a user "alice" since Sentry authorization relies on the Linux user and group information +/usr/sbin/useradd alice + +# Grant permissions to user “alice” +echo "Grant permissions to user alice ..." +kdestroy +kinit -k -t /usr/local/hadoop/etc/hadoop/hdfs.keytab hive/${HOSTNAME}@LOCAL +cat <grant_alice.sql +CREATE ROLE test_role; +GRANT ROLE test_role to GROUP alice; +GRANT ROLE test_role to GROUP root; +GRANT ALL on DATABASE default to ROLE test_role WITH GRANT OPTION; +EOF +/usr/local/hive/bin/beeline -u "jdbc:hive2://localhost:10000/default;principal=hive/${HOSTNAME}@LOCAL" -f grant_alice.sql + +# Log back in as hdfs +kdestroy +kinit -k -t /usr/local/hadoop/etc/hadoop/hdfs.keytab hdfs@LOCAL diff --git a/tools/hive/kerberos/templates/hive-site-kerberos.xml.template b/tools/hive/kerberos/templates/hive-site-kerberos.xml.template index 5dad8f1f..c25d5d52 100755 --- a/tools/hive/kerberos/templates/hive-site-kerberos.xml.template +++ b/tools/hive/kerberos/templates/hive-site-kerberos.xml.template @@ -38,11 +38,38 @@ hive.users.in.admin.role - hdfs,hive + hive + - hive.security.authorization.manager - org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider + hive.server2.enable.doAs + false + + + + hive.security.authorization.task.factory + org.apache.sentry.binding.hive.SentryHiveAuthorizationTaskFactoryImpl + + + + hive.server2.session.hook + org.apache.sentry.binding.hive.HiveAuthzBindingSessionHook + + + + + hive.metastore.filter.hook + org.apache.sentry.binding.metastore.SentryMetaStoreFilterHook + + + + hive.metastore.pre.event.listeners + org.apache.sentry.binding.metastore.MetastoreAuthzBinding + + + + hive.metastore.event.listeners + org.apache.sentry.binding.metastore.SentryMetastorePostEventListener diff --git a/tools/hive/kerberos/templates/sentry-site.xml.hive-client.template b/tools/hive/kerberos/templates/sentry-site.xml.hive-client.template new file mode 100644 index 00000000..92a8cda2 --- /dev/null +++ b/tools/hive/kerberos/templates/sentry-site.xml.hive-client.template @@ -0,0 +1,32 @@ + + + sentry.hive.provider + org.apache.sentry.provider.file.HadoopGroupResourceAuthorizationProvider + + + sentry.hive.server + server1 + + + sentry.service.client.server.rpc-port + 8038 + + + sentry.service.client.server.rpc-address + localhost + + + + + sentry.hive.provider.backend + org.apache.sentry.provider.db.SimpleDBProviderBackend + + + sentry.service.server.principal + sentry/{{HOSTNAME}}@LOCAL + + + sentry.metastore.service.users + hive + + diff --git a/tools/hive/kerberos/templates/sentry-site.xml.server.template b/tools/hive/kerberos/templates/sentry-site.xml.server.template new file mode 100644 index 00000000..9c9c8f6f --- /dev/null +++ b/tools/hive/kerberos/templates/sentry-site.xml.server.template @@ -0,0 +1,34 @@ + + + sentry.hive.server + server1 + + + sentry.store.jdbc.url + jdbc:derby:;databaseName=metastore_db;create=true + + + sentry.service.server.principal + sentry/{{HOSTNAME}}@LOCAL + + + sentry.service.server.keytab + /usr/local/hadoop/etc/hadoop/hdfs.keytab + + + sentry.service.admin.group + hive + + + sentry.service.allow.connect + hive + + + sentry.store.jdbc.user + sentry + + + sentry.store.jdbc.password + test + +