diff --git a/Dockerfile b/Dockerfile index ec34b5d22b..cf2b8f382f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,6 +34,7 @@ RUN apt-get update \ netcat \ curl \ && cd / \ + && mkdir /opt/ranger-atlas-plugin \ && export MAVEN_OPTS="-Xms2g -Xmx2g" \ && export JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64" \ && tar -xzvf /apache-atlas-3.0.0-SNAPSHOT-server.tar.gz -C /opt \ @@ -56,12 +57,22 @@ COPY atlas-hub/repair_index.py /opt/apache-atlas/bin/ RUN chmod +x /opt/apache-atlas/bin/repair_index.py COPY atlas-hub/atlas_start.py.patch atlas-hub/atlas_config.py.patch /opt/apache-atlas/bin/ +COPY atlas-hub/pre-conf/ranger/lib/ /opt/apache-atlas/libext/ +COPY atlas-hub/pre-conf/ranger/install/conf.templates/enable/ /opt/apache-atlas/conf/ COPY atlas-hub/pre-conf/atlas-log4j.xml /opt/apache-atlas/conf/ COPY atlas-hub/pre-conf/atlas-log4j2.xml /opt/apache-atlas/conf/ -COPY atlas-hub/pre-conf/atlas-auth/ /opt/apache-atlas/conf/ +COPY atlas-hub/pre-conf/ranger/ /opt/ranger-atlas-plugin/ +COPY atlas-hub/env_change.sh / RUN curl https://repo1.maven.org/maven2/org/jolokia/jolokia-jvm/1.6.2/jolokia-jvm-1.6.2-agent.jar -o /opt/apache-atlas/libext/jolokia-jvm-agent.jar +RUN cd /opt/apache-atlas/bin \ + && sed "s~ATLAS_INSTALL_DIR~/opt/apache-atlas~g" /opt/ranger-atlas-plugin/install.properties > /tmp/install.properties \ + && cp /tmp/install.properties /opt/ranger-atlas-plugin/install.properties \ + && chmod +x /env_change.sh +# && patch -b -f < atlas_start.py.patch \ +# && patch -b -f < atlas_config.py.patch \ + RUN cd /opt/apache-atlas/bin \ && ./atlas_start.py -setup || true diff --git a/addons/falcon-bridge-shim/pom.xml b/addons/falcon-bridge-shim/pom.xml new file mode 100755 index 0000000000..c554e890a7 --- /dev/null +++ b/addons/falcon-bridge-shim/pom.xml @@ -0,0 +1,77 @@ + + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + falcon-bridge-shim + Apache Atlas Falcon Bridge Shim Module + Apache Atlas Falcon Bridge Shim + jar + + + + + org.apache.atlas + atlas-plugin-classloader + + + + org.apache.falcon + falcon-common + ${falcon.version} + provided + + + org.apache.spark + * + + + javax.servlet + servlet-api + + + org.mortbay.jetty + servlet-api + + + org.springframework + spring-beans + + + org.springframework + spring-jms + + + org.springframework + spring-tx + + + org.springframework + spring-context + + + + + diff --git a/addons/falcon-bridge-shim/src/main/java/org/apache/atlas/falcon/service/AtlasService.java b/addons/falcon-bridge-shim/src/main/java/org/apache/atlas/falcon/service/AtlasService.java new file mode 100755 index 0000000000..2b756de0e2 --- /dev/null +++ b/addons/falcon-bridge-shim/src/main/java/org/apache/atlas/falcon/service/AtlasService.java @@ -0,0 +1,222 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.falcon.service; + + +import org.apache.atlas.plugin.classloader.AtlasPluginClassLoader; +import org.apache.falcon.FalconException; +import org.apache.falcon.entity.store.ConfigurationStore; +import org.apache.falcon.entity.v0.Entity; +import org.apache.falcon.service.ConfigurationChangeListener; +import org.apache.falcon.service.FalconService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Falcon hook used for atlas entity registration. + */ +public class AtlasService implements FalconService, ConfigurationChangeListener { + private static final Logger LOG = LoggerFactory.getLogger(AtlasService.class); + + private static final String ATLAS_PLUGIN_TYPE = "falcon"; + private static final String ATLAS_FALCON_HOOK_IMPL_CLASSNAME = "org.apache.atlas.falcon.service.AtlasService"; + + private AtlasPluginClassLoader atlasPluginClassLoader = null; + private FalconService falconServiceImpl = null; + private ConfigurationChangeListener configChangeListenerImpl = null; + + public AtlasService() { + this.initialize(); + } + + @Override + public String getName() { + if (LOG.isDebugEnabled()) { + LOG.debug("==> AtlasService.getName()"); + } + + String ret = null; + + try { + activatePluginClassLoader(); + ret = falconServiceImpl.getName(); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== AtlasService.getName()"); + } + + return ret; + } + + @Override + public void init() throws FalconException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> AtlasService.init()"); + } + + try { + activatePluginClassLoader(); + + ConfigurationStore.get().registerListener(this); + + falconServiceImpl.init(); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== AtlasService.init()"); + } + } + + @Override + public void destroy() throws FalconException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> AtlasService.destroy()"); + } + + try { + activatePluginClassLoader(); + + ConfigurationStore.get().unregisterListener(this); + + falconServiceImpl.destroy(); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== AtlasService.destroy()"); + } + } + + @Override + public void onAdd(Entity entity) throws FalconException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> AtlasService.onAdd({})", entity); + } + + try { + activatePluginClassLoader(); + configChangeListenerImpl.onAdd(entity); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== AtlasService.onAdd({})", entity); + } + } + + @Override + public void onRemove(Entity entity) throws FalconException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> AtlasService.onRemove({})", entity); + } + + try { + activatePluginClassLoader(); + configChangeListenerImpl.onRemove(entity); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== AtlasService.onRemove({})", entity); + } + } + + @Override + public void onChange(Entity entity, Entity entity1) throws FalconException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> AtlasService.onChange({}, {})", entity, entity1); + } + + try { + activatePluginClassLoader(); + configChangeListenerImpl.onChange(entity, entity1); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== AtlasService.onChange({}, {})", entity, entity1); + } + } + + @Override + public void onReload(Entity entity) throws FalconException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> AtlasService.onReload({})", entity); + } + + try { + activatePluginClassLoader(); + configChangeListenerImpl.onReload(entity); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== AtlasService.onReload({})", entity); + } + } + + private void initialize() { + if (LOG.isDebugEnabled()) { + LOG.debug("==> AtlasService.initialize()"); + } + + try { + atlasPluginClassLoader = AtlasPluginClassLoader.getInstance(ATLAS_PLUGIN_TYPE, this.getClass()); + + Class cls = Class.forName(ATLAS_FALCON_HOOK_IMPL_CLASSNAME, true, atlasPluginClassLoader); + + activatePluginClassLoader(); + + Object atlasService = cls.newInstance(); + + falconServiceImpl = (FalconService) atlasService; + configChangeListenerImpl = (ConfigurationChangeListener) atlasService; + } catch (Exception excp) { + LOG.error("Error instantiating Atlas hook implementation", excp); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== AtlasService.initialize()"); + } + } + + private void activatePluginClassLoader() { + if (atlasPluginClassLoader != null) { + atlasPluginClassLoader.activate(); + } + } + + private void deactivatePluginClassLoader() { + if (atlasPluginClassLoader != null) { + atlasPluginClassLoader.deactivate(); + } + } +} diff --git a/addons/falcon-bridge/pom.xml b/addons/falcon-bridge/pom.xml new file mode 100644 index 0000000000..1e2ce7c81b --- /dev/null +++ b/addons/falcon-bridge/pom.xml @@ -0,0 +1,431 @@ + + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + falcon-bridge + Apache Atlas Falcon Bridge Module + Apache Atlas Falcon Bridge + jar + + + + + org.slf4j + slf4j-api + + + + org.slf4j + slf4j-log4j12 + + + + org.apache.atlas + atlas-client-v1 + + + + org.apache.atlas + atlas-notification + + + + org.apache.falcon + falcon-common + ${falcon.version} + provided + + + org.apache.spark + * + + + javax.servlet + servlet-api + + + org.mortbay.jetty + servlet-api + + + org.springframework + spring-beans + + + org.springframework + spring-jms + + + org.springframework + spring-tx + + + + + + org.apache.atlas + hive-bridge + + + + org.testng + testng + + + + org.eclipse.jetty + jetty-server + test + + + + org.apache.atlas + atlas-graphdb-impls + pom + test + + + + + + dist + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-hook + package + + copy + + + ${project.build.directory}/dependency/hook/falcon/atlas-falcon-plugin-impl + false + false + true + + + ${project.groupId} + ${project.artifactId} + ${project.version} + + + ${project.groupId} + hive-bridge + ${project.version} + + + ${project.groupId} + atlas-client-common + ${project.version} + + + ${project.groupId} + atlas-client-v1 + ${project.version} + + + ${project.groupId} + atlas-client-v2 + ${project.version} + + + ${project.groupId} + atlas-intg + ${project.version} + + + ${project.groupId} + atlas-notification + ${project.version} + + + ${project.groupId} + atlas-common + ${project.version} + + + org.apache.kafka + kafka_${kafka.scala.binary.version} + ${kafka.version} + + + org.apache.kafka + kafka-clients + ${kafka.version} + + + com.sun.jersey + jersey-json + ${jersey.version} + + + javax.ws.rs + jsr311-api + ${jsr.version} + + + + + + copy-hook-shim + package + + copy + + + ${project.build.directory}/dependency/hook/falcon + false + false + true + + + ${project.groupId} + falcon-bridge-shim + ${project.version} + + + ${project.groupId} + atlas-plugin-classloader + ${project.version} + + + + + + + + + + + + + + org.eclipse.jetty + jetty-maven-plugin + ${jetty.version} + + ${skipTests} + + 31000 + 60000 + + ../../webapp/target/atlas-webapp-${project.version}.war + true + ../../webapp/src/main/webapp + + / + ${project.basedir}/../../webapp/src/main/webapp/WEB-INF/web.xml + + true + + true + + atlas.home + ${project.build.directory} + + + atlas.conf + ${project.build.directory}/test-classes + + + atlas.data + ${project.build.directory}/data + + + atlas.log.dir + ${project.build.directory}/logs + + + atlas.log.file + application.log + + + log4j.configuration + file:///${project.build.directory}/test-classes/atlas-log4j.xml + + + atlas.graphdb.backend + ${graphdb.backend.impl} + + + embedded.solr.directory + ${project.build.directory} + + + solr.log.dir + ${project.build.directory}/logs + + + org.eclipse.jetty.annotations.maxWait + 5000 + + + atlas-stop + 31001 + ${jetty-maven-plugin.stopWait} + jar + + + + org.apache.curator + curator-client + ${curator.version} + + + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + + + + + start-jetty + pre-integration-test + + + stop + deploy-war + + + true + + + + stop-jetty + post-integration-test + + stop + + + + + + + org.apache.maven.plugins + maven-site-plugin + + + org.apache.maven.doxia + doxia-module-twiki + ${doxia.version} + + + org.apache.maven.doxia + doxia-core + ${doxia.version} + + + + + + site + + prepare-package + + + + false + false + + + + + org.codehaus.mojo + exec-maven-plugin + 1.2.1 + false + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + copy-resources + validate + + copy-resources + + + ${basedir}/target/models + + + ${basedir}/../models + + 0000-Area0/0010-base_model.json + 1000-Hadoop/** + + + + + + + copy-solr-resources + validate + + copy-resources + + + ${project.build.directory}/solr + + + ${basedir}/../../test-tools/src/main/resources/solr + + + + + + + + + org.apache.maven.plugins + maven-antrun-plugin + + + post-integration-test + + run + + + + + + + + + + + + + + diff --git a/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/Util/EventUtil.java b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/Util/EventUtil.java new file mode 100644 index 0000000000..ef5634009d --- /dev/null +++ b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/Util/EventUtil.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.falcon.Util; + +import org.apache.commons.lang3.StringUtils; +import org.apache.falcon.FalconException; +import org.apache.falcon.security.CurrentUser; + +import java.util.HashMap; +import java.util.Map; + +/** + * Falcon event util + */ +public final class EventUtil { + + private EventUtil() {} + + + public static Map convertKeyValueStringToMap(final String keyValueString) { + if (StringUtils.isBlank(keyValueString)) { + return null; + } + + Map keyValueMap = new HashMap<>(); + + String[] tags = keyValueString.split(","); + for (String tag : tags) { + int index = tag.indexOf("="); + String tagKey = tag.substring(0, index).trim(); + String tagValue = tag.substring(index + 1, tag.length()).trim(); + keyValueMap.put(tagKey, tagValue); + } + return keyValueMap; + } + + public static String getUser() throws FalconException { + try { + return CurrentUser.getAuthenticatedUGI().getShortUserName(); + } catch (Exception ioe) { + //Ignore is failed to get user, uses login user + } + return null; + } +} diff --git a/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/bridge/FalconBridge.java b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/bridge/FalconBridge.java new file mode 100644 index 0000000000..cbf002f4fa --- /dev/null +++ b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/bridge/FalconBridge.java @@ -0,0 +1,416 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.falcon.bridge; + +import org.apache.atlas.AtlasClient; +import org.apache.atlas.AtlasConstants; +import org.apache.atlas.falcon.Util.EventUtil; +import org.apache.atlas.falcon.model.FalconDataTypes; +import org.apache.atlas.hive.bridge.HiveMetaStoreBridge; +import org.apache.atlas.hive.model.HiveDataTypes; +import org.apache.atlas.v1.model.instance.Referenceable; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.falcon.FalconException; +import org.apache.falcon.entity.CatalogStorage; +import org.apache.falcon.entity.FeedHelper; +import org.apache.falcon.entity.FileSystemStorage; +import org.apache.falcon.entity.ProcessHelper; +import org.apache.falcon.entity.store.ConfigurationStore; +import org.apache.falcon.entity.v0.EntityType; +import org.apache.falcon.entity.v0.feed.CatalogTable; +import org.apache.falcon.entity.v0.feed.ClusterType; +import org.apache.falcon.entity.v0.feed.Feed; +import org.apache.falcon.entity.v0.feed.Location; +import org.apache.falcon.entity.v0.feed.LocationType; +import org.apache.falcon.entity.v0.process.Cluster; +import org.apache.falcon.entity.v0.process.Input; +import org.apache.falcon.entity.v0.process.Output; +import org.apache.falcon.entity.v0.process.Workflow; +import org.apache.falcon.workflow.WorkflowExecutionArgs; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * A Bridge Utility to register Falcon entities metadata to Atlas. + */ +public class FalconBridge { + private static final Logger LOG = LoggerFactory.getLogger(FalconBridge.class); + + public static final String COLO = "colo"; + public static final String TAGS = "tags"; + public static final String GROUPS = "groups"; + public static final String PIPELINES = "pipelines"; + public static final String WFPROPERTIES = "workflow-properties"; + public static final String RUNSON = "runs-on"; + public static final String STOREDIN = "stored-in"; + public static final String FREQUENCY = "frequency"; + public static final String ATTRIBUTE_DB = "db"; + + /** + * Creates cluster entity + * + * @param cluster ClusterEntity + * @return cluster instance reference + */ + public static Referenceable createClusterEntity(final org.apache.falcon.entity.v0.cluster.Cluster cluster) { + LOG.info("Creating cluster Entity : {}", cluster.getName()); + + Referenceable clusterRef = new Referenceable(FalconDataTypes.FALCON_CLUSTER.getName()); + + clusterRef.set(AtlasClient.NAME, cluster.getName()); + clusterRef.set(AtlasClient.DESCRIPTION, cluster.getDescription()); + clusterRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, cluster.getName()); + + clusterRef.set(FalconBridge.COLO, cluster.getColo()); + + if (cluster.getACL() != null) { + clusterRef.set(AtlasClient.OWNER, cluster.getACL().getGroup()); + } + + if (StringUtils.isNotEmpty(cluster.getTags())) { + clusterRef.set(FalconBridge.TAGS, + EventUtil.convertKeyValueStringToMap(cluster.getTags())); + } + + return clusterRef; + } + + private static Referenceable createFeedEntity(Feed feed, Referenceable clusterReferenceable) { + LOG.info("Creating feed dataset: {}", feed.getName()); + + Referenceable feedEntity = new Referenceable(FalconDataTypes.FALCON_FEED.getName()); + feedEntity.set(AtlasClient.NAME, feed.getName()); + feedEntity.set(AtlasClient.DESCRIPTION, feed.getDescription()); + String feedQualifiedName = + getFeedQualifiedName(feed.getName(), (String) clusterReferenceable.get(AtlasClient.NAME)); + feedEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, feedQualifiedName); + feedEntity.set(FalconBridge.FREQUENCY, feed.getFrequency().toString()); + feedEntity.set(FalconBridge.STOREDIN, clusterReferenceable); + if (feed.getACL() != null) { + feedEntity.set(AtlasClient.OWNER, feed.getACL().getOwner()); + } + + if (StringUtils.isNotEmpty(feed.getTags())) { + feedEntity.set(FalconBridge.TAGS, + EventUtil.convertKeyValueStringToMap(feed.getTags())); + } + + if (feed.getGroups() != null) { + feedEntity.set(FalconBridge.GROUPS, feed.getGroups()); + } + + return feedEntity; + } + + public static List createFeedCreationEntity(Feed feed, ConfigurationStore falconStore) throws FalconException, URISyntaxException { + LOG.info("Creating feed : {}", feed.getName()); + + List entities = new ArrayList<>(); + + if (feed.getClusters() != null) { + List replicationInputs = new ArrayList<>(); + List replicationOutputs = new ArrayList<>(); + + for (org.apache.falcon.entity.v0.feed.Cluster feedCluster : feed.getClusters().getClusters()) { + org.apache.falcon.entity.v0.cluster.Cluster cluster = falconStore.get(EntityType.CLUSTER, + feedCluster.getName()); + + // set cluster + Referenceable clusterReferenceable = getClusterEntityReference(cluster.getName(), cluster.getColo()); + entities.add(clusterReferenceable); + + // input as hive_table or hdfs_path, output as falcon_feed dataset + List inputs = new ArrayList<>(); + List inputReferenceables = getInputEntities(cluster, feed); + if (inputReferenceables != null) { + entities.addAll(inputReferenceables); + inputs.add(inputReferenceables.get(inputReferenceables.size() - 1)); + } + + List outputs = new ArrayList<>(); + Referenceable feedEntity = createFeedEntity(feed, clusterReferenceable); + if (feedEntity != null) { + entities.add(feedEntity); + outputs.add(feedEntity); + } + + if (!inputs.isEmpty() || !outputs.isEmpty()) { + Referenceable feedCreateEntity = new Referenceable(FalconDataTypes.FALCON_FEED_CREATION.getName()); + String feedQualifiedName = getFeedQualifiedName(feed.getName(), cluster.getName()); + + feedCreateEntity.set(AtlasClient.NAME, feed.getName()); + feedCreateEntity.set(AtlasClient.DESCRIPTION, "Feed creation - " + feed.getName()); + feedCreateEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, feedQualifiedName); + + if (!inputs.isEmpty()) { + feedCreateEntity.set(AtlasClient.PROCESS_ATTRIBUTE_INPUTS, inputs); + } + if (!outputs.isEmpty()) { + feedCreateEntity.set(AtlasClient.PROCESS_ATTRIBUTE_OUTPUTS, outputs); + } + + feedCreateEntity.set(FalconBridge.STOREDIN, clusterReferenceable); + entities.add(feedCreateEntity); + } + + if (ClusterType.SOURCE == feedCluster.getType()) { + replicationInputs.add(feedEntity); + } else if (ClusterType.TARGET == feedCluster.getType()) { + replicationOutputs.add(feedEntity); + } + } + + if (!replicationInputs.isEmpty() && !replicationInputs.isEmpty()) { + Referenceable feedReplicationEntity = new Referenceable(FalconDataTypes + .FALCON_FEED_REPLICATION.getName()); + + feedReplicationEntity.set(AtlasClient.NAME, feed.getName()); + feedReplicationEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, feed.getName()); + + feedReplicationEntity.set(AtlasClient.PROCESS_ATTRIBUTE_INPUTS, replicationInputs); + feedReplicationEntity.set(AtlasClient.PROCESS_ATTRIBUTE_OUTPUTS, replicationOutputs); + entities.add(feedReplicationEntity); + } + + } + return entities; + } + + /** + * Creates process entity + * + * @param process process entity + * @param falconStore config store + * @return process instance reference + * + * @throws FalconException if retrieving from the configuration store fail + */ + public static List createProcessEntity(org.apache.falcon.entity.v0.process.Process process, + ConfigurationStore falconStore) throws FalconException { + LOG.info("Creating process Entity : {}", process.getName()); + + // The requirement is for each cluster, create a process entity with name + // clustername.processname + List entities = new ArrayList<>(); + + if (process.getClusters() != null) { + + for (Cluster processCluster : process.getClusters().getClusters()) { + org.apache.falcon.entity.v0.cluster.Cluster cluster = + falconStore.get(EntityType.CLUSTER, processCluster.getName()); + Referenceable clusterReferenceable = getClusterEntityReference(cluster.getName(), cluster.getColo()); + entities.add(clusterReferenceable); + + List inputs = new ArrayList<>(); + if (process.getInputs() != null) { + for (Input input : process.getInputs().getInputs()) { + Feed feed = falconStore.get(EntityType.FEED, input.getFeed()); + Referenceable inputReferenceable = getFeedDataSetReference(feed, clusterReferenceable); + entities.add(inputReferenceable); + inputs.add(inputReferenceable); + } + } + + List outputs = new ArrayList<>(); + if (process.getOutputs() != null) { + for (Output output : process.getOutputs().getOutputs()) { + Feed feed = falconStore.get(EntityType.FEED, output.getFeed()); + Referenceable outputReferenceable = getFeedDataSetReference(feed, clusterReferenceable); + entities.add(outputReferenceable); + outputs.add(outputReferenceable); + } + } + + if (!inputs.isEmpty() || !outputs.isEmpty()) { + + Referenceable processEntity = new Referenceable(FalconDataTypes.FALCON_PROCESS.getName()); + processEntity.set(AtlasClient.NAME, process.getName()); + processEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + getProcessQualifiedName(process.getName(), cluster.getName())); + processEntity.set(FalconBridge.FREQUENCY, process.getFrequency().toString()); + + if (!inputs.isEmpty()) { + processEntity.set(AtlasClient.PROCESS_ATTRIBUTE_INPUTS, inputs); + } + if (!outputs.isEmpty()) { + processEntity.set(AtlasClient.PROCESS_ATTRIBUTE_OUTPUTS, outputs); + } + + // set cluster + processEntity.set(FalconBridge.RUNSON, clusterReferenceable); + + // Set user + if (process.getACL() != null) { + processEntity.set(AtlasClient.OWNER, process.getACL().getOwner()); + } + + if (StringUtils.isNotEmpty(process.getTags())) { + processEntity.set(FalconBridge.TAGS, + EventUtil.convertKeyValueStringToMap(process.getTags())); + } + + if (process.getPipelines() != null) { + processEntity.set(FalconBridge.PIPELINES, process.getPipelines()); + } + + processEntity.set(FalconBridge.WFPROPERTIES, + getProcessEntityWFProperties(process.getWorkflow(), + process.getName())); + + entities.add(processEntity); + } + + } + } + return entities; + } + + private static List getInputEntities(org.apache.falcon.entity.v0.cluster.Cluster cluster, + Feed feed) throws URISyntaxException { + org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName()); + + if(feedCluster != null) { + final CatalogTable table = getTable(feedCluster, feed); + if (table != null) { + CatalogStorage storage = new CatalogStorage(cluster, table); + return createHiveTableInstance(cluster.getName(), storage.getDatabase().toLowerCase(), + storage.getTable().toLowerCase()); + } else { + List locations = FeedHelper.getLocations(feedCluster, feed); + if (CollectionUtils.isNotEmpty(locations)) { + Location dataLocation = FileSystemStorage.getLocation(locations, LocationType.DATA); + if (dataLocation != null) { + final String pathUri = normalize(dataLocation.getPath()); + LOG.info("Registering DFS Path {} ", pathUri); + return fillHDFSDataSet(pathUri, cluster.getName()); + } + } + } + } + + return null; + } + + private static CatalogTable getTable(org.apache.falcon.entity.v0.feed.Cluster cluster, Feed feed) { + // check if table is overridden in cluster + if (cluster.getTable() != null) { + return cluster.getTable(); + } + + return feed.getTable(); + } + + private static List fillHDFSDataSet(final String pathUri, final String clusterName) { + List entities = new ArrayList<>(); + Referenceable ref = new Referenceable(HiveMetaStoreBridge.HDFS_PATH); + ref.set("path", pathUri); + // Path path = new Path(pathUri); + // ref.set("name", path.getName()); + //TODO - Fix after ATLAS-542 to shorter Name + Path path = new Path(pathUri); + ref.set(AtlasClient.NAME, Path.getPathWithoutSchemeAndAuthority(path).toString().toLowerCase()); + ref.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, pathUri); + ref.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, clusterName); + entities.add(ref); + return entities; + } + + private static Referenceable createHiveDatabaseInstance(String clusterName, String dbName) { + Referenceable dbRef = new Referenceable(HiveDataTypes.HIVE_DB.getName()); + dbRef.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, clusterName); + dbRef.set(AtlasClient.NAME, dbName); + dbRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + HiveMetaStoreBridge.getDBQualifiedName(clusterName, dbName)); + return dbRef; + } + + private static List createHiveTableInstance(String clusterName, String dbName, + String tableName) { + List entities = new ArrayList<>(); + Referenceable dbRef = createHiveDatabaseInstance(clusterName, dbName); + entities.add(dbRef); + + Referenceable tableRef = new Referenceable(HiveDataTypes.HIVE_TABLE.getName()); + tableRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + HiveMetaStoreBridge.getTableQualifiedName(clusterName, dbName, tableName)); + tableRef.set(AtlasClient.NAME, tableName.toLowerCase()); + tableRef.set(ATTRIBUTE_DB, dbRef); + entities.add(tableRef); + + return entities; + } + + private static Referenceable getClusterEntityReference(final String clusterName, + final String colo) { + LOG.info("Getting reference for entity {}", clusterName); + Referenceable clusterRef = new Referenceable(FalconDataTypes.FALCON_CLUSTER.getName()); + clusterRef.set(AtlasClient.NAME, String.format("%s", clusterName)); + clusterRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, clusterName); + clusterRef.set(FalconBridge.COLO, colo); + return clusterRef; + } + + + private static Referenceable getFeedDataSetReference(Feed feed, Referenceable clusterReference) { + LOG.info("Getting reference for entity {}", feed.getName()); + Referenceable feedDatasetRef = new Referenceable(FalconDataTypes.FALCON_FEED.getName()); + feedDatasetRef.set(AtlasClient.NAME, feed.getName()); + feedDatasetRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getFeedQualifiedName(feed.getName(), + (String) clusterReference.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME))); + feedDatasetRef.set(FalconBridge.STOREDIN, clusterReference); + feedDatasetRef.set(FalconBridge.FREQUENCY, feed.getFrequency()); + return feedDatasetRef; + } + + private static Map getProcessEntityWFProperties(final Workflow workflow, + final String processName) { + Map wfProperties = new HashMap<>(); + wfProperties.put(WorkflowExecutionArgs.USER_WORKFLOW_NAME.getName(), + ProcessHelper.getProcessWorkflowName(workflow.getName(), processName)); + wfProperties.put(WorkflowExecutionArgs.USER_WORKFLOW_VERSION.getName(), + workflow.getVersion()); + wfProperties.put(WorkflowExecutionArgs.USER_WORKFLOW_ENGINE.getName(), + workflow.getEngine().value()); + + return wfProperties; + } + + public static String getFeedQualifiedName(final String feedName, final String clusterName) { + return String.format("%s@%s", feedName, clusterName); + } + + public static String getProcessQualifiedName(final String processName, final String clusterName) { + return String.format("%s@%s", processName, clusterName); + } + + public static String normalize(final String str) { + if (StringUtils.isBlank(str)) { + return null; + } + return str.toLowerCase().trim(); + } +} diff --git a/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/event/FalconEvent.java b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/event/FalconEvent.java new file mode 100644 index 0000000000..51db894ab6 --- /dev/null +++ b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/event/FalconEvent.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.falcon.event; + +import org.apache.falcon.entity.v0.Entity; + +/** + * Falcon event to interface with Atlas Service. + */ +public class FalconEvent { + protected String user; + protected OPERATION operation; + protected Entity entity; + + public FalconEvent(String doAsUser, OPERATION falconOperation, Entity entity) { + this.user = doAsUser; + this.operation = falconOperation; + this.entity = entity; + } + + public enum OPERATION { + ADD_CLUSTER, + UPDATE_CLUSTER, + ADD_FEED, + UPDATE_FEED, + ADD_PROCESS, + UPDATE_PROCESS, + } + + public String getUser() { + return user; + } + + public OPERATION getOperation() { + return operation; + } + + public Entity getEntity() { + return entity; + } +} diff --git a/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/hook/FalconHook.java b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/hook/FalconHook.java new file mode 100644 index 0000000000..b8a73cbe63 --- /dev/null +++ b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/hook/FalconHook.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.falcon.hook; + +import org.apache.atlas.falcon.bridge.FalconBridge; +import org.apache.atlas.falcon.event.FalconEvent; +import org.apache.atlas.falcon.publisher.FalconEventPublisher; +import org.apache.atlas.hook.AtlasHook; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.v1.model.instance.Referenceable; +import org.apache.atlas.v1.model.notification.HookNotificationV1.EntityCreateRequest; +import org.apache.falcon.FalconException; +import org.apache.falcon.entity.store.ConfigurationStore; +import org.apache.falcon.entity.v0.feed.Feed; +import org.apache.falcon.entity.v0.process.Process; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; +import static org.apache.atlas.repository.Constants.FALCON_SOURCE; + +/** + * Falcon hook sends lineage information to the Atlas Service. + */ +public class FalconHook extends AtlasHook implements FalconEventPublisher { + private static final Logger LOG = LoggerFactory.getLogger(FalconHook.class); + + private static ConfigurationStore STORE; + + @Override + public String getMessageSource() { + return FALCON_SOURCE; + } + + private enum Operation { + ADD, + UPDATE + } + + static { + try { + STORE = ConfigurationStore.get(); + } catch (Exception e) { + LOG.error("Caught exception initializing the falcon hook.", e); + } + + LOG.info("Created Atlas Hook for Falcon"); + } + + @Override + public void publish(final Data data) { + final FalconEvent event = data.getEvent(); + try { + fireAndForget(event); + } catch (Throwable t) { + LOG.warn("Error in processing data {}", data, t); + } + } + + private void fireAndForget(FalconEvent event) throws FalconException, URISyntaxException { + LOG.info("Entered Atlas hook for Falcon hook operation {}", event.getOperation()); + List messages = new ArrayList<>(); + + Operation op = getOperation(event.getOperation()); + String user = getUser(event.getUser()); + LOG.info("fireAndForget user:{}", user); + switch (op) { + case ADD: + messages.add(new EntityCreateRequest(user, createEntities(event, user))); + break; + + } + notifyEntities(messages, null); + } + + private List createEntities(FalconEvent event, String user) throws FalconException, URISyntaxException { + List entities = new ArrayList<>(); + + switch (event.getOperation()) { + case ADD_CLUSTER: + entities.add(FalconBridge + .createClusterEntity((org.apache.falcon.entity.v0.cluster.Cluster) event.getEntity())); + break; + + case ADD_PROCESS: + entities.addAll(FalconBridge.createProcessEntity((Process) event.getEntity(), STORE)); + break; + + case ADD_FEED: + entities.addAll(FalconBridge.createFeedCreationEntity((Feed) event.getEntity(), STORE)); + break; + + case UPDATE_CLUSTER: + case UPDATE_FEED: + case UPDATE_PROCESS: + default: + LOG.info("Falcon operation {} is not valid or supported", event.getOperation()); + } + + return entities; + } + + private static Operation getOperation(final FalconEvent.OPERATION op) throws FalconException { + switch (op) { + case ADD_CLUSTER: + case ADD_FEED: + case ADD_PROCESS: + return Operation.ADD; + + case UPDATE_CLUSTER: + case UPDATE_FEED: + case UPDATE_PROCESS: + return Operation.UPDATE; + + default: + throw new FalconException("Falcon operation " + op + " is not valid or supported"); + } + } +} + diff --git a/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/model/FalconDataTypes.java b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/model/FalconDataTypes.java new file mode 100644 index 0000000000..e36ff23aff --- /dev/null +++ b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/model/FalconDataTypes.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.falcon.model; + +/** + * Falcon Data Types for model and bridge. + */ +public enum FalconDataTypes { + // Classes + FALCON_CLUSTER, + FALCON_FEED_CREATION, + FALCON_FEED, + FALCON_FEED_REPLICATION, + FALCON_PROCESS; + + public String getName() { + return name().toLowerCase(); + } + +} diff --git a/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/publisher/FalconEventPublisher.java b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/publisher/FalconEventPublisher.java new file mode 100644 index 0000000000..a01ec14beb --- /dev/null +++ b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/publisher/FalconEventPublisher.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.falcon.publisher; + + +import org.apache.atlas.falcon.event.FalconEvent; + +/** + * Falcon publisher for Atlas + */ +public interface FalconEventPublisher { + class Data { + private FalconEvent event; + + public Data(FalconEvent event) { + this.event = event; + } + + public FalconEvent getEvent() { + return event; + } + } + + void publish(final Data data); +} diff --git a/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/service/AtlasService.java b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/service/AtlasService.java new file mode 100644 index 0000000000..7482ba7b82 --- /dev/null +++ b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/service/AtlasService.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.falcon.service; + +import org.apache.atlas.falcon.Util.EventUtil; +import org.apache.atlas.falcon.event.FalconEvent; +import org.apache.atlas.falcon.hook.FalconHook; +import org.apache.atlas.falcon.publisher.FalconEventPublisher; +import org.apache.falcon.FalconException; +import org.apache.falcon.entity.v0.Entity; +import org.apache.falcon.entity.v0.EntityType; +import org.apache.falcon.service.ConfigurationChangeListener; +import org.apache.falcon.service.FalconService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Atlas service to publish Falcon events + */ +public class AtlasService implements FalconService, ConfigurationChangeListener { + + private static final Logger LOG = LoggerFactory.getLogger(AtlasService.class); + private FalconEventPublisher publisher; + + /** + * Constant for the service name. + */ + public static final String SERVICE_NAME = AtlasService.class.getSimpleName(); + + @Override + public String getName() { + return SERVICE_NAME; + } + + @Override + public void init() throws FalconException { + publisher = new FalconHook(); + } + + @Override + public void destroy() throws FalconException { + } + + @Override + public void onAdd(Entity entity) throws FalconException { + try { + EntityType entityType = entity.getEntityType(); + switch (entityType) { + case CLUSTER: + addEntity(entity, FalconEvent.OPERATION.ADD_CLUSTER); + break; + + case PROCESS: + addEntity(entity, FalconEvent.OPERATION.ADD_PROCESS); + break; + + case FEED: + addEntity(entity, FalconEvent.OPERATION.ADD_FEED); + break; + + default: + LOG.debug("Entity type not processed {}", entityType); + } + } catch(Throwable t) { + LOG.warn("Error handling entity {}", entity, t); + } + } + + @Override + public void onRemove(Entity entity) throws FalconException { + } + + @Override + public void onChange(Entity oldEntity, Entity newEntity) throws FalconException { + /** + * Skipping update for now - update uses full update currently and this might result in all attributes wiped for hive entities + EntityType entityType = newEntity.getEntityType(); + switch (entityType) { + case CLUSTER: + addEntity(newEntity, FalconEvent.OPERATION.UPDATE_CLUSTER); + break; + + case PROCESS: + addEntity(newEntity, FalconEvent.OPERATION.UPDATE_PROCESS); + break; + + case FEED: + FalconEvent.OPERATION operation = isReplicationFeed((Feed) newEntity) ? + FalconEvent.OPERATION.UPDATE_REPLICATION_FEED : + FalconEvent.OPERATION.UPDATE_FEED; + addEntity(newEntity, operation); + break; + + default: + LOG.debug("Entity type not processed {}", entityType); + } + **/ + } + + @Override + public void onReload(Entity entity) throws FalconException { + //Since there is no import script that can import existing falcon entities to atlas, adding on falcon service start + onAdd(entity); + } + + private void addEntity(Entity entity, FalconEvent.OPERATION operation) throws FalconException { + LOG.info("Adding {} entity to Atlas: {}", entity.getEntityType().name(), entity.getName()); + + try { + FalconEvent event = + new FalconEvent(EventUtil.getUser(), operation, entity); + FalconEventPublisher.Data data = new FalconEventPublisher.Data(event); + publisher.publish(data); + } catch (Exception ex) { + throw new FalconException("Unable to publish data to publisher " + ex.getMessage(), ex); + } + } +} diff --git a/addons/falcon-bridge/src/test/java/org/apache/atlas/falcon/hook/FalconHookIT.java b/addons/falcon-bridge/src/test/java/org/apache/atlas/falcon/hook/FalconHookIT.java new file mode 100644 index 0000000000..24f36168c9 --- /dev/null +++ b/addons/falcon-bridge/src/test/java/org/apache/atlas/falcon/hook/FalconHookIT.java @@ -0,0 +1,351 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.falcon.hook; + +import com.sun.jersey.api.client.ClientResponse; +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.AtlasClient; +import org.apache.atlas.AtlasServiceException; +import org.apache.atlas.falcon.bridge.FalconBridge; +import org.apache.atlas.falcon.model.FalconDataTypes; +import org.apache.atlas.hive.bridge.HiveMetaStoreBridge; +import org.apache.atlas.hive.model.HiveDataTypes; +import org.apache.atlas.v1.model.instance.Id; +import org.apache.atlas.v1.model.instance.Referenceable; +import org.apache.atlas.v1.typesystem.types.utils.TypesUtil; +import org.apache.atlas.utils.AuthenticationUtil; +import org.apache.atlas.utils.ParamChecker; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.lang.RandomStringUtils; +import org.apache.atlas.falcon.service.AtlasService; +import org.apache.falcon.entity.FeedHelper; +import org.apache.falcon.entity.FileSystemStorage; +import org.apache.falcon.entity.store.ConfigurationStore; +import org.apache.falcon.entity.v0.Entity; +import org.apache.falcon.entity.v0.EntityType; +import org.apache.falcon.entity.v0.cluster.Cluster; +import org.apache.falcon.entity.v0.feed.Feed; +import org.apache.falcon.entity.v0.feed.Location; +import org.apache.falcon.entity.v0.feed.LocationType; +import org.apache.falcon.entity.v0.process.Process; +import org.apache.falcon.security.CurrentUser; +import org.slf4j.Logger; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import javax.xml.bind.JAXBException; +import java.util.List; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.fail; + +public class FalconHookIT { + public static final Logger LOG = org.slf4j.LoggerFactory.getLogger(FalconHookIT.class); + + public static final String CLUSTER_RESOURCE = "/cluster.xml"; + public static final String FEED_RESOURCE = "/feed.xml"; + public static final String FEED_HDFS_RESOURCE = "/feed-hdfs.xml"; + public static final String FEED_REPLICATION_RESOURCE = "/feed-replication.xml"; + public static final String PROCESS_RESOURCE = "/process.xml"; + + private AtlasClient atlasClient; + + private static final ConfigurationStore STORE = ConfigurationStore.get(); + + @BeforeClass + public void setUp() throws Exception { + Configuration atlasProperties = ApplicationProperties.get(); + if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) { + atlasClient = new AtlasClient(atlasProperties.getStringArray(HiveMetaStoreBridge.ATLAS_ENDPOINT), new String[]{"admin", "admin"}); + } else { + atlasClient = new AtlasClient(atlasProperties.getStringArray(HiveMetaStoreBridge.ATLAS_ENDPOINT)); + } + + AtlasService service = new AtlasService(); + service.init(); + STORE.registerListener(service); + CurrentUser.authenticate(System.getProperty("user.name")); + } + + private boolean isDataModelAlreadyRegistered() throws Exception { + try { + atlasClient.getType(FalconDataTypes.FALCON_PROCESS.getName()); + LOG.info("Hive data model is already registered!"); + return true; + } catch(AtlasServiceException ase) { + if (ase.getStatus() == ClientResponse.Status.NOT_FOUND) { + return false; + } + throw ase; + } + } + + private T loadEntity(EntityType type, String resource, String name) throws JAXBException { + Entity entity = (Entity) type.getUnmarshaller().unmarshal(this.getClass().getResourceAsStream(resource)); + switch (entity.getEntityType()) { + case CLUSTER: + ((Cluster) entity).setName(name); + break; + + case FEED: + ((Feed) entity).setName(name); + break; + + case PROCESS: + ((Process) entity).setName(name); + break; + } + return (T)entity; + } + + private String random() { + return RandomStringUtils.randomAlphanumeric(10); + } + + private String getTableUri(String dbName, String tableName) { + return String.format("catalog:%s:%s#ds=${YEAR}-${MONTH}-${DAY}-${HOUR}", dbName, tableName); + } + + @Test + public void testCreateProcess() throws Exception { + Cluster cluster = loadEntity(EntityType.CLUSTER, CLUSTER_RESOURCE, "cluster" + random()); + STORE.publish(EntityType.CLUSTER, cluster); + assertClusterIsRegistered(cluster); + + Feed infeed = getTableFeed(FEED_RESOURCE, cluster.getName(), null); + String infeedId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + FalconBridge.getFeedQualifiedName(infeed.getName(), cluster.getName())).getId()._getId(); + + Feed outfeed = getTableFeed(FEED_RESOURCE, cluster.getName()); + String outFeedId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + FalconBridge.getFeedQualifiedName(outfeed.getName(), cluster.getName())).getId()._getId(); + + Process process = loadEntity(EntityType.PROCESS, PROCESS_RESOURCE, "process" + random()); + process.getClusters().getClusters().get(0).setName(cluster.getName()); + process.getInputs().getInputs().get(0).setFeed(infeed.getName()); + process.getOutputs().getOutputs().get(0).setFeed(outfeed.getName()); + STORE.publish(EntityType.PROCESS, process); + + String pid = assertProcessIsRegistered(process, cluster.getName()); + Referenceable processEntity = atlasClient.getEntity(pid); + assertNotNull(processEntity); + assertEquals(processEntity.get(AtlasClient.NAME), process.getName()); + assertEquals(((List)processEntity.get("inputs")).get(0)._getId(), infeedId); + assertEquals(((List)processEntity.get("outputs")).get(0)._getId(), outFeedId); + } + + private String assertProcessIsRegistered(Process process, String clusterName) throws Exception { + return assertEntityIsRegistered(FalconDataTypes.FALCON_PROCESS.getName(), + AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + FalconBridge.getProcessQualifiedName(process.getName(), clusterName)); + } + + private String assertClusterIsRegistered(Cluster cluster) throws Exception { + return assertEntityIsRegistered(FalconDataTypes.FALCON_CLUSTER.getName(), + AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, cluster.getName()); + } + + private TypesUtil.Pair getHDFSFeed(String feedResource, String clusterName) throws Exception { + Feed feed = loadEntity(EntityType.FEED, feedResource, "feed" + random()); + org.apache.falcon.entity.v0.feed.Cluster feedCluster = feed.getClusters().getClusters().get(0); + feedCluster.setName(clusterName); + STORE.publish(EntityType.FEED, feed); + String feedId = assertFeedIsRegistered(feed, clusterName); + assertFeedAttributes(feedId); + + String processId = assertEntityIsRegistered(FalconDataTypes.FALCON_FEED_CREATION.getName(), + AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + FalconBridge.getFeedQualifiedName(feed.getName(), clusterName)); + Referenceable processEntity = atlasClient.getEntity(processId); + assertEquals(((List)processEntity.get("outputs")).get(0).getId(), feedId); + + String inputId = ((List) processEntity.get("inputs")).get(0).getId(); + Referenceable pathEntity = atlasClient.getEntity(inputId); + assertEquals(pathEntity.getTypeName(), HiveMetaStoreBridge.HDFS_PATH); + + List locations = FeedHelper.getLocations(feedCluster, feed); + Location dataLocation = FileSystemStorage.getLocation(locations, LocationType.DATA); + assertEquals(pathEntity.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), + FalconBridge.normalize(dataLocation.getPath())); + + return TypesUtil.Pair.of(feedId, feed); + } + + private Feed getTableFeed(String feedResource, String clusterName) throws Exception { + return getTableFeed(feedResource, clusterName, null); + } + + private Feed getTableFeed(String feedResource, String clusterName, String secondClusterName) throws Exception { + Feed feed = loadEntity(EntityType.FEED, feedResource, "feed" + random()); + org.apache.falcon.entity.v0.feed.Cluster feedCluster = feed.getClusters().getClusters().get(0); + feedCluster.setName(clusterName); + String dbName = "db" + random(); + String tableName = "table" + random(); + feedCluster.getTable().setUri(getTableUri(dbName, tableName)); + + String dbName2 = "db" + random(); + String tableName2 = "table" + random(); + + if (secondClusterName != null) { + org.apache.falcon.entity.v0.feed.Cluster feedCluster2 = feed.getClusters().getClusters().get(1); + feedCluster2.setName(secondClusterName); + feedCluster2.getTable().setUri(getTableUri(dbName2, tableName2)); + } + + STORE.publish(EntityType.FEED, feed); + String feedId = assertFeedIsRegistered(feed, clusterName); + assertFeedAttributes(feedId); + verifyFeedLineage(feed.getName(), clusterName, feedId, dbName, tableName); + + if (secondClusterName != null) { + String feedId2 = assertFeedIsRegistered(feed, secondClusterName); + assertFeedAttributes(feedId2); + verifyFeedLineage(feed.getName(), secondClusterName, feedId2, dbName2, tableName2); + } + return feed; + } + + private void assertFeedAttributes(String feedId) throws Exception { + Referenceable feedEntity = atlasClient.getEntity(feedId); + assertEquals(feedEntity.get(AtlasClient.OWNER), "testuser"); + assertEquals(feedEntity.get(FalconBridge.FREQUENCY), "hours(1)"); + assertEquals(feedEntity.get(AtlasClient.DESCRIPTION), "test input"); + } + + private void verifyFeedLineage(String feedName, String clusterName, String feedId, String dbName, String tableName) + throws Exception{ + //verify that lineage from hive table to falcon feed is created + String processId = assertEntityIsRegistered(FalconDataTypes.FALCON_FEED_CREATION.getName(), + AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + FalconBridge.getFeedQualifiedName(feedName, clusterName)); + Referenceable processEntity = atlasClient.getEntity(processId); + assertEquals(((List)processEntity.get("outputs")).get(0).getId(), feedId); + + String inputId = ((List) processEntity.get("inputs")).get(0).getId(); + Referenceable tableEntity = atlasClient.getEntity(inputId); + assertEquals(tableEntity.getTypeName(), HiveDataTypes.HIVE_TABLE.getName()); + assertEquals(tableEntity.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), + HiveMetaStoreBridge.getTableQualifiedName(clusterName, dbName, tableName)); + + } + + private String assertFeedIsRegistered(Feed feed, String clusterName) throws Exception { + return assertEntityIsRegistered(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + FalconBridge.getFeedQualifiedName(feed.getName(), clusterName)); + } + + @Test + public void testReplicationFeed() throws Exception { + Cluster srcCluster = loadEntity(EntityType.CLUSTER, CLUSTER_RESOURCE, "cluster" + random()); + STORE.publish(EntityType.CLUSTER, srcCluster); + assertClusterIsRegistered(srcCluster); + + Cluster targetCluster = loadEntity(EntityType.CLUSTER, CLUSTER_RESOURCE, "cluster" + random()); + STORE.publish(EntityType.CLUSTER, targetCluster); + assertClusterIsRegistered(targetCluster); + + Feed feed = getTableFeed(FEED_REPLICATION_RESOURCE, srcCluster.getName(), targetCluster.getName()); + String inId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + FalconBridge.getFeedQualifiedName(feed.getName(), srcCluster.getName())).getId()._getId(); + String outId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + FalconBridge.getFeedQualifiedName(feed.getName(), targetCluster.getName())).getId()._getId(); + + + String processId = assertEntityIsRegistered(FalconDataTypes.FALCON_FEED_REPLICATION.getName(), + AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, feed.getName()); + Referenceable process = atlasClient.getEntity(processId); + assertEquals(((List)process.get("inputs")).get(0)._getId(), inId); + assertEquals(((List)process.get("outputs")).get(0)._getId(), outId); + } + + @Test + public void testCreateProcessWithHDFSFeed() throws Exception { + Cluster cluster = loadEntity(EntityType.CLUSTER, CLUSTER_RESOURCE, "cluster" + random()); + STORE.publish(EntityType.CLUSTER, cluster); + + TypesUtil.Pair result = getHDFSFeed(FEED_HDFS_RESOURCE, cluster.getName()); + Feed infeed = result.right; + String infeedId = result.left; + + Feed outfeed = getTableFeed(FEED_RESOURCE, cluster.getName()); + String outfeedId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + FalconBridge.getFeedQualifiedName(outfeed.getName(), cluster.getName())).getId()._getId(); + + Process process = loadEntity(EntityType.PROCESS, PROCESS_RESOURCE, "process" + random()); + process.getClusters().getClusters().get(0).setName(cluster.getName()); + process.getInputs().getInputs().get(0).setFeed(infeed.getName()); + process.getOutputs().getOutputs().get(0).setFeed(outfeed.getName()); + STORE.publish(EntityType.PROCESS, process); + + String pid = assertProcessIsRegistered(process, cluster.getName()); + Referenceable processEntity = atlasClient.getEntity(pid); + assertEquals(processEntity.get(AtlasClient.NAME), process.getName()); + assertEquals(processEntity.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), + FalconBridge.getProcessQualifiedName(process.getName(), cluster.getName())); + assertEquals(((List)processEntity.get("inputs")).get(0)._getId(), infeedId); + assertEquals(((List)processEntity.get("outputs")).get(0)._getId(), outfeedId); + } + + private String assertEntityIsRegistered(final String typeName, final String property, final String value) throws Exception { + waitFor(80000, new Predicate() { + @Override + public void evaluate() throws Exception { + Referenceable entity = atlasClient.getEntity(typeName, property, value); + assertNotNull(entity); + } + }); + Referenceable entity = atlasClient.getEntity(typeName, property, value); + return entity.getId()._getId(); + } + + public interface Predicate { + /** + * Perform a predicate evaluation. + * + * @return the boolean result of the evaluation. + * @throws Exception thrown if the predicate evaluation could not evaluate. + */ + void evaluate() throws Exception; + } + + /** + * Wait for a condition, expressed via a {@link Predicate} to become true. + * + * @param timeout maximum time in milliseconds to wait for the predicate to become true. + * @param predicate predicate waiting on. + */ + protected void waitFor(int timeout, Predicate predicate) throws Exception { + ParamChecker.notNull(predicate, "predicate"); + long mustEnd = System.currentTimeMillis() + timeout; + + while (true) { + try { + predicate.evaluate(); + return; + } catch(Error | Exception e) { + if (System.currentTimeMillis() >= mustEnd) { + fail("Assertions failed. Failing after waiting for timeout " + timeout + " msecs", e); + } + LOG.debug("Waiting up to {} msec as assertion failed", mustEnd - System.currentTimeMillis(), e); + Thread.sleep(400); + } + } + } +} diff --git a/addons/falcon-bridge/src/test/resources/atlas-application.properties b/addons/falcon-bridge/src/test/resources/atlas-application.properties new file mode 100644 index 0000000000..3b12e5fb33 --- /dev/null +++ b/addons/falcon-bridge/src/test/resources/atlas-application.properties @@ -0,0 +1,125 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +######### Atlas Server Configs ######### +atlas.rest.address=http://localhost:31000 + +######### Graph Database Configs ######### + + +# Graph database implementation. Value inserted by maven. +atlas.graphdb.backend=org.apache.atlas.repository.graphdb.janus.AtlasJanusGraphDatabase +atlas.graph.index.search.solr.wait-searcher=true + +# Graph Storage +atlas.graph.storage.backend=berkeleyje + +# Entity repository implementation +atlas.EntityAuditRepository.impl=org.apache.atlas.repository.audit.InMemoryEntityAuditRepository + +# Graph Search Index Backend +atlas.graph.index.search.backend=solr + +#Berkeley storage directory +atlas.graph.storage.directory=${sys:atlas.data}/berkley + +#hbase +#For standalone mode , specify localhost +#for distributed mode, specify zookeeper quorum here + +atlas.graph.storage.hostname=${graph.storage.hostname} +atlas.graph.storage.hbase.regions-per-server=1 +atlas.graph.storage.lock.wait-time=10000 + +#ElasticSearch +atlas.graph.index.search.directory=${sys:atlas.data}/es +atlas.graph.index.search.elasticsearch.client-only=false +atlas.graph.index.search.elasticsearch.local-mode=true +atlas.graph.index.search.elasticsearch.create.sleep=2000 + +# Solr cloud mode properties +atlas.graph.index.search.solr.mode=cloud +atlas.graph.index.search.solr.zookeeper-url=${solr.zk.address} +atlas.graph.index.search.solr.embedded=true +atlas.graph.index.search.max-result-set-size=150 + + +######### Notification Configs ######### +atlas.notification.embedded=true + +atlas.kafka.zookeeper.connect=localhost:19026 +atlas.kafka.bootstrap.servers=localhost:19027 +atlas.kafka.data=${sys:atlas.data}/kafka +atlas.kafka.zookeeper.session.timeout.ms=4000 +atlas.kafka.zookeeper.sync.time.ms=20 +atlas.kafka.consumer.timeout.ms=4000 +atlas.kafka.auto.commit.interval.ms=100 +atlas.kafka.hook.group.id=atlas +atlas.kafka.entities.group.id=atlas_entities +#atlas.kafka.auto.commit.enable=false + +atlas.kafka.enable.auto.commit=false +atlas.kafka.auto.offset.reset=earliest +atlas.kafka.session.timeout.ms=30000 +atlas.kafka.offsets.topic.replication.factor=1 + + + +######### Entity Audit Configs ######### +atlas.audit.hbase.tablename=ATLAS_ENTITY_AUDIT_EVENTS +atlas.audit.zookeeper.session.timeout.ms=1000 +atlas.audit.hbase.zookeeper.quorum=localhost +atlas.audit.hbase.zookeeper.property.clientPort=19026 + +######### Security Properties ######### + +# SSL config +atlas.enableTLS=false +atlas.server.https.port=31443 + +######### Security Properties ######### + +hbase.security.authentication=simple + +atlas.hook.falcon.synchronous=true + +######### JAAS Configuration ######## + +atlas.jaas.KafkaClient.loginModuleName = com.sun.security.auth.module.Krb5LoginModule +atlas.jaas.KafkaClient.loginModuleControlFlag = required +atlas.jaas.KafkaClient.option.useKeyTab = true +atlas.jaas.KafkaClient.option.storeKey = true +atlas.jaas.KafkaClient.option.serviceName = kafka +atlas.jaas.KafkaClient.option.keyTab = /etc/security/keytabs/atlas.service.keytab +atlas.jaas.KafkaClient.option.principal = atlas/_HOST@EXAMPLE.COM + +######### High Availability Configuration ######## +atlas.server.ha.enabled=false +#atlas.server.ids=id1 +#atlas.server.address.id1=localhost:21000 + +######### Atlas Authorization ######### +atlas.authorizer.impl=none +# atlas.authorizer.impl=simple +# atlas.authorizer.simple.authz.policy.file=atlas-simple-authz-policy.json + +######### Atlas Authentication ######### +atlas.authentication.method.file=true +atlas.authentication.method.ldap.type=none +atlas.authentication.method.kerberos=false +# atlas.authentication.method.file.filename=users-credentials.properties diff --git a/addons/falcon-bridge/src/test/resources/atlas-log4j.xml b/addons/falcon-bridge/src/test/resources/atlas-log4j.xml new file mode 100755 index 0000000000..262a710f7a --- /dev/null +++ b/addons/falcon-bridge/src/test/resources/atlas-log4j.xml @@ -0,0 +1,137 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/addons/falcon-bridge/src/test/resources/cluster.xml b/addons/falcon-bridge/src/test/resources/cluster.xml new file mode 100644 index 0000000000..b183847db3 --- /dev/null +++ b/addons/falcon-bridge/src/test/resources/cluster.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/addons/falcon-bridge/src/test/resources/feed-hdfs.xml b/addons/falcon-bridge/src/test/resources/feed-hdfs.xml new file mode 100644 index 0000000000..435db07451 --- /dev/null +++ b/addons/falcon-bridge/src/test/resources/feed-hdfs.xml @@ -0,0 +1,39 @@ + + + + online,bi + + hours(1) + UTC + + + + + + + + + + + + + + + + diff --git a/addons/falcon-bridge/src/test/resources/feed-replication.xml b/addons/falcon-bridge/src/test/resources/feed-replication.xml new file mode 100644 index 0000000000..dcd427b180 --- /dev/null +++ b/addons/falcon-bridge/src/test/resources/feed-replication.xml @@ -0,0 +1,43 @@ + + + + online,bi + + hours(1) + UTC + + + + + + + + + + + +
+ + + +
+ + + + diff --git a/addons/falcon-bridge/src/test/resources/feed.xml b/addons/falcon-bridge/src/test/resources/feed.xml new file mode 100644 index 0000000000..473c745ce8 --- /dev/null +++ b/addons/falcon-bridge/src/test/resources/feed.xml @@ -0,0 +1,38 @@ + + + + online,bi + + hours(1) + UTC + + + + + + +
+ + + +
+ + + + diff --git a/addons/falcon-bridge/src/test/resources/hive-site.xml b/addons/falcon-bridge/src/test/resources/hive-site.xml new file mode 100644 index 0000000000..f058c2edc2 --- /dev/null +++ b/addons/falcon-bridge/src/test/resources/hive-site.xml @@ -0,0 +1,48 @@ + + + + + + + hive.exec.post.hooks + org.apache.atlas.hive.hook.HiveHook + + + + hive.support.concurrency + false + + + + hive.metastore.warehouse.dir + ${user.dir}/target/metastore + + + + javax.jdo.option.ConnectionURL + jdbc:derby:${user.dir}/target/metastore_db;create=true + + + + atlas.hook.hive.synchronous + true + + + + fs.pfile.impl + org.apache.hadoop.fs.ProxyLocalFileSystem + + \ No newline at end of file diff --git a/addons/falcon-bridge/src/test/resources/process.xml b/addons/falcon-bridge/src/test/resources/process.xml new file mode 100644 index 0000000000..b94d0a8470 --- /dev/null +++ b/addons/falcon-bridge/src/test/resources/process.xml @@ -0,0 +1,53 @@ + + + + + consumer=consumer@xyz.com, owner=producer@xyz.com, department=forecasting + + + + + + + + 1 + FIFO + days(1) + UTC + + + + + + + + + + + + + + + + + + + + + diff --git a/addons/falcon-bridge/src/test/resources/startup.properties b/addons/falcon-bridge/src/test/resources/startup.properties new file mode 100644 index 0000000000..9623470396 --- /dev/null +++ b/addons/falcon-bridge/src/test/resources/startup.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +*.domain=debug +*.config.store.persist=false +*.config.store.uri=target/config_store \ No newline at end of file diff --git a/addons/falcon-bridge/src/test/resources/users-credentials.properties b/addons/falcon-bridge/src/test/resources/users-credentials.properties new file mode 100644 index 0000000000..da69923502 --- /dev/null +++ b/addons/falcon-bridge/src/test/resources/users-credentials.properties @@ -0,0 +1,3 @@ +#username=group::sha256+salt-password +admin=ADMIN::a4a88c0872bf652bb9ed803ece5fd6e82354838a9bf59ab4babb1dab322154e1 +rangertagsync=RANGER_TAG_SYNC::0afe7a1968b07d4c3ff4ed8c2d809a32ffea706c66cd795ead9048e81cfaf034 diff --git a/addons/hbase-bridge-shim/pom.xml b/addons/hbase-bridge-shim/pom.xml new file mode 100644 index 0000000000..eb1b2e9493 --- /dev/null +++ b/addons/hbase-bridge-shim/pom.xml @@ -0,0 +1,60 @@ + + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + hbase-bridge-shim + Apache Atlas Hbase Bridge Shim Module + Apache Atlas Hbase Bridge Shim + jar + + + + + org.apache.atlas + atlas-plugin-classloader + + + org.apache.hbase + hbase-server + ${hbase.version} + provided + + + javax.servlet + servlet-api + + + com.github.stephenc.findbugs + findbugs-annotations + + + javax.ws.rs + * + + + + + diff --git a/addons/hbase-bridge-shim/src/main/java/org/apache/atlas/hbase/hook/HBaseAtlasCoprocessor.java b/addons/hbase-bridge-shim/src/main/java/org/apache/atlas/hbase/hook/HBaseAtlasCoprocessor.java new file mode 100755 index 0000000000..0b69104b17 --- /dev/null +++ b/addons/hbase-bridge-shim/src/main/java/org/apache/atlas/hbase/hook/HBaseAtlasCoprocessor.java @@ -0,0 +1,277 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.hbase.hook; + + +import org.apache.atlas.plugin.classloader.AtlasPluginClassLoader; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.NamespaceDescriptor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.CoprocessorEnvironment; +import org.apache.hadoop.hbase.client.SnapshotDescription; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor; +import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; +import org.apache.hadoop.hbase.coprocessor.MasterObserver; +import org.apache.hadoop.hbase.coprocessor.ObserverContext; +import org.apache.hadoop.hbase.coprocessor.RegionObserver; +import org.apache.hadoop.hbase.coprocessor.RegionServerObserver; + +import java.io.IOException; +import java.util.Optional; + + +public class HBaseAtlasCoprocessor implements MasterCoprocessor, MasterObserver, RegionObserver, RegionServerObserver { + public static final Log LOG = LogFactory.getLog(HBaseAtlasCoprocessor.class); + + private static final String ATLAS_PLUGIN_TYPE = "hbase"; + private static final String ATLAS_HBASE_HOOK_IMPL_CLASSNAME = "org.apache.atlas.hbase.hook.HBaseAtlasCoprocessor"; + + private AtlasPluginClassLoader atlasPluginClassLoader = null; + private Object impl = null; + private MasterObserver implMasterObserver = null; + private RegionObserver implRegionObserver = null; + private RegionServerObserver implRegionServerObserver = null; + private MasterCoprocessor implMasterCoprocessor = null; + + public HBaseAtlasCoprocessor() { + if(LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasCoprocessor.HBaseAtlasCoprocessor()"); + } + + this.init(); + + if(LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.HBaseAtlasCoprocessor()"); + } + } + + private void init(){ + if(LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasCoprocessor.init()"); + } + + try { + atlasPluginClassLoader = AtlasPluginClassLoader.getInstance(ATLAS_PLUGIN_TYPE, this.getClass()); + + @SuppressWarnings("unchecked") + Class cls = Class.forName(ATLAS_HBASE_HOOK_IMPL_CLASSNAME, true, atlasPluginClassLoader); + + activatePluginClassLoader(); + + impl = cls.newInstance(); + implMasterObserver = (MasterObserver)impl; + implRegionObserver = (RegionObserver)impl; + implRegionServerObserver = (RegionServerObserver)impl; + implMasterCoprocessor = (MasterCoprocessor)impl; + + } catch (Exception e) { + // check what need to be done + LOG.error("Error Enabling RangerHbasePlugin", e); + } finally { + deactivatePluginClassLoader(); + } + + if(LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.init()"); + } + } + + @Override + public Optional getMasterObserver() { + return Optional.of(this); + } + + @Override + public void start(CoprocessorEnvironment env) throws IOException { + if(LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasCoprocessor.start()"); + } + + try { + activatePluginClassLoader(); + if (env instanceof MasterCoprocessorEnvironment) { + implMasterCoprocessor.start(env); + } + } finally { + deactivatePluginClassLoader(); + } + if(LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.start()"); + } + } + + @Override + public void postCreateTable(ObserverContext ctx, TableDescriptor desc, RegionInfo[] regions) throws IOException { + if(LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasCoprocessor.postCreateTable()"); + } + + try { + activatePluginClassLoader(); + implMasterObserver.postCreateTable(ctx, desc, regions); + } finally { + deactivatePluginClassLoader(); + } + + if(LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.postCreateTable()"); + } + } + + @Override + public void postModifyTable(ObserverContext ctx, TableName tableName, TableDescriptor htd) throws IOException { + if(LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasCoprocessor.postModifyTable()"); + } + + try { + activatePluginClassLoader(); + implMasterObserver.postModifyTable(ctx, tableName, htd); + } finally { + deactivatePluginClassLoader(); + } + + if(LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.postModifyTable()"); + } + } + + @Override + public void postDeleteTable(ObserverContext ctx, TableName tableName) throws IOException { + if(LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasCoprocessor.postDeleteTable()"); + } + + try { + activatePluginClassLoader(); + implMasterObserver.postDeleteTable(ctx, tableName); + } finally { + deactivatePluginClassLoader(); + } + + if(LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.postDeleteTable()"); + } + } + + @Override + public void postCreateNamespace(ObserverContext ctx, NamespaceDescriptor ns) throws IOException { + if(LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasCoprocessor.preCreateNamespace()"); + } + + try { + activatePluginClassLoader(); + implMasterObserver.postCreateNamespace(ctx, ns); + } finally { + deactivatePluginClassLoader(); + } + + if(LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.preCreateNamespace()"); + } + } + + @Override + public void postDeleteNamespace(ObserverContext ctx, String ns) throws IOException { + if(LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasCoprocessor.preDeleteNamespace()"); + } + + try { + activatePluginClassLoader(); + implMasterObserver.postDeleteNamespace(ctx, ns); + } finally { + deactivatePluginClassLoader(); + } + + if(LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.preDeleteNamespace()"); + } + } + @Override + public void postModifyNamespace(ObserverContext ctx, NamespaceDescriptor ns) throws IOException { + if(LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasCoprocessor.preModifyNamespace()"); + } + + try { + activatePluginClassLoader(); + implMasterObserver.preModifyNamespace(ctx, ns); + } finally { + deactivatePluginClassLoader(); + } + + if(LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.preModifyNamespace()"); + } + } + + @Override + public void postCloneSnapshot(ObserverContext observerContext, SnapshotDescription snapshot, TableDescriptor tableDescriptor) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasCoprocessor.postCloneSnapshot()"); + } + + try { + activatePluginClassLoader(); + implMasterObserver.postCloneSnapshot(observerContext,snapshot,tableDescriptor); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.postCloneSnapshot()"); + } + } + + @Override + public void postRestoreSnapshot(ObserverContext observerContext, SnapshotDescription snapshot, TableDescriptor tableDescriptor) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasCoprocessor.postRestoreSnapshot()"); + } + + try { + activatePluginClassLoader(); + implMasterObserver.postRestoreSnapshot(observerContext,snapshot,tableDescriptor); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.postRestoreSnapshot()"); + } + } + + private void activatePluginClassLoader() { + if(atlasPluginClassLoader != null) { + atlasPluginClassLoader.activate(); + } + } + + private void deactivatePluginClassLoader() { + if(atlasPluginClassLoader != null) { + atlasPluginClassLoader.deactivate(); + } + } + +} diff --git a/addons/hbase-bridge/pom.xml b/addons/hbase-bridge/pom.xml new file mode 100644 index 0000000000..a6ed51421f --- /dev/null +++ b/addons/hbase-bridge/pom.xml @@ -0,0 +1,568 @@ + + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + hbase-bridge + Apache Atlas Hbase Bridge Module + Apache Atlas Hbase Bridge + jar + + + 3.0.3 + 9.3.14.v20161028 + + + + + + org.apache.hbase + hbase-server + ${hbase.version} + provided + + + javax.servlet + servlet-api + + + org.mortbay.jetty + servlet-api-2.5 + + + + + + org.apache.atlas + atlas-notification + + + + + com.sun.jersey + jersey-bundle + 1.19 + test + + + + org.apache.atlas + atlas-webapp + war + test + + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + + org.apache.hadoop + hadoop-hdfs + ${hadoop.version} + + + javax.servlet + servlet-api + + + + + org.apache.hadoop + hadoop-hdfs-client + ${hadoop.version} + + + + org.apache.hadoop + hadoop-annotations + + + + org.apache.hadoop + hadoop-minicluster + ${hadoop.version} + + + javax.servlet + servlet-api + + + + + + org.testng + testng + + + + org.mockito + mockito-all + + + + org.apache.httpcomponents + httpcore + ${httpcomponents-httpcore.version} + + + + org.eclipse.jetty + jetty-webapp + ${jetty.version} + compile + + + + org.eclipse.jetty + jetty-server + test + + + + org.apache.hbase + hbase-server + ${hbase.version} + test-jar + test + + + org.mortbay.jetty + servlet-api-2.5 + + + org.eclipse.jetty + jetty-server + + + + + + junit + junit + test + 4.12 + + + + org.apache.hbase + hbase-client + ${hbase.version} + + + org.apache.hbase + hbase-common + ${hbase.version} + + + com.github.stephenc.findbugs + findbugs-annotations + + + + + org.apache.hbase + hbase-hadoop2-compat + ${hbase.version} + test-jar + test + + + org.apache.hbase + hbase-hadoop-compat + ${hbase.version} + test-jar + test + + + com.google.guava + guava + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + compile + + + javax.servlet + servlet-api + + + + + org.apache.hadoop + hadoop-auth + ${hadoop.version} + compile + + + org.apache.atlas + atlas-client-v2 + ${project.version} + + + org.apache.hbase + hbase-zookeeper + test-jar + test + ${hbase.version} + + + org.apache.hbase + hbase-common + test-jar + ${hbase.version} + test + + + + + org.apache.hbase + hbase-testing-util + ${hbase.version} + + + + + + + dist + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-hook + package + + copy + + + ${project.build.directory}/dependency/hook/hbase/atlas-hbase-plugin-impl + false + false + true + + + ${project.groupId} + ${project.artifactId} + ${project.version} + + + ${project.groupId} + atlas-client-common + ${project.version} + + + ${project.groupId} + atlas-client-v2 + ${project.version} + + + ${project.groupId} + atlas-intg + ${project.version} + + + ${project.groupId} + atlas-notification + ${project.version} + + + ${project.groupId} + atlas-common + ${project.version} + + + org.apache.kafka + kafka_${kafka.scala.binary.version} + ${kafka.version} + + + org.apache.kafka + kafka-clients + ${kafka.version} + + + com.sun.jersey.contribs + jersey-multipart + ${jersey.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.databind.version} + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + + + com.fasterxml.jackson.core + jackson-annotations + ${jackson.version} + + + commons-configuration + commons-configuration + ${commons-conf.version} + + + com.sun.jersey + jersey-json + ${jersey.version} + + + javax.ws.rs + jsr311-api + ${jsr.version} + + + + + + copy-hook-shim + package + + copy + + + ${project.build.directory}/dependency/hook/hbase + false + false + true + + + ${project.groupId} + hbase-bridge-shim + ${project.version} + + + ${project.groupId} + atlas-plugin-classloader + ${project.version} + + + + + + + + + + + + + + org.eclipse.jetty + jetty-maven-plugin + ${jetty.version} + + ${skipTests} + + + 31000 + 60000 + + ../../webapp/target/atlas-webapp-${project.version}.war + true + ../../webapp/src/main/webapp + + / + ${project.basedir}/../../webapp/src/main/webapp/WEB-INF/web.xml + + true + + true + + atlas.home + ${project.build.directory} + + + atlas.conf + ${project.build.directory}/test-classes + + + atlas.data + ${project.build.directory}/data + + + atlas.log.dir + ${project.build.directory}/logs + + + atlas.log.file + application.log + + + log4j.configuration + file:///${project.build.directory}/test-classes/atlas-log4j.xml + + + atlas.graphdb.backend + ${graphdb.backend.impl} + + + embedded.solr.directory + ${project.build.directory} + + + solr.log.dir + ${project.build.directory}/logs + + + org.eclipse.jetty.annotations.maxWait + 5000 + + + atlas-stop + 31001 + ${jetty-maven-plugin.stopWait} + jar + + + + org.apache.curator + curator-client + ${curator.version} + + + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + + + + + start-jetty + pre-integration-test + + + stop + deploy-war + + + true + + + + stop-jetty + post-integration-test + + stop + + + + + + + org.apache.maven.plugins + maven-site-plugin + + + org.apache.maven.doxia + doxia-module-twiki + ${doxia.version} + + + org.apache.maven.doxia + doxia-core + ${doxia.version} + + + + + + site + + prepare-package + + + + false + false + + + + + org.codehaus.mojo + exec-maven-plugin + 1.2.1 + false + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + copy-resources + validate + + copy-resources + + + ${basedir}/target/models + + + ${basedir}/../models + + 0000-Area0/** + 1000-Hadoop/** + + + + + + + copy-solr-resources + validate + + copy-resources + + + ${project.build.directory}/solr + + + ${basedir}/../../test-tools/src/main/resources/solr + + + + + + + + + + diff --git a/addons/hbase-bridge/src/bin/import-hbase.sh b/addons/hbase-bridge/src/bin/import-hbase.sh new file mode 100644 index 0000000000..a343036faf --- /dev/null +++ b/addons/hbase-bridge/src/bin/import-hbase.sh @@ -0,0 +1,162 @@ +#!/bin/bash +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. See accompanying LICENSE file. +# + +# resolve links - $0 may be a softlink +PRG="${0}" + +[[ `uname -s` == *"CYGWIN"* ]] && CYGWIN=true + +while [ -h "${PRG}" ]; do + ls=`ls -ld "${PRG}"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "${PRG}"`/"$link" + fi +done + +echo ">>>>> $PRG" + +BASEDIR=`dirname ${PRG}` +BASEDIR=`cd ${BASEDIR}/..;pwd` + +echo ">>>>> $BASEDIR" + +if test -z "${JAVA_HOME}" +then + JAVA_BIN=`which java` + JAR_BIN=`which jar` +else + JAVA_BIN="${JAVA_HOME}/bin/java" + JAR_BIN="${JAVA_HOME}/bin/jar" +fi +export JAVA_BIN + +if [ ! -e "${JAVA_BIN}" ] || [ ! -e "${JAR_BIN}" ]; then + echo "$JAVA_BIN and/or $JAR_BIN not found on the system. Please make sure java and jar commands are available." + exit 1 +fi + +# Construct Atlas classpath using jars from hook/hbase/atlas-hbase-plugin-impl/ directory. +for i in "${BASEDIR}/hook/hbase/atlas-hbase-plugin-impl/"*.jar; do + ATLASCPPATH="${ATLASCPPATH}:$i" +done + +if [ -z "${ATLAS_CONF_DIR}" ] && [ -e /etc/atlas/conf ];then + ATLAS_CONF_DIR=/etc/atlas/conf +fi +ATLASCPPATH=${ATLASCPPATH}:${ATLAS_CONF_DIR} + +# log dir for applications +ATLAS_LOG_DIR="${ATLAS_LOG_DIR:-/var/log/atlas}" +export ATLAS_LOG_DIR +LOGFILE="$ATLAS_LOG_DIR/import-hbase.log" + +TIME=`date +%Y%m%d%H%M%s` + +#Add HBase conf in classpath +if [ ! -z "$HBASE_CONF_DIR" ]; then + HBASE_CONF=$HBASE_CONF_DIR +elif [ ! -z "$HBASE_HOME" ]; then + HBASE_CONF="$HBASE_HOME/conf" +elif [ -e /etc/hbase/conf ]; then + HBASE_CONF="/etc/hbase/conf" +else + echo "Could not find a valid HBASE configuration" + exit 1 +fi + +echo Using HBase configuration directory "[$HBASE_CONF]" + + +if [ -f "${HBASE_CONF}/hbase-env.sh" ]; then + . "${HBASE_CONF}/hbase-env.sh" +fi + +if [ -z "$HBASE_HOME" ]; then + if [ -d "${BASEDIR}/../hbase" ]; then + HBASE_HOME=${BASEDIR}/../hbase + else + echo "Please set HBASE_HOME to the root of HBase installation" + exit 1 + fi +fi + +HBASE_CP="${HBASE_CONF}" + +for i in "${HBASE_HOME}/lib/"*.jar; do + HBASE_CP="${HBASE_CP}:$i" +done + +#Add hadoop conf in classpath +if [ ! -z "$HADOOP_CLASSPATH" ]; then + HADOOP_CP=$HADOOP_CLASSPATH +elif [ ! -z "$HADOOP_HOME" ]; then + HADOOP_CP=`$HADOOP_HOME/bin/hadoop classpath` +elif [ $(command -v hadoop) ]; then + HADOOP_CP=`hadoop classpath` + echo $HADOOP_CP +else + echo "Environment variable HADOOP_CLASSPATH or HADOOP_HOME need to be set" + exit 1 +fi + +CP="${HBASE_CP}:${HADOOP_CP}:${ATLASCPPATH}" + +# If running in cygwin, convert pathnames and classpath to Windows format. +if [ "${CYGWIN}" == "true" ] +then + ATLAS_LOG_DIR=`cygpath -w ${ATLAS_LOG_DIR}` + LOGFILE=`cygpath -w ${LOGFILE}` + HBASE_CP=`cygpath -w ${HBASE_CP}` + HADOOP_CP=`cygpath -w ${HADOOP_CP}` + CP=`cygpath -w -p ${CP}` +fi + +JAVA_PROPERTIES="$ATLAS_OPTS -Datlas.log.dir=$ATLAS_LOG_DIR -Datlas.log.file=import-hbase.log +-Dlog4j.configuration=atlas-hbase-import-log4j.xml" + +IMPORT_ARGS= +JVM_ARGS= + +while true +do + option=$1 + shift + + case "$option" in + -n) IMPORT_ARGS="$IMPORT_ARGS -n $1"; shift;; + -t) IMPORT_ARGS="$IMPORT_ARGS -t $1"; shift;; + -f) IMPORT_ARGS="$IMPORT_ARGS -f $1"; shift;; + --namespace) IMPORT_ARGS="$IMPORT_ARGS --namespace $1"; shift;; + --table) IMPORT_ARGS="$IMPORT_ARGS --table $1"; shift;; + --filename) IMPORT_ARGS="$IMPORT_ARGS --filename $1"; shift;; + "") break;; + *) JVM_ARGS="$JVM_ARGS $option" + esac +done + +JAVA_PROPERTIES="${JAVA_PROPERTIES} ${JVM_ARGS}" + +echo "Log file for import is $LOGFILE" + +"${JAVA_BIN}" ${JAVA_PROPERTIES} -cp "${CP}" org.apache.atlas.hbase.bridge.HBaseBridge $IMPORT_ARGS + +RETVAL=$? +[ $RETVAL -eq 0 ] && echo HBase Data Model imported successfully!!! +[ $RETVAL -ne 0 ] && echo Failed to import HBase Data Model!!! + +exit $RETVAL diff --git a/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/bridge/HBaseAtlasHook.java b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/bridge/HBaseAtlasHook.java new file mode 100644 index 0000000000..8e6c57dba3 --- /dev/null +++ b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/bridge/HBaseAtlasHook.java @@ -0,0 +1,678 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hbase.bridge; + +import org.apache.atlas.AtlasConstants; +import org.apache.atlas.hbase.model.HBaseOperationContext; +import org.apache.atlas.hbase.model.HBaseDataTypes; +import org.apache.atlas.hook.AtlasHook; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2; +import org.apache.atlas.model.notification.HookNotification.EntityDeleteRequestV2; +import org.apache.atlas.model.notification.HookNotification.EntityUpdateRequestV2; +import org.apache.atlas.type.AtlasTypeUtil; +import org.apache.commons.collections.CollectionUtils; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.NamespaceDescriptor; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; +import org.apache.hadoop.hbase.coprocessor.ObserverContext; +import org.apache.hadoop.hbase.security.User; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.apache.atlas.repository.Constants.HBASE_SOURCE; + +// This will register Hbase entities into Atlas +public class HBaseAtlasHook extends AtlasHook { + private static final Logger LOG = LoggerFactory.getLogger(HBaseAtlasHook.class); + + + public static final String ATTR_DESCRIPTION = "description"; + public static final String ATTR_ATLAS_ENDPOINT = "atlas.rest.address"; + public static final String ATTR_PARAMETERS = "parameters"; + public static final String ATTR_URI = "uri"; + public static final String ATTR_NAMESPACE = "namespace"; + public static final String ATTR_TABLE = "table"; + public static final String ATTR_COLUMNFAMILIES = "column_families"; + public static final String ATTR_CREATE_TIME = "createTime"; + public static final String ATTR_MODIFIED_TIME = "modifiedTime"; + public static final String ATTR_OWNER = "owner"; + public static final String ATTR_NAME = "name"; + + // column addition metadata + public static final String ATTR_TABLE_MAX_FILESIZE = "maxFileSize"; + public static final String ATTR_TABLE_ISREADONLY = "isReadOnly"; + public static final String ATTR_TABLE_ISCOMPACTION_ENABLED = "isCompactionEnabled"; + public static final String ATTR_TABLE_ISNORMALIZATION_ENABLED = "isNormalizationEnabled"; + public static final String ATTR_TABLE_REPLICATION_PER_REGION = "replicasPerRegion"; + public static final String ATTR_TABLE_DURABLILITY = "durability"; + public static final String ATTR_TABLE_NORMALIZATION_ENABLED = "isNormalizationEnabled"; + + // column family additional metadata + public static final String ATTR_CF_BLOOMFILTER_TYPE = "bloomFilterType"; + public static final String ATTR_CF_COMPRESSION_TYPE = "compressionType"; + public static final String ATTR_CF_COMPACTION_COMPRESSION_TYPE = "compactionCompressionType"; + public static final String ATTR_CF_ENCRYPTION_TYPE = "encryptionType"; + public static final String ATTR_CF_INMEMORY_COMPACTION_POLICY = "inMemoryCompactionPolicy"; + public static final String ATTR_CF_KEEP_DELETE_CELLS = "keepDeletedCells"; + public static final String ATTR_CF_MAX_VERSIONS = "maxVersions"; + public static final String ATTR_CF_MIN_VERSIONS = "minVersions"; + public static final String ATTR_CF_DATA_BLOCK_ENCODING = "dataBlockEncoding"; + public static final String ATTR_CF_STORAGE_POLICY = "StoragePolicy"; + public static final String ATTR_CF_TTL = "ttl"; + public static final String ATTR_CF_BLOCK_CACHE_ENABLED = "blockCacheEnabled"; + public static final String ATTR_CF_CACHED_BLOOM_ON_WRITE = "cacheBloomsOnWrite"; + public static final String ATTR_CF_CACHED_DATA_ON_WRITE = "cacheDataOnWrite"; + public static final String ATTR_CF_CACHED_INDEXES_ON_WRITE = "cacheIndexesOnWrite"; + public static final String ATTR_CF_EVICT_BLOCK_ONCLOSE = "evictBlocksOnClose"; + public static final String ATTR_CF_PREFETCH_BLOCK_ONOPEN = "prefetchBlocksOnOpen"; + public static final String ATTR_CF_NEW_VERSION_BEHAVIOR = "newVersionBehavior"; + public static final String ATTR_CF_MOB_ENABLED = "isMobEnabled"; + public static final String ATTR_CF_MOB_COMPATCTPARTITION_POLICY = "mobCompactPartitionPolicy"; + + public static final String HBASE_NAMESPACE_QUALIFIED_NAME = "%s@%s"; + public static final String HBASE_TABLE_QUALIFIED_NAME_FORMAT = "%s:%s@%s"; + public static final String HBASE_COLUMN_FAMILY_QUALIFIED_NAME_FORMAT = "%s:%s.%s@%s"; + + private static final String REFERENCEABLE_ATTRIBUTE_NAME = "qualifiedName"; + + public static final String RELATIONSHIP_HBASE_TABLE_COLUMN_FAMILIES = "hbase_table_column_families"; + public static final String RELATIONSHIP_HBASE_TABLE_NAMESPACE = "hbase_table_namespace"; + + private static volatile HBaseAtlasHook me; + + public enum OPERATION { + CREATE_NAMESPACE("create_namespace"), + ALTER_NAMESPACE("alter_namespace"), + DELETE_NAMESPACE("delete_namespace"), + CREATE_TABLE("create_table"), + ALTER_TABLE("alter_table"), + DELETE_TABLE("delete_table"), + CREATE_COLUMN_FAMILY("create_column_Family"), + ALTER_COLUMN_FAMILY("alter_column_Family"), + DELETE_COLUMN_FAMILY("delete_column_Family"); + + private final String name; + + OPERATION(String s) { + name = s; + } + + public String getName() { + return name; + } + } + + public static HBaseAtlasHook getInstance() { + HBaseAtlasHook ret = me; + + if (ret == null) { + try { + synchronized (HBaseAtlasHook.class) { + ret = me; + + if (ret == null) { + me = ret = new HBaseAtlasHook(); + } + } + } catch (Exception e) { + LOG.error("Caught exception instantiating the Atlas HBase hook.", e); + } + } + + return ret; + } + + public HBaseAtlasHook() { + } + + public void createAtlasInstances(HBaseOperationContext hbaseOperationContext) { + OPERATION operation = hbaseOperationContext.getOperation(); + + LOG.info("HBaseAtlasHook(operation={})", operation); + + switch (operation) { + case CREATE_NAMESPACE: + case ALTER_NAMESPACE: + createOrUpdateNamespaceInstance(hbaseOperationContext); + break; + case DELETE_NAMESPACE: + deleteNameSpaceInstance(hbaseOperationContext); + break; + case CREATE_TABLE: + case ALTER_TABLE: + createOrUpdateTableInstance(hbaseOperationContext); + break; + case DELETE_TABLE: + deleteTableInstance(hbaseOperationContext); + break; + case CREATE_COLUMN_FAMILY: + case ALTER_COLUMN_FAMILY: + createOrUpdateColumnFamilyInstance(hbaseOperationContext); + break; + case DELETE_COLUMN_FAMILY: + deleteColumnFamilyInstance(hbaseOperationContext); + break; + } + } + + private void createOrUpdateNamespaceInstance(HBaseOperationContext hbaseOperationContext) { + AtlasEntity nameSpace = buildNameSpace(hbaseOperationContext); + + switch (hbaseOperationContext.getOperation()) { + case CREATE_NAMESPACE: + LOG.info("Create NameSpace {}", nameSpace.getAttribute(REFERENCEABLE_ATTRIBUTE_NAME)); + + hbaseOperationContext.addMessage(new EntityCreateRequestV2(hbaseOperationContext.getUser(), new AtlasEntitiesWithExtInfo(nameSpace))); + break; + + case ALTER_NAMESPACE: + LOG.info("Modify NameSpace {}", nameSpace.getAttribute(REFERENCEABLE_ATTRIBUTE_NAME)); + + hbaseOperationContext.addMessage(new EntityUpdateRequestV2(hbaseOperationContext.getUser(), new AtlasEntitiesWithExtInfo(nameSpace))); + break; + } + } + + private void deleteNameSpaceInstance(HBaseOperationContext hbaseOperationContext) { + String nameSpaceQName = getNameSpaceQualifiedName(getMetadataNamespace(), hbaseOperationContext.getNameSpace()); + AtlasObjectId nameSpaceId = new AtlasObjectId(HBaseDataTypes.HBASE_NAMESPACE.getName(), REFERENCEABLE_ATTRIBUTE_NAME, nameSpaceQName); + + LOG.info("Delete NameSpace {}", nameSpaceQName); + + hbaseOperationContext.addMessage(new EntityDeleteRequestV2(hbaseOperationContext.getUser(), Collections.singletonList(nameSpaceId))); + } + + private void createOrUpdateTableInstance(HBaseOperationContext hbaseOperationContext) { + AtlasEntity nameSpace = buildNameSpace(hbaseOperationContext); + AtlasEntity table = buildTable(hbaseOperationContext, nameSpace); + List columnFamilies = buildColumnFamilies(hbaseOperationContext, nameSpace, table); + + table.setRelationshipAttribute(ATTR_COLUMNFAMILIES, AtlasTypeUtil.getAtlasRelatedObjectIds(columnFamilies, RELATIONSHIP_HBASE_TABLE_COLUMN_FAMILIES)); + + AtlasEntitiesWithExtInfo entities = new AtlasEntitiesWithExtInfo(table); + + entities.addReferredEntity(nameSpace); + + if (CollectionUtils.isNotEmpty(columnFamilies)) { + for (AtlasEntity columnFamily : columnFamilies) { + entities.addReferredEntity(columnFamily); + } + } + + switch (hbaseOperationContext.getOperation()) { + case CREATE_TABLE: + LOG.info("Create Table {}", table.getAttribute(REFERENCEABLE_ATTRIBUTE_NAME)); + + hbaseOperationContext.addMessage(new EntityCreateRequestV2(hbaseOperationContext.getUser(), entities)); + break; + + case ALTER_TABLE: + LOG.info("Modify Table {}", table.getAttribute(REFERENCEABLE_ATTRIBUTE_NAME)); + + hbaseOperationContext.addMessage(new EntityUpdateRequestV2(hbaseOperationContext.getUser(), entities)); + break; + } + } + + private void deleteTableInstance(HBaseOperationContext hbaseOperationContext) { + TableName tableName = hbaseOperationContext.getTableName(); + String nameSpaceName = tableName.getNamespaceAsString(); + + if (nameSpaceName == null) { + nameSpaceName = tableName.getNameWithNamespaceInclAsString(); + } + + String tableNameStr = tableName.getNameAsString(); + String tableQName = getTableQualifiedName(getMetadataNamespace(), nameSpaceName, tableNameStr); + AtlasObjectId tableId = new AtlasObjectId(HBaseDataTypes.HBASE_TABLE.getName(), REFERENCEABLE_ATTRIBUTE_NAME, tableQName); + + LOG.info("Delete Table {}", tableQName); + + hbaseOperationContext.addMessage(new EntityDeleteRequestV2(hbaseOperationContext.getUser(), Collections.singletonList(tableId))); + } + + private void createOrUpdateColumnFamilyInstance(HBaseOperationContext hbaseOperationContext) { + AtlasEntity nameSpace = buildNameSpace(hbaseOperationContext); + AtlasEntity table = buildTable(hbaseOperationContext, nameSpace); + AtlasEntity columnFamily = buildColumnFamily(hbaseOperationContext, hbaseOperationContext.gethColumnDescriptor(), nameSpace, table); + + AtlasEntitiesWithExtInfo entities = new AtlasEntitiesWithExtInfo(columnFamily); + + entities.addReferredEntity(nameSpace); + entities.addReferredEntity(table); + + switch (hbaseOperationContext.getOperation()) { + case CREATE_COLUMN_FAMILY: + LOG.info("Create ColumnFamily {}", columnFamily.getAttribute(REFERENCEABLE_ATTRIBUTE_NAME)); + + hbaseOperationContext.addMessage(new EntityCreateRequestV2(hbaseOperationContext.getUser(), entities)); + break; + + case ALTER_COLUMN_FAMILY: + LOG.info("Alter ColumnFamily {}", columnFamily.getAttribute(REFERENCEABLE_ATTRIBUTE_NAME)); + + hbaseOperationContext.addMessage(new EntityUpdateRequestV2(hbaseOperationContext.getUser(), entities)); + break; + } + } + + private void deleteColumnFamilyInstance(HBaseOperationContext hbaseOperationContext) { + TableName tableName = hbaseOperationContext.getTableName(); + String nameSpaceName = tableName.getNamespaceAsString(); + + if (nameSpaceName == null) { + nameSpaceName = tableName.getNameWithNamespaceInclAsString(); + } + + String tableNameStr = tableName.getNameAsString(); + String columnFamilyName = hbaseOperationContext.getColummFamily(); + String columnFamilyQName = getColumnFamilyQualifiedName(getMetadataNamespace(), nameSpaceName, tableNameStr, columnFamilyName); + AtlasObjectId columnFamilyId = new AtlasObjectId(HBaseDataTypes.HBASE_COLUMN_FAMILY.getName(), REFERENCEABLE_ATTRIBUTE_NAME, columnFamilyQName); + + LOG.info("Delete ColumnFamily {}", columnFamilyQName); + + hbaseOperationContext.addMessage(new EntityDeleteRequestV2(hbaseOperationContext.getUser(), Collections.singletonList(columnFamilyId))); + } + + + /** + * Construct the qualified name used to uniquely identify a ColumnFamily instance in Atlas. + * + * @param metadataNamespace Metadata namespace of the cluster to which the HBase component belongs + * @param nameSpace Name of the HBase database to which the Table belongs + * @param tableName Name of the HBase table + * @param columnFamily Name of the ColumnFamily + * @return Unique qualified name to identify the Table instance in Atlas. + */ + public static String getColumnFamilyQualifiedName(String metadataNamespace, String nameSpace, String tableName, String columnFamily) { + if (metadataNamespace == null || nameSpace == null || tableName == null || columnFamily == null) { + return null; + } else { + return String.format(HBASE_COLUMN_FAMILY_QUALIFIED_NAME_FORMAT, nameSpace, stripNameSpace(tableName), columnFamily, metadataNamespace); + } + } + + /** + * Construct the qualified name used to uniquely identify a Table instance in Atlas. + * + * @param metadataNamespace Metadata namespace of the cluster to which the HBase component belongs + * @param nameSpace Name of the HBase database to which the Table belongs + * @param tableName Name of the HBase table + * @return Unique qualified name to identify the Table instance in Atlas. + */ + public static String getTableQualifiedName(String metadataNamespace, String nameSpace, String tableName) { + if (metadataNamespace == null || nameSpace == null || tableName == null) { + return null; + } else { + return String.format(HBASE_TABLE_QUALIFIED_NAME_FORMAT, nameSpace, stripNameSpace(tableName), metadataNamespace); + } + } + + /** + * Construct the qualified name used to uniquely identify a HBase NameSpace instance in Atlas. + * + * @param metadataNamespace Metadata namespace of the cluster to which the HBase component belongs + * @param nameSpace + * @return Unique qualified name to identify the HBase NameSpace instance in Atlas. + */ + public static String getNameSpaceQualifiedName(String metadataNamespace, String nameSpace) { + if (metadataNamespace == null || nameSpace == null) { + return null; + } else { + return String.format(HBASE_NAMESPACE_QUALIFIED_NAME, nameSpace, metadataNamespace); + } + } + + private static String stripNameSpace(String tableName) { + return tableName.substring(tableName.indexOf(":") + 1); + } + + private AtlasEntity buildNameSpace(HBaseOperationContext hbaseOperationContext) { + AtlasEntity nameSpace = new AtlasEntity(HBaseDataTypes.HBASE_NAMESPACE.getName()); + NamespaceDescriptor nameSpaceDesc = hbaseOperationContext.getNamespaceDescriptor(); + String nameSpaceName = nameSpaceDesc == null ? null : hbaseOperationContext.getNamespaceDescriptor().getName(); + + if (nameSpaceName == null) { + nameSpaceName = hbaseOperationContext.getNameSpace(); + } + + Date now = new Date(System.currentTimeMillis()); + + nameSpace.setAttribute(ATTR_NAME, nameSpaceName); + nameSpace.setAttribute(REFERENCEABLE_ATTRIBUTE_NAME, getNameSpaceQualifiedName(getMetadataNamespace(), nameSpaceName)); + nameSpace.setAttribute(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, getMetadataNamespace()); + nameSpace.setAttribute(ATTR_DESCRIPTION, nameSpaceName); + nameSpace.setAttribute(ATTR_PARAMETERS, hbaseOperationContext.getHbaseConf()); + nameSpace.setAttribute(ATTR_OWNER, hbaseOperationContext.getOwner()); + nameSpace.setAttribute(ATTR_MODIFIED_TIME, now); + + if (OPERATION.CREATE_NAMESPACE.equals(hbaseOperationContext.getOperation())) { + nameSpace.setAttribute(ATTR_CREATE_TIME, now); + } + + return nameSpace; + } + + private AtlasEntity buildTable(HBaseOperationContext hbaseOperationContext, AtlasEntity nameSpace) { + AtlasEntity table = new AtlasEntity(HBaseDataTypes.HBASE_TABLE.getName()); + String tableName = getTableName(hbaseOperationContext); + String nameSpaceName = (String) nameSpace.getAttribute(ATTR_NAME); + String tableQName = getTableQualifiedName(getMetadataNamespace(), nameSpaceName, tableName); + OPERATION operation = hbaseOperationContext.getOperation(); + Date now = new Date(System.currentTimeMillis()); + + table.setAttribute(REFERENCEABLE_ATTRIBUTE_NAME, tableQName); + table.setAttribute(ATTR_NAME, tableName); + table.setAttribute(ATTR_URI, tableName); + table.setAttribute(ATTR_OWNER, hbaseOperationContext.getOwner()); + table.setAttribute(ATTR_DESCRIPTION, tableName); + table.setAttribute(ATTR_PARAMETERS, hbaseOperationContext.getHbaseConf()); + table.setRelationshipAttribute(ATTR_NAMESPACE, AtlasTypeUtil.getAtlasRelatedObjectId(nameSpace, RELATIONSHIP_HBASE_TABLE_NAMESPACE)); + + TableDescriptor tableDescriptor = hbaseOperationContext.gethTableDescriptor(); + if (tableDescriptor != null) { + table.setAttribute(ATTR_TABLE_MAX_FILESIZE, tableDescriptor.getMaxFileSize()); + table.setAttribute(ATTR_TABLE_REPLICATION_PER_REGION, tableDescriptor.getRegionReplication()); + table.setAttribute(ATTR_TABLE_ISREADONLY, tableDescriptor.isReadOnly()); + table.setAttribute(ATTR_TABLE_ISNORMALIZATION_ENABLED, tableDescriptor.isNormalizationEnabled()); + table.setAttribute(ATTR_TABLE_ISCOMPACTION_ENABLED, tableDescriptor.isCompactionEnabled()); + table.setAttribute(ATTR_TABLE_DURABLILITY, (tableDescriptor.getDurability() != null ? tableDescriptor.getDurability().name() : null)); + table.setAttribute(ATTR_TABLE_NORMALIZATION_ENABLED, tableDescriptor.isNormalizationEnabled()); + } + + switch (operation) { + case CREATE_TABLE: + table.setAttribute(ATTR_CREATE_TIME, now); + table.setAttribute(ATTR_MODIFIED_TIME, now); + break; + case CREATE_COLUMN_FAMILY: + table.setAttribute(ATTR_MODIFIED_TIME, now); + break; + case ALTER_TABLE: + case ALTER_COLUMN_FAMILY: + table.setAttribute(ATTR_MODIFIED_TIME, now); + break; + default: + break; + } + + return table; + } + + private List buildColumnFamilies(HBaseOperationContext hbaseOperationContext, AtlasEntity nameSpace, AtlasEntity table) { + List columnFamilies = new ArrayList<>(); + ColumnFamilyDescriptor[] columnFamilyDescriptors = hbaseOperationContext.gethColumnDescriptors(); + + if (columnFamilyDescriptors != null) { + for (ColumnFamilyDescriptor columnFamilyDescriptor : columnFamilyDescriptors) { + AtlasEntity columnFamily = buildColumnFamily(hbaseOperationContext, columnFamilyDescriptor, nameSpace, table); + + columnFamilies.add(columnFamily); + } + } + + return columnFamilies; + } + + private AtlasEntity buildColumnFamily(HBaseOperationContext hbaseOperationContext, ColumnFamilyDescriptor columnFamilyDescriptor, AtlasEntity nameSpace, AtlasEntity table) { + AtlasEntity columnFamily = new AtlasEntity(HBaseDataTypes.HBASE_COLUMN_FAMILY.getName()); + String columnFamilyName = columnFamilyDescriptor.getNameAsString(); + String tableName = (String) table.getAttribute(ATTR_NAME); + String nameSpaceName = (String) nameSpace.getAttribute(ATTR_NAME); + String columnFamilyQName = getColumnFamilyQualifiedName(getMetadataNamespace(), nameSpaceName, tableName, columnFamilyName); + Date now = new Date(System.currentTimeMillis()); + + columnFamily.setAttribute(ATTR_NAME, columnFamilyName); + columnFamily.setAttribute(ATTR_DESCRIPTION, columnFamilyName); + columnFamily.setAttribute(REFERENCEABLE_ATTRIBUTE_NAME, columnFamilyQName); + columnFamily.setAttribute(ATTR_OWNER, hbaseOperationContext.getOwner()); + columnFamily.setRelationshipAttribute(ATTR_TABLE, AtlasTypeUtil.getAtlasRelatedObjectId(table, RELATIONSHIP_HBASE_TABLE_COLUMN_FAMILIES)); + + if (columnFamilyDescriptor!= null) { + columnFamily.setAttribute(ATTR_CF_BLOCK_CACHE_ENABLED, columnFamilyDescriptor.isBlockCacheEnabled()); + columnFamily.setAttribute(ATTR_CF_BLOOMFILTER_TYPE, (columnFamilyDescriptor.getBloomFilterType() != null ? columnFamilyDescriptor.getBloomFilterType().name():null)); + columnFamily.setAttribute(ATTR_CF_CACHED_BLOOM_ON_WRITE, columnFamilyDescriptor.isCacheBloomsOnWrite()); + columnFamily.setAttribute(ATTR_CF_CACHED_DATA_ON_WRITE, columnFamilyDescriptor.isCacheDataOnWrite()); + columnFamily.setAttribute(ATTR_CF_CACHED_INDEXES_ON_WRITE, columnFamilyDescriptor.isCacheIndexesOnWrite()); + columnFamily.setAttribute(ATTR_CF_COMPACTION_COMPRESSION_TYPE, (columnFamilyDescriptor.getCompactionCompressionType() != null ? columnFamilyDescriptor.getCompactionCompressionType().name():null)); + columnFamily.setAttribute(ATTR_CF_COMPRESSION_TYPE, (columnFamilyDescriptor.getCompressionType() != null ? columnFamilyDescriptor.getCompressionType().name():null)); + columnFamily.setAttribute(ATTR_CF_DATA_BLOCK_ENCODING, (columnFamilyDescriptor.getDataBlockEncoding() != null ? columnFamilyDescriptor.getDataBlockEncoding().name():null)); + columnFamily.setAttribute(ATTR_CF_ENCRYPTION_TYPE, columnFamilyDescriptor.getEncryptionType()); + columnFamily.setAttribute(ATTR_CF_EVICT_BLOCK_ONCLOSE, columnFamilyDescriptor.isEvictBlocksOnClose()); + columnFamily.setAttribute(ATTR_CF_INMEMORY_COMPACTION_POLICY, (columnFamilyDescriptor.getInMemoryCompaction() != null ? columnFamilyDescriptor.getInMemoryCompaction().name():null)); + columnFamily.setAttribute(ATTR_CF_KEEP_DELETE_CELLS, ( columnFamilyDescriptor.getKeepDeletedCells() != null ? columnFamilyDescriptor.getKeepDeletedCells().name():null)); + columnFamily.setAttribute(ATTR_CF_MAX_VERSIONS, columnFamilyDescriptor.getMaxVersions()); + columnFamily.setAttribute(ATTR_CF_MIN_VERSIONS, columnFamilyDescriptor.getMinVersions()); + columnFamily.setAttribute(ATTR_CF_NEW_VERSION_BEHAVIOR, columnFamilyDescriptor.isNewVersionBehavior()); + columnFamily.setAttribute(ATTR_CF_MOB_ENABLED, columnFamilyDescriptor.isMobEnabled()); + columnFamily.setAttribute(ATTR_CF_MOB_COMPATCTPARTITION_POLICY, ( columnFamilyDescriptor.getMobCompactPartitionPolicy() != null ? columnFamilyDescriptor.getMobCompactPartitionPolicy().name():null)); + columnFamily.setAttribute(ATTR_CF_PREFETCH_BLOCK_ONOPEN, columnFamilyDescriptor.isPrefetchBlocksOnOpen()); + columnFamily.setAttribute(ATTR_CF_STORAGE_POLICY, columnFamilyDescriptor.getStoragePolicy()); + columnFamily.setAttribute(ATTR_CF_TTL, columnFamilyDescriptor.getTimeToLive()); + } + + switch (hbaseOperationContext.getOperation()) { + case CREATE_COLUMN_FAMILY: + case CREATE_TABLE: + columnFamily.setAttribute(ATTR_CREATE_TIME, now); + columnFamily.setAttribute(ATTR_MODIFIED_TIME, now); + break; + + case ALTER_COLUMN_FAMILY: + columnFamily.setAttribute(ATTR_MODIFIED_TIME, now); + break; + + default: + break; + } + + return columnFamily; + } + + public String getMessageSource() { + return HBASE_SOURCE; + } + + private String getTableName(HBaseOperationContext hbaseOperationContext) { + final String ret; + + TableName tableName = hbaseOperationContext.getTableName(); + + if (tableName != null) { + ret = tableName.getNameAsString(); + } else { + TableDescriptor tableDescriptor = hbaseOperationContext.gethTableDescriptor(); + + ret = (tableDescriptor != null) ? tableDescriptor.getTableName().getNameAsString() : null; + } + + return ret; + } + + public void sendHBaseNameSpaceOperation(final NamespaceDescriptor namespaceDescriptor, final String nameSpace, final OPERATION operation, ObserverContext ctx) { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasHook.sendHBaseNameSpaceOperation()"); + } + + try { + final UserGroupInformation ugi = getUGI(ctx); + final User user = getActiveUser(ctx); + final String userName = (user != null) ? user.getShortName() : null; + HBaseOperationContext hbaseOperationContext = handleHBaseNameSpaceOperation(namespaceDescriptor, nameSpace, operation, ugi, userName); + + sendNotification(hbaseOperationContext); + } catch (Throwable t) { + LOG.error("HBaseAtlasHook.sendHBaseNameSpaceOperation(): failed to send notification", t); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasHook.sendHBaseNameSpaceOperation()"); + } + } + + public void sendHBaseTableOperation(TableDescriptor tableDescriptor, final TableName tableName, final OPERATION operation, ObserverContext ctx) { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasHook.sendHBaseTableOperation()"); + } + + try { + final UserGroupInformation ugi = getUGI(ctx); + final User user = getActiveUser(ctx); + final String userName = (user != null) ? user.getShortName() : null; + HBaseOperationContext hbaseOperationContext = handleHBaseTableOperation(tableDescriptor, tableName, operation, ugi, userName); + + sendNotification(hbaseOperationContext); + } catch (Throwable t) { + LOG.error("<== HBaseAtlasHook.sendHBaseTableOperation(): failed to send notification", t); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasHook.sendHBaseTableOperation()"); + } + } + + private void sendNotification(HBaseOperationContext hbaseOperationContext) { + UserGroupInformation ugi = hbaseOperationContext.getUgi(); + + if (ugi != null && ugi.getRealUser() != null) { + ugi = ugi.getRealUser(); + } + + notifyEntities(hbaseOperationContext.getMessages(), ugi); + } + + private HBaseOperationContext handleHBaseNameSpaceOperation(NamespaceDescriptor namespaceDescriptor, String nameSpace, OPERATION operation, UserGroupInformation ugi, String userName) { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasHook.handleHBaseNameSpaceOperation()"); + } + + HBaseOperationContext hbaseOperationContext = new HBaseOperationContext(namespaceDescriptor, nameSpace, operation, ugi, userName, userName); + createAtlasInstances(hbaseOperationContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasHook.handleHBaseNameSpaceOperation(): {}", hbaseOperationContext); + } + + return hbaseOperationContext; + } + + private HBaseOperationContext handleHBaseTableOperation(TableDescriptor tableDescriptor, TableName tableName, OPERATION operation, UserGroupInformation ugi, String userName) { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasHook.handleHBaseTableOperation()"); + } + + Map hbaseConf = null; + String owner = null; + String tableNameSpace = null; + TableName hbaseTableName = null; + ColumnFamilyDescriptor[] columnFamilyDescriptors = null; + + if (tableDescriptor != null) { + owner = tableDescriptor.getOwnerString(); + hbaseConf = null; + hbaseTableName = tableDescriptor.getTableName(); + if (hbaseTableName != null) { + tableNameSpace = hbaseTableName.getNamespaceAsString(); + if (tableNameSpace == null) { + tableNameSpace = hbaseTableName.getNameWithNamespaceInclAsString(); + } + } + } + + if (owner == null) { + owner = userName; + } + + if (tableDescriptor != null) { + columnFamilyDescriptors = tableDescriptor.getColumnFamilies(); + } + + HBaseOperationContext hbaseOperationContext = new HBaseOperationContext(tableNameSpace, tableDescriptor, tableName, columnFamilyDescriptors, operation, ugi, userName, owner, hbaseConf); + createAtlasInstances(hbaseOperationContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasHook.handleHBaseTableOperation(): {}", hbaseOperationContext); + } + return hbaseOperationContext; + } + + private HBaseOperationContext handleHBaseColumnFamilyOperation(ColumnFamilyDescriptor columnFamilyDescriptor, TableName tableName, String columnFamily, OPERATION operation, UserGroupInformation ugi, String userName) { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasHook.handleHBaseColumnFamilyOperation()"); + } + + String owner = userName; + Map hbaseConf = new HashMap<>(); + + String tableNameSpace = tableName.getNamespaceAsString(); + if (tableNameSpace == null) { + tableNameSpace = tableName.getNameWithNamespaceInclAsString(); + } + + if (columnFamilyDescriptor != null) { + hbaseConf = columnFamilyDescriptor.getConfiguration(); + } + + HBaseOperationContext hbaseOperationContext = new HBaseOperationContext(tableNameSpace, tableName, columnFamilyDescriptor, columnFamily, operation, ugi, userName, owner, hbaseConf); + createAtlasInstances(hbaseOperationContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasHook.handleHBaseColumnFamilyOperation(): {}", hbaseOperationContext); + } + return hbaseOperationContext; + } + + private UserGroupInformation getUGI(ObserverContext ctx) { + UserGroupInformation ugi = null; + User user = null; + try { + user = getActiveUser(ctx); + ugi = UserGroupInformation.getLoginUser(); + } catch (Exception e) { + // not setting the UGI here + } + + if (ugi == null) { + if (user != null) { + ugi = user.getUGI(); + } + } + + LOG.info("HBaseAtlasHook: UGI: {}", ugi); + return ugi; + } + + private User getActiveUser(ObserverContext ctx) throws IOException { + return (User)ctx.getCaller().orElse(User.getCurrent()); + } +} diff --git a/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/bridge/HBaseBridge.java b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/bridge/HBaseBridge.java new file mode 100644 index 0000000000..f5a1d2ad51 --- /dev/null +++ b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/bridge/HBaseBridge.java @@ -0,0 +1,720 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hbase.bridge; + +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.AtlasClientV2; +import org.apache.atlas.hbase.model.HBaseDataTypes; +import org.apache.atlas.hook.AtlasHookException; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo; +import org.apache.atlas.model.instance.AtlasEntityHeader; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.model.instance.EntityMutationResponse; +import org.apache.atlas.type.AtlasTypeUtil; +import org.apache.atlas.utils.AtlasConfigurationUtil; +import org.apache.atlas.utils.AuthenticationUtil; +import org.apache.commons.cli.BasicParser; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.NamespaceDescriptor; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class HBaseBridge { + private static final Logger LOG = LoggerFactory.getLogger(HBaseBridge.class); + + private static final int EXIT_CODE_SUCCESS = 0; + private static final int EXIT_CODE_FAILED = 1; + private static final String ATLAS_ENDPOINT = "atlas.rest.address"; + private static final String DEFAULT_ATLAS_URL = "http://localhost:21000/"; + private static final String CLUSTER_NAME_KEY = "atlas.cluster.name"; + private static final String DEFAULT_CLUSTER_NAME = "primary"; + private static final String HBASE_METADATA_NAMESPACE = "atlas.metadata.namespace"; + private static final String QUALIFIED_NAME = "qualifiedName"; + private static final String NAME = "name"; + private static final String URI = "uri"; + private static final String OWNER = "owner"; + private static final String DESCRIPTION_ATTR = "description"; + private static final String CLUSTERNAME = "clusterName"; + private static final String NAMESPACE = "namespace"; + private static final String TABLE = "table"; + private static final String COLUMN_FAMILIES = "column_families"; + + // table metadata + private static final String ATTR_TABLE_MAX_FILESIZE = "maxFileSize"; + private static final String ATTR_TABLE_ISREADONLY = "isReadOnly"; + private static final String ATTR_TABLE_ISCOMPACTION_ENABLED = "isCompactionEnabled"; + private static final String ATTR_TABLE_REPLICATION_PER_REGION = "replicasPerRegion"; + private static final String ATTR_TABLE_DURABLILITY = "durability"; + private static final String ATTR_TABLE_NORMALIZATION_ENABLED = "isNormalizationEnabled"; + + // column family metadata + private static final String ATTR_CF_BLOOMFILTER_TYPE = "bloomFilterType"; + private static final String ATTR_CF_COMPRESSION_TYPE = "compressionType"; + private static final String ATTR_CF_COMPACTION_COMPRESSION_TYPE = "compactionCompressionType"; + private static final String ATTR_CF_ENCRYPTION_TYPE = "encryptionType"; + private static final String ATTR_CF_KEEP_DELETE_CELLS = "keepDeletedCells"; + private static final String ATTR_CF_MAX_VERSIONS = "maxVersions"; + private static final String ATTR_CF_MIN_VERSIONS = "minVersions"; + private static final String ATTR_CF_DATA_BLOCK_ENCODING = "dataBlockEncoding"; + private static final String ATTR_CF_TTL = "ttl"; + private static final String ATTR_CF_BLOCK_CACHE_ENABLED = "blockCacheEnabled"; + private static final String ATTR_CF_CACHED_BLOOM_ON_WRITE = "cacheBloomsOnWrite"; + private static final String ATTR_CF_CACHED_DATA_ON_WRITE = "cacheDataOnWrite"; + private static final String ATTR_CF_CACHED_INDEXES_ON_WRITE = "cacheIndexesOnWrite"; + private static final String ATTR_CF_EVICT_BLOCK_ONCLOSE = "evictBlocksOnClose"; + private static final String ATTR_CF_PREFETCH_BLOCK_ONOPEN = "prefetchBlocksOnOpen"; + private static final String ATTRIBUTE_QUALIFIED_NAME = "qualifiedName"; + private static final String ATTR_CF_INMEMORY_COMPACTION_POLICY = "inMemoryCompactionPolicy"; + private static final String ATTR_CF_MOB_COMPATCTPARTITION_POLICY = "mobCompactPartitionPolicy"; + private static final String ATTR_CF_MOB_ENABLED = "isMobEnabled"; + private static final String ATTR_CF_NEW_VERSION_BEHAVIOR = "newVersionBehavior"; + + private static final String HBASE_NAMESPACE_QUALIFIED_NAME = "%s@%s"; + private static final String HBASE_TABLE_QUALIFIED_NAME_FORMAT = "%s:%s@%s"; + private static final String HBASE_COLUMN_FAMILY_QUALIFIED_NAME_FORMAT = "%s:%s.%s@%s"; + + private final String metadataNamespace; + private final AtlasClientV2 atlasClientV2; + private final Admin hbaseAdmin; + + + public static void main(String[] args) { + int exitCode = EXIT_CODE_FAILED; + AtlasClientV2 atlasClientV2 =null; + + try { + Options options = new Options(); + options.addOption("n","namespace", true, "namespace"); + options.addOption("t", "table", true, "tablename"); + options.addOption("f", "filename", true, "filename"); + + CommandLineParser parser = new BasicParser(); + CommandLine cmd = parser.parse(options, args); + String namespaceToImport = cmd.getOptionValue("n"); + String tableToImport = cmd.getOptionValue("t"); + String fileToImport = cmd.getOptionValue("f"); + Configuration atlasConf = ApplicationProperties.get(); + String[] urls = atlasConf.getStringArray(ATLAS_ENDPOINT); + + if (urls == null || urls.length == 0) { + urls = new String[] { DEFAULT_ATLAS_URL }; + } + + + if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) { + String[] basicAuthUsernamePassword = AuthenticationUtil.getBasicAuthenticationInput(); + + atlasClientV2 = new AtlasClientV2(urls, basicAuthUsernamePassword); + } else { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + + atlasClientV2 = new AtlasClientV2(ugi, ugi.getShortUserName(), urls); + } + + HBaseBridge importer = new HBaseBridge(atlasConf, atlasClientV2); + + if (StringUtils.isNotEmpty(fileToImport)) { + File f = new File(fileToImport); + + if (f.exists() && f.canRead()) { + BufferedReader br = new BufferedReader(new FileReader(f)); + String line = null; + + while((line = br.readLine()) != null) { + String val[] = line.split(":"); + + if (ArrayUtils.isNotEmpty(val)) { + namespaceToImport = val[0]; + + if (val.length > 1) { + tableToImport = val[1]; + } else { + tableToImport = ""; + } + + importer.importHBaseEntities(namespaceToImport, tableToImport); + } + } + + exitCode = EXIT_CODE_SUCCESS; + } else { + LOG.error("Failed to read the file"); + } + } else { + importer.importHBaseEntities(namespaceToImport, tableToImport); + + exitCode = EXIT_CODE_SUCCESS; + } + } catch(ParseException e) { + LOG.error("Failed to parse arguments. Error: ", e.getMessage()); + printUsage(); + } catch(Exception e) { + System.out.println("ImportHBaseEntities failed. Please check the log file for the detailed error message"); + + LOG.error("ImportHBaseEntities failed", e); + }finally { + if(atlasClientV2!=null) { + atlasClientV2.close(); + } + } + + System.exit(exitCode); + } + + public HBaseBridge(Configuration atlasConf, AtlasClientV2 atlasClientV2) throws Exception { + this.atlasClientV2 = atlasClientV2; + this.metadataNamespace = getMetadataNamespace(atlasConf); + + org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create(); + + LOG.info("checking HBase availability.."); + + HBaseAdmin.available(conf); + + LOG.info("HBase is available"); + + Connection conn = ConnectionFactory.createConnection(conf); + + hbaseAdmin = conn.getAdmin(); + } + + private String getMetadataNamespace(Configuration config) { + return AtlasConfigurationUtil.getRecentString(config, HBASE_METADATA_NAMESPACE, getClusterName(config)); + } + + private String getClusterName(Configuration config) { + return config.getString(CLUSTER_NAME_KEY, DEFAULT_CLUSTER_NAME); + } + + private boolean importHBaseEntities(String namespaceToImport, String tableToImport) throws Exception { + boolean ret = false; + + if (StringUtils.isEmpty(namespaceToImport) && StringUtils.isEmpty(tableToImport)) { + // when both NameSpace and Table options are not present + importNameSpaceAndTable(); + ret = true; + } else if (StringUtils.isNotEmpty(namespaceToImport)) { + // When Namespace option is present or both namespace and table options are present + importNameSpaceWithTable(namespaceToImport, tableToImport); + ret = true; + } else if (StringUtils.isNotEmpty(tableToImport)) { + importTable(tableToImport); + ret = true; + } + + return ret; + } + + public void importNameSpace(final String nameSpace) throws Exception { + List matchingNameSpaceDescriptors = getMatchingNameSpaces(nameSpace); + + if (CollectionUtils.isNotEmpty(matchingNameSpaceDescriptors)) { + for (NamespaceDescriptor namespaceDescriptor : matchingNameSpaceDescriptors) { + createOrUpdateNameSpace(namespaceDescriptor); + } + } else { + throw new AtlasHookException("No NameSpace found for the given criteria. NameSpace = " + nameSpace); + } + } + + public void importTable(final String tableName) throws Exception { + String tableNameStr = null; + TableDescriptor[] htds = hbaseAdmin.listTables(Pattern.compile(tableName)); + + if (ArrayUtils.isNotEmpty(htds)) { + for (TableDescriptor htd : htds) { + String tblNameWithNameSpace = htd.getTableName().getNameWithNamespaceInclAsString(); + String tblNameWithOutNameSpace = htd.getTableName().getNameAsString(); + + if (tableName.equals(tblNameWithNameSpace)) { + tableNameStr = tblNameWithNameSpace; + } else if (tableName.equals(tblNameWithOutNameSpace)) { + tableNameStr = tblNameWithOutNameSpace; + } else { + // when wild cards are used in table name + if (tblNameWithNameSpace != null) { + tableNameStr = tblNameWithNameSpace; + } else if (tblNameWithOutNameSpace != null) { + tableNameStr = tblNameWithOutNameSpace; + } + } + + byte[] nsByte = htd.getTableName().getNamespace(); + String nsName = new String(nsByte); + NamespaceDescriptor nsDescriptor = hbaseAdmin.getNamespaceDescriptor(nsName); + AtlasEntityWithExtInfo entity = createOrUpdateNameSpace(nsDescriptor); + ColumnFamilyDescriptor[] hcdts = htd.getColumnFamilies(); + + createOrUpdateTable(nsName, tableNameStr, entity.getEntity(), htd, hcdts); + } + } else { + throw new AtlasHookException("No Table found for the given criteria. Table = " + tableName); + } + } + + private void importNameSpaceAndTable() throws Exception { + NamespaceDescriptor[] namespaceDescriptors = hbaseAdmin.listNamespaceDescriptors(); + + if (ArrayUtils.isNotEmpty(namespaceDescriptors)) { + for (NamespaceDescriptor namespaceDescriptor : namespaceDescriptors) { + String namespace = namespaceDescriptor.getName(); + + importNameSpace(namespace); + } + } + + TableDescriptor[] htds = hbaseAdmin.listTables(); + + if (ArrayUtils.isNotEmpty(htds)) { + for (TableDescriptor htd : htds) { + String tableName = htd.getTableName().getNameAsString(); + + importTable(tableName); + } + } + } + + private void importNameSpaceWithTable(String namespaceToImport, String tableToImport) throws Exception { + importNameSpace(namespaceToImport); + + List hTableDescriptors = new ArrayList<>(); + + if (StringUtils.isEmpty(tableToImport)) { + List matchingNameSpaceDescriptors = getMatchingNameSpaces(namespaceToImport); + + if (CollectionUtils.isNotEmpty(matchingNameSpaceDescriptors)) { + hTableDescriptors = getTableDescriptors(matchingNameSpaceDescriptors); + } + } else { + tableToImport = namespaceToImport +":" + tableToImport; + + TableDescriptor[] htds = hbaseAdmin.listTables(Pattern.compile(tableToImport)); + + hTableDescriptors.addAll(Arrays.asList(htds)); + } + + if (CollectionUtils.isNotEmpty(hTableDescriptors)) { + for (TableDescriptor htd : hTableDescriptors) { + String tblName = htd.getTableName().getNameAsString(); + + importTable(tblName); + } + } + } + + private List getMatchingNameSpaces(String nameSpace) throws Exception { + List ret = new ArrayList<>(); + NamespaceDescriptor[] namespaceDescriptors = hbaseAdmin.listNamespaceDescriptors(); + Pattern pattern = Pattern.compile(nameSpace); + + for (NamespaceDescriptor namespaceDescriptor:namespaceDescriptors){ + String nmSpace = namespaceDescriptor.getName(); + Matcher matcher = pattern.matcher(nmSpace); + + if (matcher.find()){ + ret.add(namespaceDescriptor); + } + } + + return ret; + } + + private List getTableDescriptors(List namespaceDescriptors) throws Exception { + List ret = new ArrayList<>(); + + for(NamespaceDescriptor namespaceDescriptor:namespaceDescriptors) { + TableDescriptor[] tableDescriptors = hbaseAdmin.listTableDescriptorsByNamespace(namespaceDescriptor.getName()); + + ret.addAll(Arrays.asList(tableDescriptors)); + } + + return ret; + } + + protected AtlasEntityWithExtInfo createOrUpdateNameSpace(NamespaceDescriptor namespaceDescriptor) throws Exception { + String nsName = namespaceDescriptor.getName(); + String nsQualifiedName = getNameSpaceQualifiedName(metadataNamespace, nsName); + AtlasEntityWithExtInfo nsEntity = findNameSpaceEntityInAtlas(nsQualifiedName); + + if (nsEntity == null) { + LOG.info("Importing NameSpace: " + nsQualifiedName); + + AtlasEntity entity = getNameSpaceEntity(nsName, null); + + nsEntity = createEntityInAtlas(new AtlasEntityWithExtInfo(entity)); + } else { + LOG.info("NameSpace already present in Atlas. Updating it..: " + nsQualifiedName); + + AtlasEntity entity = getNameSpaceEntity(nsName, nsEntity.getEntity()); + + nsEntity.setEntity(entity); + + nsEntity = updateEntityInAtlas(nsEntity); + } + return nsEntity; + } + + protected AtlasEntityWithExtInfo createOrUpdateTable(String nameSpace, String tableName, AtlasEntity nameSapceEntity, TableDescriptor htd, ColumnFamilyDescriptor[] hcdts) throws Exception { + String owner = htd.getOwnerString(); + String tblQualifiedName = getTableQualifiedName(metadataNamespace, nameSpace, tableName); + AtlasEntityWithExtInfo ret = findTableEntityInAtlas(tblQualifiedName); + + if (ret == null) { + LOG.info("Importing Table: " + tblQualifiedName); + + AtlasEntity entity = getTableEntity(nameSpace, tableName, owner, nameSapceEntity, htd, null); + + ret = createEntityInAtlas(new AtlasEntityWithExtInfo(entity)); + } else { + LOG.info("Table already present in Atlas. Updating it..: " + tblQualifiedName); + + AtlasEntity entity = getTableEntity(nameSpace, tableName, owner, nameSapceEntity, htd, ret.getEntity()); + + ret.setEntity(entity); + + ret = updateEntityInAtlas(ret); + } + + AtlasEntity tableEntity = ret.getEntity(); + + if (tableEntity != null) { + List cfEntities = createOrUpdateColumnFamilies(nameSpace, tableName, owner, hcdts, tableEntity); + + List cfIDs = new ArrayList<>(); + + if (CollectionUtils.isNotEmpty(cfEntities)) { + for (AtlasEntityWithExtInfo cfEntity : cfEntities) { + cfIDs.add(AtlasTypeUtil.getAtlasObjectId(cfEntity.getEntity())); + } + } + tableEntity.setRelationshipAttribute(COLUMN_FAMILIES, AtlasTypeUtil.getAtlasRelatedObjectIdList(cfIDs, HBaseAtlasHook.RELATIONSHIP_HBASE_TABLE_COLUMN_FAMILIES)); + } + + return ret; + } + + protected List createOrUpdateColumnFamilies(String nameSpace, String tableName, String owner, ColumnFamilyDescriptor[] hcdts , AtlasEntity tableEntity) throws Exception { + List ret = new ArrayList<>(); + + if (hcdts != null) { + AtlasObjectId tableId = AtlasTypeUtil.getAtlasObjectId(tableEntity); + + for (ColumnFamilyDescriptor columnFamilyDescriptor : hcdts) { + String cfName = columnFamilyDescriptor.getNameAsString(); + String cfQualifiedName = getColumnFamilyQualifiedName(metadataNamespace, nameSpace, tableName, cfName); + AtlasEntityWithExtInfo cfEntity = findColumnFamiltyEntityInAtlas(cfQualifiedName); + + if (cfEntity == null) { + LOG.info("Importing Column-family: " + cfQualifiedName); + + AtlasEntity entity = getColumnFamilyEntity(nameSpace, tableName, owner, columnFamilyDescriptor, tableId, null); + + cfEntity = createEntityInAtlas(new AtlasEntityWithExtInfo(entity)); + } else { + LOG.info("ColumnFamily already present in Atlas. Updating it..: " + cfQualifiedName); + + AtlasEntity entity = getColumnFamilyEntity(nameSpace, tableName, owner, columnFamilyDescriptor, tableId, cfEntity.getEntity()); + + cfEntity.setEntity(entity); + + cfEntity = updateEntityInAtlas(cfEntity); + } + + ret.add(cfEntity); + } + } + + return ret; + } + + private AtlasEntityWithExtInfo findNameSpaceEntityInAtlas(String nsQualifiedName) { + AtlasEntityWithExtInfo ret = null; + + try { + ret = findEntityInAtlas(HBaseDataTypes.HBASE_NAMESPACE.getName(), nsQualifiedName); + clearRelationshipAttributes(ret); + } catch (Exception e) { + ret = null; // entity doesn't exist in Atlas + } + + return ret; + } + + private AtlasEntityWithExtInfo findTableEntityInAtlas(String tableQualifiedName) { + AtlasEntityWithExtInfo ret = null; + + try { + ret = findEntityInAtlas(HBaseDataTypes.HBASE_TABLE.getName(), tableQualifiedName); + clearRelationshipAttributes(ret); + } catch (Exception e) { + ret = null; // entity doesn't exist in Atlas + } + + return ret; + } + + private AtlasEntityWithExtInfo findColumnFamiltyEntityInAtlas(String columnFamilyQualifiedName) { + AtlasEntityWithExtInfo ret = null; + + try { + ret = findEntityInAtlas(HBaseDataTypes.HBASE_COLUMN_FAMILY.getName(), columnFamilyQualifiedName); + clearRelationshipAttributes(ret); + } catch (Exception e) { + ret = null; // entity doesn't exist in Atlas + } + + return ret; + } + + private AtlasEntityWithExtInfo findEntityInAtlas(String typeName, String qualifiedName) throws Exception { + Map attributes = Collections.singletonMap(QUALIFIED_NAME, qualifiedName); + + return atlasClientV2.getEntityByAttribute(typeName, attributes); + } + + private AtlasEntity getNameSpaceEntity(String nameSpace, AtlasEntity nsEtity) { + AtlasEntity ret = null ; + + if (nsEtity == null) { + ret = new AtlasEntity(HBaseDataTypes.HBASE_NAMESPACE.getName()); + } else { + ret = nsEtity; + } + + String qualifiedName = getNameSpaceQualifiedName(metadataNamespace, nameSpace); + + ret.setAttribute(QUALIFIED_NAME, qualifiedName); + ret.setAttribute(CLUSTERNAME, metadataNamespace); + ret.setAttribute(NAME, nameSpace); + ret.setAttribute(DESCRIPTION_ATTR, nameSpace); + + return ret; + } + + private AtlasEntity getTableEntity(String nameSpace, String tableName, String owner, AtlasEntity nameSpaceEntity, TableDescriptor htd, AtlasEntity atlasEntity) { + AtlasEntity ret = null; + + if (atlasEntity == null) { + ret = new AtlasEntity(HBaseDataTypes.HBASE_TABLE.getName()); + } else { + ret = atlasEntity; + } + + String tableQualifiedName = getTableQualifiedName(metadataNamespace, nameSpace, tableName); + + ret.setAttribute(QUALIFIED_NAME, tableQualifiedName); + ret.setAttribute(CLUSTERNAME, metadataNamespace); + ret.setRelationshipAttribute(NAMESPACE, AtlasTypeUtil.getAtlasRelatedObjectId(nameSpaceEntity, HBaseAtlasHook.RELATIONSHIP_HBASE_TABLE_NAMESPACE)); + ret.setAttribute(NAME, tableName); + ret.setAttribute(DESCRIPTION_ATTR, tableName); + ret.setAttribute(OWNER, owner); + ret.setAttribute(URI, tableName); + ret.setAttribute(ATTR_TABLE_MAX_FILESIZE, htd.getMaxFileSize()); + ret.setAttribute(ATTR_TABLE_REPLICATION_PER_REGION, htd.getRegionReplication()); + ret.setAttribute(ATTR_TABLE_ISREADONLY, htd.isReadOnly()); + ret.setAttribute(ATTR_TABLE_ISCOMPACTION_ENABLED, htd.isCompactionEnabled()); + ret.setAttribute(ATTR_TABLE_DURABLILITY, (htd.getDurability() != null ? htd.getDurability().name() : null)); + ret.setAttribute(ATTR_TABLE_NORMALIZATION_ENABLED, htd.isNormalizationEnabled()); + + return ret; + } + + private AtlasEntity getColumnFamilyEntity(String nameSpace, String tableName, String owner, ColumnFamilyDescriptor hcdt, AtlasObjectId tableId, AtlasEntity atlasEntity){ + AtlasEntity ret = null; + + if (atlasEntity == null) { + ret = new AtlasEntity(HBaseDataTypes.HBASE_COLUMN_FAMILY.getName()); + } else { + ret = atlasEntity; + } + + String cfName = hcdt.getNameAsString(); + String cfQualifiedName = getColumnFamilyQualifiedName(metadataNamespace, nameSpace, tableName, cfName); + + ret.setAttribute(QUALIFIED_NAME, cfQualifiedName); + ret.setAttribute(CLUSTERNAME, metadataNamespace); + ret.setRelationshipAttribute(TABLE, AtlasTypeUtil.getAtlasRelatedObjectId(tableId, HBaseAtlasHook.RELATIONSHIP_HBASE_TABLE_COLUMN_FAMILIES)); + ret.setAttribute(NAME, cfName); + ret.setAttribute(DESCRIPTION_ATTR, cfName); + ret.setAttribute(OWNER, owner); + ret.setAttribute(ATTR_CF_BLOCK_CACHE_ENABLED, hcdt.isBlockCacheEnabled()); + ret.setAttribute(ATTR_CF_BLOOMFILTER_TYPE, (hcdt.getBloomFilterType() != null ? hcdt.getBloomFilterType().name():null)); + ret.setAttribute(ATTR_CF_CACHED_BLOOM_ON_WRITE, hcdt.isCacheBloomsOnWrite()); + ret.setAttribute(ATTR_CF_CACHED_DATA_ON_WRITE, hcdt.isCacheDataOnWrite()); + ret.setAttribute(ATTR_CF_CACHED_INDEXES_ON_WRITE, hcdt.isCacheIndexesOnWrite()); + ret.setAttribute(ATTR_CF_COMPACTION_COMPRESSION_TYPE, (hcdt.getCompactionCompressionType() != null ? hcdt.getCompactionCompressionType().name():null)); + ret.setAttribute(ATTR_CF_COMPRESSION_TYPE, (hcdt.getCompressionType() != null ? hcdt.getCompressionType().name():null)); + ret.setAttribute(ATTR_CF_DATA_BLOCK_ENCODING, (hcdt.getDataBlockEncoding() != null ? hcdt.getDataBlockEncoding().name():null)); + ret.setAttribute(ATTR_CF_ENCRYPTION_TYPE, hcdt.getEncryptionType()); + ret.setAttribute(ATTR_CF_EVICT_BLOCK_ONCLOSE, hcdt.isEvictBlocksOnClose()); + ret.setAttribute(ATTR_CF_KEEP_DELETE_CELLS, ( hcdt.getKeepDeletedCells() != null ? hcdt.getKeepDeletedCells().name():null)); + ret.setAttribute(ATTR_CF_MAX_VERSIONS, hcdt.getMaxVersions()); + ret.setAttribute(ATTR_CF_MIN_VERSIONS, hcdt.getMinVersions()); + ret.setAttribute(ATTR_CF_PREFETCH_BLOCK_ONOPEN, hcdt.isPrefetchBlocksOnOpen()); + ret.setAttribute(ATTR_CF_TTL, hcdt.getTimeToLive()); + ret.setAttribute(ATTR_CF_INMEMORY_COMPACTION_POLICY, (hcdt.getInMemoryCompaction() != null ? hcdt.getInMemoryCompaction().name():null)); + ret.setAttribute(ATTR_CF_MOB_COMPATCTPARTITION_POLICY, ( hcdt.getMobCompactPartitionPolicy() != null ? hcdt.getMobCompactPartitionPolicy().name():null)); + ret.setAttribute(ATTR_CF_MOB_ENABLED,hcdt.isMobEnabled()); + ret.setAttribute(ATTR_CF_NEW_VERSION_BEHAVIOR,hcdt.isNewVersionBehavior()); + + return ret; + } + + private AtlasEntityWithExtInfo createEntityInAtlas(AtlasEntityWithExtInfo entity) throws Exception { + AtlasEntityWithExtInfo ret = null; + EntityMutationResponse response = atlasClientV2.createEntity(entity); + List entities = response.getCreatedEntities(); + + if (CollectionUtils.isNotEmpty(entities)) { + AtlasEntityWithExtInfo getByGuidResponse = atlasClientV2.getEntityByGuid(entities.get(0).getGuid()); + + ret = getByGuidResponse; + + LOG.info("Created {} entity: name={}, guid={}", ret.getEntity().getTypeName(), ret.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), ret.getEntity().getGuid()); + } + return ret; + } + + private AtlasEntityWithExtInfo updateEntityInAtlas(AtlasEntityWithExtInfo entity) throws Exception { + AtlasEntityWithExtInfo ret = null; + EntityMutationResponse response = atlasClientV2.updateEntity(entity); + + if (response != null) { + List entities = response.getUpdatedEntities(); + + if (CollectionUtils.isNotEmpty(entities)) { + AtlasEntityWithExtInfo getByGuidResponse = atlasClientV2.getEntityByGuid(entities.get(0).getGuid()); + + ret = getByGuidResponse; + + LOG.info("Updated {} entity: name={}, guid={} ", ret.getEntity().getTypeName(), ret.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), ret.getEntity().getGuid()); + } else { + LOG.info("Entity: name={} ", entity.toString() + " not updated as it is unchanged from what is in Atlas" ); + ret = entity; + } + } else { + LOG.info("Entity: name={} ", entity.toString() + " not updated as it is unchanged from what is in Atlas" ); + ret = entity; + } + + return ret; + } + + /** + * Construct the qualified name used to uniquely identify a ColumnFamily instance in Atlas. + * @param metadataNamespace Metadata namespace of the cluster to which the Hbase component belongs + * @param nameSpace Name of the Hbase database to which the Table belongs + * @param tableName Name of the Hbase table + * @param columnFamily Name of the ColumnFamily + * @return Unique qualified name to identify the Table instance in Atlas. + */ + private static String getColumnFamilyQualifiedName(String metadataNamespace, String nameSpace, String tableName, String columnFamily) { + tableName = stripNameSpace(tableName); + return String.format(HBASE_COLUMN_FAMILY_QUALIFIED_NAME_FORMAT, nameSpace, tableName, columnFamily, metadataNamespace); + } + + /** + * Construct the qualified name used to uniquely identify a Table instance in Atlas. + * @param metadataNamespace Metadata namespace of the cluster to which the Hbase component belongs + * @param nameSpace Name of the Hbase database to which the Table belongs + * @param tableName Name of the Hbase table + * @return Unique qualified name to identify the Table instance in Atlas. + */ + private static String getTableQualifiedName(String metadataNamespace, String nameSpace, String tableName) { + tableName = stripNameSpace(tableName); + return String.format(HBASE_TABLE_QUALIFIED_NAME_FORMAT, nameSpace, tableName, metadataNamespace); + } + + /** + * Construct the qualified name used to uniquely identify a Hbase NameSpace instance in Atlas. + * @param metadataNamespace Metadata namespace of the cluster to which the Hbase component belongs + * @param nameSpace Name of the NameSpace + * @return Unique qualified name to identify the HBase NameSpace instance in Atlas. + */ + private static String getNameSpaceQualifiedName(String metadataNamespace, String nameSpace) { + return String.format(HBASE_NAMESPACE_QUALIFIED_NAME, nameSpace, metadataNamespace); + } + + private static String stripNameSpace(String tableName){ + tableName = tableName.substring(tableName.indexOf(":")+1); + + return tableName; + } + + private static void printUsage() { + System.out.println("Usage 1: import-hbase.sh [-n OR --namespace ] [-t

OR --table
]"); + System.out.println("Usage 2: import-hbase.sh [-f ]" ); + System.out.println(" Format:"); + System.out.println(" namespace1:tbl1"); + System.out.println(" namespace1:tbl2"); + System.out.println(" namespace2:tbl1"); + } + + private void clearRelationshipAttributes(AtlasEntityWithExtInfo entity) { + if (entity != null) { + clearRelationshipAttributes(entity.getEntity()); + + if (entity.getReferredEntities() != null) { + clearRelationshipAttributes(entity.getReferredEntities().values()); + } + } + } + + private void clearRelationshipAttributes(Collection entities) { + if (entities != null) { + for (AtlasEntity entity : entities) { + clearRelationshipAttributes(entity); + } + } + } + + private void clearRelationshipAttributes(AtlasEntity entity) { + if (entity != null && entity.getRelationshipAttributes() != null) { + entity.getRelationshipAttributes().clear(); + } + } +} diff --git a/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/hook/HBaseAtlasCoprocessor.java b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/hook/HBaseAtlasCoprocessor.java new file mode 100644 index 0000000000..313132de6c --- /dev/null +++ b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/hook/HBaseAtlasCoprocessor.java @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hbase.hook; + + +import org.apache.atlas.hbase.bridge.HBaseAtlasHook; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.NamespaceDescriptor; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.SnapshotDescription; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor; +import org.apache.hadoop.hbase.coprocessor.BulkLoadObserver; +import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; +import org.apache.hadoop.hbase.coprocessor.MasterObserver; +import org.apache.hadoop.hbase.coprocessor.ObserverContext; +import org.apache.hadoop.hbase.coprocessor.RegionObserver; +import org.apache.hadoop.hbase.coprocessor.RegionServerObserver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +public class HBaseAtlasCoprocessor implements MasterCoprocessor, MasterObserver, RegionObserver, RegionServerObserver { + private static final Logger LOG = LoggerFactory.getLogger(HBaseAtlasCoprocessor.class); + + final HBaseAtlasHook hbaseAtlasHook; + + public HBaseAtlasCoprocessor() { + hbaseAtlasHook = HBaseAtlasHook.getInstance(); + } + + @Override + public void postCreateTable(ObserverContext observerContext, TableDescriptor tableDescriptor, RegionInfo[] hRegionInfos) throws IOException { + LOG.info("==> HBaseAtlasCoprocessor.postCreateTable()"); + + hbaseAtlasHook.sendHBaseTableOperation(tableDescriptor, null, HBaseAtlasHook.OPERATION.CREATE_TABLE, observerContext); + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.postCreateTable()"); + } + } + + @Override + public void postDeleteTable(ObserverContext observerContext, TableName tableName) throws IOException { + LOG.info("==> HBaseAtlasCoprocessor.postDeleteTable()"); + hbaseAtlasHook.sendHBaseTableOperation(null, tableName, HBaseAtlasHook.OPERATION.DELETE_TABLE, observerContext); + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.postDeleteTable()"); + } + } + + @Override + public void postModifyTable(ObserverContext observerContext, TableName tableName, TableDescriptor tableDescriptor) throws IOException { + LOG.info("==> HBaseAtlasCoprocessor.postModifyTable()"); + hbaseAtlasHook.sendHBaseTableOperation(tableDescriptor, tableName, HBaseAtlasHook.OPERATION.ALTER_TABLE, observerContext); + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.postModifyTable()"); + } + } + + @Override + public void postCreateNamespace(ObserverContext observerContext, NamespaceDescriptor namespaceDescriptor) throws IOException { + LOG.info("==> HBaseAtlasCoprocessor.postCreateNamespace()"); + + hbaseAtlasHook.sendHBaseNameSpaceOperation(namespaceDescriptor, null, HBaseAtlasHook.OPERATION.CREATE_NAMESPACE, observerContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.postCreateNamespace()"); + } + } + + @Override + public void postDeleteNamespace(ObserverContext observerContext, String s) throws IOException { + LOG.info("==> HBaseAtlasCoprocessor.postDeleteNamespace()"); + + hbaseAtlasHook.sendHBaseNameSpaceOperation(null, s, HBaseAtlasHook.OPERATION.DELETE_NAMESPACE, observerContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("==> HBaseAtlasCoprocessor.postDeleteNamespace()"); + } + } + + @Override + public void postModifyNamespace(ObserverContext observerContext, NamespaceDescriptor namespaceDescriptor) throws IOException { + LOG.info("==> HBaseAtlasCoprocessor.postModifyNamespace()"); + + hbaseAtlasHook.sendHBaseNameSpaceOperation(namespaceDescriptor, null, HBaseAtlasHook.OPERATION.ALTER_NAMESPACE, observerContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.postModifyNamespace()"); + } + } + + @Override + public void postCloneSnapshot(ObserverContext observerContext, SnapshotDescription snapshot, TableDescriptor tableDescriptor) throws IOException { + LOG.info("==> HBaseAtlasCoprocessor.postCloneSnapshot()"); + + hbaseAtlasHook.sendHBaseTableOperation(tableDescriptor, null, HBaseAtlasHook.OPERATION.CREATE_TABLE, observerContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.postCloneSnapshot()"); + } + } + + @Override + public void postRestoreSnapshot(ObserverContext observerContext, SnapshotDescription snapshot, TableDescriptor tableDescriptor) throws IOException { + LOG.info("==> HBaseAtlasCoprocessor.postRestoreSnapshot()"); + + hbaseAtlasHook.sendHBaseTableOperation(tableDescriptor, snapshot.getTableName(), HBaseAtlasHook.OPERATION.ALTER_TABLE, observerContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HBaseAtlasCoprocessor.postRestoreSnapshot()"); + } + } + +} + + diff --git a/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/model/HBaseDataTypes.java b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/model/HBaseDataTypes.java new file mode 100644 index 0000000000..b83e1b54ba --- /dev/null +++ b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/model/HBaseDataTypes.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hbase.model; + +/** + * HBASE Data Types for model and bridge. + */ +public enum HBaseDataTypes { + // Classes + HBASE_NAMESPACE, + HBASE_TABLE, + HBASE_COLUMN_FAMILY, + HBASE_COLUMN; + + public String getName() { + return name().toLowerCase(); + } +} diff --git a/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/model/HBaseOperationContext.java b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/model/HBaseOperationContext.java new file mode 100644 index 0000000000..1ef7c07dec --- /dev/null +++ b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/model/HBaseOperationContext.java @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hbase.model; + +import org.apache.atlas.hbase.bridge.HBaseAtlasHook; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.hadoop.hbase.NamespaceDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.security.UserGroupInformation; + +import java.util.ArrayList; +import java.util.List; + +import java.util.Map; + +public class HBaseOperationContext { + private final UserGroupInformation ugi; + private final Map hbaseConf; + private final HBaseAtlasHook.OPERATION operation; + private final String user; + private final NamespaceDescriptor namespaceDescriptor; + private final TableDescriptor tableDescriptor; + private final ColumnFamilyDescriptor[] columnFamilyDescriptors; + private final TableName tableName; + private final String nameSpace; + private final String columnFamily; + private final String owner; + private final ColumnFamilyDescriptor columnFamilyDescriptor; + + public HBaseOperationContext(NamespaceDescriptor namespaceDescriptor, String nameSpace, TableDescriptor tableDescriptor, TableName tableName, ColumnFamilyDescriptor[] columnFamilyDescriptors, + ColumnFamilyDescriptor columnFamilyDescriptor, String columnFamily, HBaseAtlasHook.OPERATION operation, UserGroupInformation ugi , String user, String owner, + Map hbaseConf) { + this.namespaceDescriptor = namespaceDescriptor; + this.nameSpace = nameSpace; + this.tableDescriptor = tableDescriptor; + this.tableName = tableName; + this.columnFamilyDescriptors = columnFamilyDescriptors; + this.columnFamilyDescriptor = columnFamilyDescriptor; + this.columnFamily = columnFamily; + this.operation = operation; + this.ugi = ugi; + this.user = user; + this.owner = owner; + this.hbaseConf = hbaseConf; + } + + public HBaseOperationContext(NamespaceDescriptor namespaceDescriptor, String nameSpace, HBaseAtlasHook.OPERATION operation, UserGroupInformation ugi , String user, String owner) { + this(namespaceDescriptor, nameSpace, null, null, null, null, null, operation, ugi, user, owner, null); + } + + public HBaseOperationContext(String nameSpace, TableDescriptor tableDescriptor, TableName tableName, ColumnFamilyDescriptor[] columnFamilyDescriptors, HBaseAtlasHook.OPERATION operation, UserGroupInformation ugi, String user, String owner, Map hbaseConf) { + this(null, nameSpace, tableDescriptor, tableName, columnFamilyDescriptors, null, null, operation, ugi, user, owner, hbaseConf); + } + + public HBaseOperationContext(String nameSpace, TableName tableName, ColumnFamilyDescriptor columnFamilyDescriptor, String columnFamily, HBaseAtlasHook.OPERATION operation, UserGroupInformation ugi, String user, String owner, Map hbaseConf) { + this(null, nameSpace, null, tableName, null, columnFamilyDescriptor, columnFamily, operation, ugi, user, owner, hbaseConf); + } + + private List messages = new ArrayList<>(); + + public UserGroupInformation getUgi() { + return ugi; + } + + public Map getHbaseConf() { + return hbaseConf; + } + + public String getUser() { + return user; + } + + public HBaseAtlasHook.OPERATION getOperation() { + return operation; + } + + public NamespaceDescriptor getNamespaceDescriptor() { + return namespaceDescriptor; + } + + public TableDescriptor gethTableDescriptor() { + return tableDescriptor; + } + + public ColumnFamilyDescriptor[] gethColumnDescriptors() { + return columnFamilyDescriptors; + } + + public TableName getTableName() { + return tableName; + } + + public String getNameSpace() { + return nameSpace; + } + + public ColumnFamilyDescriptor gethColumnDescriptor() { + return columnFamilyDescriptor; + } + + public String getColummFamily() { + return columnFamily; + } + + public void addMessage(HookNotification message) { + messages.add(message); + } + + public String getOwner() { + return owner; + } + + public List getMessages() { + return messages; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + toString(sb); + return sb.toString(); + } + + public StringBuilder toString(StringBuilder sb) { + sb.append("HBaseOperationContext={"); + sb.append("Operation={").append(operation).append("} "); + sb.append("User ={").append(user).append("} "); + if (nameSpace != null ) { + sb.append("NameSpace={").append(nameSpace).append("}"); + } else { + if (namespaceDescriptor != null) { + sb.append("NameSpace={").append(namespaceDescriptor.toString()).append("}"); + } + } + if (tableName != null ) { + sb.append("Table={").append(tableName).append("}"); + } else { + if ( columnFamilyDescriptor != null) { + sb.append("Table={").append(tableDescriptor.toString()).append("}"); + } + } + if (columnFamily != null ) { + sb.append("Columm Family={").append(columnFamily).append("}"); + } else { + if ( columnFamilyDescriptor != null) { + sb.append("Columm Family={").append(columnFamilyDescriptor.toString()).append("}"); + } + } + sb.append("Message ={").append(getMessages()).append("} "); + sb.append(" }"); + return sb; + } + +} diff --git a/addons/hbase-bridge/src/main/resources/atlas-hbase-import-log4j.xml b/addons/hbase-bridge/src/main/resources/atlas-hbase-import-log4j.xml new file mode 100644 index 0000000000..3fc2dcf9c3 --- /dev/null +++ b/addons/hbase-bridge/src/main/resources/atlas-hbase-import-log4j.xml @@ -0,0 +1,55 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/addons/hbase-bridge/src/test/java/org/apache/atlas/hbase/HBaseAtlasHookIT.java b/addons/hbase-bridge/src/test/java/org/apache/atlas/hbase/HBaseAtlasHookIT.java new file mode 100644 index 0000000000..e346788713 --- /dev/null +++ b/addons/hbase-bridge/src/test/java/org/apache/atlas/hbase/HBaseAtlasHookIT.java @@ -0,0 +1,307 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hbase; + +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.AtlasClient; +import org.apache.atlas.AtlasClientV2; +import org.apache.atlas.hbase.bridge.HBaseAtlasHook; +import org.apache.atlas.hbase.model.HBaseDataTypes; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo; +import org.apache.atlas.utils.AuthenticationUtil; +import org.apache.atlas.utils.ParamChecker; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.net.ServerSocket; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.fail; +import static org.testng.AssertJUnit.assertFalse; + + +public class HBaseAtlasHookIT { + private static final Logger LOG = LoggerFactory.getLogger(HBaseAtlasHookIT.class); + protected static final String ATLAS_URL = "http://localhost:31000/"; + protected static final String CLUSTER_NAME = "primary"; + + private HBaseTestingUtility utility; + private int port; + private AtlasClientV2 atlasClient; + + + @BeforeClass + public void setUp() { + try { + createHBaseCluster(); + createAtlasClient(); + } catch (Exception e) { + LOG.error("Unable to create Hbase Admin for Testing ", e); + } + } + + @AfterClass + public void cleanup() throws Exception { + LOG.info("Stopping mini cluster.. "); + utility.shutdownMiniCluster(); + } + + @Test + public void testGetMetaTableRows() throws Exception { + List results = utility.getMetaTableRows(); + assertFalse("results should have some entries and is empty.", results.isEmpty()); + } + + @Test (enabled = false) + public void testCreateNamesapce() throws Exception { + final Configuration conf = HBaseConfiguration.create(); + + conf.set("hbase.zookeeper.quorum", "localhost"); + conf.set("hbase.zookeeper.property.clientPort", String.valueOf(port)); + conf.set("zookeeper.znode.parent", "/hbase-unsecure"); + + Connection conn = ConnectionFactory.createConnection(conf); + Admin admin = conn.getAdmin(); + NamespaceDescriptor ns = NamespaceDescriptor.create("test_namespace").build(); + + admin.createNamespace(ns); + + //assert on qualified name + String nameSpace = assertNameSpaceIsRegistered(ns.getName()); + AtlasClientV2 atlasClient = getAtlasClient(); + + if (atlasClient != null) { + AtlasEntityWithExtInfo nameSpaceRef = atlasClient.getEntityByGuid(nameSpace); + String nameSpaceQualifiedName = HBaseAtlasHook.getNameSpaceQualifiedName(CLUSTER_NAME, ns.getName()); + + Assert.assertEquals(nameSpaceRef.getEntity().getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), nameSpaceQualifiedName); + } else { + Assert.fail("Unable to create AtlasClient for Testing"); + } + } + + @Test (enabled = false) + public void testCreateTable() throws Exception { + final Configuration conf = HBaseConfiguration.create(); + + conf.set("hbase.zookeeper.quorum", "localhost"); + conf.set("hbase.zookeeper.property.clientPort", String.valueOf(port)); + conf.set("zookeeper.znode.parent", "/hbase-unsecure"); + + Connection conn = ConnectionFactory.createConnection(conf); + Admin admin = conn.getAdmin(); + String namespace = "test_namespace1"; + String tablename = "test_table"; + + // Create a table + if (!admin.tableExists(TableName.valueOf(namespace, tablename))) { + NamespaceDescriptor ns = NamespaceDescriptor.create(namespace).build(); + + admin.createNamespace(ns); + + HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf(namespace, tablename)); + + tableDescriptor.addFamily(new HColumnDescriptor("colfam1")); + + admin.createTable(tableDescriptor); + } + + //assert on qualified name + String table = assertTableIsRegistered(namespace, tablename); + AtlasClientV2 atlasClient = getAtlasClient(); + + if (atlasClient != null) { + AtlasEntityWithExtInfo tableRef = atlasClient.getEntityByGuid(table); + String entityName = HBaseAtlasHook.getTableQualifiedName(CLUSTER_NAME, namespace, tablename); + + Assert.assertEquals(tableRef.getEntity().getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), entityName); + } else { + Assert.fail("Unable to create AtlasClient for Testing"); + } + } + + // Methods for creating HBase + + private void createAtlasClient() { + try { + org.apache.commons.configuration.Configuration configuration = ApplicationProperties.get(); + String[] atlasEndPoint = configuration.getStringArray(HBaseAtlasHook.ATTR_ATLAS_ENDPOINT); + + configuration.setProperty("atlas.cluster.name", CLUSTER_NAME); + + if (atlasEndPoint == null || atlasEndPoint.length == 0) { + atlasEndPoint = new String[]{ATLAS_URL}; + } + + Iterator keys = configuration.getKeys(); + while (keys.hasNext()) { + String key = keys.next(); + LOG.info("{} = {} ", key, configuration.getString(key)); + } + + if (AuthenticationUtil.isKerberosAuthenticationEnabled()) { + atlasClient = new AtlasClientV2(atlasEndPoint); + } else { + atlasClient = new AtlasClientV2(configuration, atlasEndPoint, new String[]{"admin", "admin"}); + } + } catch (Exception e) { + LOG.error("Unable to create AtlasClient for Testing ", e); + } + } + + private static int getFreePort() throws IOException { + ServerSocket serverSocket = new ServerSocket(0); + int port = serverSocket.getLocalPort(); + + serverSocket.close(); + + return port; + } + + private void createHBaseCluster() throws Exception { + LOG.info("Creating Hbase Admin..."); + + port = getFreePort(); + utility = new HBaseTestingUtility(); + + utility.getConfiguration().set("test.hbase.zookeeper.property.clientPort", String.valueOf(port)); + utility.getConfiguration().set("hbase.master.port", String.valueOf(getFreePort())); + utility.getConfiguration().set("hbase.master.info.port", String.valueOf(getFreePort())); + utility.getConfiguration().set("hbase.regionserver.port", String.valueOf(getFreePort())); + utility.getConfiguration().set("hbase.regionserver.info.port", String.valueOf(getFreePort())); + utility.getConfiguration().set("zookeeper.znode.parent", "/hbase-unsecure"); + utility.getConfiguration().set("hbase.table.sanity.checks", "false"); + utility.getConfiguration().set("hbase.coprocessor.master.classes", "org.apache.atlas.hbase.hook.HBaseAtlasCoprocessor"); + + utility.startMiniCluster(); + } + + + public AtlasClientV2 getAtlasClient() { + AtlasClientV2 ret = null; + if (atlasClient != null) { + ret = atlasClient; + } + return ret; + } + + protected String assertNameSpaceIsRegistered(String nameSpace) throws Exception { + return assertNameSpaceIsRegistered(nameSpace, null); + } + + protected String assertNameSpaceIsRegistered(String nameSpace, HBaseAtlasHookIT.AssertPredicate assertPredicate) throws Exception { + if (LOG.isDebugEnabled()) { + LOG.debug("Searching for nameSpace {}", nameSpace); + } + String nameSpaceQualifiedName = HBaseAtlasHook.getNameSpaceQualifiedName(CLUSTER_NAME, nameSpace); + return assertEntityIsRegistered(HBaseDataTypes.HBASE_NAMESPACE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + nameSpaceQualifiedName, assertPredicate); + } + + protected String assertTableIsRegistered(String nameSpace, String tableName) throws Exception { + return assertTableIsRegistered(nameSpace, tableName, null); + } + + protected String assertTableIsRegistered(String nameSpace, String tableName, HBaseAtlasHookIT.AssertPredicate assertPredicate) throws Exception { + if (LOG.isDebugEnabled()) { + LOG.debug("Searching for nameSpace:Table {} {}", nameSpace, tableName); + } + String tableQualifiedName = HBaseAtlasHook.getTableQualifiedName(CLUSTER_NAME, nameSpace, tableName); + return assertEntityIsRegistered(HBaseDataTypes.HBASE_TABLE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName, + assertPredicate); + } + + public interface AssertPredicate { + void assertOnEntity(AtlasEntity entity) throws Exception; + } + + public interface Predicate { + /** + * Perform a predicate evaluation. + * + * @return the boolean result of the evaluation. + * @throws Exception thrown if the predicate evaluation could not evaluate. + */ + void evaluate() throws Exception; + } + + + protected String assertEntityIsRegistered(final String typeName, final String property, final String value, + final HBaseAtlasHookIT.AssertPredicate assertPredicate) throws Exception { + waitFor(30000, new HBaseAtlasHookIT.Predicate() { + @Override + public void evaluate() throws Exception { + AtlasEntityWithExtInfo entity = atlasClient.getEntityByAttribute(typeName, Collections.singletonMap(property, value)); + + assertNotNull(entity); + + if (assertPredicate != null) { + assertPredicate.assertOnEntity(entity.getEntity()); + } + } + }); + + AtlasEntityWithExtInfo entity = atlasClient.getEntityByAttribute(typeName, Collections.singletonMap(property, value)); + + return entity.getEntity().getGuid(); + } + + /** + * Wait for a condition, expressed via a {@link HBaseAtlasHookIT.Predicate} to become true. + * + * @param timeout maximum time in milliseconds to wait for the predicate to become true. + * @param predicate predicate waiting on. + */ + protected void waitFor(int timeout, HBaseAtlasHookIT.Predicate predicate) throws Exception { + ParamChecker.notNull(predicate, "predicate"); + long mustEnd = System.currentTimeMillis() + timeout; + + while (true) { + try { + predicate.evaluate(); + return; + } catch (Error | Exception e) { + if (System.currentTimeMillis() >= mustEnd) { + fail("Assertions failed. Failing after waiting for timeout " + timeout + " msecs", e); + } + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting up to {} msec as assertion failed", mustEnd - System.currentTimeMillis(), e); + } + Thread.sleep(5000); + } + } + } + + +} diff --git a/addons/hbase-bridge/src/test/resources/atlas-application.properties b/addons/hbase-bridge/src/test/resources/atlas-application.properties new file mode 100644 index 0000000000..3b12e5fb33 --- /dev/null +++ b/addons/hbase-bridge/src/test/resources/atlas-application.properties @@ -0,0 +1,125 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +######### Atlas Server Configs ######### +atlas.rest.address=http://localhost:31000 + +######### Graph Database Configs ######### + + +# Graph database implementation. Value inserted by maven. +atlas.graphdb.backend=org.apache.atlas.repository.graphdb.janus.AtlasJanusGraphDatabase +atlas.graph.index.search.solr.wait-searcher=true + +# Graph Storage +atlas.graph.storage.backend=berkeleyje + +# Entity repository implementation +atlas.EntityAuditRepository.impl=org.apache.atlas.repository.audit.InMemoryEntityAuditRepository + +# Graph Search Index Backend +atlas.graph.index.search.backend=solr + +#Berkeley storage directory +atlas.graph.storage.directory=${sys:atlas.data}/berkley + +#hbase +#For standalone mode , specify localhost +#for distributed mode, specify zookeeper quorum here + +atlas.graph.storage.hostname=${graph.storage.hostname} +atlas.graph.storage.hbase.regions-per-server=1 +atlas.graph.storage.lock.wait-time=10000 + +#ElasticSearch +atlas.graph.index.search.directory=${sys:atlas.data}/es +atlas.graph.index.search.elasticsearch.client-only=false +atlas.graph.index.search.elasticsearch.local-mode=true +atlas.graph.index.search.elasticsearch.create.sleep=2000 + +# Solr cloud mode properties +atlas.graph.index.search.solr.mode=cloud +atlas.graph.index.search.solr.zookeeper-url=${solr.zk.address} +atlas.graph.index.search.solr.embedded=true +atlas.graph.index.search.max-result-set-size=150 + + +######### Notification Configs ######### +atlas.notification.embedded=true + +atlas.kafka.zookeeper.connect=localhost:19026 +atlas.kafka.bootstrap.servers=localhost:19027 +atlas.kafka.data=${sys:atlas.data}/kafka +atlas.kafka.zookeeper.session.timeout.ms=4000 +atlas.kafka.zookeeper.sync.time.ms=20 +atlas.kafka.consumer.timeout.ms=4000 +atlas.kafka.auto.commit.interval.ms=100 +atlas.kafka.hook.group.id=atlas +atlas.kafka.entities.group.id=atlas_entities +#atlas.kafka.auto.commit.enable=false + +atlas.kafka.enable.auto.commit=false +atlas.kafka.auto.offset.reset=earliest +atlas.kafka.session.timeout.ms=30000 +atlas.kafka.offsets.topic.replication.factor=1 + + + +######### Entity Audit Configs ######### +atlas.audit.hbase.tablename=ATLAS_ENTITY_AUDIT_EVENTS +atlas.audit.zookeeper.session.timeout.ms=1000 +atlas.audit.hbase.zookeeper.quorum=localhost +atlas.audit.hbase.zookeeper.property.clientPort=19026 + +######### Security Properties ######### + +# SSL config +atlas.enableTLS=false +atlas.server.https.port=31443 + +######### Security Properties ######### + +hbase.security.authentication=simple + +atlas.hook.falcon.synchronous=true + +######### JAAS Configuration ######## + +atlas.jaas.KafkaClient.loginModuleName = com.sun.security.auth.module.Krb5LoginModule +atlas.jaas.KafkaClient.loginModuleControlFlag = required +atlas.jaas.KafkaClient.option.useKeyTab = true +atlas.jaas.KafkaClient.option.storeKey = true +atlas.jaas.KafkaClient.option.serviceName = kafka +atlas.jaas.KafkaClient.option.keyTab = /etc/security/keytabs/atlas.service.keytab +atlas.jaas.KafkaClient.option.principal = atlas/_HOST@EXAMPLE.COM + +######### High Availability Configuration ######## +atlas.server.ha.enabled=false +#atlas.server.ids=id1 +#atlas.server.address.id1=localhost:21000 + +######### Atlas Authorization ######### +atlas.authorizer.impl=none +# atlas.authorizer.impl=simple +# atlas.authorizer.simple.authz.policy.file=atlas-simple-authz-policy.json + +######### Atlas Authentication ######### +atlas.authentication.method.file=true +atlas.authentication.method.ldap.type=none +atlas.authentication.method.kerberos=false +# atlas.authentication.method.file.filename=users-credentials.properties diff --git a/addons/hbase-bridge/src/test/resources/atlas-log4j.xml b/addons/hbase-bridge/src/test/resources/atlas-log4j.xml new file mode 100755 index 0000000000..2c9815ff54 --- /dev/null +++ b/addons/hbase-bridge/src/test/resources/atlas-log4j.xml @@ -0,0 +1,143 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/addons/hbase-bridge/src/test/resources/users-credentials.properties b/addons/hbase-bridge/src/test/resources/users-credentials.properties new file mode 100644 index 0000000000..5046dbaf64 --- /dev/null +++ b/addons/hbase-bridge/src/test/resources/users-credentials.properties @@ -0,0 +1,3 @@ +#username=group::sha256-password +admin=ADMIN::a4a88c0872bf652bb9ed803ece5fd6e82354838a9bf59ab4babb1dab322154e1 +rangertagsync=RANGER_TAG_SYNC::0afe7a1968b07d4c3ff4ed8c2d809a32ffea706c66cd795ead9048e81cfaf034 diff --git a/addons/hbase-testing-util/pom.xml b/addons/hbase-testing-util/pom.xml new file mode 100644 index 0000000000..982e9c85ea --- /dev/null +++ b/addons/hbase-testing-util/pom.xml @@ -0,0 +1,228 @@ + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + hbase-testing-util + Apache HBase - Testing Util + HBase Testing Utilities. + jar + + + 3.0.3 + 9.3.14.v20161028 + + + + + org.testng + testng + + + + org.apache.hbase + hbase-server + ${hbase.version} + compile + + + + org.apache.hbase + hbase-server + ${hbase.version} + test-jar + compile + + + + org.apache.hbase + hbase-zookeeper + ${hbase.version} + jar + compile + + + + org.apache.hbase + hbase-zookeeper + ${hbase.version} + test-jar + compile + + + + org.apache.hadoop + hadoop-hdfs + ${hadoop.version} + compile + + + + org.apache.hadoop + hadoop-hdfs + ${hadoop.version} + test-jar + compile + + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + + + junit + junit + + + + + + junit + junit + ${junit.version} + + + + org.apache.hadoop + hadoop-minicluster + ${hadoop.version} + compile + + + org.apache.htrace + htrace-core + + + + + org.apache.hadoop + hadoop-minikdc + ${hadoop.version} + + + + org.apache.hbase + hbase-hadoop-compat + ${hbase.version} + jar + compile + + + + org.apache.hbase + hbase-hadoop-compat + ${hbase.version} + test-jar + compile + + + + org.apache.hbase + hbase-hadoop2-compat + ${hbase.version} + jar + compile + + + com.github.stephenc.findbugs + findbugs-annotations + + + + + + org.apache.hbase + hbase-hadoop2-compat + ${hbase.version} + test-jar + compile + + + + + org.slf4j + slf4j-log4j12 + + + + org.apache.hbase + hbase-common + ${hbase.version} + jar + compile + + + com.github.stephenc.findbugs + findbugs-annotations + + + + + + org.apache.hbase + hbase-common + ${hbase.version} + test-jar + compile + + + + org.apache.hbase + hbase-annotations + ${hbase.version} + test-jar + compile + + + jdk.tools + jdk.tools + + + + + + org.apache.hbase + hbase-protocol + ${hbase.version} + jar + compile + + + + org.apache.hbase + hbase-client + ${hbase.version} + jar + compile + + + + + + + + diff --git a/addons/hbase-testing-util/src/test/java/org/apache/atlas/hbase/TestHBaseTestingUtilSpinup.java b/addons/hbase-testing-util/src/test/java/org/apache/atlas/hbase/TestHBaseTestingUtilSpinup.java new file mode 100644 index 0000000000..0beb035521 --- /dev/null +++ b/addons/hbase-testing-util/src/test/java/org/apache/atlas/hbase/TestHBaseTestingUtilSpinup.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.hbase; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +import static org.testng.AssertJUnit.assertFalse; + + +/** + * Make sure we can spin up a HBTU without a hbase-site.xml + */ +public class TestHBaseTestingUtilSpinup { + private static final Logger LOG = LoggerFactory.getLogger(TestHBaseTestingUtilSpinup.class); + private final static HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + @BeforeClass + public static void beforeClass() throws Exception { + UTIL.startMiniCluster(); + if (!UTIL.getHBaseCluster().waitForActiveAndReadyMaster(30000)) { + throw new RuntimeException("Active master not ready"); + } + } + + @AfterClass + public static void afterClass() throws Exception { + UTIL.shutdownMiniCluster(); + } + + @Test + public void testGetMetaTableRows() throws Exception { + List results = UTIL.getMetaTableRows(); + assertFalse("results should have some entries and is empty.", results.isEmpty()); + } + +} diff --git a/addons/hbase-testing-util/src/test/resources/atlas-log4j.xml b/addons/hbase-testing-util/src/test/resources/atlas-log4j.xml new file mode 100755 index 0000000000..47d4d5c7ce --- /dev/null +++ b/addons/hbase-testing-util/src/test/resources/atlas-log4j.xml @@ -0,0 +1,130 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/addons/hdfs-model/pom.xml b/addons/hdfs-model/pom.xml new file mode 100644 index 0000000000..05ba173684 --- /dev/null +++ b/addons/hdfs-model/pom.xml @@ -0,0 +1,127 @@ + + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + hdfs-model + Apache Atlas FileSystem Model + Apache Atlas FileSystem Model + jar + + + + + org.slf4j + slf4j-api + + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.atlas + atlas-client-v1 + + + + org.apache.atlas + atlas-notification + + + + org.mockito + mockito-all + + + + + + + org.testng + testng + + + + org.apache.atlas + atlas-graphdb-impls + pom + test + + + + org.apache.atlas + atlas-repository + test + + + + + + + org.apache.maven.plugins + maven-site-plugin + + + org.apache.maven.doxia + doxia-module-twiki + ${doxia.version} + + + org.apache.maven.doxia + doxia-core + ${doxia.version} + + + + + + site + + prepare-package + + + + false + false + + + + + + org.codehaus.mojo + exec-maven-plugin + 1.2.1 + false + + + + + + + + diff --git a/addons/hive-bridge-shim/pom.xml b/addons/hive-bridge-shim/pom.xml new file mode 100755 index 0000000000..849ca2a8f6 --- /dev/null +++ b/addons/hive-bridge-shim/pom.xml @@ -0,0 +1,47 @@ + + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + hive-bridge-shim + Apache Atlas Hive Bridge Shim Module + Apache Atlas Hive Bridge Shim + jar + + + + + org.apache.atlas + atlas-plugin-classloader + + + + org.apache.hive + hive-exec + ${hive.version} + provided + + + diff --git a/addons/hive-bridge-shim/src/main/java/org/apache/atlas/hive/hook/HiveHook.java b/addons/hive-bridge-shim/src/main/java/org/apache/atlas/hive/hook/HiveHook.java new file mode 100755 index 0000000000..2a4d067e59 --- /dev/null +++ b/addons/hive-bridge-shim/src/main/java/org/apache/atlas/hive/hook/HiveHook.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook; + + +import org.apache.atlas.plugin.classloader.AtlasPluginClassLoader; +import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext; +import org.apache.hadoop.hive.ql.hooks.HookContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Hive hook used for atlas entity registration. + */ +public class HiveHook implements ExecuteWithHookContext { + private static final Logger LOG = LoggerFactory.getLogger(HiveHook.class); + + private static final String ATLAS_PLUGIN_TYPE = "hive"; + private static final String ATLAS_HIVE_HOOK_IMPL_CLASSNAME = "org.apache.atlas.hive.hook.HiveHook"; + + private AtlasPluginClassLoader atlasPluginClassLoader = null; + private ExecuteWithHookContext hiveHookImpl = null; + + public HiveHook() { + this.initialize(); + } + + @Override + public void run(final HookContext hookContext) throws Exception { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HiveHook.run({})", hookContext); + } + + try { + activatePluginClassLoader(); + hiveHookImpl.run(hookContext); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HiveHook.run({})", hookContext); + } + } + + private void initialize() { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HiveHook.initialize()"); + } + + try { + atlasPluginClassLoader = AtlasPluginClassLoader.getInstance(ATLAS_PLUGIN_TYPE, this.getClass()); + + @SuppressWarnings("unchecked") + Class cls = (Class) Class + .forName(ATLAS_HIVE_HOOK_IMPL_CLASSNAME, true, atlasPluginClassLoader); + + activatePluginClassLoader(); + + hiveHookImpl = cls.newInstance(); + } catch (Exception excp) { + LOG.error("Error instantiating Atlas hook implementation", excp); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HiveHook.initialize()"); + } + } + + private void activatePluginClassLoader() { + if (atlasPluginClassLoader != null) { + atlasPluginClassLoader.activate(); + } + } + + private void deactivatePluginClassLoader() { + if (atlasPluginClassLoader != null) { + atlasPluginClassLoader.deactivate(); + } + } +} diff --git a/addons/hive-bridge-shim/src/main/java/org/apache/atlas/hive/hook/HiveMetastoreHook.java b/addons/hive-bridge-shim/src/main/java/org/apache/atlas/hive/hook/HiveMetastoreHook.java new file mode 100644 index 0000000000..2894e99bdd --- /dev/null +++ b/addons/hive-bridge-shim/src/main/java/org/apache/atlas/hive/hook/HiveMetastoreHook.java @@ -0,0 +1,199 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.hive.hook; + +import org.apache.atlas.plugin.classloader.AtlasPluginClassLoader; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.MetaStoreEventListener; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.events.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Hive Metastore hook to capture DDL operations for atlas entity registration. + */ +public class HiveMetastoreHook extends MetaStoreEventListener { + private static final String ATLAS_PLUGIN_TYPE = "hive"; + private static final String ATLAS_HIVE_METASTORE_HOOK_IMPL_CLASSNAME = "org.apache.atlas.hive.hook.HiveMetastoreHookImpl"; + public static final Logger LOG = LoggerFactory.getLogger(HiveMetastoreHook.class); + + private AtlasPluginClassLoader atlasPluginClassLoader = null; + private MetaStoreEventListener atlasMetastoreHookImpl = null; + private Configuration config; + + public HiveMetastoreHook(Configuration config) { + super(config); + + this.config = config; + + this.initialize(); + } + + private void initialize() { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HiveMetastoreHook.initialize()"); + } + + try { + atlasPluginClassLoader = AtlasPluginClassLoader.getInstance(ATLAS_PLUGIN_TYPE, this.getClass()); + + @SuppressWarnings("unchecked") + Class cls = (Class) + Class.forName(ATLAS_HIVE_METASTORE_HOOK_IMPL_CLASSNAME, true, atlasPluginClassLoader); + + activatePluginClassLoader(); + + atlasMetastoreHookImpl = cls.getDeclaredConstructor(Configuration.class).newInstance(config); + } catch (Exception ex) { + LOG.error("Error instantiating Atlas hook implementation", ex); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HiveMetastoreHook.initialize()"); + } + } + + @Override + public void onCreateTable(CreateTableEvent tableEvent) throws MetaException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HiveMetastoreHook.onCreateTable()"); + } + + try { + activatePluginClassLoader(); + + atlasMetastoreHookImpl.onCreateTable(tableEvent); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HiveMetastoreHook.onCreateTable()"); + } + } + + @Override + public void onDropTable(DropTableEvent tableEvent) throws MetaException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HiveMetastoreHook.onDropTable()"); + } + + try { + activatePluginClassLoader(); + + atlasMetastoreHookImpl.onDropTable(tableEvent); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HiveMetastoreHook.onDropTable()"); + } + } + + @Override + public void onAlterTable(AlterTableEvent tableEvent) throws MetaException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HiveMetastoreHook.onAlterTable()"); + } + + try { + activatePluginClassLoader(); + + atlasMetastoreHookImpl.onAlterTable(tableEvent); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HiveMetastoreHook.onAlterTable()"); + } + } + + @Override + public void onCreateDatabase(CreateDatabaseEvent dbEvent) throws MetaException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HiveMetastoreHook.onCreateDatabase()"); + } + + try { + activatePluginClassLoader(); + + atlasMetastoreHookImpl.onCreateDatabase(dbEvent); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HiveMetastoreHook.onCreateDatabase()"); + } + } + + @Override + public void onDropDatabase(DropDatabaseEvent dbEvent) throws MetaException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HiveMetastoreHook.onDropDatabase()"); + } + + try { + activatePluginClassLoader(); + + atlasMetastoreHookImpl.onDropDatabase(dbEvent); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HiveMetastoreHook.onDropDatabase()"); + } + } + + @Override + public void onAlterDatabase(AlterDatabaseEvent dbEvent) throws MetaException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HiveMetastoreHook.onAlterDatabase()"); + } + + try { + activatePluginClassLoader(); + + atlasMetastoreHookImpl.onAlterDatabase(dbEvent); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HiveMetastoreHook.onAlterDatabase()"); + } + } + + private void activatePluginClassLoader() { + if (atlasPluginClassLoader != null) { + atlasPluginClassLoader.activate(); + } + } + + private void deactivatePluginClassLoader() { + if (atlasPluginClassLoader != null) { + atlasPluginClassLoader.deactivate(); + } + } +} \ No newline at end of file diff --git a/addons/hive-bridge/pom.xml b/addons/hive-bridge/pom.xml new file mode 100755 index 0000000000..8c3636e5d6 --- /dev/null +++ b/addons/hive-bridge/pom.xml @@ -0,0 +1,535 @@ + + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + hive-bridge + Apache Atlas Hive Bridge Module + Apache Atlas Hive Bridge + jar + + + + + org.slf4j + slf4j-api + + + + org.slf4j + slf4j-log4j12 + + + + org.apache.hive + hive-metastore + ${hive.version} + provided + + + org.mortbay.jetty + * + + + org.eclipse.jetty + * + + + com.github.stephenc.findbugs + findbugs-annotations + + + + + + + org.apache.hive + hive-exec + ${hive.version} + provided + + + javax.servlet + * + + + + + + org.apache.hive + hive-jdbc + ${hive.version} + test + + + javax.servlet + * + + + javax.ws.rs + * + + + org.eclipse.jetty + * + + + + + + org.apache.hive + hive-cli + ${hive.version} + test + + + javax.servlet + * + + + org.eclipse.jetty.aggregate + * + + + + + + + org.apache.atlas + atlas-client-v1 + + + + org.apache.atlas + atlas-client-v2 + ${project.version} + + + + org.apache.atlas + atlas-notification + + + + + org.apache.atlas + atlas-webapp + war + test + + + + org.apache.hadoop + hadoop-client + + + javax.servlet + servlet-api + + + org.eclipse.jetty + * + + + + + + org.apache.hadoop + hadoop-annotations + + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.databind.version} + + + + org.testng + testng + + + + org.mockito + mockito-all + + + + org.eclipse.jetty + jetty-server + test + + + + org.apache.atlas + atlas-graphdb-impls + pom + test + + + + org.apache.atlas + atlas-intg + tests + test + + + + org.apache.atlas + atlas-repository + tests + test + + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + test + + + + + + dist + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-hook + package + + copy + + + ${project.build.directory}/dependency/hook/hive/atlas-hive-plugin-impl + false + false + true + + + ${project.groupId} + ${project.artifactId} + ${project.version} + + + ${project.groupId} + atlas-client-common + ${project.version} + + + ${project.groupId} + atlas-client-v1 + ${project.version} + + + ${project.groupId} + atlas-client-v2 + ${project.version} + + + ${project.groupId} + atlas-intg + ${project.version} + + + ${project.groupId} + atlas-notification + ${project.version} + + + ${project.groupId} + atlas-common + ${project.version} + + + org.apache.kafka + kafka_${kafka.scala.binary.version} + ${kafka.version} + + + org.apache.kafka + kafka-clients + ${kafka.version} + + + com.sun.jersey.contribs + jersey-multipart + ${jersey.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.databind.version} + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + + + com.fasterxml.jackson.core + jackson-annotations + ${jackson.version} + + + commons-configuration + commons-configuration + ${commons-conf.version} + + + com.sun.jersey + jersey-json + ${jersey.version} + + + javax.ws.rs + jsr311-api + ${jsr.version} + + + + + + copy-hook-shim + package + + copy + + + ${project.build.directory}/dependency/hook/hive + false + false + true + + + ${project.groupId} + hive-bridge-shim + ${project.version} + + + ${project.groupId} + atlas-plugin-classloader + ${project.version} + + + + + + + + + + + + + + org.eclipse.jetty + jetty-maven-plugin + ${jetty.version} + + ${skipTests} + + + 31000 + 60000 + + ../../webapp/target/atlas-webapp-${project.version}.war + true + ../../webapp/src/main/webapp + + / + ${project.basedir}/../../webapp/src/main/webapp/WEB-INF/web.xml + + true + + true + + atlas.home + ${project.build.directory} + + + atlas.conf + ${project.build.directory}/test-classes + + + atlas.data + ${project.build.directory}/data + + + atlas.log.dir + ${project.build.directory}/logs + + + atlas.log.file + application.log + + + log4j.configuration + file:///${project.build.directory}/test-classes/atlas-log4j.xml + + + atlas.graphdb.backend + ${graphdb.backend.impl} + + + embedded.solr.directory + ${project.build.directory} + + + solr.log.dir + ${project.build.directory}/logs + + + org.eclipse.jetty.annotations.maxWait + 5000 + + + atlas-stop + 31001 + ${jetty-maven-plugin.stopWait} + ${debug.jetty.daemon} + ${project.build.testOutputDirectory} + true + jar + + + + org.apache.curator + curator-client + ${curator.version} + + + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + + + + + start-jetty + pre-integration-test + + + stop + deploy-war + + + + stop-jetty + post-integration-test + + stop + + + + + + + org.apache.maven.plugins + maven-site-plugin + + + org.apache.maven.doxia + doxia-module-twiki + ${doxia.version} + + + org.apache.maven.doxia + doxia-core + ${doxia.version} + + + + + + site + + prepare-package + + + + false + false + + + + + org.codehaus.mojo + exec-maven-plugin + 1.2.1 + false + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + copy-resources + validate + + copy-resources + + + ${basedir}/target/models + + + ${basedir}/../models + + 0000-Area0/0010-base_model.json + 1000-Hadoop/** + + + + + + + copy-solr-resources + validate + + copy-resources + + + ${project.build.directory}/solr + + + ${basedir}/../../test-tools/src/main/resources/solr + + + + + + + + + + diff --git a/addons/hive-bridge/src/bin/import-hive.sh b/addons/hive-bridge/src/bin/import-hive.sh new file mode 100755 index 0000000000..ebe6976f0e --- /dev/null +++ b/addons/hive-bridge/src/bin/import-hive.sh @@ -0,0 +1,173 @@ +#!/bin/bash +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. See accompanying LICENSE file. +# +# resolve links - $0 may be a softlink +PRG="${0}" + +[[ `uname -s` == *"CYGWIN"* ]] && CYGWIN=true + +while [ -h "${PRG}" ]; do + ls=`ls -ld "${PRG}"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "${PRG}"`/"$link" + fi +done + +BASEDIR=`dirname ${PRG}` +BASEDIR=`cd ${BASEDIR}/..;pwd` + +if test -z "${JAVA_HOME}" +then + JAVA_BIN=`which java` + JAR_BIN=`which jar` +else + JAVA_BIN="${JAVA_HOME}/bin/java" + JAR_BIN="${JAVA_HOME}/bin/jar" +fi +export JAVA_BIN + +if [ ! -e "${JAVA_BIN}" ] || [ ! -e "${JAR_BIN}" ]; then + echo "$JAVA_BIN and/or $JAR_BIN not found on the system. Please make sure java and jar commands are available." + exit 1 +fi + +# Construct Atlas classpath using jars from hook/hive/atlas-hive-plugin-impl/ directory. +for i in "${BASEDIR}/hook/hive/atlas-hive-plugin-impl/"*.jar; do + ATLASCPPATH="${ATLASCPPATH}:$i" +done + +if [ -z "${ATLAS_CONF_DIR}" ] && [ -e /etc/atlas/conf ];then + ATLAS_CONF_DIR=/etc/atlas/conf +fi +ATLASCPPATH=${ATLASCPPATH}:${ATLAS_CONF_DIR} + +# log dir for applications +ATLAS_LOG_DIR="${ATLAS_LOG_DIR:-/var/log/atlas}" +export ATLAS_LOG_DIR +LOGFILE="$ATLAS_LOG_DIR/import-hive.log" + +TIME=`date +%Y%m%d%H%M%s` + +#Add hive conf in classpath +if [ ! -z "$HIVE_CONF_DIR" ]; then + HIVE_CONF=$HIVE_CONF_DIR +elif [ ! -z "$HIVE_HOME" ]; then + HIVE_CONF="$HIVE_HOME/conf" +elif [ -e /etc/hive/conf ]; then + HIVE_CONF="/etc/hive/conf" +else + echo "Could not find a valid HIVE configuration" + exit 1 +fi + +echo Using Hive configuration directory ["$HIVE_CONF"] + + +if [ -f "${HIVE_CONF}/hive-env.sh" ]; then + . "${HIVE_CONF}/hive-env.sh" +fi + +if [ -z "$HIVE_HOME" ]; then + if [ -d "${BASEDIR}/../hive" ]; then + HIVE_HOME=${BASEDIR}/../hive + else + echo "Please set HIVE_HOME to the root of Hive installation" + exit 1 + fi +fi + +HIVE_CP="${HIVE_CONF}" +# Multiple jars in HIVE_CP_EXCLUDE_LIST can be added using "\|" separator +# Ex: HIVE_CP_EXCLUDE_LIST="javax.ws.rs-api\|jersey-multipart" +# exclude log4j libs from hive classpath to avoid conflict +HIVE_CP_EXCLUDE_LIST="javax.ws.rs-api\|log4j-slf4j-impl\|log4j-1.2-api\|log4j-api\|log4j-core\|log4j-web" + +for i in $(find "${HIVE_HOME}/lib/" -name "*.jar" | grep -v "$HIVE_CP_EXCLUDE_LIST"); do + HIVE_CP="${HIVE_CP}:$i" +done + +#Add hadoop conf in classpath +if [ ! -z "$HADOOP_CLASSPATH" ]; then + HADOOP_CP=$HADOOP_CLASSPATH +elif [ ! -z "$HADOOP_HOME" ]; then + HADOOP_CP=`$HADOOP_HOME/bin/hadoop classpath` +elif [ $(command -v hadoop) ]; then + HADOOP_CP=`hadoop classpath` + echo $HADOOP_CP +else + echo "Environment variable HADOOP_CLASSPATH or HADOOP_HOME need to be set" + exit 1 +fi + +CP="${HIVE_CP}:${HADOOP_CP}:${ATLASCPPATH}" + +# If running in cygwin, convert pathnames and classpath to Windows format. +if [ "${CYGWIN}" == "true" ] +then + ATLAS_LOG_DIR=`cygpath -w ${ATLAS_LOG_DIR}` + LOGFILE=`cygpath -w ${LOGFILE}` + HIVE_CP=`cygpath -w ${HIVE_CP}` + HADOOP_CP=`cygpath -w ${HADOOP_CP}` + CP=`cygpath -w -p ${CP}` +fi + +JAVA_PROPERTIES="$ATLAS_OPTS -Datlas.log.dir=$ATLAS_LOG_DIR -Datlas.log.file=import-hive.log +-Dlog4j.configuration=atlas-hive-import-log4j.xml" + +IMPORT_ARGS= +JVM_ARGS= + +while true +do + option=$1 + shift + + case "$option" in + -d) IMPORT_ARGS="$IMPORT_ARGS -d $1"; shift;; + -t) IMPORT_ARGS="$IMPORT_ARGS -t $1"; shift;; + -f) IMPORT_ARGS="$IMPORT_ARGS -f $1"; shift;; + -o) IMPORT_ARGS="$IMPORT_ARGS -o $1"; shift;; + -i) IMPORT_ARGS="$IMPORT_ARGS -i";; + -h) export HELP_OPTION="true"; IMPORT_ARGS="$IMPORT_ARGS -h";; + --database) IMPORT_ARGS="$IMPORT_ARGS --database $1"; shift;; + --table) IMPORT_ARGS="$IMPORT_ARGS --table $1"; shift;; + --filename) IMPORT_ARGS="$IMPORT_ARGS --filename $1"; shift;; + --output) IMPORT_ARGS="$IMPORT_ARGS --output $1"; shift;; + --ignoreBulkImport) IMPORT_ARGS="$IMPORT_ARGS --ignoreBulkImport";; + --help) export HELP_OPTION="true"; IMPORT_ARGS="$IMPORT_ARGS --help";; + -deleteNonExisting) IMPORT_ARGS="$IMPORT_ARGS -deleteNonExisting";; + "") break;; + *) IMPORT_ARGS="$IMPORT_ARGS $option" + esac +done + +JAVA_PROPERTIES="${JAVA_PROPERTIES} ${JVM_ARGS}" + +if [ -z ${HELP_OPTION} ]; then + echo "Log file for import is $LOGFILE" +fi + +"${JAVA_BIN}" ${JAVA_PROPERTIES} -cp "${CP}" org.apache.atlas.hive.bridge.HiveMetaStoreBridge $IMPORT_ARGS + +RETVAL=$? +if [ -z ${HELP_OPTION} ]; then + [ $RETVAL -eq 0 ] && echo Hive Meta Data imported successfully! + [ $RETVAL -eq 1 ] && echo Failed to import Hive Meta Data! Check logs at: $LOGFILE for details. +fi + +exit $RETVAL + diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java new file mode 100755 index 0000000000..28365bc5c3 --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java @@ -0,0 +1,1264 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.bridge; + +import com.google.common.annotations.VisibleForTesting; +import com.sun.jersey.api.client.ClientResponse; +import org.apache.atlas.type.AtlasTypeUtil; +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.AtlasClientV2; +import org.apache.atlas.AtlasServiceException; +import org.apache.atlas.hive.hook.events.BaseHiveEvent; +import org.apache.atlas.hive.model.HiveDataTypes; +import org.apache.atlas.hook.AtlasHookException; +import org.apache.atlas.model.discovery.AtlasSearchResult; +import org.apache.atlas.model.discovery.SearchParameters; +import org.apache.atlas.model.instance.AtlasEntityHeader; +import org.apache.atlas.model.instance.EntityMutationResponse; +import org.apache.atlas.model.instance.EntityMutations; +import org.apache.atlas.utils.AtlasPathExtractorUtil; +import org.apache.atlas.utils.AuthenticationUtil; +import org.apache.atlas.utils.HdfsNameServiceResolver; +import org.apache.atlas.utils.AtlasConfigurationUtil; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.model.instance.AtlasStruct; +import org.apache.atlas.utils.PathExtractorContext; + +import org.apache.commons.cli.BasicParser; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.MissingArgumentException; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.collections.MapUtils; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.RandomStringUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.InvalidTableException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import static org.apache.atlas.hive.hook.events.BaseHiveEvent.*; + +/** + * A Bridge Utility that imports metadata from the Hive Meta Store + * and registers them in Atlas. + */ + +public class HiveMetaStoreBridge { + private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridge.class); + + public static final String CONF_PREFIX = "atlas.hook.hive."; + public static final String CLUSTER_NAME_KEY = "atlas.cluster.name"; + public static final String HIVE_USERNAME = "atlas.hook.hive.default.username"; + public static final String HIVE_METADATA_NAMESPACE = "atlas.metadata.namespace"; + public static final String HDFS_PATH_CONVERT_TO_LOWER_CASE = CONF_PREFIX + "hdfs_path.convert_to_lowercase"; + public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION = CONF_PREFIX + "aws_s3.atlas.model.version"; + public static final String DEFAULT_CLUSTER_NAME = "primary"; + public static final String TEMP_TABLE_PREFIX = "_temp-"; + public static final String ATLAS_ENDPOINT = "atlas.rest.address"; + public static final String SEP = ":".intern(); + public static final String HDFS_PATH = "hdfs_path"; + public static final String DEFAULT_METASTORE_CATALOG = "hive"; + public static final String HIVE_TABLE_DB_EDGE_LABEL = "__hive_table.db"; + public static final String HOOK_HIVE_PAGE_LIMIT = CONF_PREFIX + "page.limit"; + + static final String OPTION_OUTPUT_FILEPATH_SHORT = "o"; + static final String OPTION_OUTPUT_FILEPATH_LONG = "output"; + static final String OPTION_IGNORE_BULK_IMPORT_SHORT = "i"; + static final String OPTION_IGNORE_BULK_IMPORT_LONG = "ignoreBulkImport"; + static final String OPTION_DATABASE_SHORT = "d"; + static final String OPTION_DATABASE_LONG = "database"; + static final String OPTION_TABLE_SHORT = "t"; + static final String OPTION_TABLE_LONG = "table"; + static final String OPTION_IMPORT_DATA_FILE_SHORT = "f"; + static final String OPTION_IMPORT_DATA_FILE_LONG = "filename"; + static final String OPTION_FAIL_ON_ERROR = "failOnError"; + static final String OPTION_DELETE_NON_EXISTING = "deleteNonExisting"; + static final String OPTION_HELP_SHORT = "h"; + static final String OPTION_HELP_LONG = "help"; + + public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2 = "v2"; + + private static final int EXIT_CODE_SUCCESS = 0; + private static final int EXIT_CODE_FAILED = 1; + private static final int EXIT_CODE_INVALID_ARG = 2; + + private static final String DEFAULT_ATLAS_URL = "http://localhost:21000/"; + private static int pageLimit = 10000; + + private final String metadataNamespace; + private final Hive hiveClient; + private final AtlasClientV2 atlasClientV2; + private final boolean convertHdfsPathToLowerCase; + + private String awsS3AtlasModelVersion = null; + + public static void main(String[] args) { + int exitCode = EXIT_CODE_FAILED; + AtlasClientV2 atlasClientV2 = null; + Options acceptedCliOptions = prepareCommandLineOptions(); + + try { + CommandLine cmd = new BasicParser().parse(acceptedCliOptions, args); + List argsNotProcessed = cmd.getArgList(); + + if (argsNotProcessed != null && argsNotProcessed.size() > 0) { + throw new ParseException("Unrecognized arguments."); + } + + if (cmd.hasOption(OPTION_HELP_SHORT)) { + printUsage(acceptedCliOptions); + exitCode = EXIT_CODE_SUCCESS; + } else { + Configuration atlasConf = ApplicationProperties.get(); + String[] atlasEndpoint = atlasConf.getStringArray(ATLAS_ENDPOINT); + + if (atlasEndpoint == null || atlasEndpoint.length == 0) { + atlasEndpoint = new String[] { DEFAULT_ATLAS_URL }; + } + + if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) { + String[] basicAuthUsernamePassword = AuthenticationUtil.getBasicAuthenticationInput(); + + atlasClientV2 = new AtlasClientV2(atlasEndpoint, basicAuthUsernamePassword); + } else { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + + atlasClientV2 = new AtlasClientV2(ugi, ugi.getShortUserName(), atlasEndpoint); + } + + boolean createZip = cmd.hasOption(OPTION_OUTPUT_FILEPATH_LONG); + + if (createZip) { + HiveMetaStoreBridgeV2 hiveMetaStoreBridgeV2 = new HiveMetaStoreBridgeV2(atlasConf, new HiveConf(), atlasClientV2); + + if (hiveMetaStoreBridgeV2.exportDataToZipAndRunAtlasImport(cmd)) { + exitCode = EXIT_CODE_SUCCESS; + } + } else { + HiveMetaStoreBridge hiveMetaStoreBridge = new HiveMetaStoreBridge(atlasConf, new HiveConf(), atlasClientV2); + + if (hiveMetaStoreBridge.importDataDirectlyToAtlas(cmd)) { + exitCode = EXIT_CODE_SUCCESS; + } + } + } + } catch(ParseException e) { + LOG.error("Invalid argument. Error: {}", e.getMessage()); + System.out.println("Invalid argument. Error: " + e.getMessage()); + exitCode = EXIT_CODE_INVALID_ARG; + + if (!(e instanceof MissingArgumentException)) { + printUsage(acceptedCliOptions); + } + } catch(Exception e) { + LOG.error("Import Failed", e); + } finally { + if( atlasClientV2 !=null) { + atlasClientV2.close(); + } + } + + System.exit(exitCode); + } + + private static Options prepareCommandLineOptions() { + Options acceptedCliOptions = new Options(); + + return acceptedCliOptions.addOption(OPTION_OUTPUT_FILEPATH_SHORT, OPTION_OUTPUT_FILEPATH_LONG, true, "Output path or file for Zip import") + .addOption(OPTION_IGNORE_BULK_IMPORT_SHORT, OPTION_IGNORE_BULK_IMPORT_LONG, false, "Ignore bulk Import for Zip import") + .addOption(OPTION_DATABASE_SHORT, OPTION_DATABASE_LONG, true, "Database name") + .addOption(OPTION_TABLE_SHORT, OPTION_TABLE_LONG, true, "Table name") + .addOption(OPTION_IMPORT_DATA_FILE_SHORT, OPTION_IMPORT_DATA_FILE_LONG, true, "Filename") + .addOption(OPTION_FAIL_ON_ERROR, false, "failOnError") + .addOption(OPTION_DELETE_NON_EXISTING, false, "Delete database and table entities in Atlas if not present in Hive") + .addOption(OPTION_HELP_SHORT, OPTION_HELP_LONG, false, "Print this help message"); + } + + private static void printUsage(Options options) { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("import-hive.sh", options); + System.out.println(); + System.out.println("Usage options:"); + System.out.println(" Usage 1: import-hive.sh [-d OR --database ] " ); + System.out.println(" Imports specified database and its tables ..."); + System.out.println(); + System.out.println(" Usage 2: import-hive.sh [-d OR --database ] [-t

OR --table
]"); + System.out.println(" Imports specified table within that database ..."); + System.out.println(); + System.out.println(" Usage 3: import-hive.sh"); + System.out.println(" Imports all databases and tables..."); + System.out.println(); + System.out.println(" Usage 4: import-hive.sh -f "); + System.out.println(" Imports all databases and tables in the file..."); + System.out.println(" Format:"); + System.out.println(" database1:tbl1"); + System.out.println(" database1:tbl2"); + System.out.println(" database2:tbl2"); + System.out.println(); + System.out.println(" Usage 5: import-hive.sh [-deleteNonExisting] " ); + System.out.println(" Deletes databases and tables which are not in Hive ..."); + System.out.println(); + System.out.println(" Usage 6: import-hive.sh -o [-f ] [-d OR --database ] [-t
OR --table
]"); + System.out.println(" To create zip file with exported data and import the zip file at Atlas ..."); + System.out.println(); + System.out.println(" Usage 7: import-hive.sh -i -o [-f ] [-d OR --database ] [-t
OR --table
]"); + System.out.println(" To create zip file with exported data without importing to Atlas which can be imported later ..."); + System.out.println(); + } + + /** + * Construct a HiveMetaStoreBridge. + * @param hiveConf {@link HiveConf} for Hive component in the cluster + */ + public HiveMetaStoreBridge(Configuration atlasProperties, HiveConf hiveConf, AtlasClientV2 atlasClientV2) throws Exception { + this.metadataNamespace = getMetadataNamespace(atlasProperties); + this.hiveClient = Hive.get(hiveConf); + this.atlasClientV2 = atlasClientV2; + this.convertHdfsPathToLowerCase = atlasProperties.getBoolean(HDFS_PATH_CONVERT_TO_LOWER_CASE, false); + this.awsS3AtlasModelVersion = atlasProperties.getString(HOOK_AWS_S3_ATLAS_MODEL_VERSION, HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2); + if (atlasProperties != null) { + pageLimit = atlasProperties.getInteger(HOOK_HIVE_PAGE_LIMIT, 10000); + } + } + + /** + * Construct a HiveMetaStoreBridge. + * @param hiveConf {@link HiveConf} for Hive component in the cluster + */ + public HiveMetaStoreBridge(Configuration atlasProperties, HiveConf hiveConf) throws Exception { + this(atlasProperties, hiveConf, null); + } + + HiveMetaStoreBridge(String metadataNamespace, Hive hiveClient, AtlasClientV2 atlasClientV2) { + this(metadataNamespace, hiveClient, atlasClientV2, true); + } + + HiveMetaStoreBridge(String metadataNamespace, Hive hiveClient, AtlasClientV2 atlasClientV2, boolean convertHdfsPathToLowerCase) { + this.metadataNamespace = metadataNamespace; + this.hiveClient = hiveClient; + this.atlasClientV2 = atlasClientV2; + this.convertHdfsPathToLowerCase = convertHdfsPathToLowerCase; + } + + public String getMetadataNamespace(Configuration config) { + return AtlasConfigurationUtil.getRecentString(config, HIVE_METADATA_NAMESPACE, getClusterName(config)); + } + + private String getClusterName(Configuration config) { + return config.getString(CLUSTER_NAME_KEY, DEFAULT_CLUSTER_NAME); + } + + public String getMetadataNamespace() { + return metadataNamespace; + } + + public Hive getHiveClient() { + return hiveClient; + } + + public boolean isConvertHdfsPathToLowerCase() { + return convertHdfsPathToLowerCase; + } + + public boolean importDataDirectlyToAtlas(CommandLine cmd) throws Exception { + LOG.info("Importing Hive metadata"); + boolean ret = false; + + String databaseToImport = cmd.getOptionValue(OPTION_DATABASE_SHORT); + String tableToImport = cmd.getOptionValue(OPTION_TABLE_SHORT); + String fileToImport = cmd.getOptionValue(OPTION_IMPORT_DATA_FILE_SHORT); + + boolean failOnError = cmd.hasOption(OPTION_FAIL_ON_ERROR); + boolean deleteNonExisting = cmd.hasOption(OPTION_DELETE_NON_EXISTING); + + LOG.info("delete non existing flag : {} ", deleteNonExisting); + + if (deleteNonExisting) { + deleteEntitiesForNonExistingHiveMetadata(failOnError); + ret = true; + } else if (StringUtils.isNotEmpty(fileToImport)) { + File f = new File(fileToImport); + + if (f.exists() && f.canRead()) { + BufferedReader br = new BufferedReader(new FileReader(f)); + String line = null; + + while((line = br.readLine()) != null) { + String val[] = line.split(":"); + + if (ArrayUtils.isNotEmpty(val)) { + databaseToImport = val[0]; + + if (val.length > 1) { + tableToImport = val[1]; + } else { + tableToImport = ""; + } + + importDatabases(failOnError, databaseToImport, tableToImport); + } + } + ret = true; + } else { + LOG.error("Failed to read the input file: " + fileToImport); + } + } else { + importDatabases(failOnError, databaseToImport, tableToImport); + ret = true; + } + return ret; + } + + @VisibleForTesting + public void importHiveMetadata(String databaseToImport, String tableToImport, boolean failOnError) throws Exception { + LOG.info("Importing Hive metadata"); + + importDatabases(failOnError, databaseToImport, tableToImport); + } + + private void importDatabases(boolean failOnError, String databaseToImport, String tableToImport) throws Exception { + List databaseNames = null; + + if (StringUtils.isEmpty(databaseToImport) && StringUtils.isEmpty(tableToImport)) { + //when both database and table to import are empty, import all + databaseNames = hiveClient.getAllDatabases(); + } else if (StringUtils.isEmpty(databaseToImport) && StringUtils.isNotEmpty(tableToImport)) { + //when database is empty and table is not, then check table has database name in it and import that db and table + if (isTableWithDatabaseName(tableToImport)) { + String val[] = tableToImport.split("\\."); + if (val.length > 1) { + databaseToImport = val[0]; + tableToImport = val[1]; + } + databaseNames = hiveClient.getDatabasesByPattern(databaseToImport); + } else { + databaseNames = hiveClient.getAllDatabases(); + } + } else { + //when database to import has some value then, import that db and all table under it. + databaseNames = hiveClient.getDatabasesByPattern(databaseToImport); + } + + if(!CollectionUtils.isEmpty(databaseNames)) { + LOG.info("Found {} databases", databaseNames.size()); + + for (String databaseName : databaseNames) { + AtlasEntityWithExtInfo dbEntity = registerDatabase(databaseName); + + if (dbEntity != null) { + importTables(dbEntity.getEntity(), databaseName, tableToImport, failOnError); + } + } + } else { + LOG.error("No database found"); + System.exit(EXIT_CODE_FAILED); + } + } + + /** + * Imports all tables for the given db + * @param dbEntity + * @param databaseName + * @param failOnError + * @throws Exception + */ + private int importTables(AtlasEntity dbEntity, String databaseName, String tblName, final boolean failOnError) throws Exception { + int tablesImported = 0; + + final List tableNames; + + if (StringUtils.isEmpty(tblName)) { + tableNames = hiveClient.getAllTables(databaseName); + } else { + tableNames = hiveClient.getTablesByPattern(databaseName, tblName); + } + + if(!CollectionUtils.isEmpty(tableNames)) { + LOG.info("Found {} tables to import in database {}", tableNames.size(), databaseName); + + try { + for (String tableName : tableNames) { + int imported = importTable(dbEntity, databaseName, tableName, failOnError); + + tablesImported += imported; + } + } finally { + if (tablesImported == tableNames.size()) { + LOG.info("Successfully imported {} tables from database {}", tablesImported, databaseName); + } else { + LOG.error("Imported {} of {} tables from database {}. Please check logs for errors during import", tablesImported, tableNames.size(), databaseName); + } + } + } else { + LOG.error("No tables to import in database {}", databaseName); + } + + return tablesImported; + } + + @VisibleForTesting + public int importTable(AtlasEntity dbEntity, String databaseName, String tableName, final boolean failOnError) throws Exception { + try { + Table table = hiveClient.getTable(databaseName, tableName); + AtlasEntityWithExtInfo tableEntity = registerTable(dbEntity, table); + + if (table.getTableType() == TableType.EXTERNAL_TABLE) { + String processQualifiedName = getTableProcessQualifiedName(metadataNamespace, table); + AtlasEntityWithExtInfo processEntity = findProcessEntity(processQualifiedName); + + if (processEntity == null) { + String tableLocationString = isConvertHdfsPathToLowerCase() ? lower(table.getDataLocation().toString()) : table.getDataLocation().toString(); + Path location = table.getDataLocation(); + String query = getCreateTableString(table, tableLocationString); + + PathExtractorContext pathExtractorCtx = new PathExtractorContext(getMetadataNamespace(), isConvertHdfsPathToLowerCase(), awsS3AtlasModelVersion); + AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(location, pathExtractorCtx); + AtlasEntity pathInst = entityWithExtInfo.getEntity(); + AtlasEntity tableInst = tableEntity.getEntity(); + AtlasEntity processInst = new AtlasEntity(HiveDataTypes.HIVE_PROCESS.getName()); + + long now = System.currentTimeMillis(); + + processInst.setAttribute(ATTRIBUTE_QUALIFIED_NAME, processQualifiedName); + processInst.setAttribute(ATTRIBUTE_NAME, query); + processInst.setAttribute(ATTRIBUTE_CLUSTER_NAME, metadataNamespace); + processInst.setRelationshipAttribute(ATTRIBUTE_INPUTS, Collections.singletonList(AtlasTypeUtil.getAtlasRelatedObjectId(pathInst, RELATIONSHIP_DATASET_PROCESS_INPUTS))); + processInst.setRelationshipAttribute(ATTRIBUTE_OUTPUTS, Collections.singletonList(AtlasTypeUtil.getAtlasRelatedObjectId(tableInst, RELATIONSHIP_PROCESS_DATASET_OUTPUTS))); + String userName = table.getOwner(); + if (StringUtils.isEmpty(userName)) { + userName = ApplicationProperties.get().getString(HIVE_USERNAME, "hive"); + } + processInst.setAttribute(ATTRIBUTE_USER_NAME, userName); + processInst.setAttribute(ATTRIBUTE_START_TIME, now); + processInst.setAttribute(ATTRIBUTE_END_TIME, now); + processInst.setAttribute(ATTRIBUTE_OPERATION_TYPE, "CREATETABLE"); + processInst.setAttribute(ATTRIBUTE_QUERY_TEXT, query); + processInst.setAttribute(ATTRIBUTE_QUERY_ID, query); + processInst.setAttribute(ATTRIBUTE_QUERY_PLAN, "{}"); + processInst.setAttribute(ATTRIBUTE_RECENT_QUERIES, Collections.singletonList(query)); + + AtlasEntitiesWithExtInfo createTableProcess = new AtlasEntitiesWithExtInfo(); + + createTableProcess.addEntity(processInst); + + if (pathExtractorCtx.getKnownEntities() != null) { + pathExtractorCtx.getKnownEntities().values().forEach(entity -> createTableProcess.addEntity(entity)); + } else { + createTableProcess.addEntity(pathInst); + } + + registerInstances(createTableProcess); + } else { + LOG.info("Process {} is already registered", processQualifiedName); + } + } + + return 1; + } catch (Exception e) { + LOG.error("Import failed for hive_table {}", tableName, e); + + if (failOnError) { + throw e; + } + + return 0; + } + } + + /** + * Checks if db is already registered, else creates and registers db entity + * @param databaseName + * @return + * @throws Exception + */ + private AtlasEntityWithExtInfo registerDatabase(String databaseName) throws Exception { + AtlasEntityWithExtInfo ret = null; + Database db = hiveClient.getDatabase(databaseName); + + if (db != null) { + ret = findDatabase(metadataNamespace, databaseName); + + if (ret == null) { + ret = registerInstance(new AtlasEntityWithExtInfo(toDbEntity(db))); + } else { + LOG.info("Database {} is already registered - id={}. Updating it.", databaseName, ret.getEntity().getGuid()); + + ret.setEntity(toDbEntity(db, ret.getEntity())); + + updateInstance(ret); + } + } + + return ret; + } + + private AtlasEntityWithExtInfo registerTable(AtlasEntity dbEntity, Table table) throws AtlasHookException { + try { + AtlasEntityWithExtInfo ret; + AtlasEntityWithExtInfo tableEntity = findTableEntity(table); + + if (tableEntity == null) { + tableEntity = toTableEntity(dbEntity, table); + + ret = registerInstance(tableEntity); + } else { + LOG.info("Table {}.{} is already registered with id {}. Updating entity.", table.getDbName(), table.getTableName(), tableEntity.getEntity().getGuid()); + + ret = toTableEntity(dbEntity, table, tableEntity); + + updateInstance(ret); + } + + return ret; + } catch (Exception e) { + throw new AtlasHookException("HiveMetaStoreBridge.registerTable() failed.", e); + } + } + + /** + * Registers an entity in atlas + * @param entity + * @return + * @throws Exception + */ + private AtlasEntityWithExtInfo registerInstance(AtlasEntityWithExtInfo entity) throws Exception { + if (LOG.isDebugEnabled()) { + LOG.debug("creating {} entity: {}", entity.getEntity().getTypeName(), entity); + } + + AtlasEntityWithExtInfo ret = null; + EntityMutationResponse response = atlasClientV2.createEntity(entity); + List createdEntities = response.getEntitiesByOperation(EntityMutations.EntityOperation.CREATE); + + if (CollectionUtils.isNotEmpty(createdEntities)) { + for (AtlasEntityHeader createdEntity : createdEntities) { + if (ret == null) { + ret = atlasClientV2.getEntityByGuid(createdEntity.getGuid()); + + LOG.info("Created {} entity: name={}, guid={}", ret.getEntity().getTypeName(), ret.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), ret.getEntity().getGuid()); + } else if (ret.getEntity(createdEntity.getGuid()) == null) { + AtlasEntityWithExtInfo newEntity = atlasClientV2.getEntityByGuid(createdEntity.getGuid()); + + ret.addReferredEntity(newEntity.getEntity()); + + if (MapUtils.isNotEmpty(newEntity.getReferredEntities())) { + for (Map.Entry entry : newEntity.getReferredEntities().entrySet()) { + ret.addReferredEntity(entry.getKey(), entry.getValue()); + } + } + + LOG.info("Created {} entity: name={}, guid={}", newEntity.getEntity().getTypeName(), newEntity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), newEntity.getEntity().getGuid()); + } + } + } + + clearRelationshipAttributes(ret); + + return ret; + } + + /** + * Registers an entity in atlas + * @param entities + * @return + * @throws Exception + */ + private AtlasEntitiesWithExtInfo registerInstances(AtlasEntitiesWithExtInfo entities) throws Exception { + if (LOG.isDebugEnabled()) { + LOG.debug("creating {} entities: {}", entities.getEntities().size(), entities); + } + + AtlasEntitiesWithExtInfo ret = null; + EntityMutationResponse response = atlasClientV2.createEntities(entities); + List createdEntities = response.getEntitiesByOperation(EntityMutations.EntityOperation.CREATE); + + if (CollectionUtils.isNotEmpty(createdEntities)) { + ret = new AtlasEntitiesWithExtInfo(); + + for (AtlasEntityHeader createdEntity : createdEntities) { + AtlasEntityWithExtInfo entity = atlasClientV2.getEntityByGuid(createdEntity.getGuid()); + + ret.addEntity(entity.getEntity()); + + if (MapUtils.isNotEmpty(entity.getReferredEntities())) { + for (Map.Entry entry : entity.getReferredEntities().entrySet()) { + ret.addReferredEntity(entry.getKey(), entry.getValue()); + } + } + + LOG.info("Created {} entity: name={}, guid={}", entity.getEntity().getTypeName(), entity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), entity.getEntity().getGuid()); + } + } + + clearRelationshipAttributes(ret); + + return ret; + } + + private void updateInstance(AtlasEntityWithExtInfo entity) throws AtlasServiceException { + if (LOG.isDebugEnabled()) { + LOG.debug("updating {} entity: {}", entity.getEntity().getTypeName(), entity); + } + + atlasClientV2.updateEntity(entity); + + LOG.info("Updated {} entity: name={}, guid={}", entity.getEntity().getTypeName(), entity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), entity.getEntity().getGuid()); + } + + /** + * Create a Hive Database entity + * @param hiveDB The Hive {@link Database} object from which to map properties + * @return new Hive Database AtlasEntity + * @throws HiveException + */ + private AtlasEntity toDbEntity(Database hiveDB) throws HiveException { + return toDbEntity(hiveDB, null); + } + + private AtlasEntity toDbEntity(Database hiveDB, AtlasEntity dbEntity) { + if (dbEntity == null) { + dbEntity = new AtlasEntity(HiveDataTypes.HIVE_DB.getName()); + } + + String dbName = getDatabaseName(hiveDB); + + dbEntity.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getDBQualifiedName(metadataNamespace, dbName)); + dbEntity.setAttribute(ATTRIBUTE_NAME, dbName); + dbEntity.setAttribute(ATTRIBUTE_DESCRIPTION, hiveDB.getDescription()); + dbEntity.setAttribute(ATTRIBUTE_OWNER, hiveDB.getOwnerName()); + + dbEntity.setAttribute(ATTRIBUTE_CLUSTER_NAME, metadataNamespace); + dbEntity.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getPathWithNameServiceID(hiveDB.getLocationUri())); + dbEntity.setAttribute(ATTRIBUTE_PARAMETERS, hiveDB.getParameters()); + + if (hiveDB.getOwnerType() != null) { + dbEntity.setAttribute(ATTRIBUTE_OWNER_TYPE, OWNER_TYPE_TO_ENUM_VALUE.get(hiveDB.getOwnerType().getValue())); + } + + return dbEntity; + } + + public static String getDatabaseName(Database hiveDB) { + String dbName = hiveDB.getName().toLowerCase(); + String catalogName = hiveDB.getCatalogName() != null ? hiveDB.getCatalogName().toLowerCase() : null; + + if (StringUtils.isNotEmpty(catalogName) && !StringUtils.equals(catalogName, DEFAULT_METASTORE_CATALOG)) { + dbName = catalogName + SEP + dbName; + } + + return dbName; + } + + /** + * Create a new table instance in Atlas + * @param database AtlasEntity for Hive {@link AtlasEntity} to which this table belongs + * @param hiveTable reference to the Hive {@link Table} from which to map properties + * @return Newly created Hive AtlasEntity + * @throws Exception + */ + private AtlasEntityWithExtInfo toTableEntity(AtlasEntity database, Table hiveTable) throws AtlasHookException { + return toTableEntity(database, hiveTable, null); + } + + private AtlasEntityWithExtInfo toTableEntity(AtlasEntity database, final Table hiveTable, AtlasEntityWithExtInfo table) throws AtlasHookException { + if (table == null) { + table = new AtlasEntityWithExtInfo(new AtlasEntity(HiveDataTypes.HIVE_TABLE.getName())); + } + + AtlasEntity tableEntity = table.getEntity(); + String tableQualifiedName = getTableQualifiedName(metadataNamespace, hiveTable); + long createTime = BaseHiveEvent.getTableCreateTime(hiveTable); + long lastAccessTime = hiveTable.getLastAccessTime() > 0 ? hiveTable.getLastAccessTime() : createTime; + + tableEntity.setRelationshipAttribute(ATTRIBUTE_DB, AtlasTypeUtil.getAtlasRelatedObjectId(database, RELATIONSHIP_HIVE_TABLE_DB)); + tableEntity.setAttribute(ATTRIBUTE_QUALIFIED_NAME, tableQualifiedName); + tableEntity.setAttribute(ATTRIBUTE_NAME, hiveTable.getTableName().toLowerCase()); + tableEntity.setAttribute(ATTRIBUTE_OWNER, hiveTable.getOwner()); + + tableEntity.setAttribute(ATTRIBUTE_CREATE_TIME, createTime); + tableEntity.setAttribute(ATTRIBUTE_LAST_ACCESS_TIME, lastAccessTime); + tableEntity.setAttribute(ATTRIBUTE_RETENTION, hiveTable.getRetention()); + tableEntity.setAttribute(ATTRIBUTE_PARAMETERS, hiveTable.getParameters()); + tableEntity.setAttribute(ATTRIBUTE_COMMENT, hiveTable.getParameters().get(ATTRIBUTE_COMMENT)); + tableEntity.setAttribute(ATTRIBUTE_TABLE_TYPE, hiveTable.getTableType().name()); + tableEntity.setAttribute(ATTRIBUTE_TEMPORARY, hiveTable.isTemporary()); + + if (hiveTable.getViewOriginalText() != null) { + tableEntity.setAttribute(ATTRIBUTE_VIEW_ORIGINAL_TEXT, hiveTable.getViewOriginalText()); + } + + if (hiveTable.getViewExpandedText() != null) { + tableEntity.setAttribute(ATTRIBUTE_VIEW_EXPANDED_TEXT, hiveTable.getViewExpandedText()); + } + + AtlasEntity sdEntity = toStorageDescEntity(hiveTable.getSd(), tableQualifiedName, getStorageDescQFName(tableQualifiedName), AtlasTypeUtil.getObjectId(tableEntity)); + List partKeys = toColumns(hiveTable.getPartitionKeys(), tableEntity, RELATIONSHIP_HIVE_TABLE_PART_KEYS); + List columns = toColumns(hiveTable.getCols(), tableEntity, RELATIONSHIP_HIVE_TABLE_COLUMNS); + + tableEntity.setRelationshipAttribute(ATTRIBUTE_STORAGEDESC, AtlasTypeUtil.getAtlasRelatedObjectId(sdEntity, RELATIONSHIP_HIVE_TABLE_STORAGE_DESC)); + tableEntity.setRelationshipAttribute(ATTRIBUTE_PARTITION_KEYS, AtlasTypeUtil.getAtlasRelatedObjectIds(partKeys, RELATIONSHIP_HIVE_TABLE_PART_KEYS)); + tableEntity.setRelationshipAttribute(ATTRIBUTE_COLUMNS, AtlasTypeUtil.getAtlasRelatedObjectIds(columns, RELATIONSHIP_HIVE_TABLE_COLUMNS)); + + table.addReferredEntity(database); + table.addReferredEntity(sdEntity); + + if (partKeys != null) { + for (AtlasEntity partKey : partKeys) { + table.addReferredEntity(partKey); + } + } + + if (columns != null) { + for (AtlasEntity column : columns) { + table.addReferredEntity(column); + } + } + + table.setEntity(tableEntity); + + return table; + } + + private AtlasEntity toStorageDescEntity(StorageDescriptor storageDesc, String tableQualifiedName, String sdQualifiedName, AtlasObjectId tableId ) throws AtlasHookException { + AtlasEntity ret = new AtlasEntity(HiveDataTypes.HIVE_STORAGEDESC.getName()); + + ret.setRelationshipAttribute(ATTRIBUTE_TABLE, AtlasTypeUtil.getAtlasRelatedObjectId(tableId, RELATIONSHIP_HIVE_TABLE_STORAGE_DESC)); + ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, sdQualifiedName); + ret.setAttribute(ATTRIBUTE_PARAMETERS, storageDesc.getParameters()); + ret.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getPathWithNameServiceID(storageDesc.getLocation())); + ret.setAttribute(ATTRIBUTE_INPUT_FORMAT, storageDesc.getInputFormat()); + ret.setAttribute(ATTRIBUTE_OUTPUT_FORMAT, storageDesc.getOutputFormat()); + ret.setAttribute(ATTRIBUTE_COMPRESSED, storageDesc.isCompressed()); + ret.setAttribute(ATTRIBUTE_NUM_BUCKETS, storageDesc.getNumBuckets()); + ret.setAttribute(ATTRIBUTE_STORED_AS_SUB_DIRECTORIES, storageDesc.isStoredAsSubDirectories()); + + if (storageDesc.getBucketCols().size() > 0) { + ret.setAttribute(ATTRIBUTE_BUCKET_COLS, storageDesc.getBucketCols()); + } + + if (storageDesc.getSerdeInfo() != null) { + SerDeInfo serdeInfo = storageDesc.getSerdeInfo(); + + LOG.debug("serdeInfo = {}", serdeInfo); + // SkewedInfo skewedInfo = storageDesc.getSkewedInfo(); + + AtlasStruct serdeInfoStruct = new AtlasStruct(HiveDataTypes.HIVE_SERDE.getName()); + + serdeInfoStruct.setAttribute(ATTRIBUTE_NAME, serdeInfo.getName()); + serdeInfoStruct.setAttribute(ATTRIBUTE_SERIALIZATION_LIB, serdeInfo.getSerializationLib()); + serdeInfoStruct.setAttribute(ATTRIBUTE_PARAMETERS, serdeInfo.getParameters()); + + ret.setAttribute(ATTRIBUTE_SERDE_INFO, serdeInfoStruct); + } + + if (CollectionUtils.isNotEmpty(storageDesc.getSortCols())) { + List sortColsStruct = new ArrayList<>(); + + for (Order sortcol : storageDesc.getSortCols()) { + String hiveOrderName = HiveDataTypes.HIVE_ORDER.getName(); + AtlasStruct colStruct = new AtlasStruct(hiveOrderName); + colStruct.setAttribute("col", sortcol.getCol()); + colStruct.setAttribute("order", sortcol.getOrder()); + + sortColsStruct.add(colStruct); + } + + ret.setAttribute(ATTRIBUTE_SORT_COLS, sortColsStruct); + } + + return ret; + } + + private List toColumns(List schemaList, AtlasEntity table, String relationshipType) throws AtlasHookException { + List ret = new ArrayList<>(); + + int columnPosition = 0; + for (FieldSchema fs : schemaList) { + LOG.debug("Processing field {}", fs); + + AtlasEntity column = new AtlasEntity(HiveDataTypes.HIVE_COLUMN.getName()); + + column.setRelationshipAttribute(ATTRIBUTE_TABLE, AtlasTypeUtil.getAtlasRelatedObjectId(table, relationshipType)); + column.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getColumnQualifiedName((String) table.getAttribute(ATTRIBUTE_QUALIFIED_NAME), fs.getName())); + column.setAttribute(ATTRIBUTE_NAME, fs.getName()); + column.setAttribute(ATTRIBUTE_OWNER, table.getAttribute(ATTRIBUTE_OWNER)); + column.setAttribute(ATTRIBUTE_COL_TYPE, fs.getType()); + column.setAttribute(ATTRIBUTE_COL_POSITION, columnPosition++); + column.setAttribute(ATTRIBUTE_COMMENT, fs.getComment()); + + ret.add(column); + } + return ret; + } + + /** + * Gets the atlas entity for the database + * @param databaseName database Name + * @param metadataNamespace cluster name + * @return AtlasEntity for database if exists, else null + * @throws Exception + */ + private AtlasEntityWithExtInfo findDatabase(String metadataNamespace, String databaseName) throws Exception { + if (LOG.isDebugEnabled()) { + LOG.debug("Searching Atlas for database {}", databaseName); + } + + String typeName = HiveDataTypes.HIVE_DB.getName(); + + return findEntity(typeName, getDBQualifiedName(metadataNamespace, databaseName), true, true); + } + + /** + * Gets Atlas Entity for the table + * + * @param hiveTable + * @return table entity from Atlas if exists, else null + * @throws Exception + */ + private AtlasEntityWithExtInfo findTableEntity(Table hiveTable) throws Exception { + if (LOG.isDebugEnabled()) { + LOG.debug("Searching Atlas for table {}.{}", hiveTable.getDbName(), hiveTable.getTableName()); + } + + String typeName = HiveDataTypes.HIVE_TABLE.getName(); + String tblQualifiedName = getTableQualifiedName(getMetadataNamespace(), hiveTable.getDbName(), hiveTable.getTableName()); + + return findEntity(typeName, tblQualifiedName, true, true); + } + + private AtlasEntityWithExtInfo findProcessEntity(String qualifiedName) throws Exception{ + if (LOG.isDebugEnabled()) { + LOG.debug("Searching Atlas for process {}", qualifiedName); + } + + String typeName = HiveDataTypes.HIVE_PROCESS.getName(); + + return findEntity(typeName, qualifiedName , true , true); + } + + private AtlasEntityWithExtInfo findEntity(final String typeName, final String qualifiedName , boolean minExtInfo, boolean ignoreRelationship) throws AtlasServiceException { + AtlasEntityWithExtInfo ret = null; + + try { + ret = atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, qualifiedName), minExtInfo, ignoreRelationship); + } catch (AtlasServiceException e) { + if(e.getStatus() == ClientResponse.Status.NOT_FOUND) { + return null; + } + + throw e; + } + + return ret; + } + + private String getCreateTableString(Table table, String location){ + String colString = ""; + List colList = table.getAllCols(); + + if (colList != null) { + for (FieldSchema col : colList) { + colString += col.getName() + " " + col.getType() + ","; + } + + if (colList.size() > 0) { + colString = colString.substring(0, colString.length() - 1); + colString = "(" + colString + ")"; + } + } + + String query = "create external table " + table.getTableName() + colString + " location '" + location + "'"; + + return query; + } + + private String lower(String str) { + if (StringUtils.isEmpty(str)) { + return ""; + } + + return str.toLowerCase().trim(); + } + + + /** + * Construct the qualified name used to uniquely identify a Table instance in Atlas. + * @param metadataNamespace Metadata namespace of the cluster to which the Hive component belongs + * @param table hive table for which the qualified name is needed + * @return Unique qualified name to identify the Table instance in Atlas. + */ + private static String getTableQualifiedName(String metadataNamespace, Table table) { + return getTableQualifiedName(metadataNamespace, table.getDbName(), table.getTableName(), table.isTemporary()); + } + + private String getHdfsPathQualifiedName(String hdfsPath) { + return String.format("%s@%s", hdfsPath, metadataNamespace); + } + + /** + * Construct the qualified name used to uniquely identify a Database instance in Atlas. + * @param metadataNamespace Name of the cluster to which the Hive component belongs + * @param dbName Name of the Hive database + * @return Unique qualified name to identify the Database instance in Atlas. + */ + public static String getDBQualifiedName(String metadataNamespace, String dbName) { + return String.format("%s@%s", dbName.toLowerCase(), metadataNamespace); + } + + /** + * Construct the qualified name used to uniquely identify a Table instance in Atlas. + * @param metadataNamespace Name of the cluster to which the Hive component belongs + * @param dbName Name of the Hive database to which the Table belongs + * @param tableName Name of the Hive table + * @param isTemporaryTable is this a temporary table + * @return Unique qualified name to identify the Table instance in Atlas. + */ + public static String getTableQualifiedName(String metadataNamespace, String dbName, String tableName, boolean isTemporaryTable) { + String tableTempName = tableName; + + if (isTemporaryTable) { + if (SessionState.get() != null && SessionState.get().getSessionId() != null) { + tableTempName = tableName + TEMP_TABLE_PREFIX + SessionState.get().getSessionId(); + } else { + tableTempName = tableName + TEMP_TABLE_PREFIX + RandomStringUtils.random(10); + } + } + + return String.format("%s.%s@%s", dbName.toLowerCase(), tableTempName.toLowerCase(), metadataNamespace); + } + + public static String getTableProcessQualifiedName(String metadataNamespace, Table table) { + String tableQualifiedName = getTableQualifiedName(metadataNamespace, table); + long createdTime = getTableCreatedTime(table); + + return tableQualifiedName + SEP + createdTime; + } + + + /** + * Construct the qualified name used to uniquely identify a Table instance in Atlas. + * @param metadataNamespace Metadata namespace of the cluster to which the Hive component belongs + * @param dbName Name of the Hive database to which the Table belongs + * @param tableName Name of the Hive table + * @return Unique qualified name to identify the Table instance in Atlas. + */ + public static String getTableQualifiedName(String metadataNamespace, String dbName, String tableName) { + return getTableQualifiedName(metadataNamespace, dbName, tableName, false); + } + public static String getStorageDescQFName(String tableQualifiedName) { + return tableQualifiedName + "_storage"; + } + + public static String getColumnQualifiedName(final String tableQualifiedName, final String colName) { + final String[] parts = tableQualifiedName.split("@"); + final String tableName = parts[0]; + final String metadataNamespace = parts[1]; + + return String.format("%s.%s@%s", tableName, colName.toLowerCase(), metadataNamespace); + } + + public static long getTableCreatedTime(Table table) { + return table.getTTable().getCreateTime() * MILLIS_CONVERT_FACTOR; + } + + private void clearRelationshipAttributes(AtlasEntitiesWithExtInfo entities) { + if (entities != null) { + if (entities.getEntities() != null) { + for (AtlasEntity entity : entities.getEntities()) { + clearRelationshipAttributes(entity);; + } + } + + if (entities.getReferredEntities() != null) { + clearRelationshipAttributes(entities.getReferredEntities().values()); + } + } + } + + private void clearRelationshipAttributes(AtlasEntityWithExtInfo entity) { + if (entity != null) { + clearRelationshipAttributes(entity.getEntity()); + + if (entity.getReferredEntities() != null) { + clearRelationshipAttributes(entity.getReferredEntities().values()); + } + } + } + + private void clearRelationshipAttributes(Collection entities) { + if (entities != null) { + for (AtlasEntity entity : entities) { + clearRelationshipAttributes(entity); + } + } + } + + private void clearRelationshipAttributes(AtlasEntity entity) { + if (entity != null && entity.getRelationshipAttributes() != null) { + entity.getRelationshipAttributes().clear(); + } + } + + private boolean isTableWithDatabaseName(String tableName) { + boolean ret = false; + if (tableName.contains(".")) { + ret = true; + } + return ret; + } + + private List getAllDatabaseInCluster() throws AtlasServiceException { + + List entities = new ArrayList<>(); + final int pageSize = pageLimit; + + SearchParameters.FilterCriteria fc = new SearchParameters.FilterCriteria(); + fc.setAttributeName(ATTRIBUTE_CLUSTER_NAME); + fc.setAttributeValue(metadataNamespace); + fc.setOperator(SearchParameters.Operator.EQ); + + for (int i = 0; ; i++) { + int offset = pageSize * i; + LOG.info("Retrieving databases: offset={}, pageSize={}", offset, pageSize); + + AtlasSearchResult searchResult = atlasClientV2.basicSearch(HIVE_TYPE_DB, fc,null, null, true, pageSize, offset); + + List entityHeaders = searchResult == null ? null : searchResult.getEntities(); + int dbCount = entityHeaders == null ? 0 : entityHeaders.size(); + + LOG.info("Retrieved {} databases of {} cluster", dbCount, metadataNamespace); + + if (dbCount > 0) { + entities.addAll(entityHeaders); + } + + if (dbCount < pageSize) { // last page + break; + } + } + + return entities; + } + + private List getAllTablesInDb(String databaseGuid) throws AtlasServiceException { + + List entities = new ArrayList<>(); + final int pageSize = pageLimit; + + for (int i = 0; ; i++) { + int offset = pageSize * i; + LOG.info("Retrieving tables: offset={}, pageSize={}", offset, pageSize); + + AtlasSearchResult searchResult = atlasClientV2.relationshipSearch(databaseGuid, HIVE_TABLE_DB_EDGE_LABEL, null, null, true, pageSize, offset); + + List entityHeaders = searchResult == null ? null : searchResult.getEntities(); + int tableCount = entityHeaders == null ? 0 : entityHeaders.size(); + + LOG.info("Retrieved {} tables of {} database", tableCount, databaseGuid); + + if (tableCount > 0) { + entities.addAll(entityHeaders); + } + + if (tableCount < pageSize) { // last page + break; + } + } + + return entities; + } + + public String getHiveDatabaseName(String qualifiedName) { + + if (StringUtils.isNotEmpty(qualifiedName)) { + String[] split = qualifiedName.split("@"); + if (split.length > 0) { + return split[0]; + } + } + return null; + } + + + public String getHiveTableName(String qualifiedName, boolean isTemporary) { + + if (StringUtils.isNotEmpty(qualifiedName)) { + String tableName = StringUtils.substringBetween(qualifiedName, ".", "@"); + if (!isTemporary) { + return tableName; + } else { + if (StringUtils.isNotEmpty(tableName)) { + String[] splitTemp = tableName.split(TEMP_TABLE_PREFIX); + if (splitTemp.length > 0) { + return splitTemp[0]; + } + } + } + } + return null; + } + + private void deleteByGuid(List guidTodelete) throws AtlasServiceException { + + if (CollectionUtils.isNotEmpty(guidTodelete)) { + + for (String guid : guidTodelete) { + EntityMutationResponse response = atlasClientV2.deleteEntityByGuid(guid); + + if (response.getDeletedEntities().size() < 1) { + LOG.info("Entity with guid : {} is not deleted", guid); + } else { + LOG.info("Entity with guid : {} is deleted", guid); + } + } + } else { + LOG.info("No Entity to delete from Atlas"); + } + } + + public void deleteEntitiesForNonExistingHiveMetadata(boolean failOnError) throws Exception { + + //fetch databases from Atlas + List dbs = null; + try { + dbs = getAllDatabaseInCluster(); + LOG.info("Total Databases in cluster {} : {} ", metadataNamespace, dbs.size()); + } catch (AtlasServiceException e) { + LOG.error("Failed to retrieve database entities for cluster {} from Atlas", metadataNamespace, e); + if (failOnError) { + throw e; + } + } + + if (CollectionUtils.isNotEmpty(dbs)) { + //iterate all dbs to check if exists in hive + for (AtlasEntityHeader db : dbs) { + + String dbGuid = db.getGuid(); + String hiveDbName = getHiveDatabaseName((String) db.getAttribute(ATTRIBUTE_QUALIFIED_NAME)); + + if (StringUtils.isEmpty(hiveDbName)) { + LOG.error("Failed to get database from qualifiedName: {}, guid: {} ", db.getAttribute(ATTRIBUTE_QUALIFIED_NAME), dbGuid); + continue; + } + + List tables; + try { + tables = getAllTablesInDb(dbGuid); + LOG.info("Total Tables in database {} : {} ", hiveDbName, tables.size()); + } catch (AtlasServiceException e) { + LOG.error("Failed to retrieve table entities for database {} from Atlas", hiveDbName, e); + if (failOnError) { + throw e; + } + continue; + } + + List guidsToDelete = new ArrayList<>(); + if (!hiveClient.databaseExists(hiveDbName)) { + + //table guids + if (CollectionUtils.isNotEmpty(tables)) { + for (AtlasEntityHeader table : tables) { + guidsToDelete.add(table.getGuid()); + } + } + + //db guid + guidsToDelete.add(db.getGuid()); + LOG.info("Added database {}.{} and its {} tables to delete", metadataNamespace, hiveDbName, tables.size()); + + } else { + //iterate all table of db to check if it exists + if (CollectionUtils.isNotEmpty(tables)) { + for (AtlasEntityHeader table : tables) { + String hiveTableName = getHiveTableName((String) table.getAttribute(ATTRIBUTE_QUALIFIED_NAME), true); + + if (StringUtils.isEmpty(hiveTableName)) { + LOG.error("Failed to get table from qualifiedName: {}, guid: {} ", table.getAttribute(ATTRIBUTE_QUALIFIED_NAME), table.getGuid()); + continue; + } + + try { + hiveClient.getTable(hiveDbName, hiveTableName, true); + } catch (InvalidTableException e) { //table doesn't exists + LOG.info("Added table {}.{} to delete", hiveDbName, hiveTableName); + + guidsToDelete.add(table.getGuid()); + } catch (HiveException e) { + LOG.error("Failed to get table {}.{} from Hive", hiveDbName, hiveTableName, e); + + if (failOnError) { + throw e; + } + } + } + } + } + + //delete entities + if (CollectionUtils.isNotEmpty(guidsToDelete)) { + try { + deleteByGuid(guidsToDelete); + } catch (AtlasServiceException e) { + LOG.error("Failed to delete Atlas entities for database {}", hiveDbName, e); + + if (failOnError) { + throw e; + } + } + + } + } + + } else { + LOG.info("No database found in service."); + } + + } +} diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridgeV2.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridgeV2.java new file mode 100644 index 0000000000..0627c0e095 --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridgeV2.java @@ -0,0 +1,1036 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.bridge; + +import org.apache.atlas.AtlasClientV2; +import org.apache.atlas.AtlasServiceException; +import org.apache.atlas.exception.AtlasBaseException; +import org.apache.atlas.model.impexp.AtlasImportRequest; +import org.apache.atlas.model.impexp.AtlasImportResult; +import org.apache.atlas.model.typedef.AtlasTypesDef; +import org.apache.atlas.type.AtlasType; +import org.apache.atlas.type.AtlasTypeUtil; +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.hive.hook.events.BaseHiveEvent; +import org.apache.atlas.hive.model.HiveDataTypes; +import org.apache.atlas.hook.AtlasHookException; +import org.apache.atlas.utils.AtlasPathExtractorUtil; +import org.apache.atlas.utils.HdfsNameServiceResolver; +import org.apache.atlas.utils.AtlasConfigurationUtil; +import org.apache.atlas.utils.PathExtractorContext; +import org.apache.atlas.utils.LruCache; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.model.instance.AtlasStruct; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.commons.cli.MissingArgumentException; +import org.apache.commons.collections.CollectionUtils; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.collections.MapUtils; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.RandomStringUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.OutputStream; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.*; +import java.util.stream.Collectors; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import static org.apache.atlas.hive.hook.events.BaseHiveEvent.*; + +/** + * A Bridge Utility that imports metadata into zip file from the Hive Meta Store + * which can be exported at Atlas + */ +public class HiveMetaStoreBridgeV2 { + private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridgeV2.class); + + private static final String OPTION_DATABASE_SHORT = "d"; + private static final String OPTION_TABLE_SHORT = "t"; + private static final String OPTION_IMPORT_DATA_FILE_SHORT = "f"; + private static final String OPTION_OUTPUT_FILEPATH_SHORT = "o"; + private static final String OPTION_IGNORE_BULK_IMPORT_SHORT = "i"; + + public static final String CONF_PREFIX = "atlas.hook.hive."; + public static final String HDFS_PATH_CONVERT_TO_LOWER_CASE = CONF_PREFIX + "hdfs_path.convert_to_lowercase"; + public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION = CONF_PREFIX + "aws_s3.atlas.model.version"; + + public static final String CLUSTER_NAME_KEY = "atlas.cluster.name"; + public static final String HIVE_USERNAME = "atlas.hook.hive.default.username"; + public static final String HIVE_METADATA_NAMESPACE = "atlas.metadata.namespace"; + public static final String DEFAULT_CLUSTER_NAME = "primary"; + public static final String TEMP_TABLE_PREFIX = "_temp-"; + public static final String SEP = ":".intern(); + public static final String DEFAULT_METASTORE_CATALOG = "hive"; + public static final String HOOK_HIVE_PAGE_LIMIT = CONF_PREFIX + "page.limit"; + + private static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2 = "v2"; + private static final String ZIP_FILE_COMMENT_FORMAT = "{\"entitiesCount\":%d, \"total\":%d}"; + private static final int DEFAULT_PAGE_LIMIT = 10000; + private static final String DEFAULT_ZIP_FILE_NAME = "import-hive-output.zip"; + private static final String ZIP_ENTRY_ENTITIES = "entities.json"; + private static final String TYPES_DEF_JSON = "atlas-typesdef.json"; + + private static final String JSON_ARRAY_START = "["; + private static final String JSON_COMMA = ","; + private static final String JSON_EMPTY_OBJECT = "{}"; + private static final String JSON_ARRAY_END = "]"; + + private static int pageLimit = DEFAULT_PAGE_LIMIT; + private String awsS3AtlasModelVersion = null; + + private final String metadataNamespace; + private final Hive hiveClient; + private final AtlasClientV2 atlasClientV2; + private final boolean convertHdfsPathToLowerCase; + + private ZipOutputStream zipOutputStream; + private String outZipFileName; + private int totalProcessedEntities = 0; + + private final Map entityLRUCache = new LruCache<>(10000, 0); + private final Map hiveTablesAndAtlasEntity = new HashMap<>(); + private final Map dbEntities = new HashMap<>(); + private final List> databaseAndTableListToImport = new ArrayList<>(); + private final Map qualifiedNameGuidMap = new HashMap<>(); + + /** + * Construct a HiveMetaStoreBridgeV2. + * @param hiveConf {@link HiveConf} for Hive component in the cluster + */ + public HiveMetaStoreBridgeV2(Configuration atlasProperties, HiveConf hiveConf, AtlasClientV2 atlasClientV2) throws Exception { + this.metadataNamespace = getMetadataNamespace(atlasProperties); + this.hiveClient = Hive.get(hiveConf); + this.atlasClientV2 = atlasClientV2; + this.convertHdfsPathToLowerCase = atlasProperties.getBoolean(HDFS_PATH_CONVERT_TO_LOWER_CASE, false); + this.awsS3AtlasModelVersion = atlasProperties.getString(HOOK_AWS_S3_ATLAS_MODEL_VERSION, HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2); + + if (atlasProperties != null) { + pageLimit = atlasProperties.getInteger(HOOK_HIVE_PAGE_LIMIT, DEFAULT_PAGE_LIMIT); + } + } + + public boolean exportDataToZipAndRunAtlasImport(CommandLine cmd) throws MissingArgumentException, IOException, HiveException, AtlasBaseException { + boolean ret = true; + boolean failOnError = cmd.hasOption("failOnError"); + + String databaseToImport = cmd.getOptionValue(OPTION_DATABASE_SHORT); + String tableToImport = cmd.getOptionValue(OPTION_TABLE_SHORT); + String importDataFile = cmd.getOptionValue(OPTION_IMPORT_DATA_FILE_SHORT); + String outputFileOrPath = cmd.getOptionValue(OPTION_OUTPUT_FILEPATH_SHORT); + + boolean ignoreBulkImport = cmd.hasOption(OPTION_IGNORE_BULK_IMPORT_SHORT); + + validateOutputFileOrPath(outputFileOrPath); + + try { + initializeZipStream(); + + if (isValidImportDataFile(importDataFile)) { + File f = new File(importDataFile); + + BufferedReader br = new BufferedReader(new FileReader(f)); + String line = null; + + while ((line = br.readLine()) != null) { + String val[] = line.split(":"); + + if (ArrayUtils.isNotEmpty(val)) { + databaseToImport = val[0]; + + if (val.length > 1) { + tableToImport = val[1]; + } else { + tableToImport = ""; + } + + importHiveDatabases(databaseToImport, tableToImport, failOnError); + } + } + } else { + importHiveDatabases(databaseToImport, tableToImport, failOnError); + } + + importHiveTables(failOnError); + importHiveColumns(failOnError); + } finally { + endWritingAndZipStream(); + } + + if (!ignoreBulkImport) { + runAtlasImport(); + } + + return ret; + } + + private void validateOutputFileOrPath(String outputFileOrPath) throws MissingArgumentException { + if (StringUtils.isBlank(outputFileOrPath)) { + throw new MissingArgumentException("Output Path/File can't be empty"); + } + + File fileOrDirToImport = new File(outputFileOrPath); + if (fileOrDirToImport.exists()) { + if (fileOrDirToImport.isDirectory()) { + this.outZipFileName = outputFileOrPath + File.separator + DEFAULT_ZIP_FILE_NAME; + LOG.info("The default output zip file {} will be created at {}", DEFAULT_ZIP_FILE_NAME, outputFileOrPath); + } else { + throw new MissingArgumentException("output file: " + outputFileOrPath + " already present"); + } + } else if (fileOrDirToImport.getParentFile().isDirectory() && outputFileOrPath.endsWith(".zip")) { + LOG.info("The mentioned output zip file {} will be created", outputFileOrPath); + this.outZipFileName = outputFileOrPath; + } else { + throw new MissingArgumentException("Invalid File/Path"); + } + } + + private boolean isValidImportDataFile(String importDataFile) throws MissingArgumentException { + boolean ret = false; + if (StringUtils.isNotBlank(importDataFile)) { + File dataFile = new File(importDataFile); + + if (!dataFile.exists() || !dataFile.canRead()) { + throw new MissingArgumentException("Invalid import data file"); + } + ret = true; + } + + return ret; + } + + private void initializeZipStream() throws IOException, AtlasBaseException { + this.zipOutputStream = new ZipOutputStream(getOutputStream(this.outZipFileName)); + + storeTypesDefToZip(new AtlasTypesDef()); + + startWritingEntitiesToZip(); + } + + private void storeTypesDefToZip(AtlasTypesDef typesDef) throws AtlasBaseException { + String jsonData = AtlasType.toJson(typesDef); + saveToZip(TYPES_DEF_JSON, jsonData); + } + + private void saveToZip(String fileName, String jsonData) throws AtlasBaseException { + try { + ZipEntry e = new ZipEntry(fileName); + zipOutputStream.putNextEntry(e); + writeBytes(jsonData); + zipOutputStream.closeEntry(); + } catch (IOException e) { + throw new AtlasBaseException(String.format("Error writing file %s.", fileName), e); + } + } + + private void startWritingEntitiesToZip() throws IOException { + zipOutputStream.putNextEntry(new ZipEntry(ZIP_ENTRY_ENTITIES)); + writeBytes(JSON_ARRAY_START); + } + + private String getDatabaseToImport(String TableWithDatabase) { + String ret = null; + String val[] = TableWithDatabase.split("\\."); + if (val.length > 1) { + ret = val[0]; + } + return ret; + } + + private String getTableToImport(String TableWithDatabase) { + String ret = null; + String val[] = TableWithDatabase.split("\\."); + if (val.length > 1) { + ret = val[1]; + } + return ret; + } + + private void importHiveDatabases(String databaseToImport, String tableWithDatabaseToImport, boolean failOnError) throws HiveException, AtlasBaseException { + LOG.info("Importing Hive Databases"); + + List databaseNames = null; + + if (StringUtils.isEmpty(databaseToImport) && StringUtils.isNotEmpty(tableWithDatabaseToImport)) { + if (isTableWithDatabaseName(tableWithDatabaseToImport)) { + databaseToImport = getDatabaseToImport(tableWithDatabaseToImport); + tableWithDatabaseToImport = getTableToImport(tableWithDatabaseToImport); + } + } + + if (StringUtils.isEmpty(databaseToImport)) { + //when database to import is empty, import all + databaseNames = hiveClient.getAllDatabases(); + } else { + //when database to import has some value then, import that db and all table under it. + databaseNames = hiveClient.getDatabasesByPattern(databaseToImport); + } + + if (!CollectionUtils.isEmpty(databaseNames)) { + LOG.info("Found {} databases", databaseNames.size()); + for (String databaseName : databaseNames) { + try { + if (!dbEntities.containsKey(databaseName)) { + LOG.info("Importing Hive Database {}", databaseName); + AtlasEntityWithExtInfo dbEntity = writeDatabase(databaseName); + if (dbEntity != null) { + dbEntities.put(databaseName, dbEntity.getEntity()); + } + } + databaseAndTableListToImport.add(Collections.singletonMap(databaseName, tableWithDatabaseToImport)); + } catch (IOException e) { + LOG.error("Import failed for hive database {}", databaseName, e); + + if (failOnError) { + throw new AtlasBaseException(e.getMessage(), e); + } + } + } + } else { + LOG.error("No database found"); + if (failOnError) { + throw new AtlasBaseException("No database found"); + } + } + } + + private void writeEntity(AtlasEntity.AtlasEntityWithExtInfo entityWithExtInfo) throws IOException { + if (MapUtils.isNotEmpty(entityWithExtInfo.getReferredEntities())) { + Iterator> itr = entityWithExtInfo.getReferredEntities().entrySet().iterator(); + while (itr.hasNext()) { + Map.Entry eachEntity = itr.next(); + if (eachEntity.getValue().getTypeName().equalsIgnoreCase(HiveDataTypes.HIVE_DB.getName())) { + itr.remove(); + } + } + } + + if (!entityLRUCache.containsKey(entityWithExtInfo.getEntity().getGuid())) { + entityLRUCache.put(entityWithExtInfo.getEntity().getGuid(), entityWithExtInfo); + writeBytes(AtlasType.toJson(entityWithExtInfo) + JSON_COMMA); + } + totalProcessedEntities++; + } + + private void endWritingAndZipStream() throws IOException { + writeBytes(JSON_EMPTY_OBJECT); + writeBytes(JSON_ARRAY_END); + setStreamSize(totalProcessedEntities); + close(); + } + + private void flush() { + try { + zipOutputStream.flush(); + } catch (IOException e) { + LOG.error("Error: Flush: ", e); + } + } + + private void close() throws IOException { + zipOutputStream.flush(); + zipOutputStream.closeEntry(); + zipOutputStream.close(); + } + + private void writeBytes(String payload) throws IOException { + zipOutputStream.write(payload.getBytes()); + } + + private OutputStream getOutputStream(String fileToWrite) throws IOException { + return FileUtils.openOutputStream(new File(fileToWrite)); + } + + public String getMetadataNamespace(Configuration config) { + return AtlasConfigurationUtil.getRecentString(config, HIVE_METADATA_NAMESPACE, getClusterName(config)); + } + + private String getClusterName(Configuration config) { + return config.getString(CLUSTER_NAME_KEY, DEFAULT_CLUSTER_NAME); + } + + public String getMetadataNamespace() { + return metadataNamespace; + } + + public boolean isConvertHdfsPathToLowerCase() { + return convertHdfsPathToLowerCase; + } + + /** + * Imports Hive tables if databaseAndTableListToImport is populated + * @param failOnError + * @throws Exception + */ + public void importHiveTables(boolean failOnError) throws HiveException, AtlasBaseException { + LOG.info("Importing Hive Tables"); + + int tablesImported = 0; + + if (CollectionUtils.isNotEmpty(databaseAndTableListToImport) && MapUtils.isNotEmpty(dbEntities)) { + for (Map eachEntry : databaseAndTableListToImport) { + final List
tableObjects; + + String databaseName = eachEntry.keySet().iterator().next(); + + if (StringUtils.isEmpty(eachEntry.values().iterator().next())) { + tableObjects = hiveClient.getAllTableObjects(databaseName); + + populateQualifiedNameGuidMap(HiveDataTypes.HIVE_DB.getName(), (String) dbEntities.get(databaseName).getAttribute(ATTRIBUTE_QUALIFIED_NAME)); + } else { + List tableNames = hiveClient.getTablesByPattern(databaseName, eachEntry.values().iterator().next()); + tableObjects = new ArrayList<>(); + + for (String tableName : tableNames) { + Table table = hiveClient.getTable(databaseName, tableName); + tableObjects.add(table); + populateQualifiedNameGuidMap(HiveDataTypes.HIVE_TABLE.getName(), getTableQualifiedName(metadataNamespace, table)); + } + } + + if (!CollectionUtils.isEmpty(tableObjects)) { + LOG.info("Found {} tables to import in database {}", tableObjects.size(), databaseName); + + try { + for (Table table : tableObjects) { + int imported = importTable(dbEntities.get(databaseName), table, failOnError); + + tablesImported += imported; + } + } finally { + if (tablesImported == tableObjects.size()) { + LOG.info("Successfully imported {} tables from database {}", tablesImported, databaseName); + } else { + LOG.error("Imported {} of {} tables from database {}. Please check logs for errors during import", + tablesImported, tableObjects.size(), databaseName); + } + } + } else { + LOG.error("No tables to import in database {}", databaseName); + if (failOnError) { + throw new AtlasBaseException("No tables to import in database - " + databaseName); + } + } + } + } + + dbEntities.clear(); + } + + private void populateQualifiedNameGuidMap(String typeName, String qualifiedName) { + try { + AtlasEntitiesWithExtInfo entitiesWithExtInfo = atlasClientV2.getEntitiesByAttribute(typeName, Collections.singletonList(Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, qualifiedName)), true, false); + + if (entitiesWithExtInfo != null && entitiesWithExtInfo.getEntities() != null) { + for (AtlasEntity entity : entitiesWithExtInfo.getEntities()) { + qualifiedNameGuidMap.put((String) entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), entity.getGuid()); + + for(Map.Entry eachEntry : entitiesWithExtInfo.getReferredEntities().entrySet()) { + qualifiedNameGuidMap.put((String) eachEntry.getValue().getAttribute(ATTRIBUTE_QUALIFIED_NAME), eachEntry.getKey()); + } + + if (typeName.equals(HiveDataTypes.HIVE_DB.getName())) { + for (String eachRelatedGuid : getAllRelatedGuids(entity)) { + AtlasEntityWithExtInfo relatedEntity = atlasClientV2.getEntityByGuid(eachRelatedGuid, true, false); + + qualifiedNameGuidMap.put((String) relatedEntity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), relatedEntity.getEntity().getGuid()); + for (Map.Entry eachEntry : relatedEntity.getReferredEntities().entrySet()) { + qualifiedNameGuidMap.put((String) eachEntry.getValue().getAttribute(ATTRIBUTE_QUALIFIED_NAME), eachEntry.getKey()); + } + } + } + } + } + } catch (AtlasServiceException e) { + LOG.info("Unable to load the related entities for type {} and qualified name {} from Atlas", typeName, qualifiedName, e); + } + } + + private Set getAllRelatedGuids(AtlasEntity entity) { + Set relGuidsSet = new HashSet<>(); + + for (Object o : entity.getRelationshipAttributes().values()) { + if (o instanceof AtlasObjectId) { + relGuidsSet.add(((AtlasObjectId) o).getGuid()); + } else if (o instanceof List) { + for (Object id : (List) o) { + if (id instanceof AtlasObjectId) { + relGuidsSet.add(((AtlasObjectId) id).getGuid()); + } + if (id instanceof Map) { + relGuidsSet.add((String) ((Map) id).get("guid")); + } + } + } + } + + return relGuidsSet; + } + + public void importHiveColumns(boolean failOnError) throws AtlasBaseException { + LOG.info("Importing Hive Columns"); + + if (MapUtils.isEmpty(hiveTablesAndAtlasEntity)) { + if (LOG.isDebugEnabled()) { + LOG.debug("No hive table present to import columns"); + } + + return; + } + + for (Map.Entry eachTable : hiveTablesAndAtlasEntity.entrySet()) { + int columnsImported = 0; + List columnEntities = new ArrayList<>(); + + try { + List partKeys = toColumns(eachTable.getKey().getPartitionKeys(), eachTable.getValue(), RELATIONSHIP_HIVE_TABLE_PART_KEYS); + List columns = toColumns(eachTable.getKey().getCols(), eachTable.getValue(), RELATIONSHIP_HIVE_TABLE_COLUMNS); + + partKeys.stream().collect(Collectors.toCollection(() -> columnEntities)); + columns.stream().collect(Collectors.toCollection(() -> columnEntities)); + + for (AtlasEntity eachColumnEntity : columnEntities) { + writeEntityToZip(new AtlasEntityWithExtInfo(eachColumnEntity)); + columnsImported++; + } + } catch (IOException e) { + LOG.error("Column Import failed for hive table {}", eachTable.getValue().getAttribute(ATTRIBUTE_QUALIFIED_NAME), e); + + if (failOnError) { + throw new AtlasBaseException(e.getMessage(), e); + } + } finally { + if (columnsImported == columnEntities.size()) { + LOG.info("Successfully imported {} columns for table {}", columnsImported, eachTable.getValue().getAttribute(ATTRIBUTE_QUALIFIED_NAME)); + } else { + LOG.error("Imported {} of {} columns for table {}. Please check logs for errors during import", columnsImported, columnEntities.size(), eachTable.getValue().getAttribute(ATTRIBUTE_QUALIFIED_NAME)); + } + } + } + + } + + private void runAtlasImport() { + AtlasImportRequest request = new AtlasImportRequest(); + request.setOption(AtlasImportRequest.UPDATE_TYPE_DEFINITION_KEY, "false"); + request.setOption(AtlasImportRequest.OPTION_KEY_FORMAT, AtlasImportRequest.OPTION_KEY_FORMAT_ZIP_DIRECT); + + try { + AtlasImportResult importResult = atlasClientV2.importData(request, this.outZipFileName); + + if (importResult.getOperationStatus() == AtlasImportResult.OperationStatus.SUCCESS) { + LOG.info("Successfully imported the zip file {} at Atlas and imported {} entities. Number of entities to be imported {}.", this.outZipFileName, importResult.getProcessedEntities().size(), totalProcessedEntities); + } else { + LOG.error("Failed to import or get the status of import for the zip file {} at Atlas. Number of entities to be imported {}.", this.outZipFileName, totalProcessedEntities); + } + } catch (AtlasServiceException e) { + LOG.error("Failed to import or get the status of import for the zip file {} at Atlas. Number of entities to be imported {}.", this.outZipFileName, totalProcessedEntities, e); + } + } + + public int importTable(AtlasEntity dbEntity, Table table, final boolean failOnError) throws AtlasBaseException { + try { + AtlasEntityWithExtInfo tableEntity = writeTable(dbEntity, table); + + hiveTablesAndAtlasEntity.put(table, tableEntity.getEntity()); + + if (table.getTableType() == TableType.EXTERNAL_TABLE) { + String processQualifiedName = getTableProcessQualifiedName(metadataNamespace, table); + String tableLocationString = isConvertHdfsPathToLowerCase() ? lower(table.getDataLocation().toString()) : table.getDataLocation().toString(); + Path location = table.getDataLocation(); + String query = getCreateTableString(table, tableLocationString); + + PathExtractorContext pathExtractorCtx = new PathExtractorContext(getMetadataNamespace(), isConvertHdfsPathToLowerCase(), awsS3AtlasModelVersion); + AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(location, pathExtractorCtx); + AtlasEntity pathInst = entityWithExtInfo.getEntity(); + AtlasEntity tableInst = tableEntity.getEntity(); + AtlasEntity processInst = new AtlasEntity(HiveDataTypes.HIVE_PROCESS.getName()); + + long now = System.currentTimeMillis(); + + processInst.setGuid(getGuid(processQualifiedName)); + processInst.setAttribute(ATTRIBUTE_QUALIFIED_NAME, processQualifiedName); + processInst.setAttribute(ATTRIBUTE_NAME, query); + processInst.setAttribute(ATTRIBUTE_CLUSTER_NAME, metadataNamespace); + processInst.setRelationshipAttribute(ATTRIBUTE_INPUTS, Collections.singletonList(AtlasTypeUtil.getAtlasRelatedObjectId(pathInst, RELATIONSHIP_DATASET_PROCESS_INPUTS))); + processInst.setRelationshipAttribute(ATTRIBUTE_OUTPUTS, Collections.singletonList(AtlasTypeUtil.getAtlasRelatedObjectId(tableInst, RELATIONSHIP_PROCESS_DATASET_OUTPUTS))); + String userName = table.getOwner(); + if (StringUtils.isEmpty(userName)) { + userName = ApplicationProperties.get().getString(HIVE_USERNAME, "hive"); + } + processInst.setAttribute(ATTRIBUTE_USER_NAME, userName); + processInst.setAttribute(ATTRIBUTE_START_TIME, now); + processInst.setAttribute(ATTRIBUTE_END_TIME, now); + processInst.setAttribute(ATTRIBUTE_OPERATION_TYPE, "CREATETABLE"); + processInst.setAttribute(ATTRIBUTE_QUERY_TEXT, query); + processInst.setAttribute(ATTRIBUTE_QUERY_ID, query); + processInst.setAttribute(ATTRIBUTE_QUERY_PLAN, "{}"); + processInst.setAttribute(ATTRIBUTE_RECENT_QUERIES, Collections.singletonList(query)); + + AtlasEntitiesWithExtInfo createTableProcess = new AtlasEntitiesWithExtInfo(); + + createTableProcess.addEntity(processInst); + + if (pathExtractorCtx.getKnownEntities() != null) { + pathExtractorCtx.getKnownEntities().values().forEach(entity -> createTableProcess.addEntity(entity)); + } else { + createTableProcess.addEntity(pathInst); + } + + writeEntitiesToZip(createTableProcess); + } + + return 1; + } catch (Exception e) { + LOG.error("Import failed for hive_table {}", table.getTableName(), e); + + if (failOnError) { + throw new AtlasBaseException(e.getMessage(), e); + } + + return 0; + } + } + + /** + * Write db entity + * @param databaseName + * @return + * @throws Exception + */ + private AtlasEntityWithExtInfo writeDatabase(String databaseName) throws HiveException, IOException { + AtlasEntityWithExtInfo ret = null; + Database db = hiveClient.getDatabase(databaseName); + + if (db != null) { + ret = new AtlasEntityWithExtInfo(toDbEntity(db)); + writeEntityToZip(ret); + } + + return ret; + } + + private AtlasEntityWithExtInfo writeTable(AtlasEntity dbEntity, Table table) throws AtlasHookException { + try { + AtlasEntityWithExtInfo tableEntity = toTableEntity(dbEntity, table); + writeEntityToZip(tableEntity); + + return tableEntity; + } catch (Exception e) { + throw new AtlasHookException("HiveMetaStoreBridgeV2.registerTable() failed.", e); + } + } + + /** + * Write an entity to Zip file + * @param entity + * @return + * @throws Exception + */ + private void writeEntityToZip(AtlasEntityWithExtInfo entity) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("Writing {} entity: {}", entity.getEntity().getTypeName(), entity); + } + + writeEntity(entity); + clearRelationshipAttributes(entity.getEntity()); + flush(); + } + + /** + * Registers an entity in atlas + * @param entities + * @return + * @throws Exception + */ + private void writeEntitiesToZip(AtlasEntitiesWithExtInfo entities) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("Writing {} entities: {}", entities.getEntities().size(), entities); + } + + for (AtlasEntity entity : entities.getEntities()) { + writeEntity(new AtlasEntityWithExtInfo(entity)); + } + + flush(); + clearRelationshipAttributes(entities); + } + + /** + * Create a Hive Database entity + * @param hiveDB The Hive {@link Database} object from which to map properties + * @return new Hive Database AtlasEntity + * @throws HiveException + */ + private AtlasEntity toDbEntity(Database hiveDB) { + return toDbEntity(hiveDB, null); + } + + private AtlasEntity toDbEntity(Database hiveDB, AtlasEntity dbEntity) { + if (dbEntity == null) { + dbEntity = new AtlasEntity(HiveDataTypes.HIVE_DB.getName()); + } + + String dbName = getDatabaseName(hiveDB); + + String qualifiedName = getDBQualifiedName(metadataNamespace, dbName); + dbEntity.setAttribute(ATTRIBUTE_QUALIFIED_NAME, qualifiedName); + + dbEntity.setGuid(getGuid(true, qualifiedName)); + + dbEntity.setAttribute(ATTRIBUTE_NAME, dbName); + dbEntity.setAttribute(ATTRIBUTE_DESCRIPTION, hiveDB.getDescription()); + dbEntity.setAttribute(ATTRIBUTE_OWNER, hiveDB.getOwnerName()); + + dbEntity.setAttribute(ATTRIBUTE_CLUSTER_NAME, metadataNamespace); + dbEntity.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getPathWithNameServiceID(hiveDB.getLocationUri())); + dbEntity.setAttribute(ATTRIBUTE_PARAMETERS, hiveDB.getParameters()); + + if (hiveDB.getOwnerType() != null) { + dbEntity.setAttribute(ATTRIBUTE_OWNER_TYPE, OWNER_TYPE_TO_ENUM_VALUE.get(hiveDB.getOwnerType().getValue())); + } + + return dbEntity; + } + + private String getDBGuidFromAtlas(String dBQualifiedName) { + String guid = null; + try { + guid = atlasClientV2.getEntityHeaderByAttribute(HiveDataTypes.HIVE_DB.getName(), Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, dBQualifiedName)).getGuid(); + } catch (AtlasServiceException e) { + LOG.warn("Failed to get DB guid from Atlas with qualified name {}", dBQualifiedName, e); + } + return guid; + } + + public static String getDatabaseName(Database hiveDB) { + String dbName = hiveDB.getName().toLowerCase(); + String catalogName = hiveDB.getCatalogName() != null ? hiveDB.getCatalogName().toLowerCase() : null; + + if (StringUtils.isNotEmpty(catalogName) && !StringUtils.equals(catalogName, DEFAULT_METASTORE_CATALOG)) { + dbName = catalogName + SEP + dbName; + } + + return dbName; + } + + /** + * Create a new table instance in Atlas + * @param database AtlasEntity for Hive {@link AtlasEntity} to which this table belongs + * @param hiveTable reference to the Hive {@link Table} from which to map properties + * @return Newly created Hive AtlasEntity + * @throws Exception + */ + private AtlasEntityWithExtInfo toTableEntity(AtlasEntity database, final Table hiveTable) throws AtlasHookException { + AtlasEntityWithExtInfo table = new AtlasEntityWithExtInfo(new AtlasEntity(HiveDataTypes.HIVE_TABLE.getName())); + + AtlasEntity tableEntity = table.getEntity(); + String tableQualifiedName = getTableQualifiedName(metadataNamespace, hiveTable); + long createTime = BaseHiveEvent.getTableCreateTime(hiveTable); + long lastAccessTime = hiveTable.getLastAccessTime() > 0 ? hiveTable.getLastAccessTime() : createTime; + + tableEntity.setGuid(getGuid(tableQualifiedName)); + tableEntity.setRelationshipAttribute(ATTRIBUTE_DB, AtlasTypeUtil.getAtlasRelatedObjectId(database, RELATIONSHIP_HIVE_TABLE_DB)); + tableEntity.setAttribute(ATTRIBUTE_QUALIFIED_NAME, tableQualifiedName); + tableEntity.setAttribute(ATTRIBUTE_NAME, hiveTable.getTableName().toLowerCase()); + tableEntity.setAttribute(ATTRIBUTE_OWNER, hiveTable.getOwner()); + + tableEntity.setAttribute(ATTRIBUTE_CREATE_TIME, createTime); + tableEntity.setAttribute(ATTRIBUTE_LAST_ACCESS_TIME, lastAccessTime); + tableEntity.setAttribute(ATTRIBUTE_RETENTION, hiveTable.getRetention()); + tableEntity.setAttribute(ATTRIBUTE_PARAMETERS, hiveTable.getParameters()); + tableEntity.setAttribute(ATTRIBUTE_COMMENT, hiveTable.getParameters().get(ATTRIBUTE_COMMENT)); + tableEntity.setAttribute(ATTRIBUTE_TABLE_TYPE, hiveTable.getTableType().name()); + tableEntity.setAttribute(ATTRIBUTE_TEMPORARY, hiveTable.isTemporary()); + + if (hiveTable.getViewOriginalText() != null) { + tableEntity.setAttribute(ATTRIBUTE_VIEW_ORIGINAL_TEXT, hiveTable.getViewOriginalText()); + } + + if (hiveTable.getViewExpandedText() != null) { + tableEntity.setAttribute(ATTRIBUTE_VIEW_EXPANDED_TEXT, hiveTable.getViewExpandedText()); + } + + AtlasEntity sdEntity = toStorageDescEntity(hiveTable.getSd(), getStorageDescQFName(tableQualifiedName), AtlasTypeUtil.getObjectId(tableEntity)); + + tableEntity.setRelationshipAttribute(ATTRIBUTE_STORAGEDESC, AtlasTypeUtil.getAtlasRelatedObjectId(sdEntity, RELATIONSHIP_HIVE_TABLE_STORAGE_DESC)); + + table.addReferredEntity(database); + table.addReferredEntity(sdEntity); + table.setEntity(tableEntity); + + return table; + } + + private AtlasEntity toStorageDescEntity(StorageDescriptor storageDesc, String sdQualifiedName, AtlasObjectId tableId) { + AtlasEntity ret = new AtlasEntity(HiveDataTypes.HIVE_STORAGEDESC.getName()); + + ret.setGuid(getGuid(sdQualifiedName)); + ret.setRelationshipAttribute(ATTRIBUTE_TABLE, AtlasTypeUtil.getAtlasRelatedObjectId(tableId, RELATIONSHIP_HIVE_TABLE_STORAGE_DESC)); + ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, sdQualifiedName); + ret.setAttribute(ATTRIBUTE_PARAMETERS, storageDesc.getParameters()); + ret.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getPathWithNameServiceID(storageDesc.getLocation())); + ret.setAttribute(ATTRIBUTE_INPUT_FORMAT, storageDesc.getInputFormat()); + ret.setAttribute(ATTRIBUTE_OUTPUT_FORMAT, storageDesc.getOutputFormat()); + ret.setAttribute(ATTRIBUTE_COMPRESSED, storageDesc.isCompressed()); + ret.setAttribute(ATTRIBUTE_NUM_BUCKETS, storageDesc.getNumBuckets()); + ret.setAttribute(ATTRIBUTE_STORED_AS_SUB_DIRECTORIES, storageDesc.isStoredAsSubDirectories()); + + if (storageDesc.getBucketCols().size() > 0) { + ret.setAttribute(ATTRIBUTE_BUCKET_COLS, storageDesc.getBucketCols()); + } + + if (storageDesc.getSerdeInfo() != null) { + SerDeInfo serdeInfo = storageDesc.getSerdeInfo(); + + LOG.info("serdeInfo = {}", serdeInfo); + AtlasStruct serdeInfoStruct = new AtlasStruct(HiveDataTypes.HIVE_SERDE.getName()); + + serdeInfoStruct.setAttribute(ATTRIBUTE_NAME, serdeInfo.getName()); + serdeInfoStruct.setAttribute(ATTRIBUTE_SERIALIZATION_LIB, serdeInfo.getSerializationLib()); + serdeInfoStruct.setAttribute(ATTRIBUTE_PARAMETERS, serdeInfo.getParameters()); + + ret.setAttribute(ATTRIBUTE_SERDE_INFO, serdeInfoStruct); + } + + if (CollectionUtils.isNotEmpty(storageDesc.getSortCols())) { + List sortColsStruct = new ArrayList<>(); + + for (Order sortcol : storageDesc.getSortCols()) { + String hiveOrderName = HiveDataTypes.HIVE_ORDER.getName(); + AtlasStruct colStruct = new AtlasStruct(hiveOrderName); + colStruct.setAttribute("col", sortcol.getCol()); + colStruct.setAttribute("order", sortcol.getOrder()); + + sortColsStruct.add(colStruct); + } + + ret.setAttribute(ATTRIBUTE_SORT_COLS, sortColsStruct); + } + + return ret; + } + + private List toColumns(List schemaList, AtlasEntity table, String relationshipType) { + List ret = new ArrayList<>(); + + int columnPosition = 0; + for (FieldSchema fs : schemaList) { + LOG.debug("Processing field {}", fs); + + AtlasEntity column = new AtlasEntity(HiveDataTypes.HIVE_COLUMN.getName()); + + String columnQualifiedName = getColumnQualifiedName((String) table.getAttribute(ATTRIBUTE_QUALIFIED_NAME), fs.getName()); + + column.setAttribute(ATTRIBUTE_QUALIFIED_NAME, columnQualifiedName); + column.setGuid(getGuid(columnQualifiedName)); + + column.setRelationshipAttribute(ATTRIBUTE_TABLE, AtlasTypeUtil.getAtlasRelatedObjectId(table, relationshipType)); + + column.setAttribute(ATTRIBUTE_NAME, fs.getName()); + column.setAttribute(ATTRIBUTE_OWNER, table.getAttribute(ATTRIBUTE_OWNER)); + column.setAttribute(ATTRIBUTE_COL_TYPE, fs.getType()); + column.setAttribute(ATTRIBUTE_COL_POSITION, columnPosition++); + column.setAttribute(ATTRIBUTE_COMMENT, fs.getComment()); + + ret.add(column); + } + return ret; + } + + private String getCreateTableString(Table table, String location){ + String colString = ""; + List colList = table.getAllCols(); + + if (colList != null) { + for (FieldSchema col : colList) { + colString += col.getName() + " " + col.getType() + ","; + } + + if (colList.size() > 0) { + colString = colString.substring(0, colString.length() - 1); + colString = "(" + colString + ")"; + } + } + + String query = "create external table " + table.getTableName() + colString + " location '" + location + "'"; + + return query; + } + + private String lower(String str) { + if (StringUtils.isEmpty(str)) { + return ""; + } + + return str.toLowerCase().trim(); + } + + /** + * Construct the qualified name used to uniquely identify a Table instance in Atlas. + * @param metadataNamespace Metadata namespace of the cluster to which the Hive component belongs + * @param table hive table for which the qualified name is needed + * @return Unique qualified name to identify the Table instance in Atlas. + */ + private static String getTableQualifiedName(String metadataNamespace, Table table) { + return getTableQualifiedName(metadataNamespace, table.getDbName(), table.getTableName(), table.isTemporary()); + } + + /** + * Construct the qualified name used to uniquely identify a Database instance in Atlas. + * @param metadataNamespace Name of the cluster to which the Hive component belongs + * @param dbName Name of the Hive database + * @return Unique qualified name to identify the Database instance in Atlas. + */ + public static String getDBQualifiedName(String metadataNamespace, String dbName) { + return String.format("%s@%s", dbName.toLowerCase(), metadataNamespace); + } + + /** + * Construct the qualified name used to uniquely identify a Table instance in Atlas. + * @param metadataNamespace Name of the cluster to which the Hive component belongs + * @param dbName Name of the Hive database to which the Table belongs + * @param tableName Name of the Hive table + * @param isTemporaryTable is this a temporary table + * @return Unique qualified name to identify the Table instance in Atlas. + */ + public static String getTableQualifiedName(String metadataNamespace, String dbName, String tableName, boolean isTemporaryTable) { + String tableTempName = tableName; + + if (isTemporaryTable) { + if (SessionState.get() != null && SessionState.get().getSessionId() != null) { + tableTempName = tableName + TEMP_TABLE_PREFIX + SessionState.get().getSessionId(); + } else { + tableTempName = tableName + TEMP_TABLE_PREFIX + RandomStringUtils.random(10); + } + } + + return String.format("%s.%s@%s", dbName.toLowerCase(), tableTempName.toLowerCase(), metadataNamespace); + } + + public static String getTableProcessQualifiedName(String metadataNamespace, Table table) { + String tableQualifiedName = getTableQualifiedName(metadataNamespace, table); + long createdTime = getTableCreatedTime(table); + + return tableQualifiedName + SEP + createdTime; + } + + public static String getStorageDescQFName(String tableQualifiedName) { + return tableQualifiedName + "_storage"; + } + + public static String getColumnQualifiedName(final String tableQualifiedName, final String colName) { + final String[] parts = tableQualifiedName.split("@"); + final String tableName = parts[0]; + final String metadataNamespace = parts[1]; + + return String.format("%s.%s@%s", tableName, colName.toLowerCase(), metadataNamespace); + } + + public static long getTableCreatedTime(Table table) { + return table.getTTable().getCreateTime() * MILLIS_CONVERT_FACTOR; + } + + private void clearRelationshipAttributes(AtlasEntitiesWithExtInfo entities) { + if (entities != null) { + if (entities.getEntities() != null) { + for (AtlasEntity entity : entities.getEntities()) { + clearRelationshipAttributes(entity);; + } + } + + if (entities.getReferredEntities() != null) { + clearRelationshipAttributes(entities.getReferredEntities().values()); + } + } + } + + private void clearRelationshipAttributes(Collection entities) { + if (entities != null) { + for (AtlasEntity entity : entities) { + clearRelationshipAttributes(entity); + } + } + } + + private void clearRelationshipAttributes(AtlasEntity entity) { + if (entity != null && entity.getRelationshipAttributes() != null) { + entity.getRelationshipAttributes().clear(); + } + } + + private boolean isTableWithDatabaseName(String tableName) { + boolean ret = false; + if (tableName.contains(".")) { + ret = true; + } + return ret; + } + + private String getGuid(String qualifiedName) { + return getGuid(false, qualifiedName); + } + + private String getGuid(boolean isDBType, String qualifiedName) { + String guid = null; + + if (qualifiedNameGuidMap.containsKey(qualifiedName)) { + guid = qualifiedNameGuidMap.get(qualifiedName); + } else if (isDBType) { + guid = getDBGuidFromAtlas(qualifiedName); + } + + if (StringUtils.isBlank(guid)) { + guid = generateGuid(); + } + + return guid; + } + + private String generateGuid() { + return UUID.randomUUID().toString(); + } + + public void setStreamSize(long size) { + zipOutputStream.setComment(String.format(ZIP_FILE_COMMENT_FORMAT, size, -1)); + } +} \ No newline at end of file diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java new file mode 100644 index 0000000000..14cc2f2017 --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java @@ -0,0 +1,314 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook; + +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.hive.hook.HiveMetastoreHookImpl.HiveMetastoreHook; +import org.apache.atlas.hive.hook.HiveHook.PreprocessAction; +import org.apache.atlas.hive.hook.HiveHook.HiveHookObjectNamesCache; +import org.apache.commons.lang.RandomStringUtils; +import org.apache.hadoop.hive.metastore.IHMSHandler; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.events.*; +import org.apache.hadoop.hive.ql.hooks.*; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.session.SessionState; + +import java.util.*; + +import static org.apache.atlas.hive.bridge.HiveMetaStoreBridge.getDatabaseName; +import static org.apache.atlas.hive.hook.events.BaseHiveEvent.toTable; + + +public class AtlasHiveHookContext { + public static final char QNAME_SEP_METADATA_NAMESPACE = '@'; + public static final char QNAME_SEP_ENTITY_NAME = '.'; + public static final char QNAME_SEP_PROCESS = ':'; + public static final String TEMP_TABLE_PREFIX = "_temp-"; + public static final String CREATE_OPERATION = "CREATE"; + public static final String ALTER_OPERATION = "ALTER"; + + private final HiveHook hook; + private final HiveOperation hiveOperation; + private final HookContext hiveContext; + private final Hive hive; + private final Map qNameEntityMap = new HashMap<>(); + private final HiveHookObjectNamesCache knownObjects; + private final HiveMetastoreHook metastoreHook; + private final ListenerEvent metastoreEvent; + private final IHMSHandler metastoreHandler; + + private boolean isSkippedInputEntity; + private boolean isSkippedOutputEntity; + private boolean skipTempTables; + + public AtlasHiveHookContext(HiveHook hook, HiveOperation hiveOperation, HookContext hiveContext, + HiveHookObjectNamesCache knownObjects, boolean skipTempTables) throws Exception { + this(hook, hiveOperation, hiveContext, knownObjects, null, null, skipTempTables); + } + + public AtlasHiveHookContext(HiveHook hook, HiveOperation hiveOperation, HiveHookObjectNamesCache knownObjects, + HiveMetastoreHook metastoreHook, ListenerEvent listenerEvent, boolean skipTempTables) throws Exception { + this(hook, hiveOperation, null, knownObjects, metastoreHook, listenerEvent, skipTempTables); + } + + public AtlasHiveHookContext(HiveHook hook, HiveOperation hiveOperation, HookContext hiveContext, HiveHookObjectNamesCache knownObjects, + HiveMetastoreHook metastoreHook, ListenerEvent listenerEvent, boolean skipTempTables) throws Exception { + this.hook = hook; + this.hiveOperation = hiveOperation; + this.hiveContext = hiveContext; + this.hive = hiveContext != null ? Hive.get(hiveContext.getConf()) : null; + this.knownObjects = knownObjects; + this.metastoreHook = metastoreHook; + this.metastoreEvent = listenerEvent; + this.metastoreHandler = (listenerEvent != null) ? metastoreEvent.getIHMSHandler() : null; + this.skipTempTables = skipTempTables; + + init(); + } + + public boolean isMetastoreHook() { + return metastoreHook != null; + } + + public ListenerEvent getMetastoreEvent() { + return metastoreEvent; + } + + public IHMSHandler getMetastoreHandler() { + return metastoreHandler; + } + + public Set getInputs() { + return hiveContext != null ? hiveContext.getInputs() : Collections.emptySet(); + } + + public Set getOutputs() { + return hiveContext != null ? hiveContext.getOutputs() : Collections.emptySet(); + } + + public boolean isSkippedInputEntity() { + return isSkippedInputEntity; + } + + public boolean isSkippedOutputEntity() { + return isSkippedOutputEntity; + } + + public void registerSkippedEntity(Entity entity) { + if (entity instanceof ReadEntity) { + registerSkippedInputEntity(); + } else if (entity instanceof WriteEntity) { + registerSkippedOutputEntity(); + } + } + + public void registerSkippedInputEntity() { + if (!isSkippedInputEntity) { + isSkippedInputEntity = true; + } + } + + public void registerSkippedOutputEntity() { + if (!isSkippedOutputEntity) { + isSkippedOutputEntity = true; + } + } + + public boolean isSkipTempTables() { + return skipTempTables; + } + + public LineageInfo getLineageInfo() { + return hiveContext != null ? hiveContext.getLinfo() : null; + } + + public HookContext getHiveContext() { + return hiveContext; + } + + public Hive getHive() { + return hive; + } + + public HiveOperation getHiveOperation() { + return hiveOperation; + } + + public void putEntity(String qualifiedName, AtlasEntity entity) { + qNameEntityMap.put(qualifiedName, entity); + } + + public AtlasEntity getEntity(String qualifiedName) { + return qNameEntityMap.get(qualifiedName); + } + + public Collection getEntities() { return qNameEntityMap.values(); } + + public Map getQNameToEntityMap() { return qNameEntityMap; } + + public String getMetadataNamespace() { + return hook.getMetadataNamespace(); + } + + public String getHostName() { return hook.getHostName(); } + + public boolean isConvertHdfsPathToLowerCase() { + return hook.isConvertHdfsPathToLowerCase(); + } + + public String getAwsS3AtlasModelVersion() { + return hook.getAwsS3AtlasModelVersion(); + } + + public boolean getSkipHiveColumnLineageHive20633() { + return hook.getSkipHiveColumnLineageHive20633(); + } + + public int getSkipHiveColumnLineageHive20633InputsThreshold() { + return hook.getSkipHiveColumnLineageHive20633InputsThreshold(); + } + + public PreprocessAction getPreprocessActionForHiveTable(String qualifiedName) { + return hook.getPreprocessActionForHiveTable(qualifiedName); + } + + public List getIgnoreDummyDatabaseName() { + return hook.getIgnoreDummyDatabaseName(); + } + + public List getIgnoreDummyTableName() { + return hook.getIgnoreDummyTableName(); + } + + public String getIgnoreValuesTmpTableNamePrefix() { + return hook.getIgnoreValuesTmpTableNamePrefix(); + } + + public String getQualifiedName(Database db) { + return getDatabaseName(db) + QNAME_SEP_METADATA_NAMESPACE + getMetadataNamespace(); + } + + public String getQualifiedName(Table table) { + String tableName = table.getTableName(); + + if (table.isTemporary()) { + if (SessionState.get() != null && SessionState.get().getSessionId() != null) { + tableName = tableName + TEMP_TABLE_PREFIX + SessionState.get().getSessionId(); + } else { + tableName = tableName + TEMP_TABLE_PREFIX + RandomStringUtils.random(10); + } + } + + return (table.getDbName() + QNAME_SEP_ENTITY_NAME + tableName + QNAME_SEP_METADATA_NAMESPACE).toLowerCase() + getMetadataNamespace(); + } + + public boolean isKnownDatabase(String dbQualifiedName) { + return knownObjects != null && dbQualifiedName != null ? knownObjects.isKnownDatabase(dbQualifiedName) : false; + } + + public boolean isKnownTable(String tblQualifiedName) { + return knownObjects != null && tblQualifiedName != null ? knownObjects.isKnownTable(tblQualifiedName) : false; + } + + public void addToKnownEntities(Collection entities) { + if (knownObjects != null && entities != null) { + knownObjects.addToKnownEntities(entities); + } + } + + public void removeFromKnownDatabase(String dbQualifiedName) { + if (knownObjects != null && dbQualifiedName != null) { + knownObjects.removeFromKnownDatabase(dbQualifiedName); + } + } + + public void removeFromKnownTable(String tblQualifiedName) { + if (knownObjects != null && tblQualifiedName != null) { + knownObjects.removeFromKnownTable(tblQualifiedName); + } + } + + public boolean isHiveProcessPopulateDeprecatedAttributes() { + return hook.isHiveProcessPopulateDeprecatedAttributes(); + } + + private void init() { + if (hiveOperation == null) { + return; + } + + String operation = hiveOperation.getOperationName(); + + if (knownObjects == null || !isCreateAlterOperation(operation)) { + return; + } + + List databases = new ArrayList<>(); + List

tables = new ArrayList<>(); + + if (isMetastoreHook()) { + switch (hiveOperation) { + case CREATEDATABASE: + databases.add(((CreateDatabaseEvent) metastoreEvent).getDatabase()); + break; + case ALTERDATABASE: + databases.add(((AlterDatabaseEvent) metastoreEvent).getOldDatabase()); + databases.add(((AlterDatabaseEvent) metastoreEvent).getNewDatabase()); + break; + case CREATETABLE: + tables.add(toTable(((CreateTableEvent) metastoreEvent).getTable())); + break; + case ALTERTABLE_PROPERTIES: + case ALTERTABLE_RENAME: + case ALTERTABLE_RENAMECOL: + tables.add(toTable(((AlterTableEvent) metastoreEvent).getOldTable())); + tables.add(toTable(((AlterTableEvent) metastoreEvent).getNewTable())); + break; + } + } else { + if (getOutputs() != null) { + for (WriteEntity output : hiveContext.getOutputs()) { + switch (output.getType()) { + case DATABASE: + databases.add(output.getDatabase()); + break; + case TABLE: + tables.add(output.getTable()); + break; + } + } + } + } + + for (Database database : databases) { + knownObjects.removeFromKnownDatabase(getQualifiedName(database)); + } + + for (Table table : tables) { + knownObjects.removeFromKnownTable(getQualifiedName(table)); + } + } + + private static boolean isCreateAlterOperation(String operationName) { + return operationName != null && operationName.startsWith(CREATE_OPERATION) || operationName.startsWith(ALTER_OPERATION); + } +} \ No newline at end of file diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java new file mode 100644 index 0000000000..6ea48482ab --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java @@ -0,0 +1,428 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook; + +import org.apache.atlas.hive.hook.events.*; +import org.apache.atlas.hive.hook.utils.ActiveEntityFilter; +import org.apache.atlas.hook.AtlasHook; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.utils.LruCache; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext; +import org.apache.hadoop.hive.ql.hooks.HookContext; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.shims.Utils; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; + +import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIED_NAME; +import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_DB; +import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_TABLE; +import static org.apache.atlas.repository.Constants.HS2_SOURCE; + +public class HiveHook extends AtlasHook implements ExecuteWithHookContext { + private static final Logger LOG = LoggerFactory.getLogger(HiveHook.class); + + public enum PreprocessAction { NONE, IGNORE, PRUNE } + + public static final String CONF_PREFIX = "atlas.hook.hive."; + public static final String HDFS_PATH_CONVERT_TO_LOWER_CASE = CONF_PREFIX + "hdfs_path.convert_to_lowercase"; + public static final String HOOK_NAME_CACHE_ENABLED = CONF_PREFIX + "name.cache.enabled"; + public static final String HOOK_NAME_CACHE_DATABASE_COUNT = CONF_PREFIX + "name.cache.database.count"; + public static final String HOOK_NAME_CACHE_TABLE_COUNT = CONF_PREFIX + "name.cache.table.count"; + public static final String HOOK_NAME_CACHE_REBUID_INTERVAL_SEC = CONF_PREFIX + "name.cache.rebuild.interval.seconds"; + public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION = CONF_PREFIX + "aws_s3.atlas.model.version"; + public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2 = "v2"; + public static final String HOOK_HIVE_PROCESS_POPULATE_DEPRECATED_ATTRIBUTES = CONF_PREFIX + "hive_process.populate.deprecated.attributes"; + public static final String HOOK_SKIP_HIVE_COLUMN_LINEAGE_HIVE_20633 = CONF_PREFIX + "skip.hive_column_lineage.hive-20633"; + public static final String HOOK_SKIP_HIVE_COLUMN_LINEAGE_HIVE_20633_INPUTS_THRESHOLD = CONF_PREFIX + "skip.hive_column_lineage.hive-20633.inputs.threshold"; + public static final String HOOK_HIVE_TABLE_IGNORE_PATTERN = CONF_PREFIX + "hive_table.ignore.pattern"; + public static final String HOOK_HIVE_TABLE_PRUNE_PATTERN = CONF_PREFIX + "hive_table.prune.pattern"; + public static final String HOOK_HIVE_TABLE_CACHE_SIZE = CONF_PREFIX + "hive_table.cache.size"; + public static final String HOOK_HIVE_IGNORE_DDL_OPERATIONS = CONF_PREFIX + "hs2.ignore.ddl.operations"; + public static final String HOOK_HIVE_FILTER_ENTITY_ADDITIONAL_TYPES_TO_RETAIN = CONF_PREFIX + "hs2.filter.entity.additional.types.to.retain"; + public static final String HOOK_HIVE_SKIP_TEMP_TABLES = CONF_PREFIX + "skip.temp.tables"; + public static final String DEFAULT_HOST_NAME = "localhost"; + + private static final Map OPERATION_MAP = new HashMap<>(); + + private static final boolean convertHdfsPathToLowerCase; + private static final boolean nameCacheEnabled; + private static final int nameCacheDatabaseMaxCount; + private static final int nameCacheTableMaxCount; + private static final int nameCacheRebuildIntervalSeconds; + private static final String awsS3AtlasModelVersion; + + private static final boolean skipHiveColumnLineageHive20633; + private static final int skipHiveColumnLineageHive20633InputsThreshold; + private static final List hiveTablesToIgnore = new ArrayList<>(); + private static final List hiveTablesToPrune = new ArrayList<>(); + private static final Map hiveTablesCache; + private static final List ignoreDummyDatabaseName; + private static final List ignoreDummyTableName; + private static final String ignoreValuesTmpTableNamePrefix; + private static final boolean hiveProcessPopulateDeprecatedAttributes; + private static HiveHookObjectNamesCache knownObjects = null; + private static String hostName; + private static boolean skipTempTables = true; + + static { + for (HiveOperation hiveOperation : HiveOperation.values()) { + OPERATION_MAP.put(hiveOperation.getOperationName(), hiveOperation); + } + + convertHdfsPathToLowerCase = atlasProperties.getBoolean(HDFS_PATH_CONVERT_TO_LOWER_CASE, false); + nameCacheEnabled = atlasProperties.getBoolean(HOOK_NAME_CACHE_ENABLED, true); + nameCacheDatabaseMaxCount = atlasProperties.getInt(HOOK_NAME_CACHE_DATABASE_COUNT, 10000); + nameCacheTableMaxCount = atlasProperties.getInt(HOOK_NAME_CACHE_TABLE_COUNT, 10000); + nameCacheRebuildIntervalSeconds = atlasProperties.getInt(HOOK_NAME_CACHE_REBUID_INTERVAL_SEC, 60 * 60); // 60 minutes default + awsS3AtlasModelVersion = atlasProperties.getString(HOOK_AWS_S3_ATLAS_MODEL_VERSION, HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2); + skipHiveColumnLineageHive20633 = atlasProperties.getBoolean(HOOK_SKIP_HIVE_COLUMN_LINEAGE_HIVE_20633, false); + skipHiveColumnLineageHive20633InputsThreshold = atlasProperties.getInt(HOOK_SKIP_HIVE_COLUMN_LINEAGE_HIVE_20633_INPUTS_THRESHOLD, 15); // skip if avg # of inputs is > 15 + hiveProcessPopulateDeprecatedAttributes = atlasProperties.getBoolean(HOOK_HIVE_PROCESS_POPULATE_DEPRECATED_ATTRIBUTES, false); + String[] patternHiveTablesToIgnore = atlasProperties.getStringArray(HOOK_HIVE_TABLE_IGNORE_PATTERN); + String[] patternHiveTablesToPrune = atlasProperties.getStringArray(HOOK_HIVE_TABLE_PRUNE_PATTERN); + + if (patternHiveTablesToIgnore != null) { + for (String pattern : patternHiveTablesToIgnore) { + try { + hiveTablesToIgnore.add(Pattern.compile(pattern)); + + LOG.info("{}={}", HOOK_HIVE_TABLE_IGNORE_PATTERN, pattern); + } catch (Throwable t) { + LOG.warn("failed to compile pattern {}", pattern, t); + LOG.warn("Ignoring invalid pattern in configuration {}: {}", HOOK_HIVE_TABLE_IGNORE_PATTERN, pattern); + } + } + } + + if (patternHiveTablesToPrune != null) { + for (String pattern : patternHiveTablesToPrune) { + try { + hiveTablesToPrune.add(Pattern.compile(pattern)); + + LOG.info("{}={}", HOOK_HIVE_TABLE_PRUNE_PATTERN, pattern); + } catch (Throwable t) { + LOG.warn("failed to compile pattern {}", pattern, t); + LOG.warn("Ignoring invalid pattern in configuration {}: {}", HOOK_HIVE_TABLE_PRUNE_PATTERN, pattern); + } + } + } + + if (!hiveTablesToIgnore.isEmpty() || !hiveTablesToPrune.isEmpty()) { + hiveTablesCache = new LruCache<>(atlasProperties.getInt(HOOK_HIVE_TABLE_CACHE_SIZE, 10000), 0); + } else { + hiveTablesCache = Collections.emptyMap(); + } + + knownObjects = nameCacheEnabled ? new HiveHookObjectNamesCache(nameCacheDatabaseMaxCount, nameCacheTableMaxCount, nameCacheRebuildIntervalSeconds) : null; + + List defaultDummyDatabase = new ArrayList<>(); + List defaultDummyTable = new ArrayList<>(); + + defaultDummyDatabase.add(SemanticAnalyzer.DUMMY_DATABASE); + defaultDummyTable.add(SemanticAnalyzer.DUMMY_TABLE); + + ignoreDummyDatabaseName = atlasProperties.getList("atlas.hook.hive.ignore.dummy.database.name", defaultDummyDatabase); + ignoreDummyTableName = atlasProperties.getList("atlas.hook.hive.ignore.dummy.table.name", defaultDummyTable); + ignoreValuesTmpTableNamePrefix = atlasProperties.getString("atlas.hook.hive.ignore.values.tmp.table.name.prefix", "Values__Tmp__Table__"); + skipTempTables = atlasProperties.getBoolean(HOOK_HIVE_SKIP_TEMP_TABLES, true); + + try { + hostName = InetAddress.getLocalHost().getHostName(); + } catch (UnknownHostException e) { + LOG.warn("No hostname found. Setting the hostname to default value {}", DEFAULT_HOST_NAME, e); + hostName = DEFAULT_HOST_NAME; + } + + ActiveEntityFilter.init(atlasProperties); + } + + + public HiveHook() { + } + + public HiveHook(String name) { + super(name); + } + + public String getMessageSource() { + return HS2_SOURCE; + } + + @Override + public void run(HookContext hookContext) throws Exception { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HiveHook.run({})", hookContext.getOperationName()); + } + + try { + HiveOperation oper = OPERATION_MAP.get(hookContext.getOperationName()); + AtlasHiveHookContext context = new AtlasHiveHookContext(this, oper, hookContext, getKnownObjects(), isSkipTempTables()); + BaseHiveEvent event = null; + + switch (oper) { + case CREATEDATABASE: + event = new CreateDatabase(context); + break; + + case DROPDATABASE: + event = new DropDatabase(context); + break; + + case ALTERDATABASE: + case ALTERDATABASE_OWNER: + case ALTERDATABASE_LOCATION: + event = new AlterDatabase(context); + break; + + case CREATETABLE: + event = new CreateTable(context); + break; + + case DROPTABLE: + case DROPVIEW: + case DROP_MATERIALIZED_VIEW: + event = new DropTable(context); + break; + + case CREATETABLE_AS_SELECT: + case CREATE_MATERIALIZED_VIEW: + case CREATEVIEW: + case ALTERVIEW_AS: + case LOAD: + case EXPORT: + case IMPORT: + case QUERY: + event = new CreateHiveProcess(context); + break; + + case ALTERTABLE_FILEFORMAT: + case ALTERTABLE_CLUSTER_SORT: + case ALTERTABLE_BUCKETNUM: + case ALTERTABLE_PROPERTIES: + case ALTERVIEW_PROPERTIES: + case ALTERTABLE_SERDEPROPERTIES: + case ALTERTABLE_SERIALIZER: + case ALTERTABLE_ADDCOLS: + case ALTERTABLE_REPLACECOLS: + case ALTERTABLE_PARTCOLTYPE: + case ALTERTABLE_LOCATION: + event = new AlterTable(context); + break; + + case ALTERTABLE_RENAME: + case ALTERVIEW_RENAME: + event = new AlterTableRename(context); + break; + + case ALTERTABLE_RENAMECOL: + event = new AlterTableRenameCol(context); + break; + + default: + if (LOG.isDebugEnabled()) { + LOG.debug("HiveHook.run({}): operation ignored", hookContext.getOperationName()); + } + break; + } + + if (event != null) { + final UserGroupInformation ugi = hookContext.getUgi() == null ? Utils.getUGI() : hookContext.getUgi(); + + super.notifyEntities(ActiveEntityFilter.apply(event.getNotificationMessages()), ugi); + } + } catch (Throwable t) { + LOG.error("HiveHook.run(): failed to process operation {}", hookContext.getOperationName(), t); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HiveHook.run({})", hookContext.getOperationName()); + } + } + + public boolean isConvertHdfsPathToLowerCase() { + return convertHdfsPathToLowerCase; + } + + public String getAwsS3AtlasModelVersion() { + return awsS3AtlasModelVersion; + } + + public boolean getSkipHiveColumnLineageHive20633() { + return skipHiveColumnLineageHive20633; + } + + public int getSkipHiveColumnLineageHive20633InputsThreshold() { + return skipHiveColumnLineageHive20633InputsThreshold; + } + + public List getIgnoreDummyDatabaseName() { + return ignoreDummyDatabaseName; + } + + public List getIgnoreDummyTableName() { + return ignoreDummyTableName; + } + + public String getIgnoreValuesTmpTableNamePrefix() { + return ignoreValuesTmpTableNamePrefix; + } + + public boolean isHiveProcessPopulateDeprecatedAttributes() { + return hiveProcessPopulateDeprecatedAttributes; + } + + public static boolean isSkipTempTables() { + return skipTempTables; + } + + public PreprocessAction getPreprocessActionForHiveTable(String qualifiedName) { + PreprocessAction ret = PreprocessAction.NONE; + + if (qualifiedName != null && (CollectionUtils.isNotEmpty(hiveTablesToIgnore) || CollectionUtils.isNotEmpty(hiveTablesToPrune))) { + ret = hiveTablesCache.get(qualifiedName); + + if (ret == null) { + if (isMatch(qualifiedName, hiveTablesToIgnore)) { + ret = PreprocessAction.IGNORE; + } else if (isMatch(qualifiedName, hiveTablesToPrune)) { + ret = PreprocessAction.PRUNE; + } else { + ret = PreprocessAction.NONE; + } + + hiveTablesCache.put(qualifiedName, ret); + } + } + + return ret; + } + + private boolean isMatch(String name, List patterns) { + boolean ret = false; + + for (Pattern p : patterns) { + if (p.matcher(name).matches()) { + ret = true; + + break; + } + } + + return ret; + } + + public static HiveHookObjectNamesCache getKnownObjects() { + if (knownObjects != null && knownObjects.isCacheExpired()) { + LOG.info("HiveHook.run(): purging cached databaseNames ({}) and tableNames ({})", knownObjects.getCachedDbCount(), knownObjects.getCachedTableCount()); + + knownObjects = new HiveHook.HiveHookObjectNamesCache(nameCacheDatabaseMaxCount, nameCacheTableMaxCount, nameCacheRebuildIntervalSeconds); + } + + return knownObjects; + } + + public String getHostName() { + return hostName; + } + + public static class HiveHookObjectNamesCache { + private final int dbMaxCacheCount; + private final int tblMaxCacheCount; + private final long cacheExpiryTimeMs; + private final Set knownDatabases; + private final Set knownTables; + + public HiveHookObjectNamesCache(int dbMaxCacheCount, int tblMaxCacheCount, long nameCacheRebuildIntervalSeconds) { + this.dbMaxCacheCount = dbMaxCacheCount; + this.tblMaxCacheCount = tblMaxCacheCount; + this.cacheExpiryTimeMs = nameCacheRebuildIntervalSeconds <= 0 ? Long.MAX_VALUE : (System.currentTimeMillis() + (nameCacheRebuildIntervalSeconds * 1000)); + this.knownDatabases = Collections.synchronizedSet(new HashSet<>()); + this.knownTables = Collections.synchronizedSet(new HashSet<>()); + } + + public int getCachedDbCount() { + return knownDatabases.size(); + } + + public int getCachedTableCount() { + return knownTables.size(); + } + + public boolean isCacheExpired() { + return System.currentTimeMillis() > cacheExpiryTimeMs; + } + + public boolean isKnownDatabase(String dbQualifiedName) { + return knownDatabases.contains(dbQualifiedName); + } + + public boolean isKnownTable(String tblQualifiedName) { + return knownTables.contains(tblQualifiedName); + } + + public void addToKnownEntities(Collection entities) { + for (AtlasEntity entity : entities) { + if (StringUtils.equalsIgnoreCase(entity.getTypeName(), HIVE_TYPE_DB)) { + addToKnownDatabase((String) entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME)); + } else if (StringUtils.equalsIgnoreCase(entity.getTypeName(), HIVE_TYPE_TABLE)) { + addToKnownTable((String) entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME)); + } + } + } + + public void addToKnownDatabase(String dbQualifiedName) { + if (knownDatabases.size() < dbMaxCacheCount) { + knownDatabases.add(dbQualifiedName); + } + } + + public void addToKnownTable(String tblQualifiedName) { + if (knownTables.size() < tblMaxCacheCount) { + knownTables.add(tblQualifiedName); + } + } + + public void removeFromKnownDatabase(String dbQualifiedName) { + knownDatabases.remove(dbQualifiedName); + } + + public void removeFromKnownTable(String tblQualifiedName) { + if (tblQualifiedName != null) { + knownTables.remove(tblQualifiedName); + } + } + } +} diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveMetastoreHookImpl.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveMetastoreHookImpl.java new file mode 100644 index 0000000000..33266ce0b3 --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveMetastoreHookImpl.java @@ -0,0 +1,216 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.hive.hook; + +import org.apache.atlas.hive.hook.events.*; +import org.apache.atlas.hook.AtlasHook; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.MetaStoreEventListener; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.events.*; +import org.apache.hadoop.hive.metastore.utils.SecurityUtils; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.shims.Utils; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +import static org.apache.atlas.hive.hook.events.AlterTableRenameCol.findRenamedColumn; +import static org.apache.atlas.hive.hook.events.BaseHiveEvent.toTable; +import static org.apache.atlas.repository.Constants.HMS_SOURCE; +import static org.apache.hadoop.hive.ql.plan.HiveOperation.*; + +public class HiveMetastoreHookImpl extends MetaStoreEventListener { + private static final Logger LOG = LoggerFactory.getLogger(HiveMetastoreHookImpl.class); + private final HiveHook hiveHook; + private final HiveMetastoreHook hook; + + public HiveMetastoreHookImpl(Configuration config) { + super(config); + + this.hiveHook = new HiveHook(this.getClass().getSimpleName()); + this.hook = new HiveMetastoreHook(); + } + + @Override + public void onCreateDatabase(CreateDatabaseEvent dbEvent) { + HiveOperationContext context = new HiveOperationContext(CREATEDATABASE, dbEvent); + + hook.handleEvent(context); + } + + @Override + public void onDropDatabase(DropDatabaseEvent dbEvent) { + HiveOperationContext context = new HiveOperationContext(DROPDATABASE, dbEvent); + + hook.handleEvent(context); + } + + @Override + public void onAlterDatabase(AlterDatabaseEvent dbEvent) { + HiveOperationContext context = new HiveOperationContext(ALTERDATABASE, dbEvent); + + hook.handleEvent(context); + } + + @Override + public void onCreateTable(CreateTableEvent tableEvent) { + HiveOperationContext context = new HiveOperationContext(CREATETABLE, tableEvent); + + hook.handleEvent(context); + } + + @Override + public void onDropTable(DropTableEvent tableEvent) { + HiveOperationContext context = new HiveOperationContext(DROPTABLE, tableEvent); + + hook.handleEvent(context); + } + + @Override + public void onAlterTable(AlterTableEvent tableEvent) { + HiveOperationContext context = new HiveOperationContext(tableEvent); + Table oldTable = toTable(tableEvent.getOldTable()); + Table newTable = toTable(tableEvent.getNewTable()); + + if (isTableRename(oldTable, newTable)) { + context.setOperation(ALTERTABLE_RENAME); + } else if (isColumnRename(oldTable, newTable, context)) { + context.setOperation(ALTERTABLE_RENAMECOL); + } else if(isAlterTableProperty(tableEvent, "last_modified_time") || + isAlterTableProperty(tableEvent, "transient_lastDdlTime")) { + context.setOperation(ALTERTABLE_PROPERTIES); // map other alter table operations to ALTERTABLE_PROPERTIES + } + + hook.handleEvent(context); + } + + public class HiveMetastoreHook extends AtlasHook { + public HiveMetastoreHook() { + } + + @Override + public String getMessageSource() { + return HMS_SOURCE; + } + + public void handleEvent(HiveOperationContext operContext) { + ListenerEvent listenerEvent = operContext.getEvent(); + + if (!listenerEvent.getStatus()) { + return; + } + + try { + HiveOperation oper = operContext.getOperation(); + AtlasHiveHookContext context = new AtlasHiveHookContext(hiveHook, oper, hiveHook.getKnownObjects(), this, listenerEvent, hiveHook.isSkipTempTables()); + BaseHiveEvent event = null; + + switch (oper) { + case CREATEDATABASE: + event = new CreateDatabase(context); + break; + + case DROPDATABASE: + event = new DropDatabase(context); + break; + + case ALTERDATABASE: + event = new AlterDatabase(context); + break; + + case CREATETABLE: + event = new CreateTable(context); + break; + + case DROPTABLE: + event = new DropTable(context); + break; + + case ALTERTABLE_PROPERTIES: + event = new AlterTable(context); + break; + + case ALTERTABLE_RENAME: + event = new AlterTableRename(context); + break; + + case ALTERTABLE_RENAMECOL: + FieldSchema columnOld = operContext.getColumnOld(); + FieldSchema columnNew = operContext.getColumnNew(); + + event = new AlterTableRenameCol(columnOld, columnNew, context); + break; + + default: + if (LOG.isDebugEnabled()) { + LOG.debug("HiveMetastoreHook.handleEvent({}): operation ignored.", listenerEvent); + } + break; + } + + if (event != null) { + final UserGroupInformation ugi = SecurityUtils.getUGI() == null ? Utils.getUGI() : SecurityUtils.getUGI(); + + super.notifyEntities(event.getNotificationMessages(), ugi); + } + } catch (Throwable t) { + LOG.error("HiveMetastoreHook.handleEvent({}): failed to process operation {}", listenerEvent, t); + } + } + } + + private static boolean isTableRename(Table oldTable, Table newTable) { + String oldTableName = oldTable.getTableName(); + String newTableName = newTable.getTableName(); + + return !StringUtils.equalsIgnoreCase(oldTableName, newTableName); + } + + private static boolean isColumnRename(Table oldTable, Table newTable, HiveOperationContext context) { + FieldSchema columnOld = findRenamedColumn(oldTable, newTable); + FieldSchema columnNew = findRenamedColumn(newTable, oldTable); + boolean isColumnRename = columnOld != null && columnNew != null; + + if (isColumnRename) { + context.setColumnOld(columnOld); + context.setColumnNew(columnNew); + } + + return isColumnRename; + } + + private boolean isAlterTableProperty(AlterTableEvent tableEvent, String propertyToCheck) { + final boolean ret; + String oldTableModifiedTime = tableEvent.getOldTable().getParameters().get(propertyToCheck); + String newTableModifiedTime = tableEvent.getNewTable().getParameters().get(propertyToCheck); + + + if (oldTableModifiedTime == null) { + ret = newTableModifiedTime != null; + } else { + ret = !oldTableModifiedTime.equals(newTableModifiedTime); + } + + return ret; + + } +} \ No newline at end of file diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveOperationContext.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveOperationContext.java new file mode 100644 index 0000000000..23ea4be690 --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveOperationContext.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.hive.hook; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.events.ListenerEvent; +import org.apache.hadoop.hive.ql.plan.HiveOperation; + +public class HiveOperationContext { + HiveOperation operation; + ListenerEvent event; + FieldSchema columnOld; + FieldSchema columnNew; + + public HiveOperationContext(ListenerEvent event) { + this(null, event); + } + + public HiveOperationContext(HiveOperation operation, ListenerEvent event) { + setOperation(operation); + setEvent(event); + setColumnOld(null); + setColumnNew(null); + } + + public ListenerEvent getEvent() { + return event; + } + + public void setEvent(ListenerEvent event) { + this.event = event; + } + + public HiveOperation getOperation() { + return operation; + } + + public void setOperation(HiveOperation operation) { + this.operation = operation; + } + + public FieldSchema getColumnOld() { + return columnOld; + } + + public void setColumnOld(FieldSchema columnOld) { + this.columnOld = columnOld; + } + + public FieldSchema getColumnNew() { + return columnNew; + } + + public void setColumnNew(FieldSchema columnNew) { + this.columnNew = columnNew; + } +} diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterDatabase.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterDatabase.java new file mode 100644 index 0000000000..d2623b3636 --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterDatabase.java @@ -0,0 +1,78 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook.events; + +import org.apache.atlas.hive.hook.AtlasHiveHookContext; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.model.notification.HookNotification.EntityUpdateRequestV2; +import org.apache.commons.collections.CollectionUtils; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.events.AlterDatabaseEvent; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.List; + +public class AlterDatabase extends CreateDatabase { + private static final Logger LOG = LoggerFactory.getLogger(AlterDatabase.class); + + public AlterDatabase(AtlasHiveHookContext context) { + super(context); + } + + @Override + public List getNotificationMessages() throws Exception { + List ret = null; + AtlasEntitiesWithExtInfo entities = context.isMetastoreHook() ? getHiveMetastoreEntities() : getHiveEntities(); + + if (entities != null && CollectionUtils.isNotEmpty(entities.getEntities())) { + ret = Collections.singletonList(new EntityUpdateRequestV2(getUserName(), entities)); + } + + return ret; + } + + public AtlasEntitiesWithExtInfo getHiveMetastoreEntities() throws Exception { + AtlasEntitiesWithExtInfo ret = new AtlasEntitiesWithExtInfo(); + AlterDatabaseEvent dbEvent = (AlterDatabaseEvent) context.getMetastoreEvent(); + Database oldDb = dbEvent.getOldDatabase(); + Database newDb = dbEvent.getNewDatabase(); + + if (newDb != null) { + AtlasEntity dbEntity = toDbEntity(newDb); + + ret.addEntity(dbEntity); + + addLocationEntities(dbEntity, ret); + } else { + LOG.error("AlterDatabase.getEntities(): failed to retrieve db"); + } + + addProcessedEntities(ret); + + return ret; + } + + public AtlasEntitiesWithExtInfo getHiveEntities() throws Exception { + return super.getHiveEntities(); + } +} \ No newline at end of file diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTable.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTable.java new file mode 100644 index 0000000000..d2f09cc108 --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTable.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook.events; + +import org.apache.atlas.hive.hook.AtlasHiveHookContext; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.model.notification.HookNotification.EntityUpdateRequestV2; +import org.apache.commons.collections.CollectionUtils; + +import java.util.Collections; +import java.util.List; + +public class AlterTable extends CreateTable { + public AlterTable(AtlasHiveHookContext context) { + super(context); + } + + @Override + public List getNotificationMessages() throws Exception { + List ret = null; + AtlasEntitiesWithExtInfo entities = context.isMetastoreHook() ? getHiveMetastoreEntities() : getHiveEntities(); + + if (entities != null && CollectionUtils.isNotEmpty(entities.getEntities())) { + ret = Collections.singletonList(new EntityUpdateRequestV2(getUserName(), entities)); + } + + return ret; + } +} \ No newline at end of file diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRename.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRename.java new file mode 100644 index 0000000000..6961fa7c2c --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRename.java @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook.events; + +import org.apache.atlas.hive.hook.AtlasHiveHookContext; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.model.notification.HookNotification.EntityPartialUpdateRequestV2; +import org.apache.atlas.model.notification.HookNotification.EntityUpdateRequestV2; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hive.metastore.events.AlterTableEvent; +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +public class AlterTableRename extends BaseHiveEvent { + private static final Logger LOG = LoggerFactory.getLogger(AlterTableRename.class); + + public AlterTableRename(AtlasHiveHookContext context) { + super(context); + } + + @Override + public List getNotificationMessages() throws Exception { + return context.isMetastoreHook() ? getHiveMetastoreMessages() : getHiveMessages(); + } + + public List getHiveMetastoreMessages() throws Exception { + List ret = new ArrayList<>(); + AlterTableEvent tblEvent = (AlterTableEvent) context.getMetastoreEvent(); + Table oldTable = toTable(tblEvent.getOldTable()); + Table newTable = toTable(tblEvent.getNewTable()); + + if (newTable == null) { + LOG.error("AlterTableRename: renamed table not found in outputs list"); + + return ret; + } + + processTables(oldTable, newTable, ret); + + return ret; + } + + public List getHiveMessages() throws Exception { + List ret = new ArrayList<>(); + Table oldTable; + Table newTable; + + if (CollectionUtils.isEmpty(getInputs())) { + LOG.error("AlterTableRename: old-table not found in inputs list"); + + return ret; + } + + oldTable = getInputs().iterator().next().getTable(); + newTable = null; + + if (CollectionUtils.isNotEmpty(getOutputs())) { + for (WriteEntity entity : getOutputs()) { + if (entity.getType() == Entity.Type.TABLE) { + newTable = entity.getTable(); + + //Hive sends with both old and new table names in the outputs which is weird. So skipping that with the below check + if (StringUtils.equalsIgnoreCase(newTable.getDbName(), oldTable.getDbName()) && + StringUtils.equalsIgnoreCase(newTable.getTableName(), oldTable.getTableName())) { + newTable = null; + + continue; + } + + newTable = getHive().getTable(newTable.getDbName(), newTable.getTableName()); + + break; + } + } + } + + if (newTable == null) { + LOG.error("AlterTableRename: renamed table not found in outputs list"); + + return ret; + } + + processTables(oldTable, newTable, ret); + + return ret; + } + + private void processTables(Table oldTable, Table newTable, List ret) throws Exception { + AtlasEntityWithExtInfo oldTableEntity = toTableEntity(oldTable); + AtlasEntityWithExtInfo renamedTableEntity = toTableEntity(newTable); + + if (oldTableEntity == null || renamedTableEntity == null) { + return; + } + + // update qualifiedName for all columns, partitionKeys, storageDesc + String renamedTableQualifiedName = (String) renamedTableEntity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME); + + renameColumns((List) oldTableEntity.getEntity().getRelationshipAttribute(ATTRIBUTE_COLUMNS), oldTableEntity, renamedTableQualifiedName, ret); + renameColumns((List) oldTableEntity.getEntity().getRelationshipAttribute(ATTRIBUTE_PARTITION_KEYS), oldTableEntity, renamedTableQualifiedName, ret); + renameStorageDesc(oldTableEntity, renamedTableEntity, ret); + + // set previous name as the alias + renamedTableEntity.getEntity().setAttribute(ATTRIBUTE_ALIASES, Collections.singletonList(oldTable.getTableName())); + + // make a copy of renamedTableEntity to send as partial-update with no relationship attributes + AtlasEntity renamedTableEntityForPartialUpdate = new AtlasEntity(renamedTableEntity.getEntity()); + renamedTableEntityForPartialUpdate.setRelationshipAttributes(null); + + String oldTableQualifiedName = (String) oldTableEntity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME); + AtlasObjectId oldTableId = new AtlasObjectId(oldTableEntity.getEntity().getTypeName(), ATTRIBUTE_QUALIFIED_NAME, oldTableQualifiedName); + + // update qualifiedName and other attributes (like params - which include lastModifiedTime, lastModifiedBy) of the table + ret.add(new EntityPartialUpdateRequestV2(getUserName(), oldTableId, new AtlasEntityWithExtInfo(renamedTableEntityForPartialUpdate))); + + // to handle cases where Atlas didn't have the oldTable, send a full update + ret.add(new EntityUpdateRequestV2(getUserName(), new AtlasEntitiesWithExtInfo(renamedTableEntity))); + + // partial update relationship attribute ddl + if (!context.isMetastoreHook()) { + AtlasEntity ddlEntity = createHiveDDLEntity(renamedTableEntity.getEntity(), true); + + if (ddlEntity != null) { + ret.add(new HookNotification.EntityCreateRequestV2(getUserName(), new AtlasEntitiesWithExtInfo(ddlEntity))); + } + } + + context.removeFromKnownTable(oldTableQualifiedName); + } + + private void renameColumns(List columns, AtlasEntityExtInfo oldEntityExtInfo, String newTableQualifiedName, List notifications) { + if (CollectionUtils.isNotEmpty(columns)) { + for (AtlasObjectId columnId : columns) { + AtlasEntity oldColumn = oldEntityExtInfo.getEntity(columnId.getGuid()); + AtlasObjectId oldColumnId = new AtlasObjectId(oldColumn.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, oldColumn.getAttribute(ATTRIBUTE_QUALIFIED_NAME)); + AtlasEntity newColumn = new AtlasEntity(oldColumn.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, getColumnQualifiedName(newTableQualifiedName, (String) oldColumn.getAttribute(ATTRIBUTE_NAME))); + + notifications.add(new EntityPartialUpdateRequestV2(getUserName(), oldColumnId, new AtlasEntityWithExtInfo(newColumn))); + } + } + } + + private void renameStorageDesc(AtlasEntityWithExtInfo oldEntityExtInfo, AtlasEntityWithExtInfo newEntityExtInfo, List notifications) { + AtlasEntity oldSd = getStorageDescEntity(oldEntityExtInfo); + AtlasEntity newSd = new AtlasEntity(getStorageDescEntity(newEntityExtInfo)); // make a copy of newSd, since we will be setting relationshipAttributes to 'null' below + // and we need relationship attributes later during entity full update + + if (oldSd != null && newSd != null) { + AtlasObjectId oldSdId = new AtlasObjectId(oldSd.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, oldSd.getAttribute(ATTRIBUTE_QUALIFIED_NAME)); + + newSd.removeAttribute(ATTRIBUTE_TABLE); + newSd.setRelationshipAttributes(null); + + notifications.add(new EntityPartialUpdateRequestV2(getUserName(), oldSdId, new AtlasEntityWithExtInfo(newSd))); + } + } + + private AtlasEntity getStorageDescEntity(AtlasEntityWithExtInfo tableEntity) { + AtlasEntity ret = null; + + if (tableEntity != null && tableEntity.getEntity() != null) { + Object attrSdId = tableEntity.getEntity().getRelationshipAttribute(ATTRIBUTE_STORAGEDESC); + + if (attrSdId instanceof AtlasObjectId) { + ret = tableEntity.getReferredEntity(((AtlasObjectId) attrSdId).getGuid()); + } + } + + return ret; + } +} diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRenameCol.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRenameCol.java new file mode 100644 index 0000000000..29ca920c64 --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRenameCol.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook.events; + +import org.apache.atlas.hive.hook.AtlasHiveHookContext; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.model.notification.HookNotification.EntityPartialUpdateRequestV2; +import org.apache.commons.collections.CollectionUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.events.AlterTableEvent; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public class AlterTableRenameCol extends AlterTable { + private static final Logger LOG = LoggerFactory.getLogger(AlterTableRenameCol.class); + private final FieldSchema columnOld; + private final FieldSchema columnNew; + + public AlterTableRenameCol(AtlasHiveHookContext context) { + this(null, null, context); + } + + public AlterTableRenameCol(FieldSchema columnOld, FieldSchema columnNew, AtlasHiveHookContext context) { + super(context); + + this.columnOld = columnOld; + this.columnNew = columnNew; + } + + @Override + public List getNotificationMessages() throws Exception { + return context.isMetastoreHook() ? getHiveMetastoreMessages() : getHiveMessages(); + } + + public List getHiveMetastoreMessages() throws Exception { + List baseMsgs = super.getNotificationMessages(); + List ret = new ArrayList<>(baseMsgs); + AlterTableEvent tblEvent = (AlterTableEvent) context.getMetastoreEvent(); + Table oldTable = toTable(tblEvent.getOldTable()); + Table newTable = toTable(tblEvent.getNewTable()); + + processColumns(oldTable, newTable, ret); + + return ret; + } + + public List getHiveMessages() throws Exception { + List baseMsgs = super.getNotificationMessages(); + + if (CollectionUtils.isEmpty(getInputs())) { + LOG.error("AlterTableRenameCol: old-table not found in inputs list"); + + return null; + } + + if (CollectionUtils.isEmpty(getOutputs())) { + LOG.error("AlterTableRenameCol: new-table not found in outputs list"); + + return null; + } + + if (CollectionUtils.isEmpty(baseMsgs)) { + LOG.debug("Skipped processing of column-rename (on a temporary table?)"); + + return null; + } + + List ret = new ArrayList<>(baseMsgs); + Table oldTable = getInputs().iterator().next().getTable(); + Table newTable = getOutputs().iterator().next().getTable(); + + if (newTable != null) { + newTable = getHive().getTable(newTable.getDbName(), newTable.getTableName()); + } + + processColumns(oldTable, newTable, ret); + + return ret; + } + + private void processColumns(Table oldTable, Table newTable, List ret) { + FieldSchema changedColumnOld = (columnOld == null) ? findRenamedColumn(oldTable, newTable) : columnOld; + FieldSchema changedColumnNew = (columnNew == null) ? findRenamedColumn(newTable, oldTable) : columnNew; + + if (changedColumnOld != null && changedColumnNew != null) { + AtlasObjectId oldColumnId = new AtlasObjectId(HIVE_TYPE_COLUMN, ATTRIBUTE_QUALIFIED_NAME, getQualifiedName(oldTable, changedColumnOld)); + AtlasEntity newColumn = new AtlasEntity(HIVE_TYPE_COLUMN); + + newColumn.setAttribute(ATTRIBUTE_NAME, changedColumnNew.getName()); + newColumn.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getQualifiedName(newTable, changedColumnNew)); + + ret.add(0, new EntityPartialUpdateRequestV2(getUserName(), oldColumnId, new AtlasEntityWithExtInfo(newColumn))); + } else { + LOG.error("AlterTableRenameCol: no renamed column detected"); + } + } + + public static FieldSchema findRenamedColumn(Table inputTable, Table outputTable) { + FieldSchema ret = null; + List inputColumns = inputTable.getCols(); + List outputColumns = outputTable.getCols(); + + for (FieldSchema inputColumn : inputColumns) { + if (!outputColumns.contains(inputColumn)) { + ret = inputColumn; + + break; + } + } + + return ret; + } +} \ No newline at end of file diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java new file mode 100644 index 0000000000..3f358139be --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java @@ -0,0 +1,1189 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook.events; + +import org.apache.atlas.hive.hook.AtlasHiveHookContext; +import org.apache.atlas.hive.hook.HiveHook.PreprocessAction; +import org.apache.atlas.utils.PathExtractorContext; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.model.instance.AtlasRelatedObjectId; +import org.apache.atlas.model.instance.AtlasStruct; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.type.AtlasTypeUtil; +import org.apache.atlas.utils.AtlasPathExtractorUtil; +import org.apache.atlas.utils.HdfsNameServiceResolver; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.collections.MapUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.utils.SecurityUtils; +import org.apache.hadoop.hive.ql.hooks.*; +import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo; +import org.apache.hadoop.hive.ql.hooks.LineageInfo.DependencyKey; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; +import java.util.*; + +import static org.apache.atlas.hive.bridge.HiveMetaStoreBridge.getDatabaseName; +import static org.apache.atlas.hive.hook.AtlasHiveHookContext.QNAME_SEP_METADATA_NAMESPACE; +import static org.apache.atlas.hive.hook.AtlasHiveHookContext.QNAME_SEP_ENTITY_NAME; +import static org.apache.atlas.hive.hook.AtlasHiveHookContext.QNAME_SEP_PROCESS; + +public abstract class BaseHiveEvent { + private static final Logger LOG = LoggerFactory.getLogger(BaseHiveEvent.class); + + public static final String HIVE_TYPE_DB = "hive_db"; + public static final String HIVE_TYPE_TABLE = "hive_table"; + public static final String HIVE_TYPE_STORAGEDESC = "hive_storagedesc"; + public static final String HIVE_TYPE_COLUMN = "hive_column"; + public static final String HIVE_TYPE_PROCESS = "hive_process"; + public static final String HIVE_TYPE_COLUMN_LINEAGE = "hive_column_lineage"; + public static final String HIVE_TYPE_SERDE = "hive_serde"; + public static final String HIVE_TYPE_ORDER = "hive_order"; + public static final String HIVE_TYPE_PROCESS_EXECUTION = "hive_process_execution"; + public static final String HIVE_DB_DDL = "hive_db_ddl"; + public static final String HIVE_TABLE_DDL = "hive_table_ddl"; + public static final String HBASE_TYPE_TABLE = "hbase_table"; + public static final String HBASE_TYPE_NAMESPACE = "hbase_namespace"; + public static final String ATTRIBUTE_QUALIFIED_NAME = "qualifiedName"; + public static final String ATTRIBUTE_NAME = "name"; + public static final String ATTRIBUTE_DESCRIPTION = "description"; + public static final String ATTRIBUTE_OWNER = "owner"; + public static final String ATTRIBUTE_CLUSTER_NAME = "clusterName"; + public static final String ATTRIBUTE_LOCATION = "location"; + public static final String ATTRIBUTE_LOCATION_PATH = "locationPath"; + public static final String ATTRIBUTE_PARAMETERS = "parameters"; + public static final String ATTRIBUTE_OWNER_TYPE = "ownerType"; + public static final String ATTRIBUTE_COMMENT = "comment"; + public static final String ATTRIBUTE_CREATE_TIME = "createTime"; + public static final String ATTRIBUTE_LAST_ACCESS_TIME = "lastAccessTime"; + public static final String ATTRIBUTE_VIEW_ORIGINAL_TEXT = "viewOriginalText"; + public static final String ATTRIBUTE_VIEW_EXPANDED_TEXT = "viewExpandedText"; + public static final String ATTRIBUTE_TABLE_TYPE = "tableType"; + public static final String ATTRIBUTE_TEMPORARY = "temporary"; + public static final String ATTRIBUTE_RETENTION = "retention"; + public static final String ATTRIBUTE_DB = "db"; + public static final String ATTRIBUTE_HIVE_DB = "hiveDb"; + public static final String ATTRIBUTE_STORAGEDESC = "sd"; + public static final String ATTRIBUTE_PARTITION_KEYS = "partitionKeys"; + public static final String ATTRIBUTE_COLUMNS = "columns"; + public static final String ATTRIBUTE_INPUT_FORMAT = "inputFormat"; + public static final String ATTRIBUTE_OUTPUT_FORMAT = "outputFormat"; + public static final String ATTRIBUTE_COMPRESSED = "compressed"; + public static final String ATTRIBUTE_BUCKET_COLS = "bucketCols"; + public static final String ATTRIBUTE_NUM_BUCKETS = "numBuckets"; + public static final String ATTRIBUTE_STORED_AS_SUB_DIRECTORIES = "storedAsSubDirectories"; + public static final String ATTRIBUTE_TABLE = "table"; + public static final String ATTRIBUTE_SERDE_INFO = "serdeInfo"; + public static final String ATTRIBUTE_SERIALIZATION_LIB = "serializationLib"; + public static final String ATTRIBUTE_SORT_COLS = "sortCols"; + public static final String ATTRIBUTE_COL_TYPE = "type"; + public static final String ATTRIBUTE_COL_POSITION = "position"; + public static final String ATTRIBUTE_PATH = "path"; + public static final String ATTRIBUTE_NAMESERVICE_ID = "nameServiceId"; + public static final String ATTRIBUTE_INPUTS = "inputs"; + public static final String ATTRIBUTE_OUTPUTS = "outputs"; + public static final String ATTRIBUTE_OPERATION_TYPE = "operationType"; + public static final String ATTRIBUTE_START_TIME = "startTime"; + public static final String ATTRIBUTE_USER_NAME = "userName"; + public static final String ATTRIBUTE_QUERY_TEXT = "queryText"; + public static final String ATTRIBUTE_PROCESS = "process"; + public static final String ATTRIBUTE_PROCESS_EXECUTIONS = "processExecutions"; + public static final String ATTRIBUTE_QUERY_ID = "queryId"; + public static final String ATTRIBUTE_QUERY_PLAN = "queryPlan"; + public static final String ATTRIBUTE_END_TIME = "endTime"; + public static final String ATTRIBUTE_RECENT_QUERIES = "recentQueries"; + public static final String ATTRIBUTE_QUERY = "query"; + public static final String ATTRIBUTE_DEPENDENCY_TYPE = "depenendencyType"; + public static final String ATTRIBUTE_EXPRESSION = "expression"; + public static final String ATTRIBUTE_ALIASES = "aliases"; + public static final String ATTRIBUTE_URI = "uri"; + public static final String ATTRIBUTE_STORAGE_HANDLER = "storage_handler"; + public static final String ATTRIBUTE_NAMESPACE = "namespace"; + public static final String ATTRIBUTE_HOSTNAME = "hostName"; + public static final String ATTRIBUTE_EXEC_TIME = "execTime"; + public static final String ATTRIBUTE_DDL_QUERIES = "ddlQueries"; + public static final String ATTRIBUTE_SERVICE_TYPE = "serviceType"; + public static final String ATTRIBUTE_GUID = "guid"; + public static final String ATTRIBUTE_UNIQUE_ATTRIBUTES = "uniqueAttributes"; + public static final String HBASE_STORAGE_HANDLER_CLASS = "org.apache.hadoop.hive.hbase.HBaseStorageHandler"; + public static final String HBASE_DEFAULT_NAMESPACE = "default"; + public static final String HBASE_NAMESPACE_TABLE_DELIMITER = ":"; + public static final String HBASE_PARAM_TABLE_NAME = "hbase.table.name"; + public static final long MILLIS_CONVERT_FACTOR = 1000; + public static final String HDFS_PATH_PREFIX = "hdfs://"; + public static final String EMPTY_ATTRIBUTE_VALUE = ""; + + public static final String RELATIONSHIP_DATASET_PROCESS_INPUTS = "dataset_process_inputs"; + public static final String RELATIONSHIP_PROCESS_DATASET_OUTPUTS = "process_dataset_outputs"; + public static final String RELATIONSHIP_HIVE_PROCESS_COLUMN_LINEAGE = "hive_process_column_lineage"; + public static final String RELATIONSHIP_HIVE_TABLE_DB = "hive_table_db"; + public static final String RELATIONSHIP_HIVE_TABLE_PART_KEYS = "hive_table_partitionkeys"; + public static final String RELATIONSHIP_HIVE_TABLE_COLUMNS = "hive_table_columns"; + public static final String RELATIONSHIP_HIVE_TABLE_STORAGE_DESC = "hive_table_storagedesc"; + public static final String RELATIONSHIP_HIVE_PROCESS_PROCESS_EXE = "hive_process_process_executions"; + public static final String RELATIONSHIP_HIVE_DB_DDL_QUERIES = "hive_db_ddl_queries"; + public static final String RELATIONSHIP_HIVE_DB_LOCATION = "hive_db_location"; + public static final String RELATIONSHIP_HIVE_TABLE_DDL_QUERIES = "hive_table_ddl_queries"; + public static final String RELATIONSHIP_HBASE_TABLE_NAMESPACE = "hbase_table_namespace"; + + + public static final Map OWNER_TYPE_TO_ENUM_VALUE = new HashMap<>(); + + protected final boolean skipTempTables; + + static { + OWNER_TYPE_TO_ENUM_VALUE.put(1, "USER"); + OWNER_TYPE_TO_ENUM_VALUE.put(2, "ROLE"); + OWNER_TYPE_TO_ENUM_VALUE.put(3, "GROUP"); + } + + protected final AtlasHiveHookContext context; + + + protected BaseHiveEvent(AtlasHiveHookContext context) { + this.context = context; + this.skipTempTables = context.isSkipTempTables(); + } + + public AtlasHiveHookContext getContext() { + return context; + } + + public List getNotificationMessages() throws Exception { + return null; + } + + public static long getTableCreateTime(Table table) { + return table.getTTable() != null ? (table.getTTable().getCreateTime() * MILLIS_CONVERT_FACTOR) : System.currentTimeMillis(); + } + + public static String getTableOwner(Table table) { + return table.getTTable() != null ? (table.getOwner()): ""; + } + + + public static List getObjectIds(List entities) { + final List ret; + + if (CollectionUtils.isNotEmpty(entities)) { + ret = new ArrayList<>(entities.size()); + + for (AtlasEntity entity : entities) { + ret.add(AtlasTypeUtil.getObjectId(entity)); + } + } else { + ret = Collections.emptyList(); + } + + return ret; + } + + + protected void addProcessedEntities(AtlasEntitiesWithExtInfo entitiesWithExtInfo) { + for (AtlasEntity entity : context.getEntities()) { + entitiesWithExtInfo.addReferredEntity(entity); + } + + entitiesWithExtInfo.compact(); + + context.addToKnownEntities(entitiesWithExtInfo.getEntities()); + + if (entitiesWithExtInfo.getReferredEntities() != null) { + context.addToKnownEntities(entitiesWithExtInfo.getReferredEntities().values()); + } + } + + protected AtlasEntity getInputOutputEntity(Entity entity, AtlasEntityExtInfo entityExtInfo, boolean skipTempTables) throws Exception { + AtlasEntity ret = null; + + switch(entity.getType()) { + case TABLE: + case PARTITION: + case DFS_DIR: + case LOCAL_DIR: { + ret = toAtlasEntity(entity, entityExtInfo, skipTempTables); + } + break; + } + + return ret; + } + + protected AtlasEntity toAtlasEntity(Entity entity, AtlasEntityExtInfo entityExtInfo, boolean skipTempTables) throws Exception { + AtlasEntity ret = null; + + switch (entity.getType()) { + case DATABASE: { + String dbName = getDatabaseName(entity.getDatabase()); + + if (!context.getIgnoreDummyDatabaseName().contains(dbName)) { + Database db = getHive().getDatabase(dbName); + + ret = toDbEntity(db); + } + } + break; + + case TABLE: + case PARTITION: { + String dbName = entity.getTable().getDbName(); + String tableName = entity.getTable().getTableName(); + boolean skipTable = StringUtils.isNotEmpty(context.getIgnoreValuesTmpTableNamePrefix()) && tableName.toLowerCase().startsWith(context.getIgnoreValuesTmpTableNamePrefix()); + + if (!skipTable) { + skipTable = context.getIgnoreDummyTableName().contains(tableName) && context.getIgnoreDummyDatabaseName().contains(dbName); + } + + if (!skipTable) { + skipTable = skipTempTables && entity.getTable().isTemporary(); + } + + if (!skipTable) { + Table table = getHive().getTable(dbName, tableName); + + ret = toTableEntity(table, entityExtInfo); + } else { + context.registerSkippedEntity(entity); + } + } + break; + + case DFS_DIR: + case LOCAL_DIR: { + URI location = entity.getLocation(); + + if (location != null) { + ret = getPathEntity(new Path(entity.getLocation()), entityExtInfo); + } + } + break; + + default: + break; + } + + return ret; + } + + protected AtlasEntity toDbEntity(Database db) throws Exception { + String dbName = getDatabaseName(db); + String dbQualifiedName = getQualifiedName(db); + boolean isKnownDatabase = context.isKnownDatabase(dbQualifiedName); + AtlasEntity ret = context.getEntity(dbQualifiedName); + + if (ret == null) { + ret = new AtlasEntity(HIVE_TYPE_DB); + + // if this DB was sent in an earlier notification, set 'guid' to null - which will: + // - result in this entity to be not included in 'referredEntities' + // - cause Atlas server to resolve the entity by its qualifiedName + if (isKnownDatabase) { + ret.setGuid(null); + } + + ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, dbQualifiedName); + ret.setAttribute(ATTRIBUTE_NAME, dbName); + + if (StringUtils.isNotEmpty(db.getDescription())) { + ret.setAttribute(ATTRIBUTE_DESCRIPTION, db.getDescription()); + } + + ret.setAttribute(ATTRIBUTE_OWNER, db.getOwnerName()); + + ret.setAttribute(ATTRIBUTE_CLUSTER_NAME, getMetadataNamespace()); + ret.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getPathWithNameServiceID(db.getLocationUri())); + ret.setAttribute(ATTRIBUTE_PARAMETERS, db.getParameters()); + + if (db.getOwnerType() != null) { + ret.setAttribute(ATTRIBUTE_OWNER_TYPE, OWNER_TYPE_TO_ENUM_VALUE.get(db.getOwnerType().getValue())); + } + + context.putEntity(dbQualifiedName, ret); + } + + return ret; + } + + protected AtlasEntityWithExtInfo toTableEntity(Table table) throws Exception { + AtlasEntityWithExtInfo ret = new AtlasEntityWithExtInfo(); + + AtlasEntity entity = toTableEntity(table, ret); + + if (entity != null) { + ret.setEntity(entity); + } else { + ret = null; + } + + return ret; + } + + protected AtlasEntity toTableEntity(Table table, AtlasEntitiesWithExtInfo entities) throws Exception { + AtlasEntity ret = toTableEntity(table, (AtlasEntityExtInfo) entities); + + if (ret != null) { + entities.addEntity(ret); + } + + return ret; + } + + protected AtlasEntity toTableEntity(Table table, AtlasEntityExtInfo entityExtInfo) throws Exception { + Database db = getDatabases(table.getDbName()); + AtlasEntity dbEntity = toDbEntity(db); + + if (entityExtInfo != null) { + if (dbEntity != null) { + entityExtInfo.addReferredEntity(dbEntity); + } + } + + AtlasEntity ret = toTableEntity(AtlasTypeUtil.getObjectId(dbEntity), table, entityExtInfo); + + return ret; + } + + protected AtlasEntity toTableEntity(AtlasObjectId dbId, Table table, AtlasEntityExtInfo entityExtInfo) throws Exception { + String tblQualifiedName = getQualifiedName(table); + boolean isKnownTable = context.isKnownTable(tblQualifiedName); + + AtlasEntity ret = context.getEntity(tblQualifiedName); + + if (ret == null) { + PreprocessAction action = context.getPreprocessActionForHiveTable(tblQualifiedName); + + if (action == PreprocessAction.IGNORE) { + LOG.info("ignoring table {}", tblQualifiedName); + } else { + ret = new AtlasEntity(HIVE_TYPE_TABLE); + + // if this table was sent in an earlier notification, set 'guid' to null - which will: + // - result in this entity to be not included in 'referredEntities' + // - cause Atlas server to resolve the entity by its qualifiedName + if (isKnownTable && !isAlterTableOperation()) { + ret.setGuid(null); + } + + long createTime = getTableCreateTime(table); + long lastAccessTime = table.getLastAccessTime() > 0 ? (table.getLastAccessTime() * MILLIS_CONVERT_FACTOR) : createTime; + + AtlasRelatedObjectId dbRelatedObject = new AtlasRelatedObjectId(dbId, RELATIONSHIP_HIVE_TABLE_DB); + + ret.setRelationshipAttribute(ATTRIBUTE_DB, dbRelatedObject ); + ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, tblQualifiedName); + ret.setAttribute(ATTRIBUTE_NAME, table.getTableName().toLowerCase()); + ret.setAttribute(ATTRIBUTE_OWNER, table.getOwner()); + ret.setAttribute(ATTRIBUTE_CREATE_TIME, createTime); + ret.setAttribute(ATTRIBUTE_LAST_ACCESS_TIME, lastAccessTime); + ret.setAttribute(ATTRIBUTE_RETENTION, table.getRetention()); + ret.setAttribute(ATTRIBUTE_PARAMETERS, table.getParameters()); + ret.setAttribute(ATTRIBUTE_COMMENT, table.getParameters().get(ATTRIBUTE_COMMENT)); + ret.setAttribute(ATTRIBUTE_TABLE_TYPE, table.getTableType().name()); + ret.setAttribute(ATTRIBUTE_TEMPORARY, table.isTemporary()); + + if (table.getViewOriginalText() != null) { + ret.setAttribute(ATTRIBUTE_VIEW_ORIGINAL_TEXT, table.getViewOriginalText()); + } + + if (table.getViewExpandedText() != null) { + ret.setAttribute(ATTRIBUTE_VIEW_EXPANDED_TEXT, table.getViewExpandedText()); + } + + boolean pruneTable = table.isTemporary() || action == PreprocessAction.PRUNE; + + if (pruneTable) { + LOG.info("ignoring details of table {}", tblQualifiedName); + } else { + AtlasObjectId tableId = AtlasTypeUtil.getObjectId(ret); + AtlasEntity sd = getStorageDescEntity(tableId, table); + List partitionKeys = getColumnEntities(tableId, table, table.getPartitionKeys(), RELATIONSHIP_HIVE_TABLE_PART_KEYS); + List columns = getColumnEntities(tableId, table, table.getCols(), RELATIONSHIP_HIVE_TABLE_COLUMNS); + + + + if (entityExtInfo != null) { + entityExtInfo.addReferredEntity(sd); + + if (partitionKeys != null) { + for (AtlasEntity partitionKey : partitionKeys) { + entityExtInfo.addReferredEntity(partitionKey); + } + } + + if (columns != null) { + for (AtlasEntity column : columns) { + entityExtInfo.addReferredEntity(column); + } + } + } + + + ret.setRelationshipAttribute(ATTRIBUTE_STORAGEDESC, AtlasTypeUtil.getAtlasRelatedObjectId(sd, RELATIONSHIP_HIVE_TABLE_STORAGE_DESC)); + ret.setRelationshipAttribute(ATTRIBUTE_PARTITION_KEYS, AtlasTypeUtil.getAtlasRelatedObjectIds(partitionKeys, RELATIONSHIP_HIVE_TABLE_PART_KEYS)); + ret.setRelationshipAttribute(ATTRIBUTE_COLUMNS, AtlasTypeUtil.getAtlasRelatedObjectIds(columns, RELATIONSHIP_HIVE_TABLE_COLUMNS)); + } + + context.putEntity(tblQualifiedName, ret); + } + } + + return ret; + } + + protected AtlasEntity getStorageDescEntity(AtlasObjectId tableId, Table table) { + String sdQualifiedName = getQualifiedName(table, table.getSd()); + boolean isKnownTable = tableId.getGuid() == null; + + AtlasEntity ret = context.getEntity(sdQualifiedName); + + if (ret == null) { + ret = new AtlasEntity(HIVE_TYPE_STORAGEDESC); + + // if sd's table was sent in an earlier notification, set 'guid' to null - which will: + // - result in this entity to be not included in 'referredEntities' + // - cause Atlas server to resolve the entity by its qualifiedName + if (isKnownTable) { + ret.setGuid(null); + } + + StorageDescriptor sd = table.getSd(); + + AtlasRelatedObjectId tableRelatedObject = new AtlasRelatedObjectId(tableId, RELATIONSHIP_HIVE_TABLE_STORAGE_DESC); + + ret.setRelationshipAttribute(ATTRIBUTE_TABLE, tableRelatedObject); + ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, sdQualifiedName); + ret.setAttribute(ATTRIBUTE_PARAMETERS, sd.getParameters()); + ret.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getPathWithNameServiceID(sd.getLocation())); + ret.setAttribute(ATTRIBUTE_INPUT_FORMAT, sd.getInputFormat()); + ret.setAttribute(ATTRIBUTE_OUTPUT_FORMAT, sd.getOutputFormat()); + ret.setAttribute(ATTRIBUTE_COMPRESSED, sd.isCompressed()); + ret.setAttribute(ATTRIBUTE_NUM_BUCKETS, sd.getNumBuckets()); + ret.setAttribute(ATTRIBUTE_STORED_AS_SUB_DIRECTORIES, sd.isStoredAsSubDirectories()); + + if (sd.getBucketCols() != null && sd.getBucketCols().size() > 0) { + ret.setAttribute(ATTRIBUTE_BUCKET_COLS, sd.getBucketCols()); + } + + if (sd.getSerdeInfo() != null) { + AtlasStruct serdeInfo = new AtlasStruct(HIVE_TYPE_SERDE); + SerDeInfo sdSerDeInfo = sd.getSerdeInfo(); + + serdeInfo.setAttribute(ATTRIBUTE_NAME, sdSerDeInfo.getName()); + serdeInfo.setAttribute(ATTRIBUTE_SERIALIZATION_LIB, sdSerDeInfo.getSerializationLib()); + serdeInfo.setAttribute(ATTRIBUTE_PARAMETERS, sdSerDeInfo.getParameters()); + + ret.setAttribute(ATTRIBUTE_SERDE_INFO, serdeInfo); + } + + if (CollectionUtils.isNotEmpty(sd.getSortCols())) { + List sortCols = new ArrayList<>(sd.getSortCols().size()); + + for (Order sdSortCol : sd.getSortCols()) { + AtlasStruct sortcol = new AtlasStruct(HIVE_TYPE_ORDER); + + sortcol.setAttribute("col", sdSortCol.getCol()); + sortcol.setAttribute("order", sdSortCol.getOrder()); + + sortCols.add(sortcol); + } + + ret.setAttribute(ATTRIBUTE_SORT_COLS, sortCols); + } + + context.putEntity(sdQualifiedName, ret); + } + + return ret; + } + + protected List getColumnEntities(AtlasObjectId tableId, Table table, List fieldSchemas, String relationshipType) { + List ret = new ArrayList<>(); + boolean isKnownTable = tableId.getGuid() == null; + int columnPosition = 0; + + if (CollectionUtils.isNotEmpty(fieldSchemas)) { + for (FieldSchema fieldSchema : fieldSchemas) { + String colQualifiedName = getQualifiedName(table, fieldSchema); + AtlasEntity column = context.getEntity(colQualifiedName); + + if (column == null) { + column = new AtlasEntity(HIVE_TYPE_COLUMN); + + // if column's table was sent in an earlier notification, set 'guid' to null - which will: + // - result in this entity to be not included in 'referredEntities' + // - cause Atlas server to resolve the entity by its qualifiedName + if (isKnownTable) { + column.setGuid(null); + } + AtlasRelatedObjectId relatedObjectId = new AtlasRelatedObjectId(tableId, relationshipType); + column.setRelationshipAttribute(ATTRIBUTE_TABLE, (relatedObjectId)); + column.setAttribute(ATTRIBUTE_QUALIFIED_NAME, colQualifiedName); + column.setAttribute(ATTRIBUTE_NAME, fieldSchema.getName()); + column.setAttribute(ATTRIBUTE_OWNER, table.getOwner()); + column.setAttribute(ATTRIBUTE_COL_TYPE, fieldSchema.getType()); + column.setAttribute(ATTRIBUTE_COL_POSITION, columnPosition++); + column.setAttribute(ATTRIBUTE_COMMENT, fieldSchema.getComment()); + + context.putEntity(colQualifiedName, column); + } + + ret.add(column); + } + } + + return ret; + } + + protected AtlasEntity getPathEntity(Path path, AtlasEntityExtInfo extInfo) { + String strPath = path.toString(); + String metadataNamespace = getMetadataNamespace(); + boolean isConvertPathToLowerCase = strPath.startsWith(HDFS_PATH_PREFIX) && context.isConvertHdfsPathToLowerCase(); + PathExtractorContext pathExtractorContext = new PathExtractorContext(metadataNamespace, context.getQNameToEntityMap(), + isConvertPathToLowerCase, context.getAwsS3AtlasModelVersion()); + + AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, pathExtractorContext); + + if (entityWithExtInfo.getReferredEntities() != null){ + for (AtlasEntity entity : entityWithExtInfo.getReferredEntities().values()) { + extInfo.addReferredEntity(entity); + } + } + + return entityWithExtInfo.getEntity(); + } + + protected AtlasEntity getHiveProcessEntity(List inputs, List outputs) throws Exception { + AtlasEntity ret = new AtlasEntity(HIVE_TYPE_PROCESS); + String queryStr = getQueryString(); + String qualifiedName = getQualifiedName(inputs, outputs); + + if (queryStr != null) { + queryStr = queryStr.toLowerCase().trim(); + } + + ret.setAttribute(ATTRIBUTE_OPERATION_TYPE, getOperationName()); + + if (context.isMetastoreHook()) { + HiveOperation operation = context.getHiveOperation(); + + if (operation == HiveOperation.CREATETABLE || operation == HiveOperation.CREATETABLE_AS_SELECT) { + AtlasEntity table = outputs.get(0); + long createTime = Long.valueOf((Long)table.getAttribute(ATTRIBUTE_CREATE_TIME)); + qualifiedName = (String) table.getAttribute(ATTRIBUTE_QUALIFIED_NAME) + QNAME_SEP_PROCESS + createTime; + + ret.setAttribute(ATTRIBUTE_NAME, "dummyProcess:" + UUID.randomUUID()); + ret.setAttribute(ATTRIBUTE_OPERATION_TYPE, operation.getOperationName()); + } + } + + ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, qualifiedName); + ret.setAttribute(ATTRIBUTE_NAME, qualifiedName); + ret.setRelationshipAttribute(ATTRIBUTE_INPUTS, AtlasTypeUtil.getAtlasRelatedObjectIds(inputs, RELATIONSHIP_DATASET_PROCESS_INPUTS)); + ret.setRelationshipAttribute(ATTRIBUTE_OUTPUTS, AtlasTypeUtil.getAtlasRelatedObjectIds(outputs, RELATIONSHIP_PROCESS_DATASET_OUTPUTS)); + + // We are setting an empty value to these attributes, since now we have a new entity type called hive process + // execution which captures these values. We have to set empty values here because these attributes are + // mandatory attributes for hive process entity type. + ret.setAttribute(ATTRIBUTE_START_TIME, System.currentTimeMillis()); + ret.setAttribute(ATTRIBUTE_END_TIME, System.currentTimeMillis()); + + if (context.isHiveProcessPopulateDeprecatedAttributes()) { + ret.setAttribute(ATTRIBUTE_USER_NAME, getUserName()); + ret.setAttribute(ATTRIBUTE_QUERY_TEXT, queryStr); + ret.setAttribute(ATTRIBUTE_QUERY_ID, getQueryId()); + } else { + ret.setAttribute(ATTRIBUTE_USER_NAME, EMPTY_ATTRIBUTE_VALUE); + ret.setAttribute(ATTRIBUTE_QUERY_TEXT, EMPTY_ATTRIBUTE_VALUE); + ret.setAttribute(ATTRIBUTE_QUERY_ID, EMPTY_ATTRIBUTE_VALUE); + } + + ret.setAttribute(ATTRIBUTE_QUERY_PLAN, "Not Supported"); + ret.setAttribute(ATTRIBUTE_RECENT_QUERIES, Collections.singletonList(queryStr)); + ret.setAttribute(ATTRIBUTE_CLUSTER_NAME, getMetadataNamespace()); + + return ret; + } + + protected AtlasEntity getHiveProcessExecutionEntity(AtlasEntity hiveProcess) throws Exception { + AtlasEntity ret = new AtlasEntity(HIVE_TYPE_PROCESS_EXECUTION); + String queryStr = getQueryString(); + + if (queryStr != null) { + queryStr = queryStr.toLowerCase().trim(); + } + + Long endTime = System.currentTimeMillis(); + ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, hiveProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME).toString() + + QNAME_SEP_PROCESS + getQueryStartTime().toString() + + QNAME_SEP_PROCESS + endTime.toString()); + ret.setAttribute(ATTRIBUTE_NAME, ret.getAttribute(ATTRIBUTE_QUALIFIED_NAME)); + ret.setAttribute(ATTRIBUTE_START_TIME, getQueryStartTime()); + ret.setAttribute(ATTRIBUTE_END_TIME, endTime); + ret.setAttribute(ATTRIBUTE_USER_NAME, getUserName()); + ret.setAttribute(ATTRIBUTE_QUERY_TEXT, queryStr); + ret.setAttribute(ATTRIBUTE_QUERY_ID, getQueryId()); + ret.setAttribute(ATTRIBUTE_QUERY_PLAN, "Not Supported"); + ret.setAttribute(ATTRIBUTE_HOSTNAME, getContext().getHostName()); // + AtlasRelatedObjectId hiveProcessRelationObjectId = AtlasTypeUtil.toAtlasRelatedObjectId(hiveProcess, RELATIONSHIP_HIVE_PROCESS_PROCESS_EXE); + ret.setRelationshipAttribute(ATTRIBUTE_PROCESS, hiveProcessRelationObjectId); + return ret; + } + + protected AtlasEntity createHiveDDLEntity(AtlasEntity dbOrTable) { + return createHiveDDLEntity(dbOrTable, false); + } + + protected AtlasEntity createHiveDDLEntity(AtlasEntity dbOrTable, boolean excludeEntityGuid) { + AtlasObjectId objId = AtlasTypeUtil.getObjectId(dbOrTable); + AtlasEntity hiveDDL = null; + + if (excludeEntityGuid) { + objId.setGuid(null); + } + AtlasRelatedObjectId objIdRelatedObject = new AtlasRelatedObjectId(objId); + + if (StringUtils.equals(objId.getTypeName(), HIVE_TYPE_DB)) { + hiveDDL = new AtlasEntity(HIVE_DB_DDL); + objIdRelatedObject.setRelationshipType(RELATIONSHIP_HIVE_DB_DDL_QUERIES); + hiveDDL.setRelationshipAttribute(ATTRIBUTE_DB, objIdRelatedObject); + } else if (StringUtils.equals(objId.getTypeName(), HIVE_TYPE_TABLE)) { + hiveDDL = new AtlasEntity(HIVE_TABLE_DDL); + objIdRelatedObject.setRelationshipType(RELATIONSHIP_HIVE_TABLE_DDL_QUERIES); + hiveDDL.setRelationshipAttribute( ATTRIBUTE_TABLE, objIdRelatedObject); + } + + if (hiveDDL != null) { + hiveDDL.setAttribute(ATTRIBUTE_SERVICE_TYPE, "hive"); + hiveDDL.setAttribute(ATTRIBUTE_EXEC_TIME, getQueryStartTime()); + hiveDDL.setAttribute(ATTRIBUTE_QUERY_TEXT, getQueryString()); + hiveDDL.setAttribute(ATTRIBUTE_USER_NAME, getUserName()); + hiveDDL.setAttribute(ATTRIBUTE_NAME, getQueryString()); + hiveDDL.setAttribute(ATTRIBUTE_QUALIFIED_NAME, dbOrTable.getAttribute(ATTRIBUTE_QUALIFIED_NAME).toString() + + QNAME_SEP_PROCESS + getQueryStartTime().toString()); + } + + return hiveDDL; + } + + protected AtlasEntity createHiveLocationEntity(AtlasEntity dbEntity, AtlasEntitiesWithExtInfo extInfoEntity) { + AtlasEntity ret = null; + String locationUri = (String)dbEntity.getAttribute(ATTRIBUTE_LOCATION); + + if (StringUtils.isNotEmpty(locationUri)) { + Path path = null; + + try { + path = new Path(locationUri); + } catch (IllegalArgumentException excp) { + LOG.warn("failed to create Path from locationUri {}", locationUri, excp); + } + + if (path != null) { + ret = getPathEntity(path, extInfoEntity); + + if (ret != null) { + AtlasRelatedObjectId dbRelatedObjectId = AtlasTypeUtil.getAtlasRelatedObjectId(dbEntity, RELATIONSHIP_HIVE_DB_LOCATION); + + ret.setRelationshipAttribute(ATTRIBUTE_HIVE_DB, dbRelatedObjectId); + } + } + } + + return ret; + } + + protected String getMetadataNamespace() { + return context.getMetadataNamespace(); + } + + protected Database getDatabases(String dbName) throws Exception { + return context.isMetastoreHook() ? context.getMetastoreHandler().get_database(dbName) : + context.getHive().getDatabase(dbName); + } + + protected Hive getHive() { + return context.getHive(); + } + + protected Set getInputs() { + return context != null ? context.getInputs() : Collections.emptySet(); + } + + protected Set getOutputs() { + return context != null ? context.getOutputs() : Collections.emptySet(); + } + + protected LineageInfo getLineageInfo() { + return context != null ? context.getLineageInfo() : null; + } + + protected String getQueryString() { + return isHiveContextValid() ? context.getHiveContext().getQueryPlan().getQueryStr() : null; + } + + protected String getOperationName() { + return isHiveContextValid() ? context.getHiveContext().getOperationName() : null; + } + + protected String getHiveUserName() { + return isHiveContextValid() ? context.getHiveContext().getUserName() : null; + } + + protected UserGroupInformation getUgi() { + return isHiveContextValid() ? context.getHiveContext().getUgi() : null; + } + + protected Long getQueryStartTime() { + return isHiveContextValid() ? context.getHiveContext().getQueryPlan().getQueryStartTime() : System.currentTimeMillis(); + } + + protected String getQueryId() { + return isHiveContextValid() ? context.getHiveContext().getQueryPlan().getQueryId() : null; + } + + private boolean isHiveContextValid() { + return context != null && context.getHiveContext() != null; + } + + protected String getUserName() { + String ret = null; + UserGroupInformation ugi = null; + + if (context.isMetastoreHook()) { + try { + ugi = SecurityUtils.getUGI(); + } catch (Exception e) { + //do nothing + } + } else { + ret = getHiveUserName(); + + if (StringUtils.isEmpty(ret)) { + ugi = getUgi(); + } + } + + if (ugi != null) { + ret = ugi.getShortUserName(); + } + + if (StringUtils.isEmpty(ret)) { + try { + ret = UserGroupInformation.getCurrentUser().getShortUserName(); + } catch (IOException e) { + LOG.warn("Failed for UserGroupInformation.getCurrentUser() ", e); + + ret = System.getProperty("user.name"); + } + } + + return ret; + } + + protected String getQualifiedName(Entity entity) throws Exception { + switch (entity.getType()) { + case DATABASE: + return getQualifiedName(entity.getDatabase()); + + case TABLE: + case PARTITION: + return getQualifiedName(entity.getTable()); + + case DFS_DIR: + case LOCAL_DIR: + return getQualifiedName(entity.getLocation()); + } + + return null; + } + + protected String getQualifiedName(Database db) { + return context.getQualifiedName(db); + } + + protected String getQualifiedName(Table table) { + return context.getQualifiedName(table); + } + + protected String getQualifiedName(Table table, StorageDescriptor sd) { + return getQualifiedName(table) + "_storage"; + } + + protected String getQualifiedName(Table table, FieldSchema column) { + String tblQualifiedName = getQualifiedName(table); + + int sepPos = tblQualifiedName.lastIndexOf(QNAME_SEP_METADATA_NAMESPACE); + + if (sepPos == -1) { + return tblQualifiedName + QNAME_SEP_ENTITY_NAME + column.getName().toLowerCase(); + } else { + return tblQualifiedName.substring(0, sepPos) + QNAME_SEP_ENTITY_NAME + column.getName().toLowerCase() + tblQualifiedName.substring(sepPos); + } + } + + protected String getQualifiedName(DependencyKey column) { + String dbName = column.getDataContainer().getTable().getDbName(); + String tableName = column.getDataContainer().getTable().getTableName(); + String colName = column.getFieldSchema().getName(); + + return getQualifiedName(dbName, tableName, colName); + } + + protected String getQualifiedName(BaseColumnInfo column) { + String dbName = column.getTabAlias().getTable().getDbName(); + String tableName = column.getTabAlias().getTable().getTableName(); + String colName = column.getColumn() != null ? column.getColumn().getName() : null; + String metadataNamespace = getMetadataNamespace(); + + if (colName == null) { + return (dbName + QNAME_SEP_ENTITY_NAME + tableName + QNAME_SEP_METADATA_NAMESPACE).toLowerCase() + metadataNamespace; + } else { + return (dbName + QNAME_SEP_ENTITY_NAME + tableName + QNAME_SEP_ENTITY_NAME + colName + QNAME_SEP_METADATA_NAMESPACE).toLowerCase() + metadataNamespace; + } + } + + protected String getQualifiedName(String dbName, String tableName, String colName) { + return (dbName + QNAME_SEP_ENTITY_NAME + tableName + QNAME_SEP_ENTITY_NAME + colName + QNAME_SEP_METADATA_NAMESPACE).toLowerCase() + getMetadataNamespace(); + } + + protected String getQualifiedName(URI location) { + String strPath = new Path(location).toString(); + + if (strPath.startsWith(HDFS_PATH_PREFIX) && context.isConvertHdfsPathToLowerCase()) { + strPath = strPath.toLowerCase(); + } + + String nameServiceID = HdfsNameServiceResolver.getNameServiceIDForPath(strPath); + String attrPath = StringUtils.isEmpty(nameServiceID) ? strPath : HdfsNameServiceResolver.getPathWithNameServiceID(strPath); + + return getQualifiedName(attrPath); + } + + protected String getQualifiedName(String path) { + if (path.startsWith(HdfsNameServiceResolver.HDFS_SCHEME)) { + return path + QNAME_SEP_METADATA_NAMESPACE + getMetadataNamespace(); + } + + return path.toLowerCase(); + } + + protected String getColumnQualifiedName(String tblQualifiedName, String columnName) { + int sepPos = tblQualifiedName.lastIndexOf(QNAME_SEP_METADATA_NAMESPACE); + + if (sepPos == -1) { + return tblQualifiedName + QNAME_SEP_ENTITY_NAME + columnName.toLowerCase(); + } else { + return tblQualifiedName.substring(0, sepPos) + QNAME_SEP_ENTITY_NAME + columnName.toLowerCase() + tblQualifiedName.substring(sepPos); + } + + } + + protected String getQualifiedName(List inputs, List outputs) throws Exception { + HiveOperation operation = context.getHiveOperation(); + + if (operation == HiveOperation.CREATETABLE || + operation == HiveOperation.CREATETABLE_AS_SELECT || + operation == HiveOperation.CREATEVIEW || + operation == HiveOperation.ALTERVIEW_AS || + operation == HiveOperation.ALTERTABLE_LOCATION) { + List sortedEntities = new ArrayList<>(getOutputs()); + + Collections.sort(sortedEntities, entityComparator); + + for (Entity entity : sortedEntities) { + if (entity.getType() == Entity.Type.TABLE) { + Table table = entity.getTable(); + + table = getHive().getTable(table.getDbName(), table.getTableName()); + + long createTime = getTableCreateTime(table); + + return getQualifiedName(table) + QNAME_SEP_PROCESS + createTime; + } + } + } + + String qualifiedName = null; + String operationName = getOperationName(); + + if (operationName != null) { + StringBuilder sb = new StringBuilder(operationName); + + boolean ignoreHDFSPaths = ignoreHDFSPathsinProcessQualifiedName(); + + addToProcessQualifiedName(sb, getInputs(), ignoreHDFSPaths); + sb.append("->"); + addToProcessQualifiedName(sb, getOutputs(), ignoreHDFSPaths); + + qualifiedName = sb.toString(); + } + + + return qualifiedName; + } + + protected AtlasEntity toReferencedHBaseTable(Table table, AtlasEntitiesWithExtInfo entities) { + AtlasEntity ret = null; + HBaseTableInfo hBaseTableInfo = new HBaseTableInfo(table); + String hbaseNameSpace = hBaseTableInfo.getHbaseNameSpace(); + String hbaseTableName = hBaseTableInfo.getHbaseTableName(); + String metadataNamespace = getMetadataNamespace(); + + if (hbaseTableName != null) { + AtlasEntity nsEntity = new AtlasEntity(HBASE_TYPE_NAMESPACE); + nsEntity.setAttribute(ATTRIBUTE_NAME, hbaseNameSpace); + nsEntity.setAttribute(ATTRIBUTE_CLUSTER_NAME, metadataNamespace); + nsEntity.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getHBaseNameSpaceQualifiedName(metadataNamespace, hbaseNameSpace)); + + ret = new AtlasEntity(HBASE_TYPE_TABLE); + + ret.setAttribute(ATTRIBUTE_NAME, hbaseTableName); + ret.setAttribute(ATTRIBUTE_URI, hbaseTableName); + + AtlasRelatedObjectId objIdRelatedObject = new AtlasRelatedObjectId(AtlasTypeUtil.getObjectId(nsEntity), RELATIONSHIP_HBASE_TABLE_NAMESPACE); + + ret.setRelationshipAttribute(ATTRIBUTE_NAMESPACE, objIdRelatedObject); + ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getHBaseTableQualifiedName(metadataNamespace, hbaseNameSpace, hbaseTableName)); + + entities.addReferredEntity(nsEntity); + entities.addEntity(ret); + } + + return ret; + } + + protected boolean isHBaseStore(Table table) { + boolean ret = false; + Map parameters = table.getParameters(); + + if (MapUtils.isNotEmpty(parameters)) { + String storageHandler = parameters.get(ATTRIBUTE_STORAGE_HANDLER); + + ret = (storageHandler != null && storageHandler.equals(HBASE_STORAGE_HANDLER_CLASS)); + } + + return ret; + } + + private static String getHBaseTableQualifiedName(String metadataNamespace, String nameSpace, String tableName) { + return String.format("%s:%s@%s", nameSpace.toLowerCase(), tableName.toLowerCase(), metadataNamespace); + } + + private static String getHBaseNameSpaceQualifiedName(String metadataNamespace, String nameSpace) { + return String.format("%s@%s", nameSpace.toLowerCase(), metadataNamespace); + } + + private boolean ignoreHDFSPathsinProcessQualifiedName() { + switch (context.getHiveOperation()) { + case LOAD: + case IMPORT: + return hasPartitionEntity(getOutputs()); + case EXPORT: + return hasPartitionEntity(getInputs()); + case QUERY: + return true; + } + + return false; + } + + private boolean hasPartitionEntity(Collection entities) { + if (entities != null) { + for (Entity entity : entities) { + if (entity.getType() == Entity.Type.PARTITION) { + return true; + } + } + } + + return false; + } + + private void addToProcessQualifiedName(StringBuilder processQualifiedName, Collection entities, boolean ignoreHDFSPaths) { + if (entities == null) { + return; + } + + List sortedEntities = new ArrayList<>(entities); + + Collections.sort(sortedEntities, entityComparator); + + Set dataSetsProcessed = new HashSet<>(); + Map tableMap = new HashMap<>(); + + for (Entity entity : sortedEntities) { + if (ignoreHDFSPaths && (Entity.Type.DFS_DIR.equals(entity.getType()) || Entity.Type.LOCAL_DIR.equals(entity.getType()))) { + continue; + } + + String qualifiedName = null; + long createTime = 0; + + try { + if (entity.getType() == Entity.Type.PARTITION || entity.getType() == Entity.Type.TABLE) { + String tableKey = entity.getTable().getDbName() + "." + entity.getTable().getTableName(); + Table table = tableMap.get(tableKey); + + if (table == null) { + table = getHive().getTable(entity.getTable().getDbName(), entity.getTable().getTableName()); + + tableMap.put(tableKey, table); //since there could be several partitions in a table, store it to avoid hive calls. + } + if (table != null) { + createTime = getTableCreateTime(table); + qualifiedName = getQualifiedName(table); + } + } else { + qualifiedName = getQualifiedName(entity); + } + } catch (Exception excp) { + LOG.error("error while computing qualifiedName for process", excp); + } + + if (qualifiedName == null || !dataSetsProcessed.add(qualifiedName)) { + continue; + } + + if (entity instanceof WriteEntity) { // output entity + WriteEntity writeEntity = (WriteEntity) entity; + + if (writeEntity.getWriteType() != null && HiveOperation.QUERY.equals(context.getHiveOperation())) { + boolean addWriteType = false; + + switch (((WriteEntity) entity).getWriteType()) { + case INSERT: + case INSERT_OVERWRITE: + case UPDATE: + case DELETE: + addWriteType = true; + break; + + case PATH_WRITE: + addWriteType = !Entity.Type.LOCAL_DIR.equals(entity.getType()); + break; + } + + if (addWriteType) { + processQualifiedName.append(QNAME_SEP_PROCESS).append(writeEntity.getWriteType().name()); + } + } + } + + processQualifiedName.append(QNAME_SEP_PROCESS).append(qualifiedName.toLowerCase().replaceAll("/", "")); + + if (createTime != 0) { + processQualifiedName.append(QNAME_SEP_PROCESS).append(createTime); + } + } + } + + private boolean isAlterTableOperation() { + switch (context.getHiveOperation()) { + case ALTERTABLE_FILEFORMAT: + case ALTERTABLE_CLUSTER_SORT: + case ALTERTABLE_BUCKETNUM: + case ALTERTABLE_PROPERTIES: + case ALTERTABLE_SERDEPROPERTIES: + case ALTERTABLE_SERIALIZER: + case ALTERTABLE_ADDCOLS: + case ALTERTABLE_REPLACECOLS: + case ALTERTABLE_PARTCOLTYPE: + case ALTERTABLE_LOCATION: + case ALTERTABLE_RENAME: + case ALTERTABLE_RENAMECOL: + case ALTERVIEW_PROPERTIES: + case ALTERVIEW_RENAME: + case ALTERVIEW_AS: + return true; + } + + return false; + } + + static final class EntityComparator implements Comparator { + @Override + public int compare(Entity entity1, Entity entity2) { + String name1 = entity1.getName(); + String name2 = entity2.getName(); + + if (name1 == null || name2 == null) { + name1 = entity1.getD().toString(); + name2 = entity2.getD().toString(); + } + + return name1.toLowerCase().compareTo(name2.toLowerCase()); + } + } + + static final Comparator entityComparator = new EntityComparator(); + + static final class HBaseTableInfo { + String hbaseNameSpace = null; + String hbaseTableName = null; + + HBaseTableInfo(Table table) { + Map parameters = table.getParameters(); + + if (MapUtils.isNotEmpty(parameters)) { + hbaseNameSpace = HBASE_DEFAULT_NAMESPACE; + hbaseTableName = parameters.get(HBASE_PARAM_TABLE_NAME); + + if (hbaseTableName != null) { + if (hbaseTableName.contains(HBASE_NAMESPACE_TABLE_DELIMITER)) { + String[] hbaseTableInfo = hbaseTableName.split(HBASE_NAMESPACE_TABLE_DELIMITER); + + if (hbaseTableInfo.length > 1) { + hbaseNameSpace = hbaseTableInfo[0]; + hbaseTableName = hbaseTableInfo[1]; + } + } + } + } + } + + public String getHbaseNameSpace() { + return hbaseNameSpace; + } + + public String getHbaseTableName() { + return hbaseTableName; + } + } + + public static Table toTable(org.apache.hadoop.hive.metastore.api.Table table) { + return new Table(table); + } +} diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateDatabase.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateDatabase.java new file mode 100644 index 0000000000..bf5f5620e7 --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateDatabase.java @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook.events; + +import org.apache.atlas.hive.hook.AtlasHiveHookContext; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2; +import org.apache.commons.collections.CollectionUtils; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.events.CreateDatabaseEvent; +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.List; + +import static org.apache.atlas.hive.bridge.HiveMetaStoreBridge.getDatabaseName; +import static org.apache.hadoop.hive.ql.hooks.Entity.Type.DATABASE; + +public class CreateDatabase extends BaseHiveEvent { + private static final Logger LOG = LoggerFactory.getLogger(CreateDatabase.class); + + public CreateDatabase(AtlasHiveHookContext context) { + super(context); + } + + @Override + public List getNotificationMessages() throws Exception { + List ret = null; + AtlasEntitiesWithExtInfo entities = context.isMetastoreHook() ? getHiveMetastoreEntities() : getHiveEntities(); + + if (entities != null && CollectionUtils.isNotEmpty(entities.getEntities())) { + ret = Collections.singletonList(new EntityCreateRequestV2(getUserName(), entities)); + } + + return ret; + } + + public AtlasEntitiesWithExtInfo getHiveMetastoreEntities() throws Exception { + AtlasEntitiesWithExtInfo ret = new AtlasEntitiesWithExtInfo(); + CreateDatabaseEvent dbEvent = (CreateDatabaseEvent) context.getMetastoreEvent(); + Database db = dbEvent.getDatabase(); + + if (db != null) { + db = context.getMetastoreHandler().get_database(db.getName()); + } + + if (db != null) { + AtlasEntity dbEntity = toDbEntity(db); + + ret.addEntity(dbEntity); + + addLocationEntities(dbEntity, ret); + } else { + LOG.error("CreateDatabase.getEntities(): failed to retrieve db"); + } + + addProcessedEntities(ret); + + return ret; + } + + public AtlasEntitiesWithExtInfo getHiveEntities() throws Exception { + AtlasEntitiesWithExtInfo ret = new AtlasEntitiesWithExtInfo(); + + for (Entity entity : getOutputs()) { + if (entity.getType() == DATABASE) { + Database db = entity.getDatabase(); + + if (db != null) { + db = getHive().getDatabase(getDatabaseName(db)); + } + + if (db != null) { + AtlasEntity dbEntity = toDbEntity(db); + AtlasEntity dbDDLEntity = createHiveDDLEntity(dbEntity); + + ret.addEntity(dbEntity); + + if (dbDDLEntity != null) { + ret.addEntity(dbDDLEntity); + } + + addLocationEntities(dbEntity, ret); + } else { + LOG.error("CreateDatabase.getEntities(): failed to retrieve db"); + } + } + } + + addProcessedEntities(ret); + + return ret; + } + + public void addLocationEntities(AtlasEntity dbEntity, AtlasEntitiesWithExtInfo ret) { + AtlasEntity dbLocationEntity = createHiveLocationEntity(dbEntity, ret); + + if (dbLocationEntity != null) { + ret.addEntity(dbLocationEntity); + } + } +} \ No newline at end of file diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateHiveProcess.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateHiveProcess.java new file mode 100644 index 0000000000..5787c9365a --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateHiveProcess.java @@ -0,0 +1,295 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook.events; + +import org.apache.atlas.type.AtlasTypeUtil; +import org.apache.atlas.hive.hook.AtlasHiveHookContext; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2; +import org.apache.commons.collections.CollectionUtils; +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.hooks.LineageInfo; +import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo; +import org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency; +import org.apache.hadoop.hive.ql.hooks.LineageInfo.DependencyKey; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + + +public class CreateHiveProcess extends BaseHiveEvent { + private static final Logger LOG = LoggerFactory.getLogger(CreateHiveProcess.class); + + public CreateHiveProcess(AtlasHiveHookContext context) { + super(context); + } + + @Override + public List getNotificationMessages() throws Exception { + List ret = null; + AtlasEntitiesWithExtInfo entities = getEntities(); + + if (entities != null && CollectionUtils.isNotEmpty(entities.getEntities())) { + ret = Collections.singletonList(new EntityCreateRequestV2(getUserName(), entities)); + } + + return ret; + } + + public AtlasEntitiesWithExtInfo getEntities() throws Exception { + AtlasEntitiesWithExtInfo ret = null; + + if (!skipProcess()) { + List inputs = new ArrayList<>(); + List outputs = new ArrayList<>(); + Set processedNames = new HashSet<>(); + + ret = new AtlasEntitiesWithExtInfo(); + + if (getInputs() != null) { + for (ReadEntity input : getInputs()) { + String qualifiedName = getQualifiedName(input); + + if (qualifiedName == null || !processedNames.add(qualifiedName)) { + continue; + } + + AtlasEntity entity = getInputOutputEntity(input, ret, skipTempTables); + + if (!input.isDirect()) { + continue; + } + + if (entity != null) { + inputs.add(entity); + } + } + } + + if (getOutputs() != null) { + for (WriteEntity output : getOutputs()) { + String qualifiedName = getQualifiedName(output); + + if (qualifiedName == null || !processedNames.add(qualifiedName)) { + continue; + } + + AtlasEntity entity = getInputOutputEntity(output, ret, skipTempTables); + + if (entity != null) { + outputs.add(entity); + } + + if (isDdlOperation(entity)) { + + AtlasEntity ddlEntity = createHiveDDLEntity(entity); + + if (ddlEntity != null) { + ret.addEntity(ddlEntity); + } + } + } + } + + boolean skipProcess = inputs.isEmpty() && outputs.isEmpty(); + + if (!skipProcess) { + if (inputs.isEmpty() && context.isSkippedInputEntity()) { + skipProcess = true; + } else if (outputs.isEmpty() && context.isSkippedOutputEntity()) { + skipProcess = true; + } + } + + if (!skipProcess && !context.isMetastoreHook()) { + AtlasEntity process = getHiveProcessEntity(inputs, outputs); + + ret.addEntity(process); + + AtlasEntity processExecution = getHiveProcessExecutionEntity(process); + ret.addEntity(processExecution); + + processColumnLineage(process, ret); + + addProcessedEntities(ret); + } else { + ret = null; + } + } + + return ret; + } + + private void processColumnLineage(AtlasEntity hiveProcess, AtlasEntitiesWithExtInfo entities) { + LineageInfo lineageInfo = getLineageInfo(); + + if (lineageInfo == null || CollectionUtils.isEmpty(lineageInfo.entrySet())) { + return; + } + + final List columnLineages = new ArrayList<>(); + int lineageInputsCount = 0; + final Set processedOutputCols = new HashSet<>(); + + for (Map.Entry entry : lineageInfo.entrySet()) { + String outputColName = getQualifiedName(entry.getKey()); + AtlasEntity outputColumn = context.getEntity(outputColName); + + if (LOG.isDebugEnabled()) { + LOG.debug("processColumnLineage(): DependencyKey={}; Dependency={}", entry.getKey(), entry.getValue()); + } + + if (outputColumn == null) { + LOG.warn("column-lineage: non-existing output-column {}", outputColName); + + continue; + } + + if (processedOutputCols.contains(outputColName)) { + LOG.warn("column-lineage: duplicate for output-column {}", outputColName); + + continue; + } else { + processedOutputCols.add(outputColName); + } + + List inputColumns = new ArrayList<>(); + + for (BaseColumnInfo baseColumn : getBaseCols(entry.getValue())) { + String inputColName = getQualifiedName(baseColumn); + AtlasEntity inputColumn = context.getEntity(inputColName); + + if (inputColumn == null) { + LOG.warn("column-lineage: non-existing input-column {} for output-column={}", inputColName, outputColName); + + continue; + } + + inputColumns.add(inputColumn); + } + + if (inputColumns.isEmpty()) { + continue; + } + + lineageInputsCount += inputColumns.size(); + + AtlasEntity columnLineageProcess = new AtlasEntity(HIVE_TYPE_COLUMN_LINEAGE); + + columnLineageProcess.setAttribute(ATTRIBUTE_NAME, hiveProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME)); + columnLineageProcess.setAttribute(ATTRIBUTE_QUALIFIED_NAME, hiveProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME)); + columnLineageProcess.setRelationshipAttribute(ATTRIBUTE_INPUTS, AtlasTypeUtil.getAtlasRelatedObjectIds(inputColumns, BaseHiveEvent.RELATIONSHIP_DATASET_PROCESS_INPUTS)); + columnLineageProcess.setRelationshipAttribute(ATTRIBUTE_OUTPUTS, Collections.singletonList(AtlasTypeUtil.getAtlasRelatedObjectId(outputColumn, BaseHiveEvent.RELATIONSHIP_PROCESS_DATASET_OUTPUTS))); + columnLineageProcess.setRelationshipAttribute(ATTRIBUTE_QUERY, AtlasTypeUtil.getAtlasRelatedObjectId(hiveProcess, BaseHiveEvent.RELATIONSHIP_HIVE_PROCESS_COLUMN_LINEAGE)); + columnLineageProcess.setAttribute(ATTRIBUTE_DEPENDENCY_TYPE, entry.getValue().getType()); + columnLineageProcess.setAttribute(ATTRIBUTE_EXPRESSION, entry.getValue().getExpr()); + + columnLineages.add(columnLineageProcess); + } + + float avgInputsCount = columnLineages.size() > 0 ? (((float) lineageInputsCount) / columnLineages.size()) : 0; + boolean skipColumnLineage = context.getSkipHiveColumnLineageHive20633() && avgInputsCount > context.getSkipHiveColumnLineageHive20633InputsThreshold(); + + if (!skipColumnLineage) { + for (AtlasEntity columnLineage : columnLineages) { + entities.addEntity(columnLineage); + } + } else { + LOG.warn("skipped {} hive_column_lineage entities. Average # of inputs={}, threshold={}, total # of inputs={}", columnLineages.size(), avgInputsCount, context.getSkipHiveColumnLineageHive20633InputsThreshold(), lineageInputsCount); + } + } + + private Collection getBaseCols(Dependency lInfoDep) { + Collection ret = Collections.emptyList(); + + if (lInfoDep != null) { + try { + Method getBaseColsMethod = lInfoDep.getClass().getMethod("getBaseCols"); + + Object retGetBaseCols = getBaseColsMethod.invoke(lInfoDep); + + if (retGetBaseCols != null) { + if (retGetBaseCols instanceof Collection) { + ret = (Collection) retGetBaseCols; + } else { + LOG.warn("{}: unexpected return type from LineageInfo.Dependency.getBaseCols(), expected type {}", + retGetBaseCols.getClass().getName(), "Collection"); + } + } + } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException ex) { + LOG.warn("getBaseCols()", ex); + } + } + + return ret; + } + + + private boolean skipProcess() { + Set inputs = getInputs(); + Set outputs = getOutputs(); + + boolean ret = CollectionUtils.isEmpty(inputs) && CollectionUtils.isEmpty(outputs); + + if (!ret) { + if (getContext().getHiveOperation() == HiveOperation.QUERY) { + // Select query has only one output + if (outputs.size() == 1) { + WriteEntity output = outputs.iterator().next(); + + if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR) { + if (output.getWriteType() == WriteEntity.WriteType.PATH_WRITE && output.isTempURI()) { + ret = true; + } + } + // DELETE and UPDATE initially have one input and one output. + // Since they do not support sub-query, they won't create a lineage that have one input and one output. (One input only) + // It's safe to filter them out here. + if (output.getWriteType() == WriteEntity.WriteType.DELETE || output.getWriteType() == WriteEntity.WriteType.UPDATE) { + ret = true; + } + } + } + } + + return ret; + } + + private boolean isDdlOperation(AtlasEntity entity) { + return entity != null && !context.isMetastoreHook() + && (context.getHiveOperation().equals(HiveOperation.CREATETABLE_AS_SELECT) + || context.getHiveOperation().equals(HiveOperation.CREATEVIEW) + || context.getHiveOperation().equals(HiveOperation.ALTERVIEW_AS) + || context.getHiveOperation().equals(HiveOperation.CREATE_MATERIALIZED_VIEW)); + } +} diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java new file mode 100644 index 0000000000..91611de88e --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java @@ -0,0 +1,202 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook.events; + +import org.apache.atlas.hive.hook.AtlasHiveHookContext; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2; +import org.apache.commons.collections.CollectionUtils; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.events.AlterTableEvent; +import org.apache.hadoop.hive.metastore.events.CreateTableEvent; +import org.apache.hadoop.hive.metastore.events.ListenerEvent; +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.List; + +import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE; +import static org.apache.hadoop.hive.ql.plan.HiveOperation.*; + +public class CreateTable extends BaseHiveEvent { + private static final Logger LOG = LoggerFactory.getLogger(CreateTable.class); + + public CreateTable(AtlasHiveHookContext context) { + super(context); + } + + @Override + public List getNotificationMessages() throws Exception { + List ret = null; + AtlasEntitiesWithExtInfo entities = context.isMetastoreHook() ? getHiveMetastoreEntities() : getHiveEntities(); + + if (entities != null && CollectionUtils.isNotEmpty(entities.getEntities())) { + ret = Collections.singletonList(new EntityCreateRequestV2(getUserName(), entities)); + } + + return ret; + } + + public AtlasEntitiesWithExtInfo getHiveMetastoreEntities() throws Exception { + AtlasEntitiesWithExtInfo ret = new AtlasEntitiesWithExtInfo(); + ListenerEvent event = context.getMetastoreEvent(); + HiveOperation oper = context.getHiveOperation(); + Table table; + + if (isAlterTable(oper)) { + table = toTable(((AlterTableEvent) event).getNewTable()); + } else { + table = toTable(((CreateTableEvent) event).getTable()); + } + + if (skipTemporaryTable(table)) { + table = null; + } + + processTable(table, ret); + + addProcessedEntities(ret); + + return ret; + } + + public AtlasEntitiesWithExtInfo getHiveEntities() throws Exception { + AtlasEntitiesWithExtInfo ret = new AtlasEntitiesWithExtInfo(); + Table table = null; + + if (CollectionUtils.isNotEmpty(getOutputs())) { + for (Entity entity : getOutputs()) { + if (entity.getType() == Entity.Type.TABLE) { + table = entity.getTable(); + + if (table != null) { + table = getHive().getTable(table.getDbName(), table.getTableName()); + + if (table != null) { + if (skipTemporaryTable(table)) { + table = null; + } else { + break; + } + } + } + } + } + } + + processTable(table, ret); + + addProcessedEntities(ret); + + return ret; + } + + // create process entities for lineages from HBase/HDFS to hive table + private void processTable(Table table, AtlasEntitiesWithExtInfo ret) throws Exception { + if (table != null) { + AtlasEntity tblEntity = toTableEntity(table, ret); + + if (tblEntity != null) { + if (isHBaseStore(table)) { + if (context.isMetastoreHook()) { + //do nothing + } else { + // This create lineage to HBase table in case of Hive on HBase + AtlasEntity hbaseTableEntity = toReferencedHBaseTable(table, ret); + + //not a hive metastore hook + //it is running in the context of Hbase. + if (hbaseTableEntity != null) { + final AtlasEntity processEntity; + + if (EXTERNAL_TABLE.equals(table.getTableType())) { + processEntity = getHiveProcessEntity(Collections.singletonList(hbaseTableEntity), Collections.singletonList(tblEntity)); + } else { + processEntity = getHiveProcessEntity(Collections.singletonList(tblEntity), Collections.singletonList(hbaseTableEntity)); + } + ret.addEntity(processEntity); + + AtlasEntity processExecution = getHiveProcessExecutionEntity(processEntity); + ret.addEntity(processExecution); + } + } + + } else { + if (context.isMetastoreHook()) { + //it is running in the context of HiveMetastore + //not a hive metastore hook + if (isCreateExtTableOperation(table)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Creating a dummy process with lineage from hdfs path to hive table"); + } + AtlasEntity hdfsPathEntity = getPathEntity(table.getDataLocation(), ret); + AtlasEntity processEntity = getHiveProcessEntity(Collections.singletonList(hdfsPathEntity), Collections.singletonList(tblEntity)); + + ret.addEntity(processEntity); + ret.addReferredEntity(hdfsPathEntity); + } + } else { + //not a hive metastore hook + //it is running in the context of HiveServer2 + if (EXTERNAL_TABLE.equals(table.getTableType())) { + AtlasEntity hdfsPathEntity = getPathEntity(table.getDataLocation(), ret); + AtlasEntity processEntity = getHiveProcessEntity(Collections.singletonList(hdfsPathEntity), Collections.singletonList(tblEntity)); + + ret.addEntity(processEntity); + ret.addReferredEntity(hdfsPathEntity); + + AtlasEntity processExecution = getHiveProcessExecutionEntity(processEntity); + ret.addEntity(processExecution); + } + } + } + + if (!context.isMetastoreHook()) { + AtlasEntity tableDDLEntity = createHiveDDLEntity(tblEntity); + + if (tableDDLEntity != null) { + ret.addEntity(tableDDLEntity); + } + } + } + } + } + + private static boolean isAlterTable(HiveOperation oper) { + return (oper == ALTERTABLE_PROPERTIES || oper == ALTERTABLE_RENAME || oper == ALTERTABLE_RENAMECOL); + } + + private boolean skipTemporaryTable(Table table) { + // If its an external table, even though the temp table skip flag is on, we create the table since we need the HDFS path to temp table lineage. + return table != null && skipTempTables && table.isTemporary() && !EXTERNAL_TABLE.equals(table.getTableType()); + } + + private boolean isCreateExtTableOperation(Table table) { + HiveOperation oper = context.getHiveOperation(); + TableType tableType = table.getTableType(); + + return EXTERNAL_TABLE.equals(tableType) && (oper == CREATETABLE || oper == CREATETABLE_AS_SELECT); + } +} diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/DropDatabase.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/DropDatabase.java new file mode 100644 index 0000000000..20019d2ef4 --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/DropDatabase.java @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook.events; + +import org.apache.atlas.hive.hook.AtlasHiveHookContext; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.model.notification.HookNotification.EntityDeleteRequestV2; +import org.apache.commons.collections.CollectionUtils; +import org.apache.hadoop.hive.metastore.events.DropDatabaseEvent; +import org.apache.hadoop.hive.ql.hooks.Entity; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.apache.hadoop.hive.ql.hooks.Entity.Type.DATABASE; +import static org.apache.hadoop.hive.ql.hooks.Entity.Type.TABLE; + +public class DropDatabase extends BaseHiveEvent { + public DropDatabase(AtlasHiveHookContext context) { + super(context); + } + + @Override + public List getNotificationMessages() { + List ret = null; + List entities = context.isMetastoreHook() ? getHiveMetastoreEntities() : getHiveEntities(); + + if (CollectionUtils.isNotEmpty(entities)) { + ret = new ArrayList<>(entities.size()); + + for (AtlasObjectId entity : entities) { + ret.add(new EntityDeleteRequestV2(getUserName(), Collections.singletonList(entity))); + } + } + + return ret; + } + + private List getHiveMetastoreEntities() { + List ret = new ArrayList<>(); + DropDatabaseEvent dbEvent = (DropDatabaseEvent) context.getMetastoreEvent(); + String dbQName = getQualifiedName(dbEvent.getDatabase()); + AtlasObjectId dbId = new AtlasObjectId(HIVE_TYPE_DB, ATTRIBUTE_QUALIFIED_NAME, dbQName); + + context.removeFromKnownDatabase(dbQName); + + ret.add(dbId); + + return ret; + } + + private List getHiveEntities() { + List ret = new ArrayList<>(); + + for (Entity entity : getOutputs()) { + if (entity.getType() == DATABASE) { + String dbQName = getQualifiedName(entity.getDatabase()); + AtlasObjectId dbId = new AtlasObjectId(HIVE_TYPE_DB, ATTRIBUTE_QUALIFIED_NAME, dbQName); + + context.removeFromKnownDatabase(dbQName); + + ret.add(dbId); + } else if (entity.getType() == TABLE) { + String tblQName = getQualifiedName(entity.getTable()); + AtlasObjectId tblId = new AtlasObjectId(HIVE_TYPE_TABLE, ATTRIBUTE_QUALIFIED_NAME, tblQName); + + context.removeFromKnownTable(tblQName); + + ret.add(tblId); + } + } + + return ret; + } +} \ No newline at end of file diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/DropTable.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/DropTable.java new file mode 100644 index 0000000000..440c08affa --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/DropTable.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook.events; + +import org.apache.atlas.hive.hook.AtlasHiveHookContext; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.model.notification.HookNotification.EntityDeleteRequestV2; +import org.apache.commons.collections.CollectionUtils; +import org.apache.hadoop.hive.metastore.events.DropTableEvent; +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.metadata.Table; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class DropTable extends BaseHiveEvent { + public DropTable(AtlasHiveHookContext context) { + super(context); + } + + @Override + public List getNotificationMessages() { + List ret = null; + List entities = context.isMetastoreHook() ? getHiveMetastoreEntities() : getHiveEntities(); + + if (CollectionUtils.isNotEmpty(entities)) { + ret = new ArrayList<>(entities.size()); + + for (AtlasObjectId entity : entities) { + ret.add(new EntityDeleteRequestV2(getUserName(), Collections.singletonList(entity))); + } + } + + return ret; + } + + public List getHiveMetastoreEntities() { + List ret = new ArrayList<>(); + DropTableEvent tblEvent = (DropTableEvent) context.getMetastoreEvent(); + Table table = new Table(tblEvent.getTable()); + String tblQName = getQualifiedName(table); + AtlasObjectId tblId = new AtlasObjectId(HIVE_TYPE_TABLE, ATTRIBUTE_QUALIFIED_NAME, tblQName); + + context.removeFromKnownTable(tblQName); + + ret.add(tblId); + + return ret; + } + + public List getHiveEntities() { + List ret = new ArrayList<>(); + + for (Entity entity : getOutputs()) { + if (entity.getType() == Entity.Type.TABLE) { + String tblQName = getQualifiedName(entity.getTable()); + AtlasObjectId tblId = new AtlasObjectId(HIVE_TYPE_TABLE, ATTRIBUTE_QUALIFIED_NAME, tblQName); + + context.removeFromKnownTable(tblQName); + + ret.add(tblId); + } + } + + return ret; + } +} \ No newline at end of file diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/ActiveEntityFilter.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/ActiveEntityFilter.java new file mode 100644 index 0000000000..0b0d4d6b26 --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/ActiveEntityFilter.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.hive.hook.utils; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.atlas.hive.hook.HiveHook; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.commons.configuration.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Arrays; +import java.util.List; + +import static org.apache.atlas.hive.hook.HiveHook.HOOK_HIVE_IGNORE_DDL_OPERATIONS; + +public class ActiveEntityFilter { + private static final Logger LOG = LoggerFactory.getLogger(ActiveEntityFilter.class); + + private static EntityFilter entityFilter; + + public static void init(Configuration configuration) { + boolean skipDdlOperations = configuration.getBoolean(HOOK_HIVE_IGNORE_DDL_OPERATIONS, false); + List additionalTypesToRetain = getConfiguredTypesToRetainForDDLEntityFilter(configuration); + + init(skipDdlOperations, additionalTypesToRetain); + LOG.info("atlas.hook.hive.ignore.ddl.operations={} - {}", skipDdlOperations, entityFilter.getClass().getSimpleName()); + } + + @VisibleForTesting + static void init(boolean lineageOnlyFilter, List additionalTypesToRetain) { + entityFilter = lineageOnlyFilter ? new HiveDDLEntityFilter(additionalTypesToRetain) : new PassthroughFilter(); + } + + public static List apply(List incoming) { + return entityFilter.apply(incoming); + } + + private static List getConfiguredTypesToRetainForDDLEntityFilter(Configuration configuration) { + try { + if (configuration.containsKey(HiveHook.HOOK_HIVE_FILTER_ENTITY_ADDITIONAL_TYPES_TO_RETAIN)) { + String[] configuredTypes = configuration.getStringArray(HiveHook.HOOK_HIVE_FILTER_ENTITY_ADDITIONAL_TYPES_TO_RETAIN); + + return Arrays.asList(configuredTypes); + } + } catch (Exception e) { + LOG.error("Failed to load application properties", e); + } + + return null; + } +} diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/EntityFilter.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/EntityFilter.java new file mode 100644 index 0000000000..f02ee52dac --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/EntityFilter.java @@ -0,0 +1,26 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.hive.hook.utils; + +import org.apache.atlas.model.notification.HookNotification; + +import java.util.List; + +public interface EntityFilter { + List apply(List incoming); +} diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/HiveDDLEntityFilter.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/HiveDDLEntityFilter.java new file mode 100644 index 0000000000..0f9aa458cc --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/HiveDDLEntityFilter.java @@ -0,0 +1,286 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.hive.hook.utils; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.atlas.hive.hook.events.BaseHiveEvent; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.utils.AtlasPathExtractorUtil; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.collections.MapUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + +public class HiveDDLEntityFilter implements EntityFilter { + private static final Logger LOG = LoggerFactory.getLogger(HiveDDLEntityFilter.class); + + private static final Set defaultPathTypesToRetain = new HashSet() {{ + add(AtlasPathExtractorUtil.HDFS_TYPE_PATH); + add(AtlasPathExtractorUtil.ADLS_GEN2_ACCOUNT); + add(AtlasPathExtractorUtil.ADLS_GEN2_CONTAINER); + add(AtlasPathExtractorUtil.ADLS_GEN2_DIRECTORY); + add(AtlasPathExtractorUtil.GCS_VIRTUAL_DIR); + add(AtlasPathExtractorUtil.AWS_S3_BUCKET); + add(AtlasPathExtractorUtil.AWS_S3_V2_BUCKET); + add(AtlasPathExtractorUtil.AWS_S3_V2_PSEUDO_DIR); + add(AtlasPathExtractorUtil.AWS_S3_PSEUDO_DIR); + add(AtlasPathExtractorUtil.OZONE_KEY); + add(AtlasPathExtractorUtil.OZONE_BUCKET); + add(AtlasPathExtractorUtil.OZONE_VOLUME); + }}; + + private static final Set typesToRetain = new HashSet() {{ + add(BaseHiveEvent.HIVE_TYPE_PROCESS); + add(BaseHiveEvent.HIVE_TYPE_PROCESS_EXECUTION); + add(BaseHiveEvent.HIVE_TYPE_COLUMN_LINEAGE); + add(BaseHiveEvent.HIVE_DB_DDL); + add(BaseHiveEvent.HIVE_TABLE_DDL); + addAll(defaultPathTypesToRetain); + }}; + + public HiveDDLEntityFilter(List additionalTypesToRetain) { + if (CollectionUtils.isEmpty(additionalTypesToRetain)) { + return; + } + + typesToRetain.addAll(additionalTypesToRetain); + LOG.info("Types retained: {}", typesToRetain.toArray()); + } + + public List apply(List incoming) { + if (CollectionUtils.isEmpty(incoming)) { + return incoming; + } + + List ret = new ArrayList<>(); + for (HookNotification notification : incoming) { + HookNotification filteredNotification = apply(notification); + if (filteredNotification == null) { + continue; + } + + ret.add(filteredNotification); + } + + return ret; + } + + @VisibleForTesting + AtlasEntity.AtlasEntityWithExtInfo apply(AtlasEntity.AtlasEntityWithExtInfo incoming) { + AtlasEntity.AtlasEntityWithExtInfo ret = new AtlasEntity.AtlasEntityWithExtInfo(); + + AtlasEntity entity = filter(incoming.getEntity()); + if (entity == null) { + return null; + } + + ret.setEntity(entity); + + Map refEntities = filter(incoming.getReferredEntities()); + if (!MapUtils.isEmpty(refEntities)) { + ret.setReferredEntities(refEntities); + } + + return ret; + } + + @VisibleForTesting + public AtlasEntity.AtlasEntitiesWithExtInfo apply(AtlasEntity.AtlasEntitiesWithExtInfo incoming) { + if (incoming == null) { + return incoming; + } + + AtlasEntity.AtlasEntitiesWithExtInfo ret = new AtlasEntity.AtlasEntitiesWithExtInfo(); + + filterEntities(incoming, ret); + filterReferences(incoming, ret); + + return ret; + } + + @VisibleForTesting + List applyForObjectIds(List incoming) { + if (incoming == null || CollectionUtils.isEmpty(incoming)) { + return null; + } + + List ret = new ArrayList<>(); + for (AtlasObjectId o : incoming) { + if (filterObjectId(o) != null) { + ret.add(o); + } + } + + return ret; + } + + private AtlasObjectId filterObjectId(AtlasObjectId o) { + if (o != null && typesToRetain.contains(o.getTypeName())) { + return o; + } + + return null; + } + + private static void filterEntities(AtlasEntity.AtlasEntitiesWithExtInfo incoming, AtlasEntity.AtlasEntitiesWithExtInfo ret) { + ret.setEntities(filter(incoming.getEntities())); + } + + private static void filterReferences(AtlasEntity.AtlasEntitiesWithExtInfo incoming, AtlasEntity.AtlasEntitiesWithExtInfo ret) { + ret.setReferredEntities(filter(incoming.getReferredEntities())); + } + + private static Map filter(Map incoming) { + if (incoming == null || MapUtils.isEmpty(incoming)) { + return null; + } + + return incoming.values() + .stream() + .filter(x -> typesToRetain.contains(x.getTypeName())) + .collect(Collectors.toMap(AtlasEntity::getGuid, Function.identity())); + } + + private static List filter(List incoming) { + if (incoming == null) { + return null; + } + + List ret = incoming.stream() + .filter(x -> typesToRetain.contains(x.getTypeName())) + .collect(Collectors.toList()); + + for (AtlasEntity e : ret) { + for (Object o : e.getRelationshipAttributes().values()) { + if (o instanceof List) { + List list = (List) o; + for (Object ox : list) { + inferObjectTypeResetGuid(ox); + } + } else { + inferObjectTypeResetGuid(o); + } + } + } + + return ret; + } + + private static void inferObjectTypeResetGuid(Object o) { + if (o instanceof AtlasObjectId) { + AtlasObjectId oid = (AtlasObjectId) o; + String typeName = oid.getTypeName(); + + if (oid.getUniqueAttributes() != null && !typesToRetain.contains(typeName)) { + oid.setGuid(null); + } + } else { + LinkedHashMap hm = (LinkedHashMap) o; + if (!hm.containsKey(BaseHiveEvent.ATTRIBUTE_GUID)) { + return; + } + + String typeName = hm.containsKey(AtlasObjectId.KEY_TYPENAME) ? (String) hm.get(AtlasObjectId.KEY_TYPENAME) : null; + + if (hm.containsKey(BaseHiveEvent.ATTRIBUTE_UNIQUE_ATTRIBUTES) && !typesToRetain.contains(typeName)) { + hm.remove(BaseHiveEvent.ATTRIBUTE_GUID); + } + } + } + + private static AtlasEntity filter(AtlasEntity incoming) { + if (incoming == null) { + return null; + } + + return typesToRetain.contains(incoming.getTypeName()) ? incoming : null; + } + + private HookNotification apply(HookNotification notification) { + if (notification instanceof HookNotification.EntityCreateRequestV2) { + return apply((HookNotification.EntityCreateRequestV2) notification); + } + + if (notification instanceof HookNotification.EntityUpdateRequestV2) { + return apply((HookNotification.EntityUpdateRequestV2) notification); + } + + if (notification instanceof HookNotification.EntityPartialUpdateRequestV2) { + return apply((HookNotification.EntityPartialUpdateRequestV2) notification); + } + + if (notification instanceof HookNotification.EntityDeleteRequestV2) { + return apply((HookNotification.EntityDeleteRequestV2) notification); + } + + return null; + } + + private HookNotification.EntityCreateRequestV2 apply(HookNotification.EntityCreateRequestV2 notification) { + AtlasEntity.AtlasEntitiesWithExtInfo entities = apply(notification.getEntities()); + if (entities == null || CollectionUtils.isEmpty(entities.getEntities())) { + return null; + } + + return new HookNotification.EntityCreateRequestV2(notification.getUser(), entities); + } + + private HookNotification.EntityUpdateRequestV2 apply(HookNotification.EntityUpdateRequestV2 notification) { + AtlasEntity.AtlasEntitiesWithExtInfo entities = apply(notification.getEntities()); + if (entities == null || CollectionUtils.isEmpty(entities.getEntities())) { + return null; + } + + return new HookNotification.EntityUpdateRequestV2(notification.getUser(), entities); + } + + private HookNotification.EntityPartialUpdateRequestV2 apply(HookNotification.EntityPartialUpdateRequestV2 notification) { + AtlasObjectId objectId = filterObjectId(notification.getEntityId()); + if (objectId == null) { + return null; + } + + AtlasEntity.AtlasEntityWithExtInfo entityWithExtInfo = apply(notification.getEntity()); + if (entityWithExtInfo == null) { + return null; + } + + return new HookNotification.EntityPartialUpdateRequestV2(notification.getUser(), objectId, entityWithExtInfo); + } + + private HookNotification.EntityDeleteRequestV2 apply(HookNotification.EntityDeleteRequestV2 notification) { + List objectIds = applyForObjectIds(notification.getEntities()); + if (CollectionUtils.isEmpty(objectIds)) { + return null; + } + + return new HookNotification.EntityDeleteRequestV2(notification.getUser(), objectIds); + } +} diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/PassthroughFilter.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/PassthroughFilter.java new file mode 100644 index 0000000000..f61c120ea2 --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/PassthroughFilter.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.hive.hook.utils; + +import org.apache.atlas.model.notification.HookNotification; + +import java.util.List; + +class PassthroughFilter implements EntityFilter { + @Override + public List apply(List incoming) { + return incoming; + } +} diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/model/HiveDataTypes.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/model/HiveDataTypes.java new file mode 100755 index 0000000000..f3691e6259 --- /dev/null +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/model/HiveDataTypes.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.model; + +/** + * Hive Data Types for model and bridge. + */ +public enum HiveDataTypes { + + // Enums + HIVE_OBJECT_TYPE, + HIVE_PRINCIPAL_TYPE, + HIVE_RESOURCE_TYPE, + + // Structs + HIVE_SERDE, + HIVE_ORDER, + HIVE_RESOURCEURI, + + // Classes + HIVE_DB, + HIVE_STORAGEDESC, + HIVE_TABLE, + HIVE_COLUMN, + HIVE_PARTITION, + HIVE_INDEX, + HIVE_ROLE, + HIVE_TYPE, + HIVE_PROCESS, + HIVE_COLUMN_LINEAGE, + HIVE_PROCESS_EXECUTION, + // HIVE_VIEW, + ; + + public String getName() { + return name().toLowerCase(); + } +} diff --git a/addons/hive-bridge/src/main/resources/atlas-hive-import-log4j.xml b/addons/hive-bridge/src/main/resources/atlas-hive-import-log4j.xml new file mode 100644 index 0000000000..22a8cc9d43 --- /dev/null +++ b/addons/hive-bridge/src/main/resources/atlas-hive-import-log4j.xml @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/addons/hive-bridge/src/patches/001-hive_column_add_position.json b/addons/hive-bridge/src/patches/001-hive_column_add_position.json new file mode 100644 index 0000000000..e69de29bb2 diff --git a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/HiveITBase.java b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/HiveITBase.java new file mode 100644 index 0000000000..0875afa392 --- /dev/null +++ b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/HiveITBase.java @@ -0,0 +1,796 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.AtlasClient; +import org.apache.atlas.AtlasClientV2; +import org.apache.atlas.AtlasServiceException; +import org.apache.atlas.hive.bridge.ColumnLineageUtils; +import org.apache.atlas.hive.bridge.HiveMetaStoreBridge; +import org.apache.atlas.hive.hook.HiveHookIT; +import org.apache.atlas.hive.model.HiveDataTypes; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.model.instance.AtlasStruct; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.utils.AuthenticationUtil; +import org.apache.atlas.utils.ParamChecker; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.lang.RandomStringUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.hooks.HookContext; +import org.apache.hadoop.hive.ql.hooks.LineageInfo; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.security.UserGroupInformation; +import org.json.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.annotations.BeforeClass; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; + +import static com.sun.jersey.api.client.ClientResponse.Status.NOT_FOUND; +import static org.apache.atlas.hive.bridge.HiveMetaStoreBridge.HDFS_PATH; +import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIED_NAME; +import static org.apache.atlas.hive.model.HiveDataTypes.HIVE_DB; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +public class HiveITBase { + private static final Logger LOG = LoggerFactory.getLogger(HiveITBase.class); + + public static final String DEFAULT_DB = "default"; + public static final String SEP = ":".intern(); + public static final String IO_SEP = "->".intern(); + protected static final String DGI_URL = "http://localhost:21000/"; + protected static final String CLUSTER_NAME = "primary"; + protected static final String PART_FILE = "2015-01-01"; + protected static final String INPUTS = "inputs"; + protected static final String OUTPUTS = "outputs"; + + + protected Driver driver; + protected AtlasClient atlasClient; + protected AtlasClientV2 atlasClientV2; + protected HiveMetaStoreBridge hiveMetaStoreBridge; + protected SessionState ss; + protected HiveConf conf; + protected Driver driverWithoutContext; + + private static final String REFERENCEABLE_ATTRIBUTE_NAME = "qualifiedName"; + private static final String ATTR_NAME = "name"; + + + @BeforeClass + public void setUp() throws Exception { + //Set-up hive session + conf = new HiveConf(); + conf.setClassLoader(Thread.currentThread().getContextClassLoader()); + conf.set("hive.metastore.event.listeners", ""); + + // 'driver' using this configuration will be used for tests in HiveHookIT + // HiveHookIT will use this driver to test post-execution hooks in HiveServer2. + // initialize 'driver' with HMS hook disabled. + driver = new Driver(conf); + ss = new SessionState(conf); + ss = SessionState.start(ss); + + SessionState.setCurrentSessionState(ss); + + Configuration configuration = ApplicationProperties.get(); + + String[] atlasEndPoint = configuration.getStringArray(HiveMetaStoreBridge.ATLAS_ENDPOINT); + + if (atlasEndPoint == null || atlasEndPoint.length == 0) { + atlasEndPoint = new String[] { DGI_URL }; + } + + if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) { + atlasClientV2 = new AtlasClientV2(atlasEndPoint, new String[]{"admin", "admin"}); + atlasClient = new AtlasClient(atlasEndPoint, new String[]{"admin", "admin"}); + } else { + atlasClientV2 = new AtlasClientV2(atlasEndPoint); + atlasClient = new AtlasClient(atlasEndPoint); + } + + hiveMetaStoreBridge = new HiveMetaStoreBridge(configuration, conf, atlasClientV2); + + HiveConf conf = new HiveConf(); + + conf.set("hive.exec.post.hooks", ""); + + SessionState ss = new SessionState(conf); + ss = SessionState.start(ss); + SessionState.setCurrentSessionState(ss); + + // 'driverWithoutContext' using this configuration will be used for tests in HiveMetastoreHookIT + // HiveMetastoreHookIT will use this driver to test event listeners in HiveMetastore. + // initialize 'driverWithoutContext' with HiveServer2 post execution hook disabled. + driverWithoutContext = new Driver(conf); + } + + protected void runCommand(String cmd) throws Exception { + runCommandWithDelay(cmd, 0); + } + + protected void runCommand(Driver driver, String cmd) throws Exception { + runCommandWithDelay(driver, cmd, 0); + } + + protected void runCommandWithDelay(String cmd, int sleepMs) throws Exception { + runCommandWithDelay(driver, cmd, sleepMs); + } + + protected void runCommandWithDelay(Driver driver, String cmd, int sleepMs) throws Exception { + LOG.debug("Running command '{}'", cmd); + + CommandProcessorResponse response = driver.run(cmd); + + assertEquals(response.getResponseCode(), 0); + + if (sleepMs != 0) { + Thread.sleep(sleepMs); + } + } + + protected String createTestDFSPath(String path) throws Exception { + return "file://" + mkdir(path); + } + + protected String file(String tag) throws Exception { + String filename = System.getProperty("user.dir") + "/target/" + tag + "-data-" + random(); + File file = new File(filename); + file.createNewFile(); + return file.getAbsolutePath(); + } + + protected String mkdir(String tag) throws Exception { + String filename = "./target/" + tag + "-data-" + random(); + File file = new File(filename); + file.mkdirs(); + return file.getAbsolutePath(); + } + + public static String lower(String str) { + if (StringUtils.isEmpty(str)) { + return null; + } + return str.toLowerCase().trim(); + } + + protected String random() { + return RandomStringUtils.randomAlphanumeric(10).toLowerCase(); + } + + protected String tableName() { + return "table_" + random(); + } + + protected String dbName() { + return "db_" + random(); + } + + protected String assertTableIsRegistered(String dbName, String tableName) throws Exception { + return assertTableIsRegistered(dbName, tableName, null, false); + } + + protected String assertTableIsRegistered(String dbName, String tableName, HiveHookIT.AssertPredicate assertPredicate, boolean isTemporary) throws Exception { + LOG.debug("Searching for table {}.{}", dbName, tableName); + String tableQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, dbName, tableName, isTemporary); + return assertEntityIsRegistered(HiveDataTypes.HIVE_TABLE.getName(), REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName, + assertPredicate); + } + + protected String assertEntityIsRegistered(final String typeName, final String property, final String value, + final HiveHookIT.AssertPredicate assertPredicate) throws Exception { + waitFor(100000, new HiveHookIT.Predicate() { + @Override + public void evaluate() throws Exception { + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(property,value)); + AtlasEntity entity = atlasEntityWithExtInfo.getEntity(); + assertNotNull(entity); + if (assertPredicate != null) { + assertPredicate.assertOnEntity(entity); + } + } + }); + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(property,value)); + AtlasEntity entity = atlasEntityWithExtInfo.getEntity(); + return (String) entity.getGuid(); + } + + protected String assertEntityIsRegisteredViaGuid(String guid, + final HiveHookIT.AssertPredicate assertPredicate) throws Exception { + waitFor(100000, new HiveHookIT.Predicate() { + @Override + public void evaluate() throws Exception { + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByGuid(guid); + AtlasEntity entity = atlasEntityWithExtInfo.getEntity(); + assertNotNull(entity); + if (assertPredicate != null) { + assertPredicate.assertOnEntity(entity); + } + + } + }); + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByGuid(guid); + AtlasEntity entity = atlasEntityWithExtInfo.getEntity(); + return (String) entity.getGuid(); + } + + protected AtlasEntity assertEntityIsRegistedViaEntity(final String typeName, final String property, final String value, + final HiveHookIT.AssertPredicate assertPredicate) throws Exception { + waitFor(80000, new HiveHookIT.Predicate() { + @Override + public void evaluate() throws Exception { + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(property,value)); + AtlasEntity entity = atlasEntityWithExtInfo.getEntity(); + assertNotNull(entity); + if (assertPredicate != null) { + assertPredicate.assertOnEntity(entity); + } + } + }); + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(property,value)); + AtlasEntity entity = atlasEntityWithExtInfo.getEntity(); + return entity; + } + + public interface AssertPredicate { + void assertOnEntity(AtlasEntity entity) throws Exception; + } + + public interface Predicate { + /** + * Perform a predicate evaluation. + * + * @return the boolean result of the evaluation. + * @throws Exception thrown if the predicate evaluation could not evaluate. + */ + void evaluate() throws Exception; + } + + /** + * Wait for a condition, expressed via a {@link Predicate} to become true. + * + * @param timeout maximum time in milliseconds to wait for the predicate to become true. + * @param predicate predicate waiting on. + */ + protected void waitFor(int timeout, Predicate predicate) throws Exception { + ParamChecker.notNull(predicate, "predicate"); + long mustEnd = System.currentTimeMillis() + timeout; + + while (true) { + try { + predicate.evaluate(); + return; + } catch(Error | Exception e) { + if (System.currentTimeMillis() >= mustEnd) { + fail("Assertions failed. Failing after waiting for timeout " + timeout + " msecs", e); + } + LOG.debug("Waiting up to {} msec as assertion failed", mustEnd - System.currentTimeMillis(), e); + Thread.sleep(5000); + } + } + } + + protected String getTableProcessQualifiedName(String dbName, String tableName) throws Exception { + return HiveMetaStoreBridge.getTableProcessQualifiedName(CLUSTER_NAME, + hiveMetaStoreBridge.getHiveClient().getTable(dbName, tableName)); + } + + protected void validateHDFSPaths(AtlasEntity processEntity, String attributeName, String... testPaths) throws Exception { + List hdfsPathIds = toAtlasObjectIdList(processEntity.getAttribute(attributeName)); + + for (String testPath : testPaths) { + Path path = new Path(testPath); + String testPathNormed = lower(path.toString()); + String hdfsPathId = assertHDFSPathIsRegistered(testPathNormed); + + assertHDFSPathIdsContain(hdfsPathIds, hdfsPathId); + } + } + + private void assertHDFSPathIdsContain(List hdfsPathObjectIds, String hdfsPathId) { + Set hdfsPathGuids = new HashSet<>(); + + for (AtlasObjectId hdfsPathObjectId : hdfsPathObjectIds) { + hdfsPathGuids.add(hdfsPathObjectId.getGuid()); + } + + assertTrue(hdfsPathGuids.contains(hdfsPathId)); + } + + protected String assertHDFSPathIsRegistered(String path) throws Exception { + LOG.debug("Searching for hdfs path {}", path); + // ATLAS-2444 HDFS name node federation adds the cluster name to the qualifiedName + if (path.startsWith("hdfs://")) { + String pathWithCluster = path + "@" + CLUSTER_NAME; + return assertEntityIsRegistered(HDFS_PATH, REFERENCEABLE_ATTRIBUTE_NAME, pathWithCluster, null); + } else { + return assertEntityIsRegistered(HDFS_PATH, REFERENCEABLE_ATTRIBUTE_NAME, path, null); + } + } + + protected String assertDatabaseIsRegistered(String dbName) throws Exception { + return assertDatabaseIsRegistered(dbName, null); + } + + protected String assertDatabaseIsRegistered(String dbName, AssertPredicate assertPredicate) throws Exception { + LOG.debug("Searching for database: {}", dbName); + + String dbQualifiedName = HiveMetaStoreBridge.getDBQualifiedName(CLUSTER_NAME, dbName); + + return assertEntityIsRegistered(HIVE_DB.getName(), REFERENCEABLE_ATTRIBUTE_NAME, dbQualifiedName, assertPredicate); + } + + public void assertDatabaseIsNotRegistered(String dbName) throws Exception { + LOG.debug("Searching for database {}", dbName); + String dbQualifiedName = HiveMetaStoreBridge.getDBQualifiedName(CLUSTER_NAME, dbName); + assertEntityIsNotRegistered(HIVE_DB.getName(), ATTRIBUTE_QUALIFIED_NAME, dbQualifiedName); + } + + protected void assertEntityIsNotRegistered(final String typeName, final String property, final String value) throws Exception { + // wait for sufficient time before checking if entity is not available. + long waitTime = 10000; + LOG.debug("Waiting for {} msecs, before asserting entity is not registered.", waitTime); + Thread.sleep(waitTime); + + try { + atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(property, value)); + + fail(String.format("Entity was not supposed to exist for typeName = %s, attributeName = %s, attributeValue = %s", typeName, property, value)); + } catch (AtlasServiceException e) { + if (e.getStatus() == NOT_FOUND) { + return; + } + } + } + + protected AtlasEntity getAtlasEntityByType(String type, String id) throws Exception { + AtlasEntity atlasEntity = null; + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfoForProcess = atlasClientV2.getEntityByAttribute(type, + Collections.singletonMap(AtlasClient.GUID, id)); + atlasEntity = atlasEntityWithExtInfoForProcess.getEntity(); + return atlasEntity; + } + + + public static class HiveEventContext { + private Set inputs; + private Set outputs; + + private String user; + private UserGroupInformation ugi; + private HiveOperation operation; + private HookContext.HookType hookType; + private JSONObject jsonPlan; + private String queryId; + private String queryStr; + private Long queryStartTime; + + public Map> lineageInfo; + + private List messages = new ArrayList<>(); + + public void setInputs(Set inputs) { + this.inputs = inputs; + } + + public void setOutputs(Set outputs) { + this.outputs = outputs; + } + + public void setUser(String user) { + this.user = user; + } + + public void setUgi(UserGroupInformation ugi) { + this.ugi = ugi; + } + + public void setOperation(HiveOperation operation) { + this.operation = operation; + } + + public void setHookType(HookContext.HookType hookType) { + this.hookType = hookType; + } + + public void setQueryId(String queryId) { + this.queryId = queryId; + } + + public void setQueryStr(String queryStr) { + this.queryStr = queryStr; + } + + public void setQueryStartTime(Long queryStartTime) { + this.queryStartTime = queryStartTime; + } + + public void setLineageInfo(LineageInfo lineageInfo){ + try { + this.lineageInfo = ColumnLineageUtils.buildLineageMap(lineageInfo); + LOG.debug("Column Lineage Map => {} ", this.lineageInfo.entrySet()); + }catch (Throwable e){ + LOG.warn("Column Lineage Map build failed with exception {}", e); + } + } + + public Set getInputs() { + return inputs; + } + + public Set getOutputs() { + return outputs; + } + + public String getUser() { + return user; + } + + public UserGroupInformation getUgi() { + return ugi; + } + + public HiveOperation getOperation() { + return operation; + } + + public HookContext.HookType getHookType() { + return hookType; + } + + public String getQueryId() { + return queryId; + } + + public String getQueryStr() { + return queryStr; + } + + public Long getQueryStartTime() { + return queryStartTime; + } + + public void addMessage(HookNotification message) { + messages.add(message); + } + + public List getMessages() { + return messages; + } + } + + + @VisibleForTesting + protected static String getProcessQualifiedName(HiveMetaStoreBridge dgiBridge, HiveEventContext eventContext, + final SortedSet sortedHiveInputs, + final SortedSet sortedHiveOutputs, + SortedMap hiveInputsMap, + SortedMap hiveOutputsMap) throws HiveException { + HiveOperation op = eventContext.getOperation(); + if (isCreateOp(eventContext)) { + Entity entity = getEntityByType(sortedHiveOutputs, Entity.Type.TABLE); + + if (entity != null) { + Table outTable = entity.getTable(); + //refresh table + outTable = dgiBridge.getHiveClient().getTable(outTable.getDbName(), outTable.getTableName()); + return HiveMetaStoreBridge.getTableProcessQualifiedName(dgiBridge.getMetadataNamespace(), outTable); + } + } + + StringBuilder buffer = new StringBuilder(op.getOperationName()); + + boolean ignoreHDFSPathsinQFName = ignoreHDFSPathsinQFName(op, sortedHiveInputs, sortedHiveOutputs); + if ( ignoreHDFSPathsinQFName && LOG.isDebugEnabled()) { + LOG.debug("Ignoring HDFS paths in qualifiedName for {} {} ", op, eventContext.getQueryStr()); + } + + addInputs(dgiBridge, op, sortedHiveInputs, buffer, hiveInputsMap, ignoreHDFSPathsinQFName); + buffer.append(IO_SEP); + addOutputs(dgiBridge, op, sortedHiveOutputs, buffer, hiveOutputsMap, ignoreHDFSPathsinQFName); + LOG.info("Setting process qualified name to {}", buffer); + return buffer.toString(); + } + + protected static Entity getEntityByType(Set entities, Entity.Type entityType) { + for (Entity entity : entities) { + if (entity.getType() == entityType) { + return entity; + } + } + return null; + } + + + protected static boolean ignoreHDFSPathsinQFName(final HiveOperation op, final Set inputs, final Set outputs) { + switch (op) { + case LOAD: + case IMPORT: + return isPartitionBasedQuery(outputs); + case EXPORT: + return isPartitionBasedQuery(inputs); + case QUERY: + return true; + } + return false; + } + + protected static boolean isPartitionBasedQuery(Set entities) { + for (Entity entity : entities) { + if (Entity.Type.PARTITION.equals(entity.getType())) { + return true; + } + } + return false; + } + + protected static boolean isCreateOp(HiveEventContext hiveEvent) { + return HiveOperation.CREATETABLE.equals(hiveEvent.getOperation()) + || HiveOperation.CREATEVIEW.equals(hiveEvent.getOperation()) + || HiveOperation.ALTERVIEW_AS.equals(hiveEvent.getOperation()) + || HiveOperation.ALTERTABLE_LOCATION.equals(hiveEvent.getOperation()) + || HiveOperation.CREATETABLE_AS_SELECT.equals(hiveEvent.getOperation()); + } + + protected static void addInputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet sortedInputs, StringBuilder buffer, final Map refs, final boolean ignoreHDFSPathsInQFName) throws HiveException { + if (refs != null) { + if (sortedInputs != null) { + Set dataSetsProcessed = new LinkedHashSet<>(); + for (Entity input : sortedInputs) { + + if (!dataSetsProcessed.contains(input.getName().toLowerCase())) { + //HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations + if (ignoreHDFSPathsInQFName && + (Entity.Type.DFS_DIR.equals(input.getType()) || Entity.Type.LOCAL_DIR.equals(input.getType()))) { + LOG.debug("Skipping dfs dir input addition to process qualified name {} ", input.getName()); + } else if (refs.containsKey(input)) { + if ( input.getType() == Entity.Type.PARTITION || input.getType() == Entity.Type.TABLE) { + Table inputTable = refreshTable(hiveBridge, input.getTable().getDbName(), input.getTable().getTableName()); + + if (inputTable != null) { + addDataset(buffer, refs.get(input), HiveMetaStoreBridge.getTableCreatedTime(inputTable)); + } + } else { + addDataset(buffer, refs.get(input)); + } + } + + dataSetsProcessed.add(input.getName().toLowerCase()); + } + } + + } + } + } + + protected static void addDataset(StringBuilder buffer, AtlasEntity ref, final long createTime) { + addDataset(buffer, ref); + buffer.append(SEP); + buffer.append(createTime); + } + + protected static void addDataset(StringBuilder buffer, AtlasEntity ref) { + buffer.append(SEP); + String dataSetQlfdName = (String) ref.getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME); + // '/' breaks query parsing on ATLAS + buffer.append(dataSetQlfdName.toLowerCase().replaceAll("/", "")); + } + + protected static void addOutputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet sortedOutputs, StringBuilder buffer, final Map refs, final boolean ignoreHDFSPathsInQFName) throws HiveException { + if (refs != null) { + Set dataSetsProcessed = new LinkedHashSet<>(); + if (sortedOutputs != null) { + for (WriteEntity output : sortedOutputs) { + final Entity entity = output; + if (!dataSetsProcessed.contains(output.getName().toLowerCase())) { + if (ignoreHDFSPathsInQFName && + (Entity.Type.DFS_DIR.equals(output.getType()) || Entity.Type.LOCAL_DIR.equals(output.getType()))) { + LOG.debug("Skipping dfs dir output addition to process qualified name {} ", output.getName()); + } else if (refs.containsKey(output)) { + //HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations + if (addQueryType(op, (WriteEntity) entity)) { + buffer.append(SEP); + buffer.append(((WriteEntity) entity).getWriteType().name()); + } + + if ( output.getType() == Entity.Type.PARTITION || output.getType() == Entity.Type.TABLE) { + Table outputTable = refreshTable(hiveBridge, output.getTable().getDbName(), output.getTable().getTableName()); + + if (outputTable != null) { + addDataset(buffer, refs.get(output), HiveMetaStoreBridge.getTableCreatedTime(outputTable)); + } + } else { + addDataset(buffer, refs.get(output)); + } + } + + dataSetsProcessed.add(output.getName().toLowerCase()); + } + } + } + } + } + + protected static Table refreshTable(HiveMetaStoreBridge dgiBridge, String dbName, String tableName) { + try { + return dgiBridge.getHiveClient().getTable(dbName, tableName); + } catch (HiveException excp) { // this might be the case for temp tables + LOG.warn("failed to get details for table {}.{}. Ignoring. {}: {}", dbName, tableName, excp.getClass().getCanonicalName(), excp.getMessage()); + } + + return null; + } + + protected static boolean addQueryType(HiveOperation op, WriteEntity entity) { + if (entity.getWriteType() != null && HiveOperation.QUERY.equals(op)) { + switch (entity.getWriteType()) { + case INSERT: + case INSERT_OVERWRITE: + case UPDATE: + case DELETE: + return true; + case PATH_WRITE: + //Add query type only for DFS paths and ignore local paths since they are not added as outputs + if ( !Entity.Type.LOCAL_DIR.equals(entity.getType())) { + return true; + } + break; + default: + } + } + return false; + } + + + @VisibleForTesting + protected static final class EntityComparator implements Comparator { + @Override + public int compare(Entity o1, Entity o2) { + String s1 = o1.getName(); + String s2 = o2.getName(); + if (s1 == null || s2 == null){ + s1 = o1.getD().toString(); + s2 = o2.getD().toString(); + } + return s1.toLowerCase().compareTo(s2.toLowerCase()); + } + } + + @VisibleForTesting + protected static final Comparator entityComparator = new EntityComparator(); + + protected AtlasObjectId toAtlasObjectId(Object obj) { + final AtlasObjectId ret; + + if (obj instanceof AtlasObjectId) { + ret = (AtlasObjectId) obj; + } else if (obj instanceof Map) { + ret = new AtlasObjectId((Map) obj); + } else if (obj != null) { + ret = new AtlasObjectId(obj.toString()); // guid + } else { + ret = null; + } + + return ret; + } + + protected List toAtlasObjectIdList(Object obj) { + final List ret; + + if (obj instanceof Collection) { + Collection coll = (Collection) obj; + + ret = new ArrayList<>(coll.size()); + + for (Object item : coll) { + AtlasObjectId objId = toAtlasObjectId(item); + + if (objId != null) { + ret.add(objId); + } + } + } else { + AtlasObjectId objId = toAtlasObjectId(obj); + + if (objId != null) { + ret = new ArrayList<>(1); + + ret.add(objId); + } else { + ret = null; + } + } + + return ret; + } + + protected AtlasStruct toAtlasStruct(Object obj) { + final AtlasStruct ret; + + if (obj instanceof AtlasStruct) { + ret = (AtlasStruct) obj; + } else if (obj instanceof Map) { + ret = new AtlasStruct((Map) obj); + } else { + ret = null; + } + + return ret; + } + + protected List toAtlasStructList(Object obj) { + final List ret; + + if (obj instanceof Collection) { + Collection coll = (Collection) obj; + + ret = new ArrayList<>(coll.size()); + + for (Object item : coll) { + AtlasStruct struct = toAtlasStruct(item); + + if (struct != null) { + ret.add(struct); + } + } + } else { + AtlasStruct struct = toAtlasStruct(obj); + + if (struct != null) { + ret = new ArrayList<>(1); + + ret.add(struct); + } else { + ret = null; + } + } + + return ret; + } +} diff --git a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/bridge/ColumnLineageUtils.java b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/bridge/ColumnLineageUtils.java new file mode 100644 index 0000000000..22397f1e1a --- /dev/null +++ b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/bridge/ColumnLineageUtils.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.bridge; + +import org.apache.atlas.hive.model.HiveDataTypes; +import org.apache.atlas.v1.model.instance.Referenceable; +import org.apache.hadoop.hive.ql.hooks.LineageInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_COLUMNS; +import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIED_NAME; + + +public class ColumnLineageUtils { + public static final Logger LOG = LoggerFactory.getLogger(ColumnLineageUtils.class); + public static class HiveColumnLineageInfo { + public final String depenendencyType; + public final String expr; + public final String inputColumn; + + HiveColumnLineageInfo(LineageInfo.Dependency d, String inputCol) { + depenendencyType = d.getType().name(); + expr = d.getExpr(); + inputColumn = inputCol; + } + + @Override + public String toString(){ + return inputColumn; + } + } + + public static String getQualifiedName(LineageInfo.DependencyKey key){ + String db = key.getDataContainer().getTable().getDbName(); + String table = key.getDataContainer().getTable().getTableName(); + String col = key.getFieldSchema().getName(); + return db + "." + table + "." + col; + } + + public static Map> buildLineageMap(LineageInfo lInfo) { + Map> m = new HashMap<>(); + + for (Map.Entry e : lInfo.entrySet()) { + List l = new ArrayList<>(); + String k = getQualifiedName(e.getKey()); + + if (LOG.isDebugEnabled()) { + LOG.debug("buildLineageMap(): key={}; value={}", e.getKey(), e.getValue()); + } + + Collection baseCols = getBaseCols(e.getValue()); + + if (baseCols != null) { + for (LineageInfo.BaseColumnInfo iCol : baseCols) { + String db = iCol.getTabAlias().getTable().getDbName(); + String table = iCol.getTabAlias().getTable().getTableName(); + String colQualifiedName = iCol.getColumn() == null ? db + "." + table : db + "." + table + "." + iCol.getColumn().getName(); + l.add(new HiveColumnLineageInfo(e.getValue(), colQualifiedName)); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Setting lineage --> Input: {} ==> Output : {}", l, k); + } + m.put(k, l); + } + } + return m; + } + + static Collection getBaseCols(LineageInfo.Dependency lInfoDep) { + Collection ret = null; + + if (lInfoDep != null) { + try { + Method getBaseColsMethod = lInfoDep.getClass().getMethod("getBaseCols"); + + Object retGetBaseCols = getBaseColsMethod.invoke(lInfoDep); + + if (retGetBaseCols != null) { + if (retGetBaseCols instanceof Collection) { + ret = (Collection) retGetBaseCols; + } else { + LOG.warn("{}: unexpected return type from LineageInfo.Dependency.getBaseCols(), expected type {}", + retGetBaseCols.getClass().getName(), "Collection"); + } + } + } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException ex) { + LOG.warn("getBaseCols()", ex); + } + } + + return ret; + } + + static String[] extractComponents(String qualifiedName) { + String[] comps = qualifiedName.split("\\."); + int lastIdx = comps.length - 1; + int atLoc = comps[lastIdx].indexOf('@'); + if (atLoc > 0) { + comps[lastIdx] = comps[lastIdx].substring(0, atLoc); + } + return comps; + } + + static void populateColumnReferenceableMap(Map m, + Referenceable r) { + if (r.getTypeName().equals(HiveDataTypes.HIVE_TABLE.getName())) { + String qName = (String) r.get(ATTRIBUTE_QUALIFIED_NAME); + String[] qNameComps = extractComponents(qName); + for (Referenceable col : (List) r.get(ATTRIBUTE_COLUMNS)) { + String cName = (String) col.get(ATTRIBUTE_QUALIFIED_NAME); + String[] colQNameComps = extractComponents(cName); + String colQName = colQNameComps[0] + "." + colQNameComps[1] + "." + colQNameComps[2]; + m.put(colQName, col); + } + String tableQName = qNameComps[0] + "." + qNameComps[1]; + m.put(tableQName, r); + } + } + + + public static Map buildColumnReferenceableMap(List inputs, + List outputs) { + Map m = new HashMap<>(); + + for (Referenceable r : inputs) { + populateColumnReferenceableMap(m, r); + } + + for (Referenceable r : outputs) { + populateColumnReferenceableMap(m, r); + } + + return m; + } +} diff --git a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridgeTest.java b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridgeTest.java new file mode 100644 index 0000000000..ae7ab1a220 --- /dev/null +++ b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridgeTest.java @@ -0,0 +1,327 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.bridge; + +import org.apache.atlas.AtlasClient; +import org.apache.atlas.AtlasClientV2; +import org.apache.atlas.AtlasServiceException; +import org.apache.atlas.hive.model.HiveDataTypes; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.EntityMutationResponse; +import org.apache.atlas.type.AtlasTypeUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.mapred.TextInputFormat; +import org.mockito.ArgumentMatcher; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.apache.atlas.hive.hook.events.BaseHiveEvent.*; +import static org.mockito.Matchers.anyObject; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class HiveMetaStoreBridgeTest { + private static final String TEST_DB_NAME = "default"; + public static final String METADATA_NAMESPACE = "primary"; + public static final String TEST_TABLE_NAME = "test_table"; + + @Mock + private Hive hiveClient; + + @Mock + private AtlasClient atlasClient; + + @Mock + private AtlasClientV2 atlasClientV2; + + @Mock + private AtlasEntity atlasEntity; + + @Mock + private AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo; + + @Mock + EntityMutationResponse entityMutationResponse; + + @BeforeMethod + public void initializeMocks() { + MockitoAnnotations.initMocks(this); + } + + @Test + public void testImportThatUpdatesRegisteredDatabase() throws Exception { + // setup database + when(hiveClient.getAllDatabases()).thenReturn(Arrays.asList(new String[]{TEST_DB_NAME})); + String description = "This is a default database"; + Database db = new Database(TEST_DB_NAME, description, "/user/hive/default", null); + when(hiveClient.getDatabase(TEST_DB_NAME)).thenReturn(db); + when(hiveClient.getAllTables(TEST_DB_NAME)).thenReturn(Arrays.asList(new String[]{})); + + returnExistingDatabase(TEST_DB_NAME, atlasClientV2, METADATA_NAMESPACE); + + when(atlasEntityWithExtInfo.getEntity("72e06b34-9151-4023-aa9d-b82103a50e76")) + .thenReturn((new AtlasEntity.AtlasEntityWithExtInfo( + getEntity(HiveDataTypes.HIVE_DB.getName(), AtlasClient.GUID, "72e06b34-9151-4023-aa9d-b82103a50e76"))).getEntity()); + + HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(METADATA_NAMESPACE, hiveClient, atlasClientV2); + bridge.importHiveMetadata(null, null, true); + + // verify update is called + verify(atlasClientV2).updateEntity(anyObject()); + } + + @Test + public void testImportThatUpdatesRegisteredTable() throws Exception { + setupDB(hiveClient, TEST_DB_NAME); + + List

hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME); + + returnExistingDatabase(TEST_DB_NAME, atlasClientV2, METADATA_NAMESPACE); + + // return existing table + + when(atlasEntityWithExtInfo.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")) + .thenReturn((new AtlasEntity.AtlasEntityWithExtInfo( + getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77"))).getEntity()); + + when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(), + Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + HiveMetaStoreBridge.getTableQualifiedName(METADATA_NAMESPACE, TEST_DB_NAME, TEST_TABLE_NAME)), true, true )) + .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo( + getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77"))); + + when(atlasEntityWithExtInfo.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")) + .thenReturn(createTableReference()); + + Table testTable = hiveTables.get(0); + String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(METADATA_NAMESPACE, testTable); + + when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_PROCESS.getName(), + Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + processQualifiedName), true ,true)) + .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo( + getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77"))); + + + HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(METADATA_NAMESPACE, hiveClient, atlasClientV2); + bridge.importHiveMetadata(null, null, true); + + // verify update is called on table + verify(atlasClientV2, times(2)).updateEntity(anyObject()); + + } + + private void returnExistingDatabase(String databaseName, AtlasClientV2 atlasClientV2, String metadataNamespace) + throws AtlasServiceException { + //getEntity(HiveDataTypes.HIVE_DB.getName(), AtlasClient.GUID, "72e06b34-9151-4023-aa9d-b82103a50e76"); + + when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_DB.getName(), + Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + HiveMetaStoreBridge.getDBQualifiedName(METADATA_NAMESPACE, TEST_DB_NAME)), true, true)) + .thenReturn((new AtlasEntity.AtlasEntityWithExtInfo( + getEntity(HiveDataTypes.HIVE_DB.getName(), AtlasClient.GUID, "72e06b34-9151-4023-aa9d-b82103a50e76")))); + + } + + private List
setupTables(Hive hiveClient, String databaseName, String... tableNames) throws HiveException { + List
tables = new ArrayList<>(); + when(hiveClient.getAllTables(databaseName)).thenReturn(Arrays.asList(tableNames)); + for(String tableName : tableNames) { + Table testTable = createTestTable(databaseName, tableName); + when(hiveClient.getTable(databaseName, tableName)).thenReturn(testTable); + tables.add(testTable); + } + return tables; + } + + private void setupDB(Hive hiveClient, String databaseName) throws HiveException { + when(hiveClient.getAllDatabases()).thenReturn(Arrays.asList(new String[]{databaseName})); + when(hiveClient.getDatabase(databaseName)).thenReturn( + new Database(databaseName, "Default database", "/user/hive/default", null)); + } + + @Test + public void testImportWhenPartitionKeysAreNull() throws Exception { + setupDB(hiveClient, TEST_DB_NAME); + List
hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME); + Table hiveTable = hiveTables.get(0); + + returnExistingDatabase(TEST_DB_NAME, atlasClientV2, METADATA_NAMESPACE); + + + when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(), + Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + HiveMetaStoreBridge.getTableQualifiedName(METADATA_NAMESPACE, TEST_DB_NAME, TEST_TABLE_NAME)), true, true)) + .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo( + getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77"))); + + String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(METADATA_NAMESPACE, hiveTable); + + when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_PROCESS.getName(), + Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + processQualifiedName), true, true)) + .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo( + getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77"))); + + when(atlasEntityWithExtInfo.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")) + .thenReturn(createTableReference()); + + Partition partition = mock(Partition.class); + when(partition.getTable()).thenReturn(hiveTable); + List partitionValues = Arrays.asList(new String[]{}); + when(partition.getValues()).thenReturn(partitionValues); + + when(hiveClient.getPartitions(hiveTable)).thenReturn(Arrays.asList(new Partition[]{partition})); + + HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(METADATA_NAMESPACE, hiveClient, atlasClientV2); + try { + bridge.importHiveMetadata(null, null, true); + } catch (Exception e) { + Assert.fail("Partition with null key caused import to fail with exception ", e); + } + } + + @Test + public void testImportContinuesWhenTableRegistrationFails() throws Exception { + setupDB(hiveClient, TEST_DB_NAME); + final String table2Name = TEST_TABLE_NAME + "_1"; + List
hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME, table2Name); + + returnExistingDatabase(TEST_DB_NAME, atlasClientV2, METADATA_NAMESPACE); + when(hiveClient.getTable(TEST_DB_NAME, TEST_TABLE_NAME)).thenThrow(new RuntimeException("Timeout while reading data from hive metastore")); + + when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(), + Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + HiveMetaStoreBridge.getTableQualifiedName(METADATA_NAMESPACE, TEST_DB_NAME, TEST_TABLE_NAME)))) + .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo( + getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77"))); + + when(atlasEntityWithExtInfo.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")) + .thenReturn(createTableReference()); + + Table testTable = hiveTables.get(1); + String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(METADATA_NAMESPACE, testTable); + + when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_PROCESS.getName(), + Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + processQualifiedName))) + .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo( + getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77"))); + + HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(METADATA_NAMESPACE, hiveClient, atlasClientV2); + try { + bridge.importHiveMetadata(null, null, false); + } catch (Exception e) { + Assert.fail("Table registration failed with exception", e); + } + } + + @Test + public void testImportFailsWhenTableRegistrationFails() throws Exception { + setupDB(hiveClient, TEST_DB_NAME); + final String table2Name = TEST_TABLE_NAME + "_1"; + List
hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME, table2Name); + + returnExistingDatabase(TEST_DB_NAME, atlasClientV2, METADATA_NAMESPACE); + when(hiveClient.getTable(TEST_DB_NAME, TEST_TABLE_NAME)).thenThrow(new RuntimeException("Timeout while reading data from hive metastore")); + + + when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(), + Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + HiveMetaStoreBridge.getTableQualifiedName(METADATA_NAMESPACE, TEST_DB_NAME, TEST_TABLE_NAME)))) + .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo( + getEntity(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77"))); + + + when(atlasEntityWithExtInfo.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")) + .thenReturn(createTableReference()); + + Table testTable = hiveTables.get(1); + String processQualifiedName = HiveMetaStoreBridge.getTableProcessQualifiedName(METADATA_NAMESPACE, testTable); + + when(atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_PROCESS.getName(), + Collections.singletonMap(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + processQualifiedName))) + .thenReturn(new AtlasEntity.AtlasEntityWithExtInfo( + getEntity(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.GUID, "82e06b34-9151-4023-aa9d-b82103a50e77"))); + + HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(METADATA_NAMESPACE, hiveClient, atlasClientV2); + try { + bridge.importHiveMetadata(null, null, true); + Assert.fail("Table registration is supposed to fail"); + } catch (Exception e) { + //Expected + } + } + + private AtlasEntity getEntity(String typeName, String attr, String value) { + return new AtlasEntity(typeName, attr, value); + } + + private AtlasEntity createTableReference() { + AtlasEntity tableEntity = new AtlasEntity(HiveDataTypes.HIVE_TABLE.getName()); + AtlasEntity sdEntity = new AtlasEntity(HiveDataTypes.HIVE_STORAGEDESC.getName()); + tableEntity.setAttribute(ATTRIBUTE_STORAGEDESC, AtlasTypeUtil.getObjectId(sdEntity)); + return tableEntity; + } + + private Table createTestTable(String databaseName, String tableName) throws HiveException { + Table table = new Table(databaseName, tableName); + table.setInputFormatClass(TextInputFormat.class); + table.setFields(new ArrayList() {{ + add(new FieldSchema("col1", "string", "comment1")); + } + }); + table.setTableType(TableType.EXTERNAL_TABLE); + table.setDataLocation(new Path("somehdfspath")); + return table; + } + + private class MatchesReferenceableProperty implements ArgumentMatcher { + private final String attrName; + private final Object attrValue; + + public MatchesReferenceableProperty(String attrName, Object attrValue) { + this.attrName = attrName; + this.attrValue = attrValue; + } + + @Override + public boolean matches(Object o) { + return attrValue.equals(((AtlasEntity) o).getAttribute(attrName)); + } + } +} diff --git a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/bridge/HiveMetastoreBridgeIT.java b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/bridge/HiveMetastoreBridgeIT.java new file mode 100644 index 0000000000..981600c4e1 --- /dev/null +++ b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/bridge/HiveMetastoreBridgeIT.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.bridge; + +import org.apache.atlas.AtlasClient; +import org.apache.atlas.hive.HiveITBase; +import org.apache.atlas.hive.model.HiveDataTypes; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.testng.annotations.Test; + +import java.util.List; + +import static org.testng.Assert.assertEquals; + +public class HiveMetastoreBridgeIT extends HiveITBase { + + @Test + public void testCreateTableAndImport() throws Exception { + String tableName = tableName(); + String pFile = createTestDFSPath("parentPath"); + String query = String.format("create EXTERNAL table %s(id string, cnt int) location '%s'", tableName, pFile); + + runCommand(query); + + String dbId = assertDatabaseIsRegistered(DEFAULT_DB); + String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); + + //verify lineage is created + String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getTableProcessQualifiedName(DEFAULT_DB, tableName), null); + AtlasEntity processsEntity = atlasClientV2.getEntityByGuid(processId).getEntity(); + + validateHDFSPaths(processsEntity, INPUTS, pFile); + + List outputs = toAtlasObjectIdList(processsEntity.getAttribute(OUTPUTS)); + + assertEquals(outputs.size(), 1); + assertEquals(outputs.get(0).getGuid(), tableId); + + int tableCount = atlasClient.listEntities(HiveDataTypes.HIVE_TABLE.getName()).size(); + + //Now import using import tool - should be no-op. This also tests update since table exists + AtlasEntity dbEntity = atlasClientV2.getEntityByGuid(dbId).getEntity(); + + hiveMetaStoreBridge.importTable(dbEntity, DEFAULT_DB, tableName, true); + + String tableId2 = assertTableIsRegistered(DEFAULT_DB, tableName); + assertEquals(tableId2, tableId); + + String processId2 = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getTableProcessQualifiedName(DEFAULT_DB, tableName), null); + assertEquals(processId2, processId); + + //assert that table is de-duped and no new entity is created + int newTableCount = atlasClient.listEntities(HiveDataTypes.HIVE_TABLE.getName()).size(); + assertEquals(newTableCount, tableCount); + } + + @Test + public void testImportCreatedTable() throws Exception { + String tableName = tableName(); + String pFile = createTestDFSPath("parentPath"); + + runCommandWithDelay(driverWithoutContext, String.format("create EXTERNAL table %s(id string) location '%s'", tableName, pFile), 3000); + + String dbId = assertDatabaseIsRegistered(DEFAULT_DB); + + AtlasEntity dbEntity = atlasClientV2.getEntityByGuid(dbId).getEntity(); + + hiveMetaStoreBridge.importTable(dbEntity, DEFAULT_DB, tableName, true); + + String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); + + String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getTableProcessQualifiedName(DEFAULT_DB, tableName), null); + AtlasEntity processEntity = atlasClientV2.getEntityByGuid(processId).getEntity(); + List outputs = toAtlasObjectIdList(processEntity.getAttribute(OUTPUTS)); + + assertEquals(outputs.size(), 1); + assertEquals(outputs.get(0).getGuid(), tableId); + } + + //TODO enable this test + //@Test + public void testCreateTableHiveProcessNameAttribute() throws Exception { + //test if \n is trimmed from name attribute of the process entity + String tableName = tableName(); + String processNameQuery = String.format("create table %s (id string)", tableName); + //add \n at the beginning of the query + String query = String.format("%n%n%s", processNameQuery); + + runCommand(query); + + String dbId = assertDatabaseIsRegistered(DEFAULT_DB); + String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); + + //verify lineage is created and the name attribute is the query without \n + String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getTableProcessQualifiedName(DEFAULT_DB, tableName), null); + AtlasEntity processsEntity = atlasClientV2.getEntityByGuid(processId).getEntity(); + + assertEquals(processsEntity.getAttribute("name"), processNameQuery); + } +} diff --git a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java new file mode 100755 index 0000000000..1db73e5a7c --- /dev/null +++ b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java @@ -0,0 +1,2554 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.base.Joiner; +import com.sun.jersey.api.client.ClientResponse; +import org.apache.atlas.AtlasClient; +import org.apache.atlas.AtlasServiceException; +import org.apache.atlas.hive.HiveITBase; +import org.apache.atlas.hive.bridge.HiveMetaStoreBridge; +import org.apache.atlas.hive.hook.events.BaseHiveEvent; +import org.apache.atlas.hive.model.HiveDataTypes; +import org.apache.atlas.model.instance.*; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo; +import org.apache.atlas.model.lineage.AtlasLineageInfo; +import org.apache.atlas.model.typedef.AtlasClassificationDef; +import org.apache.atlas.model.typedef.AtlasEntityDef; +import org.apache.atlas.model.typedef.AtlasTypesDef; +import org.apache.atlas.type.AtlasTypeUtil; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.text.ParseException; +import java.util.*; + +import static org.apache.atlas.AtlasClient.NAME; +import static org.apache.atlas.hive.hook.events.BaseHiveEvent.*; +import static org.testng.Assert.*; +import static org.testng.AssertJUnit.assertEquals; + +public class HiveHookIT extends HiveITBase { + private static final Logger LOG = LoggerFactory.getLogger(HiveHookIT.class); + + private static final String PART_FILE = "2015-01-01"; + private static final String PATH_TYPE_NAME = "Path"; + + private Driver driverWithNoHook; + + @BeforeClass + public void setUp() throws Exception { + // initialize 'driverWithNoHook' with HiveServer2 hook and HiveMetastore hook disabled + HiveConf conf = new HiveConf(); + conf.set("hive.exec.post.hooks", ""); + conf.set("hive.metastore.event.listeners", ""); + + SessionState ss = new SessionState(conf); + ss = SessionState.start(ss); + SessionState.setCurrentSessionState(ss); + + // Initialize 'driverWithNoHook' with HS2 hook disabled and HMS hook disabled. + driverWithNoHook = new Driver(conf); + + super.setUp(); + } + + @Test + public void testCreateDatabase() throws Exception { + String dbName = "db" + random(); + + runCommand("create database " + dbName + " WITH DBPROPERTIES ('p1'='v1', 'p2'='v2')"); + + String dbId = assertDatabaseIsRegistered(dbName); + AtlasEntity dbEntity = atlasClientV2.getEntityByGuid(dbId).getEntity(); + Map params = (Map) dbEntity.getAttribute(ATTRIBUTE_PARAMETERS); + List ddlQueries = (List) dbEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(),1); + + Assert.assertNotNull(params); + Assert.assertEquals(params.size(), 2); + Assert.assertEquals(params.get("p1"), "v1"); + + //There should be just one entity per dbname + runCommandWithDelay("drop database " + dbName, 3000); + assertDatabaseIsNotRegistered(dbName); + + runCommandWithDelay("create database " + dbName, 3000); + dbId = assertDatabaseIsRegistered(dbName); + + //assert on qualified name + dbEntity = atlasClientV2.getEntityByGuid(dbId).getEntity(); + + Assert.assertEquals(dbEntity.getAttribute(ATTRIBUTE_QUALIFIED_NAME) , dbName.toLowerCase() + "@" + CLUSTER_NAME); + } + + @Test + public void testPathEntityDefAvailable() throws Exception { + //Check if Path entity definition created or not + AtlasEntityDef pathEntityDef = atlasClientV2.getEntityDefByName("Path"); + assertNotNull(pathEntityDef); + } + + @Test + public void testCreateDatabaseWithLocation() throws Exception { + String dbName = dbName(); + String query = "CREATE DATABASE " + dbName; + + runCommand(query); + String dbId = assertDatabaseIsRegistered(dbName); + + //HDFS Location + String hdfsLocation = "hdfs://localhost:8020/warehouse/tablespace/external/hive/reports.db"; + alterDatabaseLocation(dbName, hdfsLocation); + assertDatabaseLocationRelationship(dbId); + } + + //alter database location + public void alterDatabaseLocation(String dbName, String location) throws Exception { + int timeDelay = 5000; + String query = String.format("ALTER DATABASE %s SET LOCATION \"%s\"", dbName, location); + runCommandWithDelay(query, timeDelay); + } + + public void assertDatabaseLocationRelationship(String dbId) throws Exception { + AtlasEntity dbEntity = atlasClientV2.getEntityByGuid(dbId).getEntity(); + AtlasEntityDef pathEntityDef = getPathEntityDefWithAllSubTypes(); + + assertTrue(dbEntity.hasAttribute(ATTRIBUTE_LOCATION)); + + assertNotNull(dbEntity.getAttribute(ATTRIBUTE_LOCATION)); + + assertNotNull(dbEntity.getRelationshipAttribute(ATTRIBUTE_LOCATION_PATH)); + + AtlasObjectId locationEntityObject = toAtlasObjectId(dbEntity.getRelationshipAttribute(ATTRIBUTE_LOCATION_PATH)); + assertTrue(pathEntityDef.getSubTypes().contains(locationEntityObject.getTypeName())); + } + + public AtlasEntityDef getPathEntityDefWithAllSubTypes() throws Exception { + Set possiblePathSubTypes = new HashSet<>(Arrays.asList("fs_path", "hdfs_path", "aws_s3_pseudo_dir", "aws_s3_v2_directory", "adls_gen2_directory")); + AtlasEntityDef pathEntityDef = atlasClientV2.getEntityDefByName(PATH_TYPE_NAME); + + if(pathEntityDef == null) { + pathEntityDef = new AtlasEntityDef(PATH_TYPE_NAME); + } + + pathEntityDef.setSubTypes(possiblePathSubTypes); + + return pathEntityDef; + } + + @Test + public void testCreateTable() throws Exception { + String tableName = tableName(); + String dbName = createDatabase(); + String colName = columnName(); + + runCommand("create table " + dbName + "." + tableName + "(" + colName + " int, name string)"); + + String tableId = assertTableIsRegistered(dbName, tableName); + String colId = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, dbName, tableName), colName)); //there is only one instance of column registered + AtlasEntity colEntity = atlasClientV2.getEntityByGuid(colId).getEntity(); + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tableId).getEntity(); + + Assert.assertEquals(colEntity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), String.format("%s.%s.%s@%s", dbName.toLowerCase(), tableName.toLowerCase(), colName.toLowerCase(), CLUSTER_NAME)); + Assert.assertNotNull(colEntity.getAttribute(ATTRIBUTE_TABLE)); + + Assert.assertNotNull(tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)); + Assert.assertEquals(((List)tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)).size(), 1); + + AtlasObjectId tblObjId = toAtlasObjectId(colEntity.getAttribute(ATTRIBUTE_TABLE)); + + Assert.assertEquals(tblObjId.getGuid(), tableId); + + //assert that column.owner = table.owner + AtlasEntity tblEntity1 = atlasClientV2.getEntityByGuid(tableId).getEntity(); + AtlasEntity colEntity1 = atlasClientV2.getEntityByGuid(colId).getEntity(); + + assertEquals(tblEntity1.getAttribute(ATTRIBUTE_OWNER), colEntity1.getAttribute(ATTRIBUTE_OWNER)); + + //create table where db is not registered + tableName = createTable(); + tableId = assertTableIsRegistered(DEFAULT_DB, tableName); + + AtlasEntity tblEntity2 = atlasClientV2.getEntityByGuid(tableId).getEntity(); + + Assert.assertEquals(tblEntity2.getAttribute(ATTRIBUTE_TABLE_TYPE), TableType.MANAGED_TABLE.name()); + Assert.assertEquals(tblEntity2.getAttribute(ATTRIBUTE_COMMENT), "table comment"); + + String entityName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName); + + Assert.assertEquals(tblEntity2.getAttribute(AtlasClient.NAME), tableName.toLowerCase()); + Assert.assertEquals(tblEntity2.getAttribute(ATTRIBUTE_QUALIFIED_NAME), entityName); + + Table t = hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, tableName); + long createTime = Long.parseLong(t.getMetadata().getProperty(hive_metastoreConstants.DDL_TIME)) * MILLIS_CONVERT_FACTOR; + + verifyTimestamps(tblEntity2, ATTRIBUTE_CREATE_TIME, createTime); + verifyTimestamps(tblEntity2, ATTRIBUTE_LAST_ACCESS_TIME, createTime); + + final AtlasObjectId sdEntity = toAtlasObjectId(tblEntity2.getAttribute(ATTRIBUTE_STORAGEDESC)); + + Assert.assertNotNull(sdEntity); + + // Assert.assertEquals(((Id) sdRef.getAttribute(HiveMetaStoreBridge.TABLE))._getId(), tableId); + + //Create table where database doesn't exist, will create database instance as well + assertDatabaseIsRegistered(DEFAULT_DB); + } + + + private void verifyTimestamps(AtlasEntity ref, String property, long expectedTime) throws ParseException { + //Verify timestamps. + Object createTime = ref.getAttribute(property); + + Assert.assertNotNull(createTime); + + if (expectedTime > 0) { + Assert.assertEquals(expectedTime, createTime); + } + } + + private void verifyTimestamps(AtlasEntity ref, String property) throws ParseException { + verifyTimestamps(ref, property, 0); + } + + //ATLAS-1321: Disable problematic tests. Need to revisit and fix them later + @Test(enabled = false) + public void testCreateExternalTable() throws Exception { + String tableName = tableName(); + String colName = columnName(); + String pFile = createTestDFSPath("parentPath"); + String query = String.format("create EXTERNAL table %s.%s(%s, %s) location '%s'", DEFAULT_DB , tableName , colName + " int", "name string", pFile); + + runCommand(query); + + String tblId = assertTableIsRegistered(DEFAULT_DB, tableName, null, true); + AtlasEntity tblEnity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlList = (List) tblEnity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + assertNotNull(ddlList); + assertEquals(ddlList.size(), 1); + + String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), ATTRIBUTE_QUALIFIED_NAME, getTableProcessQualifiedName(DEFAULT_DB, tableName), null); + + AtlasEntity processsEntity = atlasClientV2.getEntityByGuid(processId).getEntity(); + + assertEquals(processsEntity.getAttribute("userName"), UserGroupInformation.getCurrentUser().getShortUserName()); + + verifyTimestamps(processsEntity, "startTime"); + verifyTimestamps(processsEntity, "endTime"); + + validateHDFSPaths(processsEntity, INPUTS, pFile); + } + + private Set getInputs(String inputName, Entity.Type entityType) throws HiveException { + final ReadEntity entity; + + if (Entity.Type.DFS_DIR.equals(entityType) || Entity.Type.LOCAL_DIR.equals(entityType)) { + entity = new TestReadEntity(lower(new Path(inputName).toString()), entityType); + } else { + entity = new TestReadEntity(getQualifiedTblName(inputName), entityType); + } + + if (entityType == Entity.Type.TABLE) { + entity.setT(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, inputName)); + } + + return new LinkedHashSet() {{ add(entity); }}; + } + + private Set getOutputs(String inputName, Entity.Type entityType) throws HiveException { + final WriteEntity entity; + + if (Entity.Type.DFS_DIR.equals(entityType) || Entity.Type.LOCAL_DIR.equals(entityType)) { + entity = new TestWriteEntity(lower(new Path(inputName).toString()), entityType); + } else { + entity = new TestWriteEntity(getQualifiedTblName(inputName), entityType); + } + + if (entityType == Entity.Type.TABLE) { + entity.setT(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, inputName)); + } + + return new LinkedHashSet() {{ add(entity); }}; + } + + private void validateOutputTables(AtlasEntity processEntity, Set expectedTables) throws Exception { + validateTables(toAtlasObjectIdList(processEntity.getAttribute(ATTRIBUTE_OUTPUTS)), expectedTables); + } + + private void validateInputTables(AtlasEntity processEntity, Set expectedTables) throws Exception { + validateTables(toAtlasObjectIdList(processEntity.getAttribute(ATTRIBUTE_INPUTS)), expectedTables); + } + + private void validateTables(List tableIds, Set expectedTables) throws Exception { + if (tableIds == null) { + Assert.assertTrue(CollectionUtils.isEmpty(expectedTables)); + } else if (expectedTables == null) { + Assert.assertTrue(CollectionUtils.isEmpty(tableIds)); + } else { + Assert.assertEquals(tableIds.size(), expectedTables.size()); + + List entityQualifiedNames = new ArrayList<>(tableIds.size()); + List expectedTableNames = new ArrayList<>(expectedTables.size()); + + for (AtlasObjectId tableId : tableIds) { + AtlasEntity atlasEntity = atlasClientV2.getEntityByGuid(tableId.getGuid()).getEntity(); + + entityQualifiedNames.add((String) atlasEntity.getAttribute(ATTRIBUTE_QUALIFIED_NAME)); + } + + for (Iterator iterator = expectedTables.iterator(); iterator.hasNext(); ) { + Entity hiveEntity = iterator.next(); + + expectedTableNames.add(hiveEntity.getName()); + } + + for (String entityQualifiedName : entityQualifiedNames) { + boolean found = false; + + for (String expectedTableName : expectedTableNames) { + if (entityQualifiedName.startsWith(expectedTableName)) { + found = true; + + break; + } + } + + assertTrue(found, "Table name '" + entityQualifiedName + "' does not start with any name in the expected list " + expectedTableNames); + } + } + } + + private String assertColumnIsRegistered(String colName) throws Exception { + return assertColumnIsRegistered(colName, null); + } + + private String assertColumnIsRegistered(String colName, AssertPredicate assertPredicate) throws Exception { + LOG.debug("Searching for column {}", colName); + + return assertEntityIsRegistered(HiveDataTypes.HIVE_COLUMN.getName(), ATTRIBUTE_QUALIFIED_NAME, colName, assertPredicate); + } + + private String assertSDIsRegistered(String sdQFName, AssertPredicate assertPredicate) throws Exception { + LOG.debug("Searching for sd {}", sdQFName.toLowerCase()); + + return assertEntityIsRegistered(HiveDataTypes.HIVE_STORAGEDESC.getName(), ATTRIBUTE_QUALIFIED_NAME, sdQFName.toLowerCase(), assertPredicate); + } + + private void assertColumnIsNotRegistered(String colName) throws Exception { + LOG.debug("Searching for column {}", colName); + + assertEntityIsNotRegistered(HiveDataTypes.HIVE_COLUMN.getName(), ATTRIBUTE_QUALIFIED_NAME, colName); + } + + @Test + public void testCTAS() throws Exception { + String tableName = createTable(); + String ctasTableName = "table" + random(); + String query = "create table " + ctasTableName + " as select * from " + tableName; + + runCommand(query); + + final Set readEntities = getInputs(tableName, Entity.Type.TABLE); + final Set writeEntities = getOutputs(ctasTableName, Entity.Type.TABLE); + + HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, readEntities, + writeEntities); + AtlasEntity processEntity1 = validateProcess(hiveEventContext); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext); + AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process.getGuid(), processEntity1.getGuid()); + + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + assertTableIsRegistered(DEFAULT_DB, ctasTableName); + } + + private HiveEventContext constructEvent(String query, HiveOperation op, Set inputs, Set outputs) { + HiveEventContext event = new HiveEventContext(); + + event.setQueryStr(query); + event.setOperation(op); + event.setInputs(inputs); + event.setOutputs(outputs); + + return event; + } + + @Test + public void testEmptyStringAsValue() throws Exception{ + String tableName = tableName(); + String command = "create table " + tableName + "(id int, name string) row format delimited lines terminated by '\n' null defined as ''"; + + runCommandWithDelay(command, 3000); + + assertTableIsRegistered(DEFAULT_DB, tableName); + } + + @Test + public void testDropAndRecreateCTASOutput() throws Exception { + String tableName = createTable(); + String ctasTableName = "table" + random(); + String query = "create table " + ctasTableName + " as select * from " + tableName; + + runCommand(query); + + assertTableIsRegistered(DEFAULT_DB, ctasTableName); + + Set inputs = getInputs(tableName, Entity.Type.TABLE); + Set outputs = getOutputs(ctasTableName, Entity.Type.TABLE); + + HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, inputs, outputs); + AtlasEntity processEntity1 = validateProcess(hiveEventContext); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext); + AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process.getGuid(), processEntity1.getGuid()); + + String dropQuery = String.format("drop table %s ", ctasTableName); + + runCommandWithDelay(dropQuery, 5000); + + assertTableIsNotRegistered(DEFAULT_DB, ctasTableName); + + runCommand(query); + + String tblId = assertTableIsRegistered(DEFAULT_DB, ctasTableName); + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlList = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + assertNotNull(ddlList); + assertEquals(ddlList.size(), 1); + + outputs = getOutputs(ctasTableName, Entity.Type.TABLE); + + AtlasEntity processEntity2 = validateProcess(hiveEventContext); + AtlasEntity processExecutionEntity2 = validateProcessExecution(processEntity2, hiveEventContext); + AtlasObjectId process2 = toAtlasObjectId(processExecutionEntity2.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process2.getGuid(), processEntity2.getGuid()); + + assertNotEquals(processEntity1.getGuid(), processEntity2.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + Assert.assertEquals(numberOfProcessExecutions(processEntity2), 1); + + validateOutputTables(processEntity1, outputs); + } + + @Test + public void testCreateView() throws Exception { + String tableName = createTable(); + String viewName = tableName(); + String query = "create view " + viewName + " as select * from " + tableName; + + runCommand(query); + + HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATEVIEW, getInputs(tableName, + Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE)); + AtlasEntity processEntity1 = validateProcess(hiveEventContext); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext); + AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + assertTableIsRegistered(DEFAULT_DB, viewName); + + String viewId = assertTableIsRegistered(DEFAULT_DB, viewName); + AtlasEntity viewEntity = atlasClientV2.getEntityByGuid(viewId).getEntity(); + List ddlQueries = (List) viewEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 1); + } + + @Test + public void testAlterViewAsSelect() throws Exception { + //Create the view from table1 + String table1Name = createTable(); + String viewName = tableName(); + String query = "create view " + viewName + " as select * from " + table1Name; + + runCommand(query); + + String table1Id = assertTableIsRegistered(DEFAULT_DB, table1Name); + + HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATEVIEW, getInputs(table1Name, + Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE)); + String processId1 = assertProcessIsRegistered(hiveEventContext); + AtlasEntity processEntity1 = atlasClientV2.getEntityByGuid(processId1).getEntity(); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext); + AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + + String viewId = assertTableIsRegistered(DEFAULT_DB, viewName); + + //Check lineage which includes table1 + String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, viewName); + String tableId = assertTableIsRegistered(DEFAULT_DB, viewName); + AtlasLineageInfo inputLineageInfo = atlasClientV2.getLineageInfo(tableId, AtlasLineageInfo.LineageDirection.INPUT, 0); + Map entityMap = inputLineageInfo.getGuidEntityMap(); + + assertTrue(entityMap.containsKey(viewId)); + assertTrue(entityMap.containsKey(table1Id)); + + //Alter the view from table2 + String table2Name = createTable(); + + query = "alter view " + viewName + " as select * from " + table2Name; + + runCommand(query); + + HiveEventContext hiveEventContext2 = constructEvent(query, HiveOperation.CREATEVIEW, getInputs(table2Name, + Entity.Type.TABLE), getOutputs(viewName, Entity.Type.TABLE)); + String processId2 = assertProcessIsRegistered(hiveEventContext2); + AtlasEntity processEntity2 = atlasClientV2.getEntityByGuid(processId2).getEntity(); + AtlasEntity processExecutionEntity2 = validateProcessExecution(processEntity2, hiveEventContext2); + AtlasObjectId process2 = toAtlasObjectId(processExecutionEntity2.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process2.getGuid(), processEntity2.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity2), 2); + Assert.assertEquals(processEntity1.getGuid(), processEntity2.getGuid()); + + String table2Id = assertTableIsRegistered(DEFAULT_DB, table2Name); + String viewId2 = assertTableIsRegistered(DEFAULT_DB, viewName); + + Assert.assertEquals(viewId2, viewId); + + AtlasEntity viewEntity = atlasClientV2.getEntityByGuid(viewId2).getEntity(); + List ddlQueries = (List) viewEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 2); + + datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, viewName); + + String tableId1 = assertTableIsRegistered(DEFAULT_DB, viewName); + AtlasLineageInfo inputLineageInfo1 = atlasClientV2.getLineageInfo(tableId1, AtlasLineageInfo.LineageDirection.INPUT, 0); + Map entityMap1 = inputLineageInfo1.getGuidEntityMap(); + + assertTrue(entityMap1.containsKey(viewId)); + + //This is through the alter view process + assertTrue(entityMap1.containsKey(table2Id)); + + //This is through the Create view process + assertTrue(entityMap1.containsKey(table1Id)); + + //Outputs dont exist + AtlasLineageInfo outputLineageInfo = atlasClientV2.getLineageInfo(tableId1, AtlasLineageInfo.LineageDirection.OUTPUT, 0); + Map entityMap2 = outputLineageInfo.getGuidEntityMap(); + + assertEquals(entityMap2.size(),0); + } + + private String createTestDFSFile(String path) throws Exception { + return "pfile://" + file(path); + } + + @Test + public void testLoadLocalPath() throws Exception { + String tableName = createTable(false); + String loadFile = file("load"); + String query = "load data local inpath 'file://" + loadFile + "' into table " + tableName; + + String tblId = assertTableIsRegistered(DEFAULT_DB, tableName); + + runCommand(query); + + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 1); + + assertProcessIsRegistered(constructEvent(query, HiveOperation.LOAD, getInputs("file://" + loadFile, Entity.Type.LOCAL_DIR), getOutputs(tableName, Entity.Type.TABLE))); + } + + @Test + public void testLoadLocalPathIntoPartition() throws Exception { + String tableName = createTable(true); + String loadFile = file("load"); + String query = "load data local inpath 'file://" + loadFile + "' into table " + tableName + " partition(dt = '"+ PART_FILE + "')"; + + String tblId = assertTableIsRegistered(DEFAULT_DB, tableName); + + runCommand(query); + + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 1); + + assertProcessIsRegistered(constructEvent(query, HiveOperation.LOAD, null, getOutputs(tableName, Entity.Type.TABLE))); + } + + @Test + public void testLoadDFSPathPartitioned() throws Exception { + String tableName = createTable(true, true, false); + + assertTableIsRegistered(DEFAULT_DB, tableName); + + String loadFile = createTestDFSFile("loadDFSFile"); + String query = "load data inpath '" + loadFile + "' into table " + tableName + " partition(dt = '"+ PART_FILE + "')"; + + runCommand(query); + + Set outputs = getOutputs(tableName, Entity.Type.TABLE); + Set inputs = getInputs(loadFile, Entity.Type.DFS_DIR); + Set partitionOps = new LinkedHashSet<>(outputs); + + partitionOps.addAll(getOutputs(DEFAULT_DB + "@" + tableName + "@dt=" + PART_FILE, Entity.Type.PARTITION)); + + AtlasEntity processReference = validateProcess(constructEvent(query, HiveOperation.LOAD, inputs, partitionOps), inputs, outputs); + + validateHDFSPaths(processReference, INPUTS, loadFile); + validateOutputTables(processReference, outputs); + + String loadFile2 = createTestDFSFile("loadDFSFile1"); + + query = "load data inpath '" + loadFile2 + "' into table " + tableName + " partition(dt = '"+ PART_FILE + "')"; + + runCommand(query); + + Set process2Inputs = getInputs(loadFile2, Entity.Type.DFS_DIR); + Set expectedInputs = new LinkedHashSet<>(); + + expectedInputs.addAll(process2Inputs); + expectedInputs.addAll(inputs); + + validateProcess(constructEvent(query, HiveOperation.LOAD, expectedInputs, partitionOps), expectedInputs, outputs); + } + + private String getQualifiedTblName(String inputTable) { + String inputtblQlfdName = inputTable; + + if (inputTable != null && !inputTable.contains("@")) { + inputtblQlfdName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, inputTable); + } + return inputtblQlfdName; + } + + private AtlasEntity validateProcess(HiveEventContext event, Set inputTables, Set outputTables) throws Exception { + String processId = assertProcessIsRegistered(event, inputTables, outputTables); + AtlasEntity processEntity = atlasClientV2.getEntityByGuid(processId).getEntity(); + + validateInputTables(processEntity, inputTables); + validateOutputTables(processEntity, outputTables); + + return processEntity; + } + + private AtlasEntity validateProcess(HiveEventContext event) throws Exception { + return validateProcess(event, event.getInputs(), event.getOutputs()); + } + + private AtlasEntity validateProcessExecution(AtlasEntity hiveProcess, HiveEventContext event) throws Exception { + String processExecutionId = assertProcessExecutionIsRegistered(hiveProcess, event); + AtlasEntity processExecutionEntity = atlasClientV2.getEntityByGuid(processExecutionId).getEntity(); + return processExecutionEntity; + } + + @Test + public void testInsertIntoTable() throws Exception { + String inputTable1Name = createTable(); + String inputTable2Name = createTable(); + String insertTableName = createTable(); + + assertTableIsRegistered(DEFAULT_DB, inputTable1Name); + assertTableIsRegistered(DEFAULT_DB, insertTableName); + + String query = "insert into " + insertTableName + " select t1.id, t1.name from " + inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=t2.id"; + + runCommand(query); + + Set inputs = getInputs(inputTable1Name, Entity.Type.TABLE); + + inputs.addAll(getInputs(inputTable2Name, Entity.Type.TABLE)); + + Set outputs = getOutputs(insertTableName, Entity.Type.TABLE); + + (outputs.iterator().next()).setWriteType(WriteEntity.WriteType.INSERT); + + HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs); + + Set expectedInputs = new TreeSet(entityComparator) {{ + addAll(inputs); + }}; + + String tblId = assertTableIsRegistered(DEFAULT_DB, insertTableName); + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 1); + + AtlasEntity processEntity1 = validateProcess(event, expectedInputs, outputs); + + //Test sorting of tbl names + SortedSet sortedTblNames = new TreeSet<>(); + + sortedTblNames.add(inputTable1Name.toLowerCase()); + sortedTblNames.add(inputTable2Name.toLowerCase()); + + //Verify sorted order of inputs in qualified name + Assert.assertEquals(processEntity1.getAttribute(ATTRIBUTE_QUALIFIED_NAME), + Joiner.on(SEP).join("QUERY", + getQualifiedTblName(sortedTblNames.first()), + HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, sortedTblNames.first())), + getQualifiedTblName(sortedTblNames.last()), + HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, sortedTblNames.last()))) + + IO_SEP + SEP + + Joiner.on(SEP). + join(WriteEntity.WriteType.INSERT.name(), + getQualifiedTblName(insertTableName), + HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, insertTableName))) + ); + + //Rerun same query. Should result in same process + runCommandWithDelay(query, 3000); + + AtlasEntity processEntity2 = validateProcess(event, expectedInputs, outputs); + Assert.assertEquals(numberOfProcessExecutions(processEntity2), 2); + Assert.assertEquals(processEntity1.getGuid(), processEntity2.getGuid()); + } + + @Test + public void testInsertIntoTableProcessExecution() throws Exception { + String inputTable1Name = createTable(); + String inputTable2Name = createTable(); + String insertTableName = createTable(); + + assertTableIsRegistered(DEFAULT_DB, inputTable1Name); + assertTableIsRegistered(DEFAULT_DB, insertTableName); + + String query = "insert into " + insertTableName + " select t1.id, t1.name from " + inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=t2.id"; + + runCommand(query); + + Set inputs = getInputs(inputTable1Name, Entity.Type.TABLE); + + inputs.addAll(getInputs(inputTable2Name, Entity.Type.TABLE)); + + Set outputs = getOutputs(insertTableName, Entity.Type.TABLE); + + (outputs.iterator().next()).setWriteType(WriteEntity.WriteType.INSERT); + + HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs); + + Set expectedInputs = new TreeSet(entityComparator) {{ + addAll(inputs); + }}; + + assertTableIsRegistered(DEFAULT_DB, insertTableName); + + AtlasEntity processEntity1 = validateProcess(event, expectedInputs, outputs); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, event); + AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process.getGuid(), processEntity1.getGuid()); + + //Test sorting of tbl names + SortedSet sortedTblNames = new TreeSet<>(); + + sortedTblNames.add(inputTable1Name.toLowerCase()); + sortedTblNames.add(inputTable2Name.toLowerCase()); + + //Verify sorted order of inputs in qualified name + Assert.assertEquals(processEntity1.getAttribute(ATTRIBUTE_QUALIFIED_NAME), + Joiner.on(SEP).join("QUERY", + getQualifiedTblName(sortedTblNames.first()), + HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, sortedTblNames.first())), + getQualifiedTblName(sortedTblNames.last()), + HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, sortedTblNames.last()))) + + IO_SEP + SEP + + Joiner.on(SEP). + join(WriteEntity.WriteType.INSERT.name(), + getQualifiedTblName(insertTableName), + HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, insertTableName))) + ); + + //Rerun same query. Should result in same process + runCommandWithDelay(query, 3000); + + AtlasEntity processEntity2 = validateProcess(event, expectedInputs, outputs); + AtlasEntity processExecutionEntity2 = validateProcessExecution(processEntity2, event); + process = toAtlasObjectId(processExecutionEntity2.getRelationshipAttribute(BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process.getGuid(), processEntity2.getGuid()); + Assert.assertEquals(processEntity1.getGuid(), processEntity2.getGuid()); + + String queryWithDifferentPredicate = "insert into " + insertTableName + " select t1.id, t1.name from " + + inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=100"; + runCommandWithDelay(queryWithDifferentPredicate, 1000); + + HiveEventContext event3 = constructEvent(queryWithDifferentPredicate, HiveOperation.QUERY, inputs, outputs); + AtlasEntity processEntity3 = validateProcess(event3, expectedInputs, outputs); + AtlasEntity processExecutionEntity3 = validateProcessExecution(processEntity3, event3); + process = toAtlasObjectId(processExecutionEntity3.getRelationshipAttribute(BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process.getGuid(), processEntity3.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity3), 3); + Assert.assertEquals(processEntity2.getGuid(), processEntity3.getGuid()); + } + + @Test + public void testInsertIntoLocalDir() throws Exception { + String tableName = createTable(); + String randomLocalPath = mkdir("hiverandom.tmp"); + String query = "insert overwrite LOCAL DIRECTORY '" + randomLocalPath + "' select id, name from " + tableName; + + runCommand(query); + + HiveEventContext event = constructEvent(query, HiveOperation.QUERY, + getInputs(tableName, Entity.Type.TABLE), getOutputs(randomLocalPath, Entity.Type.LOCAL_DIR)); + AtlasEntity hiveProcess = validateProcess(event); + AtlasEntity hiveProcessExecution = validateProcessExecution(hiveProcess, event); + AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process.getGuid(), hiveProcess.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(hiveProcess), 1); + + String tblId = assertTableIsRegistered(DEFAULT_DB, tableName); + + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 1); + } + + @Test + public void testUpdateProcess() throws Exception { + String tableName = createTable(); + String pFile1 = createTestDFSPath("somedfspath1"); + String query = "insert overwrite DIRECTORY '" + pFile1 + "' select id, name from " + tableName; + + runCommand(query); + + Set inputs = getInputs(tableName, Entity.Type.TABLE); + Set outputs = getOutputs(pFile1, Entity.Type.DFS_DIR); + + outputs.iterator().next().setWriteType(WriteEntity.WriteType.PATH_WRITE); + + HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.QUERY, inputs, outputs); + AtlasEntity processEntity = validateProcess(hiveEventContext); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity, hiveEventContext); + AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process.getGuid(), processEntity.getGuid()); + + validateHDFSPaths(processEntity, OUTPUTS, pFile1); + + assertTableIsRegistered(DEFAULT_DB, tableName); + + validateInputTables(processEntity, inputs); + + //Rerun same query with same HDFS path + runCommandWithDelay(query, 3000); + + assertTableIsRegistered(DEFAULT_DB, tableName); + + AtlasEntity process2Entity = validateProcess(hiveEventContext); + AtlasEntity processExecutionEntity2 = validateProcessExecution(processEntity, hiveEventContext); + AtlasObjectId process2 = toAtlasObjectId(processExecutionEntity2.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process2.getGuid(), process2Entity.getGuid()); + + + validateHDFSPaths(process2Entity, OUTPUTS, pFile1); + + Assert.assertEquals(process2Entity.getGuid(), processEntity.getGuid()); + + //Rerun same query with a new HDFS path. Will result in same process since HDFS paths is not part of qualified name for QUERY operations + String pFile2 = createTestDFSPath("somedfspath2"); + + query = "insert overwrite DIRECTORY '" + pFile2 + "' select id, name from " + tableName; + + runCommandWithDelay(query, 3000); + + String tblId = assertTableIsRegistered(DEFAULT_DB, tableName); + + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 1); + + Set p3Outputs = new LinkedHashSet() {{ + addAll(getOutputs(pFile2, Entity.Type.DFS_DIR)); + addAll(outputs); + }}; + + AtlasEntity process3Entity = validateProcess(constructEvent(query, HiveOperation.QUERY, inputs, p3Outputs)); + AtlasEntity processExecutionEntity3 = validateProcessExecution(processEntity, hiveEventContext); + AtlasObjectId process3 = toAtlasObjectId(processExecutionEntity3.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process3.getGuid(), process3Entity.getGuid()); + validateHDFSPaths(process3Entity, OUTPUTS, pFile2); + + Assert.assertEquals(numberOfProcessExecutions(process3Entity), 3); + Assert.assertEquals(process3Entity.getGuid(), processEntity.getGuid()); + } + + @Test + public void testInsertIntoDFSDirPartitioned() throws Exception { + //Test with partitioned table + String tableName = createTable(true); + String pFile1 = createTestDFSPath("somedfspath1"); + String query = "insert overwrite DIRECTORY '" + pFile1 + "' select id, name from " + tableName + " where dt = '" + PART_FILE + "'"; + + runCommand(query); + + Set inputs = getInputs(tableName, Entity.Type.TABLE); + Set outputs = getOutputs(pFile1, Entity.Type.DFS_DIR); + + outputs.iterator().next().setWriteType(WriteEntity.WriteType.PATH_WRITE); + + Set partitionIps = new LinkedHashSet<>(inputs); + + partitionIps.addAll(getInputs(DEFAULT_DB + "@" + tableName + "@dt='" + PART_FILE + "'", Entity.Type.PARTITION)); + + AtlasEntity processEntity = validateProcess(constructEvent(query, HiveOperation.QUERY, partitionIps, outputs), inputs, outputs); + + //Rerun same query with different HDFS path. Should not create another process and should update it. + + String pFile2 = createTestDFSPath("somedfspath2"); + query = "insert overwrite DIRECTORY '" + pFile2 + "' select id, name from " + tableName + " where dt = '" + PART_FILE + "'"; + + runCommand(query); + + Set pFile2Outputs = getOutputs(pFile2, Entity.Type.DFS_DIR); + + pFile2Outputs.iterator().next().setWriteType(WriteEntity.WriteType.PATH_WRITE); + + //Now the process has 2 paths - one older with deleted reference to partition and another with the the latest partition + Set p2Outputs = new LinkedHashSet() {{ + addAll(pFile2Outputs); + addAll(outputs); + }}; + + AtlasEntity process2Entity = validateProcess(constructEvent(query, HiveOperation.QUERY, partitionIps, pFile2Outputs), inputs, p2Outputs); + + validateHDFSPaths(process2Entity, OUTPUTS, pFile2); + + Assert.assertEquals(process2Entity.getGuid(), processEntity.getGuid()); + } + + //Disabling test as temporary table is not captured by hiveHook(https://issues.apache.org/jira/browse/ATLAS-1274) + @Test(enabled = false) + public void testInsertIntoTempTable() throws Exception { + String tableName = createTable(); + String insertTableName = createTable(false, false, true); + + assertTableIsRegistered(DEFAULT_DB, tableName); + assertTableIsNotRegistered(DEFAULT_DB, insertTableName, true); + + String query = "insert into " + insertTableName + " select id, name from " + tableName; + + runCommand(query); + + Set inputs = getInputs(tableName, Entity.Type.TABLE); + Set outputs = getOutputs(insertTableName, Entity.Type.TABLE); + + outputs.iterator().next().setWriteType(WriteEntity.WriteType.INSERT); + + HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs); + AtlasEntity hiveProcess = validateProcess(event); + AtlasEntity hiveProcessExecution = validateProcessExecution(hiveProcess, event); + AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process.getGuid(), hiveProcess.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(hiveProcess), 1); + + assertTableIsRegistered(DEFAULT_DB, tableName); + assertTableIsRegistered(DEFAULT_DB, insertTableName, null, true); + } + + @Test + public void testInsertIntoPartition() throws Exception { + boolean isPartitionedTable = true; + String tableName = createTable(isPartitionedTable); + String insertTableName = createTable(isPartitionedTable); + String query = "insert into " + insertTableName + " partition(dt = '"+ PART_FILE + "') select id, name from " + tableName + " where dt = '"+ PART_FILE + "'"; + + runCommand(query); + + Set inputs = getInputs(tableName, Entity.Type.TABLE); + Set outputs = getOutputs(insertTableName, Entity.Type.TABLE); + + outputs.iterator().next().setWriteType(WriteEntity.WriteType.INSERT); + + Set partitionIps = new LinkedHashSet() { + { + addAll(inputs); + add(getPartitionInput()); + } + }; + + Set partitionOps = new LinkedHashSet() { + { + addAll(outputs); + add(getPartitionOutput()); + } + }; + + HiveEventContext event = constructEvent(query, HiveOperation.QUERY, partitionIps, partitionOps); + AtlasEntity hiveProcess = validateProcess(event, inputs, outputs); + AtlasEntity hiveProcessExecution = validateProcessExecution(hiveProcess, event); + AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process.getGuid(), hiveProcess.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(hiveProcess), 1); + assertTableIsRegistered(DEFAULT_DB, tableName); + + String tblId = assertTableIsRegistered(DEFAULT_DB, insertTableName); + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 1); + + //TODO -Add update test case + } + + @Test + public void testExportImportUnPartitionedTable() throws Exception { + String tableName = createTable(false); + + String tblId = assertTableIsRegistered(DEFAULT_DB, tableName); + + String filename = "file://" + mkdir("exportUnPartitioned"); + String query = "export table " + tableName + " to \"" + filename + "\""; + + runCommand(query); + + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 1); + + Set inputs = getInputs(tableName, Entity.Type.TABLE); + Set outputs = getOutputs(filename, Entity.Type.DFS_DIR); + + HiveEventContext event = constructEvent(query, HiveOperation.EXPORT, inputs, outputs); + AtlasEntity processEntity = validateProcess(event); + AtlasEntity hiveProcessExecution = validateProcessExecution(processEntity, event); + AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process.getGuid(), processEntity.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity), 1); + validateHDFSPaths(processEntity, OUTPUTS, filename); + validateInputTables(processEntity, inputs); + + //Import + String importTableName = createTable(false); + + String importTblId = assertTableIsRegistered(DEFAULT_DB, importTableName); + + query = "import table " + importTableName + " from '" + filename + "'"; + + runCommand(query); + + AtlasEntity importTblEntity = atlasClientV2.getEntityByGuid(importTblId).getEntity(); + List importTblddlQueries = (List) importTblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(importTblddlQueries); + Assert.assertEquals(importTblddlQueries.size(), 1); + + outputs = getOutputs(importTableName, Entity.Type.TABLE); + + HiveEventContext event2 = constructEvent(query, HiveOperation.IMPORT, + getInputs(filename, Entity.Type.DFS_DIR), outputs); + AtlasEntity processEntity2 = validateProcess(event2); + AtlasEntity hiveProcessExecution2 = validateProcessExecution(processEntity2, event2); + AtlasObjectId process2 = toAtlasObjectId(hiveProcessExecution2.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process2.getGuid(), processEntity2.getGuid()); + + Assert.assertEquals(numberOfProcessExecutions(processEntity2), 1); + Assert.assertNotEquals(processEntity.getGuid(), processEntity2.getGuid()); + + //Should create another process + filename = "file://" + mkdir("export2UnPartitioned"); + query = "export table " + tableName + " to \"" + filename + "\""; + + runCommand(query); + + AtlasEntity tblEntity2 = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries2 = (List) tblEntity2.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries2); + Assert.assertEquals(ddlQueries2.size(), 1); + + inputs = getInputs(tableName, Entity.Type.TABLE); + outputs = getOutputs(filename, Entity.Type.DFS_DIR); + + HiveEventContext event3 = constructEvent(query, HiveOperation.EXPORT, inputs, outputs); + AtlasEntity processEntity3 = validateProcess(event3); + AtlasEntity hiveProcessExecution3 = validateProcessExecution(processEntity3, event3); + AtlasObjectId process3 = toAtlasObjectId(hiveProcessExecution3.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process3.getGuid(), processEntity3.getGuid()); + + Assert.assertEquals(numberOfProcessExecutions(processEntity3), 1); + + // Should be a different process compared to the previous ones + Assert.assertNotEquals(processEntity.getGuid(), processEntity3.getGuid()); + Assert.assertNotEquals(processEntity2.getGuid(), processEntity3.getGuid()); + + //import again shouyld create another process + query = "import table " + importTableName + " from '" + filename + "'"; + + runCommand(query); + + AtlasEntity tblEntity3 = atlasClientV2.getEntityByGuid(importTblId).getEntity(); + List ddlQueries3 = (List) tblEntity3.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries3); + Assert.assertEquals(ddlQueries3.size(), 1); + + outputs = getOutputs(importTableName, Entity.Type.TABLE); + + HiveEventContext event4 = constructEvent(query, HiveOperation.IMPORT, getInputs(filename, + Entity.Type.DFS_DIR), outputs); + AtlasEntity processEntity4 = validateProcess(event4); + AtlasEntity hiveProcessExecution4 = validateProcessExecution(processEntity4, event4); + AtlasObjectId process4 = toAtlasObjectId(hiveProcessExecution4.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process4.getGuid(), processEntity4.getGuid()); + + Assert.assertEquals(numberOfProcessExecutions(processEntity4), 1); + + // Should be a different process compared to the previous ones + Assert.assertNotEquals(processEntity.getGuid(), processEntity4.getGuid()); + Assert.assertNotEquals(processEntity2.getGuid(), processEntity4.getGuid()); + Assert.assertNotEquals(processEntity3.getGuid(), processEntity4.getGuid()); + } + + @Test + public void testExportImportPartitionedTable() throws Exception { + boolean isPartitionedTable = true; + String tableName = createTable(isPartitionedTable); + + String tblId = assertTableIsRegistered(DEFAULT_DB, tableName); + + //Add a partition + String partFile = "file://" + mkdir("partition"); + String query = "alter table " + tableName + " add partition (dt='"+ PART_FILE + "') location '" + partFile + "'"; + + runCommand(query); + + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 1); + + String filename = "pfile://" + mkdir("export"); + + query = "export table " + tableName + " to \"" + filename + "\""; + + runCommand(query); + + AtlasEntity tblEntity2 = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries2 = (List) tblEntity2.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries2); + Assert.assertEquals(ddlQueries2.size(), 1); + + Set expectedExportInputs = getInputs(tableName, Entity.Type.TABLE); + Set outputs = getOutputs(filename, Entity.Type.DFS_DIR); + Set partitionIps = getInputs(DEFAULT_DB + "@" + tableName + "@dt=" + PART_FILE, Entity.Type.PARTITION); //Note that export has only partition as input in this case + + partitionIps.addAll(expectedExportInputs); + + HiveEventContext event1 = constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs); + AtlasEntity processEntity1 = validateProcess(event1, expectedExportInputs, outputs); + AtlasEntity hiveProcessExecution1 = validateProcessExecution(processEntity1, event1); + AtlasObjectId process1 = toAtlasObjectId(hiveProcessExecution1.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + + validateHDFSPaths(processEntity1, OUTPUTS, filename); + + //Import + String importTableName = createTable(true); + + String tblId2 = assertTableIsRegistered(DEFAULT_DB, tableName); + + query = "import table " + importTableName + " from '" + filename + "'"; + + runCommand(query); + + AtlasEntity tblEntity3 = atlasClientV2.getEntityByGuid(tblId2).getEntity(); + List ddlQueries3 = (List) tblEntity3.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries3); + Assert.assertEquals(ddlQueries3.size(), 1); + + Set expectedImportInputs = getInputs(filename, Entity.Type.DFS_DIR); + Set importOutputs = getOutputs(importTableName, Entity.Type.TABLE); + Set partitionOps = getOutputs(DEFAULT_DB + "@" + importTableName + "@dt=" + PART_FILE, Entity.Type.PARTITION); + + partitionOps.addAll(importOutputs); + + HiveEventContext event2 = constructEvent(query, HiveOperation.IMPORT, expectedImportInputs , partitionOps); + AtlasEntity processEntity2 = validateProcess(event2, expectedImportInputs, importOutputs); + AtlasEntity hiveProcessExecution2 = validateProcessExecution(processEntity2, event2); + AtlasObjectId process2 = toAtlasObjectId(hiveProcessExecution2.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process2.getGuid(), processEntity2.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity2), 1); + Assert.assertNotEquals(processEntity1.getGuid(), processEntity2.getGuid()); + + //Export should update same process + filename = "pfile://" + mkdir("export2"); + query = "export table " + tableName + " to \"" + filename + "\""; + + runCommand(query); + + Set outputs2 = getOutputs(filename, Entity.Type.DFS_DIR); + Set p3Outputs = new LinkedHashSet() {{ + addAll(outputs2); + addAll(outputs); + }}; + + HiveEventContext event3 = constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs2); + + // this process entity should return same as the processEntity1 since the inputs and outputs are the same, + // hence the qualifiedName will be the same + AtlasEntity processEntity3 = validateProcess(event3, expectedExportInputs, p3Outputs); + AtlasEntity hiveProcessExecution3 = validateProcessExecution(processEntity3, event3); + AtlasObjectId process3 = toAtlasObjectId(hiveProcessExecution3.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process3.getGuid(), processEntity3.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity3), 2); + Assert.assertEquals(processEntity1.getGuid(), processEntity3.getGuid()); + + query = "alter table " + importTableName + " drop partition (dt='"+ PART_FILE + "')"; + + runCommand(query); + + //Import should update same process + query = "import table " + importTableName + " from '" + filename + "'"; + + runCommandWithDelay(query, 3000); + + Set importInputs = getInputs(filename, Entity.Type.DFS_DIR); + Set expectedImport2Inputs = new LinkedHashSet() {{ + addAll(importInputs); + addAll(expectedImportInputs); + }}; + + HiveEventContext event4 = constructEvent(query, HiveOperation.IMPORT, importInputs, partitionOps); + + // This process is going to be same as processEntity2 + AtlasEntity processEntity4 = validateProcess(event4, expectedImport2Inputs, importOutputs); + AtlasEntity hiveProcessExecution4 = validateProcessExecution(processEntity4, event4); + AtlasObjectId process4 = toAtlasObjectId(hiveProcessExecution4.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process4.getGuid(), processEntity4.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity4), 2); + Assert.assertEquals(processEntity2.getGuid(), processEntity4.getGuid()); + Assert.assertNotEquals(processEntity1.getGuid(), processEntity4.getGuid()); + } + + @Test + public void testIgnoreSelect() throws Exception { + String tableName = createTable(); + String query = "select * from " + tableName; + + runCommand(query); + + Set inputs = getInputs(tableName, Entity.Type.TABLE); + HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.QUERY, inputs, null); + + assertProcessIsNotRegistered(hiveEventContext); + + //check with uppercase table name + query = "SELECT * from " + tableName.toUpperCase(); + + runCommand(query); + + assertProcessIsNotRegistered(hiveEventContext); + } + + @Test + public void testAlterTableRenameAliasRegistered() throws Exception{ + String tableName = createTable(false); + String tableGuid = assertTableIsRegistered(DEFAULT_DB, tableName); + String newTableName = tableName(); + String query = String.format("alter table %s rename to %s", tableName, newTableName); + + runCommand(query); + + String newTableGuid = assertTableIsRegistered(DEFAULT_DB, newTableName); + + assertEquals(tableGuid, newTableGuid); + + AtlasEntity atlasEntity = atlasClientV2.getEntityByGuid(newTableGuid).getEntity(); + Map valueMap = atlasEntity.getAttributes(); + Iterable aliasList = (Iterable) valueMap.get("aliases"); + String aliasTableName = aliasList.iterator().next(); + + assert tableName.toLowerCase().equals(aliasTableName); + } + + @Test + public void testAlterTableRename() throws Exception { + String tableName = createTable(true); + String newDBName = createDatabase(); + String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); + AtlasEntity tableEntity = atlasClientV2.getEntityByGuid(tableId).getEntity(); + String createTime = String.valueOf(tableEntity.getAttribute(ATTRIBUTE_CREATE_TIME)); + + Assert.assertNotNull(createTime); + + String columnGuid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), NAME)); + String sdGuid = assertSDIsRegistered(HiveMetaStoreBridge.getStorageDescQFName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName)), null); + + assertDatabaseIsRegistered(newDBName); + + String colTraitDetails = createTrait(columnGuid); //Add trait to column + String sdTraitDetails = createTrait(sdGuid); //Add trait to sd + String partColumnGuid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), "dt")); + String partColTraitDetails = createTrait(partColumnGuid); //Add trait to part col keys + String newTableName = tableName(); + String query = String.format("alter table %s rename to %s", DEFAULT_DB + "." + tableName, newDBName + "." + newTableName); + + runCommandWithDelay(query, 3000); + + String newColGuid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, newDBName, newTableName), NAME)); + + Assert.assertEquals(newColGuid, columnGuid); + + assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, newDBName, tableName), NAME)); + + assertTrait(columnGuid, colTraitDetails); + + String newSdGuid = assertSDIsRegistered(HiveMetaStoreBridge.getStorageDescQFName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, newDBName, newTableName)), null); + + Assert.assertEquals(newSdGuid, sdGuid); + assertTrait(sdGuid, sdTraitDetails); + assertTrait(partColumnGuid, partColTraitDetails); + assertTableIsNotRegistered(DEFAULT_DB, tableName); + + String renamedTableId = assertTableIsRegistered(newDBName, newTableName, new AssertPredicate() { + @Override + public void assertOnEntity(final AtlasEntity entity) throws Exception { + AtlasObjectId sd = toAtlasObjectId(entity.getAttribute(ATTRIBUTE_STORAGEDESC)); + + assertNotNull(sd); + } + }); + + AtlasEntity renamedTableEntity = atlasClientV2.getEntityByGuid(renamedTableId).getEntity(); + List ddlQueries = (List) renamedTableEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 2); + + } + + private List getColumns(String dbName, String tableName) throws Exception { + String tableId = assertTableIsRegistered(dbName, tableName); + AtlasEntityWithExtInfo tblEntityWithExtInfo = atlasClientV2.getEntityByGuid(tableId); + AtlasEntity tableEntity = tblEntityWithExtInfo.getEntity(); + + //with soft delete, the deleted columns are returned as well. So, filter the deleted ones + List columns = toAtlasObjectIdList(tableEntity.getAttribute(ATTRIBUTE_COLUMNS)); + List activeColumns = new ArrayList<>(); + + for (AtlasObjectId col : columns) { + AtlasEntity columnEntity = tblEntityWithExtInfo.getEntity(col.getGuid()); + + if (columnEntity.getStatus() == AtlasEntity.Status.ACTIVE) { + activeColumns.add(columnEntity); + } + } + + return activeColumns; + } + + private String createTrait(String guid) throws AtlasServiceException { + //add trait + //valid type names in v2 must consist of a letter followed by a sequence of letter, number, or _ characters + String traitName = "PII_Trait" + random(); + AtlasClassificationDef piiTrait = AtlasTypeUtil.createTraitTypeDef(traitName, Collections.emptySet()); + + atlasClientV2.createAtlasTypeDefs(new AtlasTypesDef(Collections.emptyList(), Collections.emptyList(), Collections.singletonList(piiTrait), Collections.emptyList())); + atlasClientV2.addClassifications(guid, Collections.singletonList(new AtlasClassification(piiTrait.getName()))); + + return traitName; + } + + private void assertTrait(String guid, String traitName) throws AtlasServiceException { + AtlasClassification.AtlasClassifications classifications = atlasClientV2.getClassifications(guid); + + Assert.assertEquals(classifications.getList().get(0).getTypeName(), traitName); + } + + @Test + public void testAlterTableAddColumn() throws Exception { + String tableName = createTable(); + String column = columnName(); + String query = "alter table " + tableName + " add columns (" + column + " string)"; + + runCommand(query); + + assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), column)); + + //Verify the number of columns present in the table + List columns = getColumns(DEFAULT_DB, tableName); + + Assert.assertEquals(columns.size(), 3); + + String tblId = assertTableIsRegistered(DEFAULT_DB, tableName); + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 2); + + } + + //ATLAS-1321: Disable problematic tests. Need to revisit and fix them later + @Test(enabled = false) + public void testAlterTableDropColumn() throws Exception { + String tableName = createTable(); + String colDropped = "id"; + String query = "alter table " + tableName + " replace columns (name string)"; + + runCommand(query); + + assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), colDropped)); + + //Verify the number of columns present in the table + List columns = getColumns(DEFAULT_DB, tableName); + + assertEquals(columns.size(), 1); + assertEquals(columns.get(0).getAttribute(NAME), "name"); + + String tblId = assertTableIsRegistered(DEFAULT_DB, tableName); + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 2); + } + + @Test + public void testAlterTableChangeColumn() throws Exception { + //Change name + String oldColName = NAME; + String newColName = "name1"; + String tableName = createTable(); + String query = String.format("alter table %s change %s %s string", tableName, oldColName, newColName); + + runCommandWithDelay(query, 3000); + + assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), oldColName)); + assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), newColName)); + + //Verify the number of columns present in the table + List columns = getColumns(DEFAULT_DB, tableName); + + Assert.assertEquals(columns.size(), 2); + + String tblId = assertTableIsRegistered(DEFAULT_DB, tableName); + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 2); + + //Change column type + oldColName = "name1"; + newColName = "name2"; + + String newColType = "int"; + + query = String.format("alter table %s change column %s %s %s", tableName, oldColName, newColName, newColType); + + runCommandWithDelay(query, 3000); + + columns = getColumns(DEFAULT_DB, tableName); + + Assert.assertEquals(columns.size(), 2); + + String newColQualifiedName = HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), newColName); + + assertColumnIsRegistered(newColQualifiedName, new AssertPredicate() { + @Override + public void assertOnEntity(AtlasEntity entity) throws Exception { + assertEquals(entity.getAttribute("type"), "int"); + } + }); + + assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), oldColName)); + + AtlasEntity tblEntity2 = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries2 = (List) tblEntity2.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries2); + Assert.assertEquals(ddlQueries2.size(), 3); + + //Change name and add comment + oldColName = "name2"; + newColName = "name3"; + + String comment = "added comment"; + + query = String.format("alter table %s change column %s %s %s COMMENT '%s' after id", tableName, oldColName, newColName, newColType, comment); + + runCommandWithDelay(query, 3000); + + columns = getColumns(DEFAULT_DB, tableName); + + Assert.assertEquals(columns.size(), 2); + + assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), oldColName)); + + newColQualifiedName = HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), newColName); + + assertColumnIsRegistered(newColQualifiedName, new AssertPredicate() { + @Override + public void assertOnEntity(AtlasEntity entity) throws Exception { + assertEquals(entity.getAttribute(ATTRIBUTE_COMMENT), comment); + } + }); + + //Change column position + oldColName = "name3"; + newColName = "name4"; + query = String.format("alter table %s change column %s %s %s first", tableName, oldColName, newColName, newColType); + + runCommandWithDelay(query, 3000); + + columns = getColumns(DEFAULT_DB, tableName); + + Assert.assertEquals(columns.size(), 2); + + assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), oldColName)); + + newColQualifiedName = HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), newColName); + + assertColumnIsRegistered(newColQualifiedName); + + String finalNewColName = newColName; + + String tblId3 = assertTableIsRegistered(DEFAULT_DB, tableName, new AssertPredicate() { + @Override + public void assertOnEntity(AtlasEntity entity) throws Exception { + List columns = toAtlasObjectIdList(entity.getAttribute(ATTRIBUTE_COLUMNS)); + + assertEquals(columns.size(), 2); + } + } + ); + + AtlasEntity tblEntity3 = atlasClientV2.getEntityByGuid(tblId3).getEntity(); + List ddlQueries3 = (List) tblEntity3.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries3); + Assert.assertEquals(ddlQueries3.size(), 5); + + //Change col position again + oldColName = "name4"; + newColName = "name5"; + query = String.format("alter table %s change column %s %s %s after id", tableName, oldColName, newColName, newColType); + + runCommandWithDelay(query, 3000); + + columns = getColumns(DEFAULT_DB, tableName); + + Assert.assertEquals(columns.size(), 2); + + assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), oldColName)); + + newColQualifiedName = HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), newColName); + + assertColumnIsRegistered(newColQualifiedName); + + //Check col position + String finalNewColName2 = newColName; + + String tblId4 = assertTableIsRegistered(DEFAULT_DB, tableName, new AssertPredicate() { + @Override + public void assertOnEntity(AtlasEntity entity) throws Exception { + List columns = toAtlasObjectIdList(entity.getAttribute(ATTRIBUTE_COLUMNS)); + + assertEquals(columns.size(), 2); + } + } + ); + + AtlasEntity tblEntity4 = atlasClientV2.getEntityByGuid(tblId4).getEntity(); + List ddlQueries4 = (List) tblEntity4.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries4); + Assert.assertEquals(ddlQueries4.size(), 6); + } + + /** + * Reenabling this test since HIVE-14706 is fixed now and the hive version we are using now sends + * us the column lineage information + * @throws Exception + */ + @Test + public void testColumnLevelLineage() throws Exception { + String sourceTable = "table" + random(); + + runCommand("create table " + sourceTable + "(a int, b int)"); + + String sourceTableGUID = assertTableIsRegistered(DEFAULT_DB, sourceTable); + String a_guid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, sourceTable), "a")); + String b_guid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, sourceTable), "b")); + String ctasTableName = "table" + random(); + String query = "create table " + ctasTableName + " as " + "select sum(a+b) as a, count(*) as b from " + sourceTable; + + runCommand(query); + + String dest_a_guid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, ctasTableName), "a")); + String dest_b_guid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, ctasTableName), "b")); + + Set inputs = getInputs(sourceTable, Entity.Type.TABLE); + Set outputs = getOutputs(ctasTableName, Entity.Type.TABLE); + HiveEventContext event = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, inputs, outputs); + AtlasEntity processEntity1 = validateProcess(event); + AtlasEntity hiveProcessExecution1 = validateProcessExecution(processEntity1, event); + AtlasObjectId process1 = toAtlasObjectId(hiveProcessExecution1.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + Assert.assertEquals(processEntity1.getGuid(), processEntity1.getGuid()); + + assertTableIsRegistered(DEFAULT_DB, ctasTableName); + + String processQName = sortEventsAndGetProcessQualifiedName(event); + List aLineageInputs = Arrays.asList(a_guid, b_guid); + String aLineageProcessName = processQName + ":" + "a"; + + LOG.debug("Searching for column lineage process {} ", aLineageProcessName); + String guid = assertEntityIsRegistered(HiveDataTypes.HIVE_COLUMN_LINEAGE.getName(), ATTRIBUTE_QUALIFIED_NAME, aLineageProcessName, null); + + AtlasEntity colLineageEntity = atlasClientV2.getEntityByGuid(guid).getEntity(); + List processInputs = toAtlasObjectIdList(colLineageEntity.getAttribute("inputs")); + List processInputsAsString = new ArrayList<>(); + + for(AtlasObjectId input: processInputs){ + processInputsAsString.add(input.getGuid()); + } + + Collections.sort(processInputsAsString); + Collections.sort(aLineageInputs); + + Assert.assertEquals(processInputsAsString, aLineageInputs); + + List bLineageInputs = Arrays.asList(sourceTableGUID); + String bLineageProcessName = processQName + ":" + "b"; + + LOG.debug("Searching for column lineage process {} ", bLineageProcessName); + + String guid1 = assertEntityIsRegistered(HiveDataTypes.HIVE_COLUMN_LINEAGE.getName(), ATTRIBUTE_QUALIFIED_NAME, bLineageProcessName, null); + + + AtlasEntity colLineageEntity1 = atlasClientV2.getEntityByGuid(guid1).getEntity(); + List bProcessInputs = toAtlasObjectIdList(colLineageEntity1.getAttribute("inputs")); + List bProcessInputsAsString = new ArrayList<>(); + + for(AtlasObjectId input: bProcessInputs){ + bProcessInputsAsString.add(input.getGuid()); + } + + Collections.sort(bProcessInputsAsString); + Collections.sort(bLineageInputs); + + Assert.assertEquals(bProcessInputsAsString, bLineageInputs); + + //Test lineage API response + AtlasLineageInfo atlasLineageInfoInput = atlasClientV2.getLineageInfo(dest_a_guid, AtlasLineageInfo.LineageDirection.INPUT,0); + Map entityMap = atlasLineageInfoInput.getGuidEntityMap(); + + ObjectNode response = atlasClient.getInputGraphForEntity(dest_a_guid); + JsonNode vertices = response.get("values").get("vertices"); + JsonNode dest_a_val = vertices.get(dest_a_guid); + JsonNode src_a_val = vertices.get(a_guid); + JsonNode src_b_val = vertices.get(b_guid); + + Assert.assertNotNull(dest_a_val); + Assert.assertNotNull(src_a_val); + Assert.assertNotNull(src_b_val); + + ObjectNode b_response = atlasClient.getInputGraphForEntity(dest_b_guid); + JsonNode b_vertices = b_response.get("values").get("vertices"); + JsonNode b_val = b_vertices.get(dest_b_guid); + JsonNode src_tbl_val = b_vertices.get(sourceTableGUID); + + Assert.assertNotNull(b_val); + Assert.assertNotNull(src_tbl_val); + } + + @Test + public void testIgnoreTruncateTable() throws Exception { + String tableName = createTable(false); + String query = String.format("truncate table %s", tableName); + + runCommand(query); + + Set outputs = getOutputs(tableName, Entity.Type.TABLE); + HiveEventContext event = constructEvent(query, HiveOperation.TRUNCATETABLE, null, outputs); + + assertTableIsRegistered(DEFAULT_DB, tableName); + assertProcessIsNotRegistered(event); + } + + @Test + public void testAlterTablePartitionColumnType() throws Exception { + String tableName = createTable(true, true, false); + String newType = "int"; + String query = String.format("ALTER TABLE %s PARTITION COLUMN (dt %s)", tableName, newType); + + runCommand(query); + + String colQualifiedName = HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), "dt"); + String dtColId = assertColumnIsRegistered(colQualifiedName, new AssertPredicate() { + @Override + public void assertOnEntity(AtlasEntity column) throws Exception { + Assert.assertEquals(column.getAttribute("type"), newType); + } + }); + + assertTableIsRegistered(DEFAULT_DB, tableName, new AssertPredicate() { + @Override + public void assertOnEntity(AtlasEntity table) throws Exception { + final List partitionKeys = toAtlasObjectIdList(table.getAttribute("partitionKeys")); + Assert.assertEquals(partitionKeys.size(), 1); + Assert.assertEquals(partitionKeys.get(0).getGuid(), dtColId); + + } + }); + } + + @Test + public void testAlterTableWithoutHookConf() throws Exception { + String tableName = tableName(); + String createCommand = "create table " + tableName + " (id int, name string)"; + + driverWithNoHook.run(createCommand); + + assertTableIsNotRegistered(DEFAULT_DB, tableName); + + String command = "alter table " + tableName + " change id id_new string"; + + runCommand(command); + + assertTableIsRegistered(DEFAULT_DB, tableName); + + String tbqn = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName); + + assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(tbqn, "id_new")); + } + + @Test + public void testTraitsPreservedOnColumnRename() throws Exception { + String dbName = createDatabase(); + String tableName = tableName(); + String createQuery = String.format("create table %s.%s (id int, name string)", dbName, tableName); + + runCommand(createQuery); + + String tbqn = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, dbName, tableName); + String guid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(tbqn, "id")); + String trait = createTrait(guid); + String oldColName = "id"; + String newColName = "id_new"; + String query = String.format("alter table %s.%s change %s %s string", dbName, tableName, oldColName, newColName); + + runCommand(query); + + String guid2 = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(tbqn, "id_new")); + + assertEquals(guid2, guid); + + assertTrue(atlasClient.getEntity(guid2).getTraitNames().contains(trait)); + } + + @Test + public void testAlterViewRename() throws Exception { + String tableName = createTable(); + String viewName = tableName(); + String newName = tableName(); + String query = "create view " + viewName + " as select * from " + tableName; + + runCommandWithDelay(query, 5000); + + query = "alter view " + viewName + " rename to " + newName; + + runCommandWithDelay(query, 5000); + + assertTableIsNotRegistered(DEFAULT_DB, viewName); + + String viewId = assertTableIsRegistered(DEFAULT_DB, newName); + AtlasEntity viewEntity = atlasClientV2.getEntityByGuid(viewId).getEntity(); + List ddlQueries = (List) viewEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 2); + } + + @Test + public void testAlterTableLocation() throws Exception { + //Its an external table, so the HDFS location should also be registered as an entity + String tableName = createTable(true, true, false); + String testPath = createTestDFSPath("testBaseDir"); + String query = "alter table " + tableName + " set location '" + testPath + "'"; + + runCommandWithDelay(query, 8000); + + String tblId = assertTableIsRegistered(DEFAULT_DB, tableName, new AssertPredicate() { + @Override + public void assertOnEntity(AtlasEntity tableRef) throws Exception { + AtlasObjectId sd = toAtlasObjectId(tableRef.getAttribute(ATTRIBUTE_STORAGEDESC)); + + assertNotNull(sd); + } + }); + + AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity(); + List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + Assert.assertNotNull(ddlQueries); + Assert.assertEquals(ddlQueries.size(), 2); + + String processQualifiedName = getTableProcessQualifiedName(DEFAULT_DB, tableName); + String processId = assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), ATTRIBUTE_QUALIFIED_NAME, processQualifiedName, null); + AtlasEntity processEntity = atlasClientV2.getEntityByGuid(processId).getEntity(); + Assert.assertEquals(numberOfProcessExecutions(processEntity), 2); + //validateProcessExecution(processEntity, event); + validateHDFSPaths(processEntity, INPUTS, testPath); + } + + @Test + public void testAlterTableFileFormat() throws Exception { + String tableName = createTable(); + String testFormat = "orc"; + String query = "alter table " + tableName + " set FILEFORMAT " + testFormat; + + runCommand(query); + + assertTableIsRegistered(DEFAULT_DB, tableName, new AssertPredicate() { + @Override + public void assertOnEntity(AtlasEntity tableRef) throws Exception { + AtlasObjectId sdObjectId = toAtlasObjectId(tableRef.getAttribute(ATTRIBUTE_STORAGEDESC)); + AtlasEntity sdEntity = atlasClientV2.getEntityByGuid(sdObjectId.getGuid()).getEntity(); + + Assert.assertEquals(sdEntity.getAttribute(ATTRIBUTE_INPUT_FORMAT), "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"); + Assert.assertEquals(sdEntity.getAttribute(ATTRIBUTE_OUTPUT_FORMAT), "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"); + Assert.assertNotNull(sdEntity.getAttribute(ATTRIBUTE_SERDE_INFO)); + + AtlasStruct serdeInfo = toAtlasStruct(sdEntity.getAttribute(ATTRIBUTE_SERDE_INFO)); + + Assert.assertEquals(serdeInfo.getAttribute(ATTRIBUTE_SERIALIZATION_LIB), "org.apache.hadoop.hive.ql.io.orc.OrcSerde"); + Assert.assertNotNull(serdeInfo.getAttribute(ATTRIBUTE_PARAMETERS)); + Assert.assertEquals(((Map) serdeInfo.getAttribute(ATTRIBUTE_PARAMETERS)).get("serialization.format"), "1"); + } + }); + + + /** + * Hive 'alter table stored as' is not supported - See https://issues.apache.org/jira/browse/HIVE-9576 + * query = "alter table " + tableName + " STORED AS " + testFormat.toUpperCase(); + * runCommand(query); + + * tableRef = atlasClientV1.getEntity(tableId); + * sdRef = (AtlasEntity)tableRef.getAttribute(HiveMetaStoreBridge.STORAGE_DESC); + * Assert.assertEquals(sdRef.getAttribute(HiveMetaStoreBridge.STORAGE_DESC_INPUT_FMT), "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"); + * Assert.assertEquals(sdRef.getAttribute(HiveMetaStoreBridge.STORAGE_DESC_OUTPUT_FMT), "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"); + * Assert.assertEquals(((Map) sdRef.getAttribute(HiveMetaStoreBridge.PARAMETERS)).getAttribute("orc.compress"), "ZLIB"); + */ + } + + @Test + public void testAlterTableBucketingClusterSort() throws Exception { + String tableName = createTable(); + List cols = Collections.singletonList("id"); + + runBucketSortQuery(tableName, 5, cols, cols); + + cols = Arrays.asList("id", NAME); + + runBucketSortQuery(tableName, 2, cols, cols); + } + + private void runBucketSortQuery(String tableName, final int numBuckets, final List bucketCols, final List sortCols) throws Exception { + String fmtQuery = "alter table %s CLUSTERED BY (%s) SORTED BY (%s) INTO %s BUCKETS"; + String query = String.format(fmtQuery, tableName, stripListBrackets(bucketCols.toString()), stripListBrackets(sortCols.toString()), numBuckets); + + runCommand(query); + + assertTableIsRegistered(DEFAULT_DB, tableName, new AssertPredicate() { + @Override + public void assertOnEntity(AtlasEntity entity) throws Exception { + verifyBucketSortingProperties(entity, numBuckets, bucketCols, sortCols); + } + }); + } + + private String stripListBrackets(String listElements) { + return StringUtils.strip(StringUtils.strip(listElements, "["), "]"); + } + + private void verifyBucketSortingProperties(AtlasEntity tableRef, int numBuckets, List bucketColNames, List sortcolNames) throws Exception { + AtlasObjectId sdObjectId = toAtlasObjectId(tableRef.getAttribute(ATTRIBUTE_STORAGEDESC)); + AtlasEntity sdEntity = atlasClientV2.getEntityByGuid(sdObjectId.getGuid()).getEntity(); + + Assert.assertEquals((sdEntity.getAttribute(ATTRIBUTE_NUM_BUCKETS)), numBuckets); + Assert.assertEquals(sdEntity.getAttribute(ATTRIBUTE_BUCKET_COLS), bucketColNames); + + List hiveOrderStructList = toAtlasStructList(sdEntity.getAttribute(ATTRIBUTE_SORT_COLS)); + + Assert.assertNotNull(hiveOrderStructList); + Assert.assertEquals(hiveOrderStructList.size(), sortcolNames.size()); + + for (int i = 0; i < sortcolNames.size(); i++) { + AtlasStruct hiveOrderStruct = hiveOrderStructList.get(i); + + Assert.assertNotNull(hiveOrderStruct); + Assert.assertEquals(hiveOrderStruct.getAttribute("col"), sortcolNames.get(i)); + Assert.assertEquals(hiveOrderStruct.getAttribute("order"), 1); + } + } + + @Test + public void testAlterTableSerde() throws Exception { + //SERDE PROPERTIES + String tableName = createTable(); + Map expectedProps = new HashMap() {{ + put("key1", "value1"); + }}; + + runSerdePropsQuery(tableName, expectedProps); + + expectedProps.put("key2", "value2"); + + //Add another property + runSerdePropsQuery(tableName, expectedProps); + } + + @Test + public void testDropTable() throws Exception { + //Test Deletion of tables and its corrresponding columns + String tableName = createTable(true, true, false); + + assertTableIsRegistered(DEFAULT_DB, tableName); + assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), "id")); + assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), NAME)); + + String query = String.format("drop table %s ", tableName); + + runCommandWithDelay(query, 3000); + + assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), "id")); + assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), NAME)); + assertTableIsNotRegistered(DEFAULT_DB, tableName); + } + + private WriteEntity getPartitionOutput() { + TestWriteEntity partEntity = new TestWriteEntity(PART_FILE, Entity.Type.PARTITION); + + return partEntity; + } + + private ReadEntity getPartitionInput() { + ReadEntity partEntity = new TestReadEntity(PART_FILE, Entity.Type.PARTITION); + + return partEntity; + } + + @Test + public void testDropDatabaseWithCascade() throws Exception { + //Test Deletion of database and its corresponding tables + String dbName = "db" + random(); + + runCommand("create database " + dbName + " WITH DBPROPERTIES ('p1'='v1')"); + + int numTables = 10; + String[] tableNames = new String[numTables]; + + for(int i = 0; i < numTables; i++) { + tableNames[i] = createTable(true, true, false); + } + + String query = String.format("drop database %s cascade", dbName); + + runCommand(query); + + //Verify columns are not registered for one of the tables + assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, dbName, tableNames[0]), "id")); + assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, dbName, tableNames[0]), NAME)); + + for(int i = 0; i < numTables; i++) { + assertTableIsNotRegistered(dbName, tableNames[i]); + } + + assertDatabaseIsNotRegistered(dbName); + } + + @Test + public void testDropDatabaseWithoutCascade() throws Exception { + //Test Deletion of database and its corresponding tables + String dbName = "db" + random(); + + runCommand("create database " + dbName + " WITH DBPROPERTIES ('p1'='v1')"); + + int numTables = 5; + String[] tableNames = new String[numTables]; + + for(int i = 0; i < numTables; i++) { + tableNames[i] = createTable(true, true, false); + + String query = String.format("drop table %s", tableNames[i]); + + runCommand(query); + + assertTableIsNotRegistered(dbName, tableNames[i]); + } + + String query = String.format("drop database %s", dbName); + + runCommand(query); + + String dbQualifiedName = HiveMetaStoreBridge.getDBQualifiedName(CLUSTER_NAME, dbName); + + Thread.sleep(10000); + + try { + atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_DB.getName(), Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, dbQualifiedName)); + } catch (AtlasServiceException e) { + if (e.getStatus() == ClientResponse.Status.NOT_FOUND) { + return; + } + } + + fail(String.format("Entity was not supposed to exist for typeName = %s, attributeName = %s, attributeValue = %s", HiveDataTypes.HIVE_DB.getName(), ATTRIBUTE_QUALIFIED_NAME, dbQualifiedName)); + } + + @Test + public void testDropNonExistingDB() throws Exception { + //Test Deletion of a non existing DB + String dbName = "nonexistingdb"; + + assertDatabaseIsNotRegistered(dbName); + + String query = String.format("drop database if exists %s cascade", dbName); + + runCommand(query); + + //Should have no effect + assertDatabaseIsNotRegistered(dbName); + } + + @Test + public void testDropNonExistingTable() throws Exception { + //Test Deletion of a non existing table + String tableName = "nonexistingtable"; + + assertTableIsNotRegistered(DEFAULT_DB, tableName); + + String query = String.format("drop table if exists %s", tableName); + + runCommand(query); + + //Should have no effect + assertTableIsNotRegistered(DEFAULT_DB, tableName); + } + + @Test + public void testDropView() throws Exception { + //Test Deletion of tables and its corrresponding columns + String tableName = createTable(true, true, false); + String viewName = tableName(); + String query = "create view " + viewName + " as select * from " + tableName; + + runCommandWithDelay(query, 3000); + + assertTableIsRegistered(DEFAULT_DB, viewName); + assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, viewName), "id")); + assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, viewName), NAME)); + + query = String.format("drop view %s ", viewName); + + runCommandWithDelay(query, 3000); + assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, viewName), "id")); + assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, viewName), NAME)); + assertTableIsNotRegistered(DEFAULT_DB, viewName); + } + + private void runSerdePropsQuery(String tableName, Map expectedProps) throws Exception { + String serdeLib = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"; + String serializedProps = getSerializedProps(expectedProps); + String query = String.format("alter table %s set SERDE '%s' WITH SERDEPROPERTIES (%s)", tableName, serdeLib, serializedProps); + + runCommand(query); + + verifyTableSdProperties(tableName, serdeLib, expectedProps); + } + + private String getSerializedProps(Map expectedProps) { + StringBuilder sb = new StringBuilder(); + + for(String expectedPropKey : expectedProps.keySet()) { + if(sb.length() > 0) { + sb.append(","); + } + + sb.append("'").append(expectedPropKey).append("'"); + sb.append("="); + sb.append("'").append(expectedProps.get(expectedPropKey)).append("'"); + } + + return sb.toString(); + } + + @Test + public void testAlterDBOwner() throws Exception { + String dbName = createDatabase(); + + assertDatabaseIsRegistered(dbName); + + String owner = "testOwner"; + String fmtQuery = "alter database %s set OWNER %s %s"; + String query = String.format(fmtQuery, dbName, "USER", owner); + + runCommandWithDelay(query, 3000); + + assertDatabaseIsRegistered(dbName, new AssertPredicate() { + @Override + public void assertOnEntity(AtlasEntity entity) { + assertEquals(entity.getAttribute(AtlasClient.OWNER), owner); + } + }); + } + + @Test + public void testAlterDBProperties() throws Exception { + String dbName = createDatabase(); + String fmtQuery = "alter database %s %s DBPROPERTIES (%s)"; + + testAlterProperties(Entity.Type.DATABASE, dbName, fmtQuery); + } + + @Test + public void testAlterTableProperties() throws Exception { + String tableName = createTable(); + String fmtQuery = "alter table %s %s TBLPROPERTIES (%s)"; + + testAlterProperties(Entity.Type.TABLE, tableName, fmtQuery); + } + + private void testAlterProperties(Entity.Type entityType, String entityName, String fmtQuery) throws Exception { + String SET_OP = "set"; + String UNSET_OP = "unset"; + Map expectedProps = new HashMap() {{ + put("testPropKey1", "testPropValue1"); + put("comment", "test comment"); + }}; + + String query = String.format(fmtQuery, entityName, SET_OP, getSerializedProps(expectedProps)); + + runCommandWithDelay(query, 3000); + + verifyEntityProperties(entityType, entityName, expectedProps, false); + + expectedProps.put("testPropKey2", "testPropValue2"); + //Add another property + + query = String.format(fmtQuery, entityName, SET_OP, getSerializedProps(expectedProps)); + + runCommandWithDelay(query, 3000); + + verifyEntityProperties(entityType, entityName, expectedProps, false); + + if (entityType != Entity.Type.DATABASE) { + //Database unset properties doesnt work - alter database %s unset DBPROPERTIES doesnt work + //Unset all the props + StringBuilder sb = new StringBuilder("'"); + + query = String.format(fmtQuery, entityName, UNSET_OP, Joiner.on("','").skipNulls().appendTo(sb, expectedProps.keySet()).append('\'')); + + runCommandWithDelay(query, 3000); + + verifyEntityProperties(entityType, entityName, expectedProps, true); + } + } + + @Test + public void testAlterViewProperties() throws Exception { + String tableName = createTable(); + String viewName = tableName(); + String query = "create view " + viewName + " as select * from " + tableName; + + runCommand(query); + + String fmtQuery = "alter view %s %s TBLPROPERTIES (%s)"; + + testAlterProperties(Entity.Type.TABLE, viewName, fmtQuery); + } + + private void verifyEntityProperties(Entity.Type type, String entityName, final Map expectedProps, final boolean checkIfNotExists) throws Exception { + switch(type) { + case TABLE: + assertTableIsRegistered(DEFAULT_DB, entityName, new AssertPredicate() { + @Override + public void assertOnEntity(AtlasEntity entity) throws Exception { + verifyProperties(entity, expectedProps, checkIfNotExists); + } + }); + break; + case DATABASE: + assertDatabaseIsRegistered(entityName, new AssertPredicate() { + @Override + public void assertOnEntity(AtlasEntity entity) throws Exception { + verifyProperties(entity, expectedProps, checkIfNotExists); + } + }); + break; + } + } + + private void verifyTableSdProperties(String tableName, final String serdeLib, final Map expectedProps) throws Exception { + assertTableIsRegistered(DEFAULT_DB, tableName, new AssertPredicate() { + @Override + public void assertOnEntity(AtlasEntity tableRef) throws Exception { + AtlasObjectId sdEntity = toAtlasObjectId(tableRef.getAttribute(ATTRIBUTE_STORAGEDESC)); + + assertNotNull(sdEntity); + } + }); + } + + + private void verifyProperties(AtlasStruct referenceable, Map expectedProps, boolean checkIfNotExists) { + Map parameters = (Map) referenceable.getAttribute(ATTRIBUTE_PARAMETERS); + + if (!checkIfNotExists) { + //Check if properties exist + Assert.assertNotNull(parameters); + for (String propKey : expectedProps.keySet()) { + Assert.assertEquals(parameters.get(propKey), expectedProps.get(propKey)); + } + } else { + //Check if properties dont exist + if (expectedProps != null && parameters != null) { + for (String propKey : expectedProps.keySet()) { + Assert.assertFalse(parameters.containsKey(propKey)); + } + } + } + } + + private String sortEventsAndGetProcessQualifiedName(final HiveEventContext event) throws HiveException{ + SortedSet sortedHiveInputs = event.getInputs() == null ? null : new TreeSet(entityComparator); + SortedSet sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet(entityComparator); + + if (event.getInputs() != null) { + sortedHiveInputs.addAll(event.getInputs()); + } + + if (event.getOutputs() != null) { + sortedHiveOutputs.addAll(event.getOutputs()); + } + + return getProcessQualifiedName(hiveMetaStoreBridge, event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(event.getInputs()), getSortedProcessDataSets(event.getOutputs())); + } + + private String assertProcessIsRegistered(final HiveEventContext event) throws Exception { + try { + String processQFName = sortEventsAndGetProcessQualifiedName(event); + + LOG.debug("Searching for process with query {}", processQFName); + + return assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), ATTRIBUTE_QUALIFIED_NAME, processQFName, new AssertPredicate() { + @Override + public void assertOnEntity(final AtlasEntity entity) throws Exception { + List recentQueries = (List) entity.getAttribute(ATTRIBUTE_RECENT_QUERIES); + Assert.assertEquals(recentQueries.get(0), lower(event.getQueryStr())); + } + }); + } catch (Exception e) { + LOG.error("Exception : ", e); + throw e; + } + } + + private String assertProcessIsRegistered(final HiveEventContext event, final Set inputTbls, final Set outputTbls) throws Exception { + try { + SortedSet sortedHiveInputs = event.getInputs() == null ? null : new TreeSet(entityComparator); + SortedSet sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet(entityComparator); + + if (event.getInputs() != null) { + sortedHiveInputs.addAll(event.getInputs()); + } + + if (event.getOutputs() != null) { + sortedHiveOutputs.addAll(event.getOutputs()); + } + + String processQFName = getProcessQualifiedName(hiveMetaStoreBridge, event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(inputTbls), getSortedProcessDataSets(outputTbls)); + + LOG.debug("Searching for process with query {}", processQFName); + + return assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), ATTRIBUTE_QUALIFIED_NAME, processQFName, new AssertPredicate() { + @Override + public void assertOnEntity(final AtlasEntity entity) throws Exception { + List recentQueries = (List) entity.getAttribute(BaseHiveEvent.ATTRIBUTE_RECENT_QUERIES); + + Assert.assertEquals(recentQueries.get(0), lower(event.getQueryStr())); + } + }); + } catch(Exception e) { + LOG.error("Exception : ", e); + throw e; + } + } + + private String assertProcessExecutionIsRegistered(AtlasEntity hiveProcess, final HiveEventContext event) throws Exception { + try { + String guid = ""; + List processExecutions = toAtlasObjectIdList(hiveProcess.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS_EXECUTIONS)); + for (AtlasObjectId processExecution : processExecutions) { + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2. + getEntityByGuid(processExecution.getGuid()); + AtlasEntity entity = atlasEntityWithExtInfo.getEntity(); + if (String.valueOf(entity.getAttribute(ATTRIBUTE_QUERY_TEXT)).equals(event.getQueryStr().toLowerCase().trim())) { + guid = entity.getGuid(); + } + } + + return assertEntityIsRegisteredViaGuid(guid, new AssertPredicate() { + @Override + public void assertOnEntity(final AtlasEntity entity) throws Exception { + String queryText = (String) entity.getAttribute(ATTRIBUTE_QUERY_TEXT); + Assert.assertEquals(queryText, event.getQueryStr().toLowerCase().trim()); + } + }); + } catch(Exception e) { + LOG.error("Exception : ", e); + throw e; + } + } + + + private String getDSTypeName(Entity entity) { + return Entity.Type.TABLE.equals(entity.getType()) ? HiveDataTypes.HIVE_TABLE.name() : HiveMetaStoreBridge.HDFS_PATH; + } + + private SortedMap getSortedProcessDataSets(Set inputTbls) { + SortedMap inputs = new TreeMap<>(entityComparator); + + if (inputTbls != null) { + for (final T tbl : inputTbls) { + AtlasEntity inputTableRef = new AtlasEntity(getDSTypeName(tbl), new HashMap() {{ + put(ATTRIBUTE_QUALIFIED_NAME, tbl.getName()); + }}); + + inputs.put(tbl, inputTableRef); + } + } + return inputs; + } + + private void assertProcessIsNotRegistered(HiveEventContext event) throws Exception { + try { + SortedSet sortedHiveInputs = event.getInputs() == null ? null : new TreeSet(entityComparator); + SortedSet sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet(entityComparator); + + if (event.getInputs() != null) { + sortedHiveInputs.addAll(event.getInputs()); + } + + if (event.getOutputs() != null) { + sortedHiveOutputs.addAll(event.getOutputs()); + } + + String processQFName = getProcessQualifiedName(hiveMetaStoreBridge, event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(event.getInputs()), getSortedProcessDataSets(event.getOutputs())); + + LOG.debug("Searching for process with query {}", processQFName); + + assertEntityIsNotRegistered(HiveDataTypes.HIVE_PROCESS.getName(), ATTRIBUTE_QUALIFIED_NAME, processQFName); + } catch(Exception e) { + LOG.error("Exception : ", e); + } + } + + private void assertTableIsNotRegistered(String dbName, String tableName, boolean isTemporaryTable) throws Exception { + LOG.debug("Searching for table {}.{}", dbName, tableName); + + String tableQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, dbName, tableName, isTemporaryTable); + + assertEntityIsNotRegistered(HiveDataTypes.HIVE_TABLE.getName(), ATTRIBUTE_QUALIFIED_NAME, tableQualifiedName); + } + + private void assertTableIsNotRegistered(String dbName, String tableName) throws Exception { + LOG.debug("Searching for table {}.{}", dbName, tableName); + + String tableQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, dbName, tableName, false); + + assertEntityIsNotRegistered(HiveDataTypes.HIVE_TABLE.getName(), ATTRIBUTE_QUALIFIED_NAME, tableQualifiedName); + } + + private String assertTableIsRegistered(String dbName, String tableName, AssertPredicate assertPredicate) throws Exception { + return assertTableIsRegistered(dbName, tableName, assertPredicate, false); + } + + @Test + public void testLineage() throws Exception { + String table1 = createTable(false); + String db2 = createDatabase(); + String table2 = tableName(); + String query = String.format("create table %s.%s as select * from %s", db2, table2, table1); + + runCommand(query); + + String table1Id = assertTableIsRegistered(DEFAULT_DB, table1); + String table2Id = assertTableIsRegistered(db2, table2); + AtlasLineageInfo inputLineage = atlasClientV2.getLineageInfo(table2Id, AtlasLineageInfo.LineageDirection.INPUT, 0); + Map entityMap = inputLineage.getGuidEntityMap(); + + assertTrue(entityMap.containsKey(table1Id)); + assertTrue(entityMap.containsKey(table2Id)); + + AtlasLineageInfo inputLineage1 = atlasClientV2.getLineageInfo(table1Id, AtlasLineageInfo.LineageDirection.OUTPUT, 0); + Map entityMap1 = inputLineage1.getGuidEntityMap(); + + assertTrue(entityMap1.containsKey(table1Id)); + assertTrue(entityMap1.containsKey(table2Id)); + } + + //For ATLAS-448 + @Test + public void testNoopOperation() throws Exception { + runCommand("show compactions"); + runCommand("show transactions"); + } + + private String createDatabase() throws Exception { + String dbName = dbName(); + + runCommand("create database " + dbName); + + return dbName; + } + + private String columnName() { + return "col" + random(); + } + + private String createTable() throws Exception { + return createTable(false); + } + + private String createTable(boolean isPartitioned) throws Exception { + String tableName = tableName(); + + runCommand("create table " + tableName + "(id int, name string) comment 'table comment' " + (isPartitioned ? " partitioned by(dt string)" : "")); + + return tableName; + } + + private String createTable(boolean isExternal, boolean isPartitioned, boolean isTemporary) throws Exception { + String tableName = tableName(); + + String location = ""; + if (isExternal) { + location = " location '" + createTestDFSPath("someTestPath") + "'"; + } + + runCommandWithDelay("create " + (isExternal ? " EXTERNAL " : "") + (isTemporary ? "TEMPORARY " : "") + "table " + tableName + "(id int, name string) comment 'table comment' " + (isPartitioned ? " partitioned by(dt string)" : "") + location, 3000); + + return tableName; + } + + // ReadEntity class doesn't offer a constructor that takes (name, type). A hack to get the tests going! + private static class TestReadEntity extends ReadEntity { + private final String name; + private final Entity.Type type; + + public TestReadEntity(String name, Entity.Type type) { + this.name = name; + this.type = type; + } + + @Override + public String getName() { return name; } + + @Override + public Entity.Type getType() { return type; } + } + + // WriteEntity class doesn't offer a constructor that takes (name, type). A hack to get the tests going! + private static class TestWriteEntity extends WriteEntity { + private final String name; + private final Entity.Type type; + + public TestWriteEntity(String name, Entity.Type type) { + this.name = name; + this.type = type; + } + + @Override + public String getName() { return name; } + + @Override + public Entity.Type getType() { return type; } + } + + private int numberOfProcessExecutions(AtlasEntity hiveProcess) { + return toAtlasObjectIdList(hiveProcess.getRelationshipAttribute( + BaseHiveEvent.ATTRIBUTE_PROCESS_EXECUTIONS)).size(); + } +} diff --git a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveMetastoreHookIT.java b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveMetastoreHookIT.java new file mode 100755 index 0000000000..2bce1b2a0e --- /dev/null +++ b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveMetastoreHookIT.java @@ -0,0 +1,384 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.hive.hook; + +import org.apache.atlas.AtlasServiceException; +import org.apache.atlas.hive.HiveITBase; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.annotations.Test; + +import java.util.List; +import java.util.Map; + +import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_DDL_QUERIES; +import static org.apache.atlas.model.instance.AtlasEntity.Status.ACTIVE; +import static org.apache.atlas.model.instance.AtlasEntity.Status.DELETED; +import static org.testng.AssertJUnit.*; + +public class HiveMetastoreHookIT extends HiveITBase { + private static final Logger LOG = LoggerFactory.getLogger(HiveMetastoreHookIT.class); + + @Test (priority = 1) + public void testCreateDatabase() throws Exception { + String dbName = dbName(); + String query = "CREATE DATABASE " + dbName; + + runCommand(query); + String dbId = assertDatabaseIsRegistered(dbName); + AtlasEntity dbEntity = getAtlasEntity(dbId); + + assertEquals(((List) dbEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)).size(), 0); + } + + @Test (priority = 2) + public void testAlterDatabase() throws Exception { + String dbName = dbName(); + String query = "CREATE DATABASE " + dbName; + + runCommand(query); + String dbId = assertDatabaseIsRegistered(dbName); + + AtlasEntity dbEntity = getAtlasEntity(dbId); + assertNotNull(dbEntity); + + // SET DBPROPERTIES + query = "ALTER DATABASE " + dbName + " SET DBPROPERTIES (\"prop1\"=\"val1\", \"prop2\"=\"val2\")"; + runCommandWithDelay(query); + + dbEntity = getAtlasEntity(dbId); + Map parameters = (Map) dbEntity.getAttribute("parameters"); + + assertEquals(((List) dbEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)).size(), 0); + assertNotNull(parameters); + assertEquals(2, parameters.size()); + + // SET OWNER to 'hive' + query = "ALTER DATABASE " + dbName + " SET OWNER USER hive"; + runCommandWithDelay(query); + + dbEntity = getAtlasEntity(dbId); + + + assertEquals(((List) dbEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)).size(), 0); + assertEquals(dbEntity.getAttribute("owner"), "hive"); + assertEquals(dbEntity.getAttribute("ownerType"), "USER"); + + // SET LOCATION + String hdfsPath = "hdfs://localhost:8020/warehouse/tablespace/managed/dwx/new_db.db"; + + query = String.format("ALTER DATABASE %s SET LOCATION \"%s\"", dbName, hdfsPath); + runCommandWithDelay(query); + + dbEntity = getAtlasEntity(dbId); + + assertEquals(((List) dbEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)).size(), 0); + + String location = (String) dbEntity.getAttribute("location"); + assertEquals(location, hdfsPath); + } + + @Test (priority = 3) + public void testDropDatabase() throws Exception { + String dbName = dbName(); + String query = "CREATE DATABASE " + dbName; + + runCommand(query); + String dbId = assertDatabaseIsRegistered(dbName); + + AtlasEntity dbEntity = getAtlasEntity(dbId); + assertNotNull(dbEntity); + + query = "DROP DATABASE " + dbName; + runCommand(query); + assertDatabaseIsNotRegistered(dbName); + + dbEntity = getAtlasEntity(dbId); + assertEquals(dbEntity.getStatus(), DELETED); + } + + @Test (priority = 4) + public void testDropDatabaseWithTables() throws Exception { + String dbName = dbName(); + String query = "CREATE DATABASE " + dbName; + + runCommandWithDelay(query); + String dbId = assertDatabaseIsRegistered(dbName); + assertEquals(getAtlasEntity(dbId).getStatus(), ACTIVE); + + String table1 = tableName(); + runCommandWithDelay("CREATE TABLE " + dbName + "." + table1 + " (name string, age int, dob date)"); + String table1Id = assertTableIsRegistered(dbName, table1); + assertEquals(getAtlasEntity(table1Id).getStatus(), ACTIVE); + + String table2 = tableName(); + runCommandWithDelay("CREATE TABLE " + dbName + "." + table2 + " (name string, age int, dob date)"); + String table2Id = assertTableIsRegistered(dbName, table2); + assertEquals(getAtlasEntity(table2Id).getStatus(), ACTIVE); + + query = "DROP DATABASE " + dbName + " CASCADE"; + runCommandWithDelay(query); + assertDatabaseIsNotRegistered(dbName); + + assertEquals(getAtlasEntity(dbId).getStatus(), DELETED); + assertEquals(getAtlasEntity(table1Id).getStatus(), DELETED); + assertEquals(getAtlasEntity(table2Id).getStatus(), DELETED); + } + + @Test (priority = 5) + public void testCreateTable() throws Exception { + String dbName = dbName(); + String query = "CREATE DATABASE " + dbName; + + runCommand(query); + String dbId = assertDatabaseIsRegistered(dbName); + assertEquals(getAtlasEntity(dbId).getStatus(), ACTIVE); + + String tableName = tableName(); + runCommand("CREATE TABLE " + dbName + "." + tableName + " (name string, age int, dob date)"); + String tblId = assertTableIsRegistered(dbName, tableName); + AtlasEntity tblEntity = getAtlasEntity(tblId); + + assertEquals(((List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)).size(), 0); + assertEquals(getAtlasEntity(tblId).getStatus(), ACTIVE); + } + + @Test (priority = 6) + public void testCreateView() throws Exception { + String dbName = dbName(); + String query = "CREATE DATABASE " + dbName; + + runCommand(query); + String dbId = assertDatabaseIsRegistered(dbName); + assertEquals(getAtlasEntity(dbId).getStatus(), ACTIVE); + + String tableName = tableName(); + runCommand("CREATE TABLE " + dbName + "." + tableName + " (name string, age int, dob date)"); + String tblId = assertTableIsRegistered(dbName, tableName); + assertEquals(getAtlasEntity(tblId).getStatus(), ACTIVE); + + String viewName = tableName(); + + runCommand("CREATE VIEW " + dbName + "." + viewName + " AS SELECT * FROM " + dbName + "." + tableName); + + tblId = assertTableIsRegistered(dbName, viewName); + AtlasEntity tblEntity = getAtlasEntity(tblId); + + assertEquals(((List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)).size(), 0); + assertEquals(getAtlasEntity(tblId).getStatus(), ACTIVE); + } + + @Test (priority = 7) + public void testAlterTableProperties() throws Exception { + String dbName = dbName(); + String query = "CREATE DATABASE " + dbName; + + runCommand(query); + String dbId = assertDatabaseIsRegistered(dbName); + assertEquals(getAtlasEntity(dbId).getStatus(), ACTIVE); + assertEquals(((List) getAtlasEntity(dbId).getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)).size(), 0); + + String tableName = tableName(); + runCommand("CREATE TABLE " + dbName + "." + tableName + " (name string, age int, dob date)"); + String tblId = assertTableIsRegistered(dbName, tableName); + assertEquals(getAtlasEntity(tblId).getStatus(), ACTIVE); + assertEquals(((List) getAtlasEntity(tblId).getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)).size(), 0); + + // SET TBLPROPERTIES + query = "ALTER TABLE " + dbName + "." + tableName + " SET TBLPROPERTIES (\"prop1\"=\"val1\", \"prop2\"=\"val2\", \"prop3\"=\"val3\")"; + runCommandWithDelay(query); + + query = "ALTER TABLE " + dbName + "." + tableName + " SET TBLPROPERTIES (\"comment\" = \"sample comment\")"; + runCommandWithDelay(query); + + // SET SERDE + query = "ALTER TABLE " + dbName + "." + tableName + " SET SERDE \"org.apache.hadoop.hive.ql.io.orc.OrcSerde\" WITH SERDEPROPERTIES (\"prop1\"=\"val1\", \"prop2\"=\"val2\")"; + runCommandWithDelay(query); + + // SET SERDEPROPERTIES + query = "ALTER TABLE " + dbName + "." + tableName + " SET SERDEPROPERTIES (\"prop1\"=\"val1\", \"prop2\"=\"val2\")"; + runCommandWithDelay(query); + + AtlasEntity tableEntity = getAtlasEntity(tblId); + Map tableParameters = (Map) tableEntity.getAttribute("parameters"); + + assertEquals(tableParameters.get("comment"), "sample comment"); + assertEquals(tableParameters.get("prop1"), "val1"); + assertEquals(tableParameters.get("prop2"), "val2"); + assertEquals(tableParameters.get("prop3"), "val3"); + + AtlasEntity sdEntity = getAtlasEntity((String) ((Map) tableEntity.getAttribute("sd")).get("guid")); + Map serdeInfo = (Map) sdEntity.getAttribute("serdeInfo"); + Map serdeAttrs = (Map) serdeInfo.get("attributes"); + + assertEquals(serdeAttrs.get("serializationLib"), "org.apache.hadoop.hive.ql.io.orc.OrcSerde"); + assertEquals(((Map) serdeAttrs.get("parameters")).get("prop1"), "val1"); + assertEquals(((Map) serdeAttrs.get("parameters")).get("prop2"), "val2"); + assertEquals(((List) tableEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)).size(), 0); + } + + @Test (priority = 8) + public void testAlterTableRenameTableName() throws Exception { + String dbName = dbName(); + String query = "CREATE DATABASE " + dbName; + + runCommand(query); + String dbId = assertDatabaseIsRegistered(dbName); + assertEquals(getAtlasEntity(dbId).getStatus(), ACTIVE); + + String tableName = tableName(); + runCommand("CREATE TABLE " + dbName + "." + tableName + " (name string, age int, dob date)"); + String tblId = assertTableIsRegistered(dbName, tableName); + assertEquals(getAtlasEntity(tblId).getStatus(), ACTIVE); + + // RENAME TABLE NAME + String newTableName = tableName + "_new"; + query = "ALTER TABLE " + dbName + "." + tableName + " RENAME TO " + dbName + "." + newTableName; + runCommandWithDelay(query); + + AtlasEntityWithExtInfo tableEntityWithExtInfo = getAtlasEntityWithExtInfo(tblId); + AtlasEntity tableEntity = tableEntityWithExtInfo.getEntity(); + + assertEquals(((List) tableEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)).size(), 0); + + // validate table rename in table entity + assertEquals(newTableName, tableEntity.getAttribute("name")); + assertTrue(((String) tableEntity.getAttribute("qualifiedName")).contains(newTableName)); + + // validate table rename in column and sd entity + for (AtlasEntity referredEntity : tableEntityWithExtInfo.getReferredEntities().values()) { + assertTrue(((String) referredEntity.getAttribute("qualifiedName")).contains(newTableName)); + } + } + + @Test (priority = 9) + public void testAlterTableRenameColumnName() throws Exception { + String dbName = dbName(); + String query = "CREATE DATABASE " + dbName; + + runCommand(query); + String dbId = assertDatabaseIsRegistered(dbName); + assertEquals(getAtlasEntity(dbId).getStatus(), ACTIVE); + + String tableName = tableName(); + runCommand("CREATE TABLE " + dbName + "." + tableName + " (col1 int, col2 int, col3 int)"); + String tblId = assertTableIsRegistered(dbName, tableName); + AtlasEntityWithExtInfo tableEntityWithExtInfo = getAtlasEntityWithExtInfo(tblId); + + assertEquals(tableEntityWithExtInfo.getEntity().getStatus(), ACTIVE); + + String col1Id = getColumnId(tableEntityWithExtInfo, "col1"); + String col2Id = getColumnId(tableEntityWithExtInfo, "col2"); + + // RENAME COLUMN NAME + query = "ALTER TABLE " + dbName + "." + tableName + " CHANGE col1 col11 int"; + runCommandWithDelay(query); + + AtlasEntity col1Entity = getAtlasEntity(col1Id); + assertEquals(col1Entity.getAttribute("name"), "col11"); + assertTrue(((String) col1Entity.getAttribute("qualifiedName")).contains("col11")); + + // CHANGE COLUMN NAME and DATATYPE + query = "ALTER TABLE " + dbName + "." + tableName + " CHANGE col2 col22 string"; + runCommandWithDelay(query); + + AtlasEntity col2Entity = getAtlasEntity(col2Id); + assertEquals(col2Entity.getAttribute("name"), "col22"); + assertEquals(col2Entity.getAttribute("type"), "string"); + assertEquals(((List) getAtlasEntity(tblId).getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)).size(), 0); + } + + @Test (priority = 10) + public void testDropTable() throws Exception { + String dbName = dbName(); + String query = "CREATE DATABASE " + dbName; + + runCommand(query); + String dbId = assertDatabaseIsRegistered(dbName); + assertEquals(getAtlasEntity(dbId).getStatus(), ACTIVE); + + String tableName = tableName(); + runCommand("CREATE TABLE " + dbName + "." + tableName + " (name string, age int, dob date)"); + String tblId = assertTableIsRegistered(dbName, tableName); + assertEquals(getAtlasEntity(tblId).getStatus(), ACTIVE); + + query = "DROP TABLE " + dbName + "." + tableName; + runCommandWithDelay(query); + + assertEquals(getAtlasEntity(tblId).getStatus(), DELETED); + } + + @Test (priority = 11) + public void testDropView() throws Exception { + String dbName = dbName(); + String query = "CREATE DATABASE " + dbName; + + runCommand(query); + String dbId = assertDatabaseIsRegistered(dbName); + assertEquals(getAtlasEntity(dbId).getStatus(), ACTIVE); + + String tableName = tableName(); + runCommand("CREATE TABLE " + dbName + "." + tableName + " (name string, age int, dob date)"); + String tblId = assertTableIsRegistered(dbName, tableName); + assertEquals(getAtlasEntity(tblId).getStatus(), ACTIVE); + + String viewName = tableName(); + runCommand("CREATE VIEW " + dbName + "." + viewName + " AS SELECT * FROM " + dbName + "." + tableName); + tblId = assertTableIsRegistered(dbName, viewName); + assertEquals(getAtlasEntity(tblId).getStatus(), ACTIVE); + + query = "DROP VIEW " + dbName + "." + viewName; + runCommandWithDelay(query); + + assertEquals(getAtlasEntity(tblId).getStatus(), DELETED); + } + + private String getColumnId(AtlasEntityWithExtInfo entityWithExtInfo, String columnName) { + String ret = null; + + for (AtlasEntity entity : entityWithExtInfo.getReferredEntities().values()) { + + if (entity.getTypeName().equals("hive_column") && entity.getAttribute("name").equals(columnName)) { + ret = entity.getGuid(); + break; + } + } + + return ret; + } + + private AtlasEntity getAtlasEntity(String guid) throws AtlasServiceException { + return atlasClientV2.getEntityByGuid(guid).getEntity(); + } + + private AtlasEntityWithExtInfo getAtlasEntityWithExtInfo(String guid) throws AtlasServiceException { + return atlasClientV2.getEntityByGuid(guid); + } + + protected void runCommand(String cmd) throws Exception { + runCommandWithDelay(driverWithoutContext, cmd, 0); + } + + protected void runCommandWithDelay(String cmd) throws Exception { + int delayTimeInMs = 10000; + runCommandWithDelay(driverWithoutContext, cmd, delayTimeInMs); + } +} \ No newline at end of file diff --git a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/utils/ActiveEntityFilterTest.java b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/utils/ActiveEntityFilterTest.java new file mode 100644 index 0000000000..a20121496e --- /dev/null +++ b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/utils/ActiveEntityFilterTest.java @@ -0,0 +1,210 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.hive.hook.utils; + +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.type.AtlasType; +import org.apache.atlas.utils.TestResourceFileUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +public class ActiveEntityFilterTest { + private static String FILE_SUFFIX_ACTUAL_RESULTS = "-v2"; + private static String ADDITIONAL_TYPE_HDFS_PATH = "hdfs_path"; + + @BeforeClass + public void setup() { + ActiveEntityFilter.init(true, Arrays.asList(new String[]{ADDITIONAL_TYPE_HDFS_PATH})); + } + + @Test + public void verifyMessages() throws IOException { + assertAtlasEntitiesWithExtInfoFromFile("hs2-drop-db"); + assertAtlasEntitiesWithExtInfoFromFile("hs2-create-db"); + assertAtlasEntitiesWithExtInfoFromFile("hs2-create-table"); + assertMessageFromFile("hs2-table-rename"); + assertMessageFromFile("hs2-alter-view"); + assertMessageFromFile("hs2-drop-table"); + assertAtlasEntitiesWithExtInfoFromFile("hs2-create-process"); + assertMessageFromFile("hs2-load-inpath"); + } + + private void assertMessageFromFile(String msgFile) throws IOException { + List incoming = loadList(msgFile); + List expected = loadList(msgFile + FILE_SUFFIX_ACTUAL_RESULTS); + int expectedSize = expected.size(); + + List actual = ActiveEntityFilter.apply((List) incoming); + assertEquals(actual.size(), expected.size()); + for (int i = 0; i < expectedSize; i++) { + if (actual.get(i) instanceof HookNotification.EntityCreateRequestV2) { + HookNotification.EntityCreateRequestV2 actualN = (HookNotification.EntityCreateRequestV2) actual.get(i); + HookNotification.EntityCreateRequestV2 expectedN = (HookNotification.EntityCreateRequestV2) expected.get(i); + + assertAtlasEntitiesWithExtInfo(actualN.getEntities(), expectedN.getEntities()); + } + + if (actual.get(i) instanceof HookNotification.EntityUpdateRequestV2) { + HookNotification.EntityUpdateRequestV2 actualN = (HookNotification.EntityUpdateRequestV2) actual.get(i); + HookNotification.EntityUpdateRequestV2 expectedN = (HookNotification.EntityUpdateRequestV2) expected.get(i); + + assertAtlasEntitiesWithExtInfo(actualN.getEntities(), expectedN.getEntities()); + } + + if (actual.get(i) instanceof HookNotification.EntityPartialUpdateRequestV2) { + HookNotification.EntityPartialUpdateRequestV2 actualN = (HookNotification.EntityPartialUpdateRequestV2) actual.get(i); + HookNotification.EntityPartialUpdateRequestV2 expectedN = (HookNotification.EntityPartialUpdateRequestV2) expected.get(i); + + assertAtlasEntitiesWithExtInfo(actualN.getEntity(), expectedN.getEntity()); + } + } + } + + private List loadList(String msgFile) throws IOException { + List list = TestResourceFileUtils.readObjectFromJson("", msgFile, List.class); + List ret = new ArrayList<>(); + + for (int i = 0; i < list.size(); i++) { + HookNotification notification = createNotification((LinkedHashMap) list.get(i)); + if (notification == null) { + continue; + } + + ret.add(notification); + } + + return ret; + } + + private HookNotification createNotification(LinkedHashMap linkedHashMap) { + assertTrue(linkedHashMap.containsKey("type")); + + String type = (String) linkedHashMap.get("type"); + switch (type) { + case "ENTITY_CREATE_V2": + return AtlasType.fromLinkedHashMap(linkedHashMap, HookNotification.EntityCreateRequestV2.class); + + case "ENTITY_FULL_UPDATE_V2": + return AtlasType.fromLinkedHashMap(linkedHashMap, HookNotification.EntityUpdateRequestV2.class); + + case "ENTITY_PARTIAL_UPDATE_V2": + return AtlasType.fromLinkedHashMap(linkedHashMap, HookNotification.EntityPartialUpdateRequestV2.class); + + default: + return null; + } + } + + + private void assertAtlasEntitiesWithExtInfo(AtlasEntity.AtlasEntityWithExtInfo actual, AtlasEntity.AtlasEntityWithExtInfo expected) { + String actualJson = AtlasType.toJson(actual); + String expectedJson = AtlasType.toJson(expected); + + LinkedHashMap actualLHM = AtlasType.fromJson(actualJson, LinkedHashMap.class); + LinkedHashMap expectedLHM = AtlasType.fromJson(expectedJson, LinkedHashMap.class); + + AssertLinkedHashMap.assertEquals(actualLHM, expectedLHM); + } + + private void assertAtlasEntitiesWithExtInfoFromFile(String entityFile) throws IOException { + AtlasEntity.AtlasEntitiesWithExtInfo incoming = TestResourceFileUtils.readObjectFromJson("", entityFile, AtlasEntity.AtlasEntitiesWithExtInfo.class); + AtlasEntity.AtlasEntitiesWithExtInfo expected = TestResourceFileUtils.readObjectFromJson("", entityFile + FILE_SUFFIX_ACTUAL_RESULTS, AtlasEntity.AtlasEntitiesWithExtInfo.class); + + HiveDDLEntityFilter hiveLineageEntityFilter = new HiveDDLEntityFilter(null); + AtlasEntity.AtlasEntitiesWithExtInfo actual = hiveLineageEntityFilter.apply(incoming); + assertAtlasEntitiesWithExtInfo(actual, expected); + } + + private void assertAtlasEntitiesWithExtInfo(AtlasEntity.AtlasEntitiesWithExtInfo actual, AtlasEntity.AtlasEntitiesWithExtInfo expected) { + assertNotNull(actual); + assertNotNull(expected); + + assertEquals(actual.getEntities().size(), expected.getEntities().size()); + assertEntity(actual.getEntities(), expected.getEntities()); + + if (expected.getReferredEntities() == null && actual.getReferredEntities() != null) { + fail("expected.getReferredEntities() == null, but expected.getReferredEntities() != null"); + } + + if (expected.getReferredEntities() != null && actual.getReferredEntities() != null) { + assertEntity(actual.getReferredEntities(), expected.getReferredEntities()); + } + } + + private void assertEntity(Map actual, Map expected) { + assertEquals(actual.size(), expected.size()); + } + + private void assertEntity(List actual, List expected) { + AssertLinkedHashMap.assertEquals(actual, expected); + } + + private static class AssertLinkedHashMap { + private static final String MISMATCH_KEY_FORMAT = "Mismatch: Key: %s"; + private static final Set excludeKeys = new HashSet() {{ + add("guid"); + add("owner"); + }}; + + public static void assertEquals(LinkedHashMap actual, LinkedHashMap expected) { + for (String key : expected.keySet()) { + assertTrue(actual.containsKey(key), "Key: " + key + " Not found!"); + + if (excludeKeys.contains(key)) { + continue; + } + + if (actual.get(key) instanceof LinkedHashMap) { + assertEquals((LinkedHashMap) actual.get(key), (LinkedHashMap) expected.get(key)); + continue; + } + + Assert.assertEquals(actual.get(key), actual.get(key), String.format(MISMATCH_KEY_FORMAT, key)); + } + } + + public static void assertEquals(List actual, List expected) { + Assert.assertEquals(actual.size(), expected.size()); + for (int i = 0; i < actual.size(); i++) { + AtlasEntity actualEntity = actual.get(i); + AtlasEntity expectedEntity = expected.get(i); + + String actualJson = AtlasType.toJson(actualEntity); + String expectedJson = AtlasType.toJson(expectedEntity); + + Assert.assertEquals(actualJson, expectedJson, "Actual: " + actualJson); + } + } + } +} diff --git a/addons/hive-bridge/src/test/resources/atlas-application.properties b/addons/hive-bridge/src/test/resources/atlas-application.properties new file mode 100644 index 0000000000..5d24a3014c --- /dev/null +++ b/addons/hive-bridge/src/test/resources/atlas-application.properties @@ -0,0 +1,125 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +######### Atlas Server Configs ######### +atlas.rest.address=http://localhost:31000 + +######### Graph Database Configs ######### + + +# Graph database implementation. Value inserted by maven. +atlas.graphdb.backend=org.apache.atlas.repository.graphdb.janus.AtlasJanusGraphDatabase + +# Graph Storage +atlas.graph.storage.backend=berkeleyje + +# Entity repository implementation +atlas.EntityAuditRepository.impl=org.apache.atlas.repository.audit.InMemoryEntityAuditRepository + +# Graph Search Index Backend +atlas.graph.index.search.backend=solr + +#Berkeley storage directory +atlas.graph.storage.directory=${sys:atlas.data}/berkley + +#hbase +#For standalone mode , specify localhost +#for distributed mode, specify zookeeper quorum here + +atlas.graph.storage.hostname=${graph.storage.hostname} +atlas.graph.storage.hbase.regions-per-server=1 +atlas.graph.storage.lock.wait-time=10000 + +#ElasticSearch +atlas.graph.index.search.directory=${sys:atlas.data}/es +atlas.graph.index.search.elasticsearch.client-only=false +atlas.graph.index.search.elasticsearch.local-mode=true +atlas.graph.index.search.elasticsearch.create.sleep=2000 + +# Solr cloud mode properties +atlas.graph.index.search.solr.mode=cloud +atlas.graph.index.search.solr.zookeeper-url=${solr.zk.address} +atlas.graph.index.search.solr.embedded=true +atlas.graph.index.search.max-result-set-size=150 + + +######### Notification Configs ######### +atlas.notification.embedded=true + +atlas.kafka.zookeeper.connect=localhost:19026 +atlas.kafka.bootstrap.servers=localhost:19027 +atlas.kafka.data=${sys:atlas.data}/kafka +atlas.kafka.zookeeper.session.timeout.ms=4000 +atlas.kafka.zookeeper.sync.time.ms=20 +atlas.kafka.consumer.timeout.ms=4000 +atlas.kafka.auto.commit.interval.ms=100 +atlas.kafka.hook.group.id=atlas +atlas.kafka.entities.group.id=atlas_entities +#atlas.kafka.auto.commit.enable=false + +atlas.kafka.enable.auto.commit=false +atlas.kafka.auto.offset.reset=earliest +atlas.kafka.session.timeout.ms=30000 +atlas.kafka.offsets.topic.replication.factor=1 + + + +######### Entity Audit Configs ######### +atlas.audit.hbase.tablename=ATLAS_ENTITY_AUDIT_EVENTS +atlas.audit.zookeeper.session.timeout.ms=1000 +atlas.audit.hbase.zookeeper.quorum=localhost +atlas.audit.hbase.zookeeper.property.clientPort=19026 + +######### Security Properties ######### + +# SSL config +atlas.enableTLS=false +atlas.server.https.port=31443 + +######### Security Properties ######### + +hbase.security.authentication=simple + +atlas.hook.falcon.synchronous=true + +######### JAAS Configuration ######## + +atlas.jaas.KafkaClient.loginModuleName = com.sun.security.auth.module.Krb5LoginModule +atlas.jaas.KafkaClient.loginModuleControlFlag = required +atlas.jaas.KafkaClient.option.useKeyTab = true +atlas.jaas.KafkaClient.option.storeKey = true +atlas.jaas.KafkaClient.option.serviceName = kafka +atlas.jaas.KafkaClient.option.keyTab = /etc/security/keytabs/atlas.service.keytab +atlas.jaas.KafkaClient.option.principal = atlas/_HOST@EXAMPLE.COM + +######### High Availability Configuration ######## +atlas.server.ha.enabled=false +#atlas.server.ids=id1 +#atlas.server.address.id1=localhost:21000 + +######### Atlas Authorization ######### +atlas.authorizer.impl=none +# atlas.authorizer.impl=simple +# atlas.authorizer.simple.authz.policy.file=atlas-simple-authz-policy.json + +######### Atlas Authentication ######### +atlas.authentication.method.file=true +atlas.authentication.method.ldap.type=none +atlas.authentication.method.kerberos=false +# atlas.authentication.method.file.filename=users-credentials.properties +atlas.hook.hive.hs2.ignore.ddl.operations=false \ No newline at end of file diff --git a/addons/hive-bridge/src/test/resources/atlas-log4j.xml b/addons/hive-bridge/src/test/resources/atlas-log4j.xml new file mode 100755 index 0000000000..262a710f7a --- /dev/null +++ b/addons/hive-bridge/src/test/resources/atlas-log4j.xml @@ -0,0 +1,137 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/addons/hive-bridge/src/test/resources/hive-site.xml b/addons/hive-bridge/src/test/resources/hive-site.xml new file mode 100644 index 0000000000..f9ec5773e8 --- /dev/null +++ b/addons/hive-bridge/src/test/resources/hive-site.xml @@ -0,0 +1,99 @@ + + + + + + + hive.exec.submit.local.task.via.child + false + + + + mapreduce.framework.name + local + + + + fs.default.name + file:/// + + + + hive.exec.post.hooks + org.apache.atlas.hive.hook.HiveHook + + + + hive.metastore.event.listeners + org.apache.atlas.hive.hook.HiveMetastoreHookImpl + + + + hive.support.concurrency + false + + + + hive.metastore.warehouse.dir + ${project.basedir}/target/metastore + + + + javax.jdo.option.ConnectionURL + jdbc:derby:;databaseName=${project.basedir}/target/metastore_db;create=true + + + + atlas.hook.hive.synchronous + true + + + + fs.pfile.impl + org.apache.hadoop.fs.ProxyLocalFileSystem + + + + hive.in.test + true + + + + hive.zookeeper.quorum + localhost:19026 + + + + hive.metastore.schema.verification + false + + + + hive.metastore.disallow.incompatible.col.type.changes + false + + + + datanucleus.schema.autoCreateAll + true + + + + hive.exec.scratchdir + ${project.basedir}/target/scratchdir + + + \ No newline at end of file diff --git a/addons/hive-bridge/src/test/resources/json/hs2-alter-view-v2.json b/addons/hive-bridge/src/test/resources/json/hs2-alter-view-v2.json new file mode 100644 index 0000000000..ebe896ba9a --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-alter-view-v2.json @@ -0,0 +1,35 @@ +[ + { + "type": "ENTITY_CREATE_V2", + "user": "hive", + "entities": { + "entities": [ + { + "typeName": "hive_table_ddl", + "attributes": { + "serviceType": "hive", + "qualifiedName": "cadb02.hh6_renamed_view2@cm:1616450675937", + "execTime": 1616450675937, + "queryText": "ALTER VIEW hh6_renamed_view RENAME TO hh6_renamed_view2", + "name": "ALTER VIEW hh6_renamed_view RENAME TO hh6_renamed_view2", + "userName": "hive" + }, + "guid": "-14529329955589487", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed_view2@cm" + }, + "relationshipType": "hive_table_ddl_queries" + } + }, + "proxy": false + } + ] + } + } +] diff --git a/addons/hive-bridge/src/test/resources/json/hs2-alter-view.json b/addons/hive-bridge/src/test/resources/json/hs2-alter-view.json new file mode 100644 index 0000000000..cfe77847ca --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-alter-view.json @@ -0,0 +1,268 @@ +[ + { + "type": "ENTITY_PARTIAL_UPDATE_V2", + "user": "hive", + "entityId": { + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed_view.col1@cm" + } + }, + "entity": { + "entity": { + "typeName": "hive_column", + "attributes": { + "qualifiedName": "cadb02.hh6_renamed_view2.col1@cm" + }, + "guid": "-14529329955589486", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "proxy": false + } + } + }, + { + "type": "ENTITY_PARTIAL_UPDATE_V2", + "user": "hive", + "entityId": { + "typeName": "hive_storagedesc", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed_view@cm_storage" + } + }, + "entity": { + "entity": { + "typeName": "hive_storagedesc", + "attributes": { + "qualifiedName": "cadb02.hh6_renamed_view2@cm_storage", + "storedAsSubDirectories": false, + "location": null, + "compressed": false, + "inputFormat": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + "parameters": {}, + "outputFormat": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", + "serdeInfo": { + "typeName": "hive_serde", + "attributes": { + "serializationLib": null, + "name": null, + "parameters": {} + } + }, + "numBuckets": -1 + }, + "guid": "-14529329955589484", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "proxy": false + } + } + }, + { + "type": "ENTITY_PARTIAL_UPDATE_V2", + "user": "hive", + "entityId": { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed_view@cm" + } + }, + "entity": { + "entity": { + "typeName": "hive_table", + "attributes": { + "owner": "hive", + "temporary": false, + "lastAccessTime": 1616450675000, + "aliases": [ + "hh6_renamed_view" + ], + "qualifiedName": "cadb02.hh6_renamed_view2@cm", + "viewExpandedText": "select `hh6_renamed`.`col1` from `cadb02`.`hh6_renamed`", + "tableType": "VIRTUAL_VIEW", + "createTime": 1616450675000, + "name": "hh6_renamed_view2", + "comment": null, + "parameters": { + "transient_lastDdlTime": "1616450676", + "bucketing_version": "2", + "last_modified_time": "1616450676", + "last_modified_by": "hive" + }, + "retention": 0, + "viewOriginalText": "select * from hh6_renamed" + }, + "guid": "-14529329955589483", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "proxy": false + } + } + }, + { + "type": "ENTITY_FULL_UPDATE_V2", + "user": "hive", + "entities": { + "referredEntities": { + "-14529329955589485": { + "typeName": "hive_column", + "attributes": { + "owner": "hive", + "qualifiedName": "cadb02.hh6_renamed_view2.col1@cm", + "name": "col1", + "comment": null, + "position": 0, + "type": "int" + }, + "guid": "-14529329955589485", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "guid": "-14529329955589483", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed_view2@cm" + }, + "relationshipType": "hive_table_columns" + } + }, + "proxy": false + }, + "-14529329955589484": { + "typeName": "hive_storagedesc", + "attributes": { + "qualifiedName": "cadb02.hh6_renamed_view2@cm_storage", + "storedAsSubDirectories": false, + "location": null, + "compressed": false, + "inputFormat": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + "parameters": {}, + "outputFormat": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", + "serdeInfo": { + "typeName": "hive_serde", + "attributes": { + "serializationLib": null, + "name": null, + "parameters": {} + } + }, + "numBuckets": -1 + }, + "guid": "-14529329955589484", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "guid": "-14529329955589483", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed_view2@cm" + }, + "relationshipType": "hive_table_storagedesc" + } + }, + "proxy": false + } + }, + "entities": [ + { + "typeName": "hive_table", + "attributes": { + "owner": "hive", + "temporary": false, + "lastAccessTime": 1616450675000, + "aliases": [ + "hh6_renamed_view" + ], + "qualifiedName": "cadb02.hh6_renamed_view2@cm", + "viewExpandedText": "select `hh6_renamed`.`col1` from `cadb02`.`hh6_renamed`", + "tableType": "VIRTUAL_VIEW", + "createTime": 1616450675000, + "name": "hh6_renamed_view2", + "comment": null, + "parameters": { + "transient_lastDdlTime": "1616450676", + "bucketing_version": "2", + "last_modified_time": "1616450676", + "last_modified_by": "hive" + }, + "retention": 0, + "viewOriginalText": "select * from hh6_renamed" + }, + "guid": "-14529329955589483", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "sd": { + "guid": "-14529329955589484", + "typeName": "hive_storagedesc", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed_view2@cm_storage" + }, + "relationshipType": "hive_table_storagedesc" + }, + "columns": [ + { + "guid": "-14529329955589485", + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed_view2.col1@cm" + }, + "relationshipType": "hive_table_columns" + } + ], + "partitionKeys": [], + "db": { + "typeName": "hive_db", + "uniqueAttributes": { + "qualifiedName": "cadb02@cm" + }, + "relationshipType": "hive_table_db" + } + }, + "proxy": false + } + ] + } + }, + { + "type": "ENTITY_CREATE_V2", + "user": "hive", + "entities": { + "entities": [ + { + "typeName": "hive_table_ddl", + "attributes": { + "serviceType": "hive", + "qualifiedName": "cadb02.hh6_renamed_view2@cm:1616450675937", + "execTime": 1616450675937, + "queryText": "ALTER VIEW hh6_renamed_view RENAME TO hh6_renamed_view2", + "name": "ALTER VIEW hh6_renamed_view RENAME TO hh6_renamed_view2", + "userName": "hive" + }, + "guid": "-14529329955589487", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed_view2@cm" + }, + "relationshipType": "hive_table_ddl_queries" + } + }, + "proxy": false + } + ] + } + } +] diff --git a/addons/hive-bridge/src/test/resources/json/hs2-create-db-v2.json b/addons/hive-bridge/src/test/resources/json/hs2-create-db-v2.json new file mode 100644 index 0000000000..28d3b6b24e --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-create-db-v2.json @@ -0,0 +1,53 @@ +{ + "referredEntities": {}, + "entities": [ + { + "typeName": "hive_db_ddl", + "attributes": { + "serviceType": "hive", + "qualifiedName": "cadb02@cm:1616450673617", + "execTime": 1616450673617, + "queryText": "create database cadb02", + "name": "create database cadb02", + "userName": "hive" + }, + "guid": "-14529329955589449", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "db": { + "typeName": "hive_db", + "uniqueAttributes": { + "qualifiedName": "cadb02@cm" + }, + "relationshipType": "hive_db_ddl_queries" + } + }, + "proxy": false + }, + { + "typeName": "hdfs_path", + "attributes": { + "path": "hdfs://ve0126.halxg.cloudera.com:8020/warehouse/tablespace/external/hive/cadb02.db", + "qualifiedName": "hdfs://ve0126.halxg.cloudera.com:8020/warehouse/tablespace/external/hive/cadb02.db@cm", + "clusterName": "cm", + "name": "/warehouse/tablespace/external/hive/cadb02.db" + }, + "guid": "-14529329955589450", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "hiveDb": { + "typeName": "hive_db", + "uniqueAttributes": { + "qualifiedName": "cadb02@cm" + }, + "relationshipType": "hive_db_location" + } + }, + "proxy": false + } + ] +} diff --git a/addons/hive-bridge/src/test/resources/json/hs2-create-db.json b/addons/hive-bridge/src/test/resources/json/hs2-create-db.json new file mode 100644 index 0000000000..a5b810f729 --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-create-db.json @@ -0,0 +1,73 @@ +{ + "referredEntities": {}, + "entities": [ + { + "typeName": "hive_db", + "attributes": { + "owner": "hive", + "ownerType": "USER", + "managedLocation": null, + "qualifiedName": "cadb02@cm", + "clusterName": "cm", + "name": "cadb02", + "location": "hdfs://ve0126.halxg.cloudera.com:8020/warehouse/tablespace/external/hive/cadb02.db", + "parameters": {} + }, + "guid": "-14529329955589448", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "proxy": false + }, + { + "typeName": "hive_db_ddl", + "attributes": { + "serviceType": "hive", + "qualifiedName": "cadb02@cm:1616450673617", + "execTime": 1616450673617, + "queryText": "create database cadb02", + "name": "create database cadb02", + "userName": "hive" + }, + "guid": "-14529329955589449", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "db": { + "guid": "-14529329955589448", + "typeName": "hive_db", + "uniqueAttributes": { + "qualifiedName": "cadb02@cm" + }, + "relationshipType": "hive_db_ddl_queries" + } + }, + "proxy": false + }, + { + "typeName": "hdfs_path", + "attributes": { + "path": "hdfs://ve0126.halxg.cloudera.com:8020/warehouse/tablespace/external/hive/cadb02.db", + "qualifiedName": "hdfs://ve0126.halxg.cloudera.com:8020/warehouse/tablespace/external/hive/cadb02.db@cm", + "clusterName": "cm", + "name": "/warehouse/tablespace/external/hive/cadb02.db" + }, + "guid": "-14529329955589450", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "hiveDb": { + "guid": "-14529329955589448", + "typeName": "hive_db", + "uniqueAttributes": { + "qualifiedName": "cadb02@cm" + }, + "relationshipType": "hive_db_location" + } + }, + "proxy": false + } + ] +} diff --git a/addons/hive-bridge/src/test/resources/json/hs2-create-process-v2.json b/addons/hive-bridge/src/test/resources/json/hs2-create-process-v2.json new file mode 100644 index 0000000000..9291cde24f --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-create-process-v2.json @@ -0,0 +1,141 @@ +{ + "referredEntities": {}, + "entities": [ + { + "typeName": "hive_table_ddl", + "attributes": { + "serviceType": "hive", + "qualifiedName": "cadb202.vw202@primary:1616604468798", + "execTime": 1616604468798, + "queryText": "create table vw202 as select * from hh202", + "name": "create table vw202 as select * from hh202", + "userName": "hive" + }, + "guid": "-44808597128613", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb202.vw202@primary" + }, + "relationshipType": "hive_table_ddl_queries" + } + }, + "proxy": false + }, + { + "typeName": "hive_process", + "attributes": { + "recentQueries": [ + "create table vw202 as select * from hh202" + ], + "qualifiedName": "cadb202.vw202@primary:1616604471000", + "clusterName": "primary", + "name": "cadb202.vw202@primary:1616604471000", + "queryText": "", + "operationType": "CREATETABLE_AS_SELECT", + "startTime": 1616604475518, + "queryPlan": "Not Supported", + "endTime": 1616604475518, + "userName": "", + "queryId": "" + }, + "guid": "-44808597128614", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "outputs": [ + { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb202.vw202@primary" + }, + "relationshipType": "process_dataset_outputs" + } + ], + "inputs": [ + { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb202.hh202@primary" + }, + "relationshipType": "dataset_process_inputs" + } + ] + }, + "proxy": false + }, + { + "typeName": "hive_process_execution", + "attributes": { + "hostName": "21806.local", + "qualifiedName": "cadb202.vw202@primary:1616604471000:1616604468798:1616604475518", + "name": "cadb202.vw202@primary:1616604471000:1616604468798:1616604475518", + "queryText": "create table vw202 as select * from hh202", + "startTime": 1616604468798, + "queryPlan": "Not Supported", + "endTime": 1616604475518, + "userName": "hive", + "queryId": "hive_20210324094633_2144da6f-70ac-476e-aeec-0e758cdf1fa6" + }, + "guid": "-44808597128615", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "process": { + "guid": "-44808597128614", + "typeName": "hive_process", + "relationshipType": "hive_process_process_executions" + } + }, + "proxy": false + }, + { + "typeName": "hive_column_lineage", + "attributes": { + "expression": null, + "qualifiedName": "cadb202.vw202@primary:1616604471000:col202", + "name": "cadb202.vw202@primary:1616604471000:col202", + "depenendencyType": "SIMPLE" + }, + "guid": "-44808597128616", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "outputs": [ + { + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "cadb202.vw202.col202@primary" + }, + "relationshipType": "process_dataset_outputs" + } + ], + "inputs": [ + { + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "cadb202.hh202.col202@primary" + }, + "relationshipType": "dataset_process_inputs" + } + ], + "query": { + "guid": "-44808597128614", + "typeName": "hive_process", + "uniqueAttributes": { + "qualifiedName": "cadb202.vw202@primary:1616604471000" + }, + "relationshipType": "hive_process_column_lineage" + } + }, + "proxy": false + } + ] +} \ No newline at end of file diff --git a/addons/hive-bridge/src/test/resources/json/hs2-create-process.json b/addons/hive-bridge/src/test/resources/json/hs2-create-process.json new file mode 100644 index 0000000000..1bbd95a995 --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-create-process.json @@ -0,0 +1,283 @@ +{ + "referredEntities": { + "-44808597128606": { + "typeName": "hive_db", + "attributes": { + "owner": "hive", + "ownerType": "USER", + "qualifiedName": "cadb202@primary", + "clusterName": "primary", + "name": "cadb202", + "location": "file:/Users/hive/Apache/atlas-wip/addons/hive-bridge/target/metastore/cadb202.db", + "parameters": {} + }, + "guid": "-44808597128606", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "proxy": false + }, + "-44808597128612": { + "typeName": "hive_column", + "attributes": { + "owner": "hive", + "qualifiedName": "cadb202.vw202.col202@primary", + "name": "col202", + "comment": null, + "position": 0, + "type": "string" + }, + "guid": "-44808597128612", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "guid": "-44808597128610", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb202.vw202@primary" + }, + "relationshipType": "hive_table_columns" + } + }, + "proxy": false + }, + "-44808597128611": { + "typeName": "hive_storagedesc", + "attributes": { + "qualifiedName": "cadb202.vw202@primary_storage", + "storedAsSubDirectories": false, + "location": "file:/Users/hive/Apache/atlas-wip/addons/hive-bridge/target/metastore/cadb202.db/vw202", + "compressed": false, + "inputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "parameters": {}, + "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "serdeInfo": { + "typeName": "hive_serde", + "attributes": { + "serializationLib": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + "name": null, + "parameters": { + "serialization.format": "1" + } + } + }, + "numBuckets": -1 + }, + "guid": "-44808597128611", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "guid": "-44808597128610", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb202.vw202@primary" + }, + "relationshipType": "hive_table_storagedesc" + } + }, + "proxy": false + }, + "-44808597128610": { + "typeName": "hive_table", + "attributes": { + "owner": "hive", + "tableType": "MANAGED_TABLE", + "temporary": false, + "lastAccessTime": 1616604471000, + "createTime": 1616604471000, + "qualifiedName": "cadb202.vw202@primary", + "name": "vw202", + "comment": null, + "parameters": { + "totalSize": "0", + "numRows": "0", + "rawDataSize": "0", + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "numFiles": "0", + "transient_lastDdlTime": "1616604472", + "bucketing_version": "2" + }, + "retention": 0 + }, + "guid": "-44808597128610", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "sd": { + "guid": "-44808597128611", + "typeName": "hive_storagedesc", + "uniqueAttributes": { + "qualifiedName": "cadb202.vw202@primary_storage" + }, + "relationshipType": "hive_table_storagedesc" + }, + "columns": [ + { + "guid": "-44808597128612", + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "cadb202.vw202.col202@primary" + }, + "relationshipType": "hive_table_columns" + } + ], + "partitionKeys": [], + "db": { + "guid": "-44808597128606", + "typeName": "hive_db", + "uniqueAttributes": { + "qualifiedName": "cadb202@primary" + }, + "relationshipType": "hive_table_db" + } + }, + "proxy": false + } + }, + "entities": [ + { + "typeName": "hive_table_ddl", + "attributes": { + "serviceType": "hive", + "qualifiedName": "cadb202.vw202@primary:1616604468798", + "execTime": 1616604468798, + "queryText": "create table vw202 as select * from hh202", + "name": "create table vw202 as select * from hh202", + "userName": "hive" + }, + "guid": "-44808597128613", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "guid": "-44808597128610", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb202.vw202@primary" + }, + "relationshipType": "hive_table_ddl_queries" + } + }, + "proxy": false + }, + { + "typeName": "hive_process", + "attributes": { + "recentQueries": [ + "create table vw202 as select * from hh202" + ], + "qualifiedName": "cadb202.vw202@primary:1616604471000", + "clusterName": "primary", + "name": "cadb202.vw202@primary:1616604471000", + "queryText": "", + "operationType": "CREATETABLE_AS_SELECT", + "startTime": 1616604475518, + "queryPlan": "Not Supported", + "endTime": 1616604475518, + "userName": "", + "queryId": "" + }, + "guid": "-44808597128614", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "outputs": [ + { + "guid": "-44808597128610", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb202.vw202@primary" + }, + "relationshipType": "process_dataset_outputs" + } + ], + "inputs": [ + { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb202.hh202@primary" + }, + "relationshipType": "dataset_process_inputs" + } + ] + }, + "proxy": false + }, + { + "typeName": "hive_process_execution", + "attributes": { + "hostName": "21806.local", + "qualifiedName": "cadb202.vw202@primary:1616604471000:1616604468798:1616604475518", + "name": "cadb202.vw202@primary:1616604471000:1616604468798:1616604475518", + "queryText": "create table vw202 as select * from hh202", + "startTime": 1616604468798, + "queryPlan": "Not Supported", + "endTime": 1616604475518, + "userName": "hive", + "queryId": "hive_20210324094633_2144da6f-70ac-476e-aeec-0e758cdf1fa6" + }, + "guid": "-44808597128615", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "process": { + "guid": "-44808597128614", + "typeName": "hive_process", + "relationshipType": "hive_process_process_executions" + } + }, + "proxy": false + }, + { + "typeName": "hive_column_lineage", + "attributes": { + "expression": null, + "qualifiedName": "cadb202.vw202@primary:1616604471000:col202", + "name": "cadb202.vw202@primary:1616604471000:col202", + "depenendencyType": "SIMPLE" + }, + "guid": "-44808597128616", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "outputs": [ + { + "guid": "-44808597128612", + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "cadb202.vw202.col202@primary" + }, + "relationshipType": "process_dataset_outputs" + } + ], + "inputs": [ + { + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "cadb202.hh202.col202@primary" + }, + "relationshipType": "dataset_process_inputs" + } + ], + "query": { + "guid": "-44808597128614", + "typeName": "hive_process", + "uniqueAttributes": { + "qualifiedName": "cadb202.vw202@primary:1616604471000" + }, + "relationshipType": "hive_process_column_lineage" + } + }, + "proxy": false + } + ] +} \ No newline at end of file diff --git a/addons/hive-bridge/src/test/resources/json/hs2-create-table-v2.json b/addons/hive-bridge/src/test/resources/json/hs2-create-table-v2.json new file mode 100644 index 0000000000..ebf9e51fd4 --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-create-table-v2.json @@ -0,0 +1,115 @@ +{ + "referredEntities": { + "-14529329955589455": { + "typeName": "hdfs_path", + "attributes": { + "path": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv", + "qualifiedName": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv@cm", + "clusterName": "cm", + "name": "/tmp/external/hh6.csv" + }, + "guid": "-14529329955589455", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "proxy": false + } + }, + "entities": [ + { + "typeName": "hive_process", + "attributes": { + "recentQueries": [ + "create external table hh6(col1 int) location '/tmp/external/hh6.csv'" + ], + "qualifiedName": "cadb02.hh6@cm:1616450674000", + "clusterName": "cm", + "name": "cadb02.hh6@cm:1616450674000", + "queryText": "", + "operationType": "CREATETABLE", + "startTime": 1616450674217, + "queryPlan": "Not Supported", + "endTime": 1616450674217, + "userName": "", + "queryId": "" + }, + "guid": "-14529329955589456", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "outputs": [ + { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6@cm" + }, + "relationshipType": "process_dataset_outputs" + } + ], + "inputs": [ + { + "guid": "-14529329955589455", + "typeName": "hdfs_path", + "uniqueAttributes": { + "qualifiedName": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv@cm" + }, + "relationshipType": "dataset_process_inputs" + } + ] + }, + "proxy": false + }, + { + "typeName": "hive_process_execution", + "attributes": { + "hostName": "ve0126.halxg.cloudera.com", + "qualifiedName": "cadb02.hh6@cm:1616450674000:1616450673854:1616450674217", + "name": "cadb02.hh6@cm:1616450674000:1616450673854:1616450674217", + "queryText": "create external table hh6(col1 int) location '/tmp/external/hh6.csv'", + "startTime": 1616450673854, + "queryPlan": "Not Supported", + "endTime": 1616450674217, + "userName": "hive", + "queryId": "hive_20210322150433_52b16d58-6a44-49e2-afe4-4bb1a5bb7484" + }, + "guid": "-14529329955589457", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "process": { + "guid": "-14529329955589456", + "typeName": "hive_process", + "relationshipType": "hive_process_process_executions" + } + }, + "proxy": false + }, + { + "typeName": "hive_table_ddl", + "attributes": { + "serviceType": "hive", + "qualifiedName": "cadb02.hh6@cm:1616450673854", + "execTime": 1616450673854, + "queryText": "create external table hh6(col1 int) location '/tmp/external/hh6.csv'", + "name": "create external table hh6(col1 int) location '/tmp/external/hh6.csv'", + "userName": "hive" + }, + "guid": "-14529329955589458", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6@cm" + }, + "relationshipType": "hive_table_ddl_queries" + } + }, + "proxy": false + } + ] +} \ No newline at end of file diff --git a/addons/hive-bridge/src/test/resources/json/hs2-create-table.json b/addons/hive-bridge/src/test/resources/json/hs2-create-table.json new file mode 100644 index 0000000000..4e13ed1526 --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-create-table.json @@ -0,0 +1,255 @@ +{ + "referredEntities": { + "-14529329955589453": { + "typeName": "hive_storagedesc", + "attributes": { + "qualifiedName": "cadb02.hh6@cm_storage", + "storedAsSubDirectories": false, + "location": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv", + "compressed": false, + "inputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "parameters": {}, + "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "serdeInfo": { + "typeName": "hive_serde", + "attributes": { + "serializationLib": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + "name": null, + "parameters": { + "serialization.format": "1" + } + } + }, + "numBuckets": -1 + }, + "guid": "-14529329955589453", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "guid": "-14529329955589452", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6@cm" + }, + "relationshipType": "hive_table_storagedesc" + } + }, + "proxy": false + }, + "-14529329955589451": { + "typeName": "hive_db", + "attributes": { + "owner": "hive", + "ownerType": "USER", + "managedLocation": null, + "qualifiedName": "cadb02@cm", + "clusterName": "cm", + "name": "cadb02", + "location": "hdfs://ve0126.halxg.cloudera.com:8020/warehouse/tablespace/external/hive/cadb02.db", + "parameters": {} + }, + "guid": "-14529329955589451", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "proxy": false + }, + "-14529329955589455": { + "typeName": "hdfs_path", + "attributes": { + "path": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv", + "qualifiedName": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv@cm", + "clusterName": "cm", + "name": "/tmp/external/hh6.csv" + }, + "guid": "-14529329955589455", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "proxy": false + }, + "-14529329955589454": { + "typeName": "hive_column", + "attributes": { + "owner": "hive", + "qualifiedName": "cadb02.hh6.col1@cm", + "name": "col1", + "comment": null, + "position": 0, + "type": "int" + }, + "guid": "-14529329955589454", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "guid": "-14529329955589452", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6@cm" + }, + "relationshipType": "hive_table_columns" + } + }, + "proxy": false + } + }, + "entities": [ + { + "typeName": "hive_table", + "attributes": { + "owner": "hive", + "tableType": "EXTERNAL_TABLE", + "temporary": false, + "lastAccessTime": 1616450674000, + "createTime": 1616450674000, + "qualifiedName": "cadb02.hh6@cm", + "name": "hh6", + "comment": null, + "parameters": { + "totalSize": "0", + "EXTERNAL": "TRUE", + "numFiles": "0", + "transient_lastDdlTime": "1616450674", + "bucketing_version": "2", + "numFilesErasureCoded": "0" + }, + "retention": 0 + }, + "guid": "-14529329955589452", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "sd": { + "guid": "-14529329955589453", + "typeName": "hive_storagedesc", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6@cm_storage" + }, + "relationshipType": "hive_table_storagedesc" + }, + "columns": [ + { + "guid": "-14529329955589454", + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6.col1@cm" + }, + "relationshipType": "hive_table_columns" + } + ], + "partitionKeys": [], + "db": { + "guid": "-14529329955589451", + "typeName": "hive_db", + "uniqueAttributes": { + "qualifiedName": "cadb02@cm" + }, + "relationshipType": "hive_table_db" + } + }, + "proxy": false + }, + { + "typeName": "hive_process", + "attributes": { + "recentQueries": [ + "create external table hh6(col1 int) location '/tmp/external/hh6.csv'" + ], + "qualifiedName": "cadb02.hh6@cm:1616450674000", + "clusterName": "cm", + "name": "cadb02.hh6@cm:1616450674000", + "queryText": "", + "operationType": "CREATETABLE", + "startTime": 1616450674217, + "queryPlan": "Not Supported", + "endTime": 1616450674217, + "userName": "", + "queryId": "" + }, + "guid": "-14529329955589456", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "outputs": [ + { + "guid": "-14529329955589452", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6@cm" + }, + "relationshipType": "process_dataset_outputs" + } + ], + "inputs": [ + { + "guid": "-14529329955589455", + "typeName": "hdfs_path", + "uniqueAttributes": { + "qualifiedName": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv@cm" + }, + "relationshipType": "dataset_process_inputs" + } + ] + }, + "proxy": false + }, + { + "typeName": "hive_process_execution", + "attributes": { + "hostName": "ve0126.halxg.cloudera.com", + "qualifiedName": "cadb02.hh6@cm:1616450674000:1616450673854:1616450674217", + "name": "cadb02.hh6@cm:1616450674000:1616450673854:1616450674217", + "queryText": "create external table hh6(col1 int) location '/tmp/external/hh6.csv'", + "startTime": 1616450673854, + "queryPlan": "Not Supported", + "endTime": 1616450674217, + "userName": "hive", + "queryId": "hive_20210322150433_52b16d58-6a44-49e2-afe4-4bb1a5bb7484" + }, + "guid": "-14529329955589457", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "process": { + "guid": "-14529329955589456", + "typeName": "hive_process", + "relationshipType": "hive_process_process_executions" + } + }, + "proxy": false + }, + { + "typeName": "hive_table_ddl", + "attributes": { + "serviceType": "hive", + "qualifiedName": "cadb02.hh6@cm:1616450673854", + "execTime": 1616450673854, + "queryText": "create external table hh6(col1 int) location '/tmp/external/hh6.csv'", + "name": "create external table hh6(col1 int) location '/tmp/external/hh6.csv'", + "userName": "hive" + }, + "guid": "-14529329955589458", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "guid": "-14529329955589452", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6@cm" + }, + "relationshipType": "hive_table_ddl_queries" + } + }, + "proxy": false + } + ] +} \ No newline at end of file diff --git a/addons/hive-bridge/src/test/resources/json/hs2-drop-db-v2.json b/addons/hive-bridge/src/test/resources/json/hs2-drop-db-v2.json new file mode 100644 index 0000000000..76860cea9a --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-drop-db-v2.json @@ -0,0 +1,4 @@ +{ + "entities": [ + ] +} diff --git a/addons/hive-bridge/src/test/resources/json/hs2-drop-db.json b/addons/hive-bridge/src/test/resources/json/hs2-drop-db.json new file mode 100644 index 0000000000..429f95fb38 --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-drop-db.json @@ -0,0 +1,10 @@ +{ + "entities": [ + { + "typeName": "hive_db", + "uniqueAttributes": { + "qualifiedName": "cadb02@cm" + } + } + ] +} diff --git a/addons/hive-bridge/src/test/resources/json/hs2-drop-table-v2.json b/addons/hive-bridge/src/test/resources/json/hs2-drop-table-v2.json new file mode 100644 index 0000000000..0637a088a0 --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-drop-table-v2.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/addons/hive-bridge/src/test/resources/json/hs2-drop-table.json b/addons/hive-bridge/src/test/resources/json/hs2-drop-table.json new file mode 100644 index 0000000000..ed62032493 --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-drop-table.json @@ -0,0 +1,50 @@ +[ + { + "type": "ENTITY_DELETE_V2", + "user": "hive", + "entities": [ + { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed@cm" + } + } + ] + }, + { + "type": "ENTITY_DELETE_V2", + "user": "hive", + "entities": [ + { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed_view2@cm" + } + } + ] + }, + { + "type": "ENTITY_DELETE_V2", + "user": "hive", + "entities": [ + { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.cc1@cm" + } + } + ] + }, + { + "type": "ENTITY_DELETE_V2", + "user": "hive", + "entities": [ + { + "typeName": "hive_db", + "uniqueAttributes": { + "qualifiedName": "cadb02@cm" + } + } + ] + } +] diff --git a/addons/hive-bridge/src/test/resources/json/hs2-load-inpath-v2.json b/addons/hive-bridge/src/test/resources/json/hs2-load-inpath-v2.json new file mode 100644 index 0000000000..499a9c2e87 --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-load-inpath-v2.json @@ -0,0 +1,96 @@ +[ + { + "type": "ENTITY_CREATE_V2", + "user": "hive", + "entities": { + "referredEntities": { + "-98504074851374": { + "typeName": "hdfs_path", + "attributes": { + "path": "file:/Users/hive/Apache/atlas-wip/addons/hive-bridge/target/load-data-thi5jt1lgj", + "qualifiedName": "file:/users/hive/apache/atlas-wip/addons/hive-bridge/target/load-data-thi5jt1lgj", + "clusterName": "primary", + "name": "/Users/hive/Apache/atlas-wip/addons/hive-bridge/target/load-data-thi5jt1lgj" + }, + "guid": "-98504074851374", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "proxy": false + } + }, + "entities": [ + { + "typeName": "hive_process", + "attributes": { + "recentQueries": [ + "load data local inpath 'file:///users/hive/apache/atlas-wip/addons/hive-bridge/target/load-data-thi5jt1lgj' into table table_nrx8uoggc0 partition(dt = '2015-01-01')" + ], + "qualifiedName": "LOAD->:default.table_nrx8uoggc0@primary:1622738598000", + "clusterName": "primary", + "name": "LOAD->:default.table_nrx8uoggc0@primary:1622738598000", + "queryText": "", + "operationType": "LOAD", + "startTime": 1622738659471, + "queryPlan": "Not Supported", + "endTime": 1622738659471, + "userName": "", + "queryId": "" + }, + "guid": "-98504074851381", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "outputs": [ + { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "default.table_nrx8uoggc0@primary" + }, + "relationshipType": "process_dataset_outputs" + } + ], + "inputs": [ + { + "guid": "-98504074851374", + "typeName": "hdfs_path", + "uniqueAttributes": { + "qualifiedName": "file:/users/hive/apache/atlas-wip/addons/hive-bridge/target/load-data-thi5jt1lgj" + }, + "relationshipType": "dataset_process_inputs" + } + ] + }, + "proxy": false + }, + { + "typeName": "hive_process_execution", + "attributes": { + "hostName": "21806.local", + "qualifiedName": "LOAD->:default.table_nrx8uoggc0@primary:1622738598000:1622738658982:1622738659471", + "name": "LOAD->:default.table_nrx8uoggc0@primary:1622738598000:1622738658982:1622738659471", + "queryText": "load data local inpath 'file:///users/hive/apache/atlas-wip/addons/hive-bridge/target/load-data-thi5jt1lgj' into table table_nrx8uoggc0 partition(dt = '2015-01-01')", + "startTime": 1622738658982, + "queryPlan": "Not Supported", + "endTime": 1622738659471, + "userName": "hive", + "queryId": "hive_20210603094308_ef789483-7de1-462b-ac74-bb0ebe7aeedf" + }, + "guid": "-98504074851382", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "process": { + "guid": "-98504074851381", + "typeName": "hive_process", + "relationshipType": "hive_process_process_executions" + } + }, + "proxy": false + } + ] + } + } +] \ No newline at end of file diff --git a/addons/hive-bridge/src/test/resources/json/hs2-load-inpath.json b/addons/hive-bridge/src/test/resources/json/hs2-load-inpath.json new file mode 100644 index 0000000000..499a9c2e87 --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-load-inpath.json @@ -0,0 +1,96 @@ +[ + { + "type": "ENTITY_CREATE_V2", + "user": "hive", + "entities": { + "referredEntities": { + "-98504074851374": { + "typeName": "hdfs_path", + "attributes": { + "path": "file:/Users/hive/Apache/atlas-wip/addons/hive-bridge/target/load-data-thi5jt1lgj", + "qualifiedName": "file:/users/hive/apache/atlas-wip/addons/hive-bridge/target/load-data-thi5jt1lgj", + "clusterName": "primary", + "name": "/Users/hive/Apache/atlas-wip/addons/hive-bridge/target/load-data-thi5jt1lgj" + }, + "guid": "-98504074851374", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "proxy": false + } + }, + "entities": [ + { + "typeName": "hive_process", + "attributes": { + "recentQueries": [ + "load data local inpath 'file:///users/hive/apache/atlas-wip/addons/hive-bridge/target/load-data-thi5jt1lgj' into table table_nrx8uoggc0 partition(dt = '2015-01-01')" + ], + "qualifiedName": "LOAD->:default.table_nrx8uoggc0@primary:1622738598000", + "clusterName": "primary", + "name": "LOAD->:default.table_nrx8uoggc0@primary:1622738598000", + "queryText": "", + "operationType": "LOAD", + "startTime": 1622738659471, + "queryPlan": "Not Supported", + "endTime": 1622738659471, + "userName": "", + "queryId": "" + }, + "guid": "-98504074851381", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "outputs": [ + { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "default.table_nrx8uoggc0@primary" + }, + "relationshipType": "process_dataset_outputs" + } + ], + "inputs": [ + { + "guid": "-98504074851374", + "typeName": "hdfs_path", + "uniqueAttributes": { + "qualifiedName": "file:/users/hive/apache/atlas-wip/addons/hive-bridge/target/load-data-thi5jt1lgj" + }, + "relationshipType": "dataset_process_inputs" + } + ] + }, + "proxy": false + }, + { + "typeName": "hive_process_execution", + "attributes": { + "hostName": "21806.local", + "qualifiedName": "LOAD->:default.table_nrx8uoggc0@primary:1622738598000:1622738658982:1622738659471", + "name": "LOAD->:default.table_nrx8uoggc0@primary:1622738598000:1622738658982:1622738659471", + "queryText": "load data local inpath 'file:///users/hive/apache/atlas-wip/addons/hive-bridge/target/load-data-thi5jt1lgj' into table table_nrx8uoggc0 partition(dt = '2015-01-01')", + "startTime": 1622738658982, + "queryPlan": "Not Supported", + "endTime": 1622738659471, + "userName": "hive", + "queryId": "hive_20210603094308_ef789483-7de1-462b-ac74-bb0ebe7aeedf" + }, + "guid": "-98504074851382", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "process": { + "guid": "-98504074851381", + "typeName": "hive_process", + "relationshipType": "hive_process_process_executions" + } + }, + "proxy": false + } + ] + } + } +] \ No newline at end of file diff --git a/addons/hive-bridge/src/test/resources/json/hs2-table-rename-v2.json b/addons/hive-bridge/src/test/resources/json/hs2-table-rename-v2.json new file mode 100644 index 0000000000..f133e7ffbf --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-table-rename-v2.json @@ -0,0 +1,35 @@ +[ + { + "type": "ENTITY_CREATE_V2", + "user": "hive", + "entities": { + "entities": [ + { + "typeName": "hive_table_ddl", + "attributes": { + "serviceType": "hive", + "qualifiedName": "cadb02.hh6_renamed@cm:1616450674247", + "execTime": 1616450674247, + "queryText": "ALTER TABLE hh6 RENAME TO hh6_renamed", + "name": "ALTER TABLE hh6 RENAME TO hh6_renamed", + "userName": "hive" + }, + "guid": "-14529329955589467", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed@cm" + }, + "relationshipType": "hive_table_ddl_queries" + } + }, + "proxy": false + } + ] + } + } +] diff --git a/addons/hive-bridge/src/test/resources/json/hs2-table-rename.json b/addons/hive-bridge/src/test/resources/json/hs2-table-rename.json new file mode 100644 index 0000000000..6b8094dc83 --- /dev/null +++ b/addons/hive-bridge/src/test/resources/json/hs2-table-rename.json @@ -0,0 +1,276 @@ +[ + { + "type": "ENTITY_PARTIAL_UPDATE_V2", + "user": "hive", + "entityId": { + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6.col1@cm" + } + }, + "entity": { + "entity": { + "typeName": "hive_column", + "attributes": { + "qualifiedName": "cadb02.hh6_renamed.col1@cm" + }, + "guid": "-14529329955589466", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "proxy": false + } + } + }, + { + "type": "ENTITY_PARTIAL_UPDATE_V2", + "user": "hive", + "entityId": { + "typeName": "hive_storagedesc", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6@cm_storage" + } + }, + "entity": { + "entity": { + "typeName": "hive_storagedesc", + "attributes": { + "qualifiedName": "cadb02.hh6_renamed@cm_storage", + "storedAsSubDirectories": false, + "location": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv", + "compressed": false, + "inputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "parameters": {}, + "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "serdeInfo": { + "typeName": "hive_serde", + "attributes": { + "serializationLib": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + "name": null, + "parameters": { + "serialization.format": "1" + } + } + }, + "numBuckets": -1 + }, + "guid": "-14529329955589464", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "proxy": false + } + } + }, + { + "type": "ENTITY_PARTIAL_UPDATE_V2", + "user": "hive", + "entityId": { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6@cm" + } + }, + "entity": { + "entity": { + "typeName": "hive_table", + "attributes": { + "owner": "hive", + "tableType": "EXTERNAL_TABLE", + "temporary": false, + "lastAccessTime": 1616450674000, + "aliases": [ + "hh6" + ], + "createTime": 1616450674000, + "qualifiedName": "cadb02.hh6_renamed@cm", + "name": "hh6_renamed", + "comment": null, + "parameters": { + "last_modified_time": "1616450674", + "totalSize": "0", + "EXTERNAL": "TRUE", + "numFiles": "0", + "transient_lastDdlTime": "1616450674", + "bucketing_version": "2", + "last_modified_by": "hive", + "numFilesErasureCoded": "0" + }, + "retention": 0 + }, + "guid": "-14529329955589463", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "proxy": false + } + } + }, + { + "type": "ENTITY_FULL_UPDATE_V2", + "user": "hive", + "entities": { + "referredEntities": { + "-14529329955589464": { + "typeName": "hive_storagedesc", + "attributes": { + "qualifiedName": "cadb02.hh6_renamed@cm_storage", + "storedAsSubDirectories": false, + "location": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv", + "compressed": false, + "inputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "parameters": {}, + "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "serdeInfo": { + "typeName": "hive_serde", + "attributes": { + "serializationLib": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + "name": null, + "parameters": { + "serialization.format": "1" + } + } + }, + "numBuckets": -1 + }, + "guid": "-14529329955589464", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "guid": "-14529329955589463", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed@cm" + }, + "relationshipType": "hive_table_storagedesc" + } + }, + "proxy": false + }, + "-14529329955589465": { + "typeName": "hive_column", + "attributes": { + "owner": "hive", + "qualifiedName": "cadb02.hh6_renamed.col1@cm", + "name": "col1", + "comment": null, + "position": 0, + "type": "int" + }, + "guid": "-14529329955589465", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "guid": "-14529329955589463", + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed@cm" + }, + "relationshipType": "hive_table_columns" + } + }, + "proxy": false + } + }, + "entities": [ + { + "typeName": "hive_table", + "attributes": { + "owner": "hive", + "tableType": "EXTERNAL_TABLE", + "temporary": false, + "lastAccessTime": 1616450674000, + "aliases": [ + "hh6" + ], + "createTime": 1616450674000, + "qualifiedName": "cadb02.hh6_renamed@cm", + "name": "hh6_renamed", + "comment": null, + "parameters": { + "last_modified_time": "1616450674", + "totalSize": "0", + "EXTERNAL": "TRUE", + "numFiles": "0", + "transient_lastDdlTime": "1616450674", + "bucketing_version": "2", + "last_modified_by": "hive", + "numFilesErasureCoded": "0" + }, + "retention": 0 + }, + "guid": "-14529329955589463", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "sd": { + "guid": "-14529329955589464", + "typeName": "hive_storagedesc", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed@cm_storage" + }, + "relationshipType": "hive_table_storagedesc" + }, + "columns": [ + { + "guid": "-14529329955589465", + "typeName": "hive_column", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed.col1@cm" + }, + "relationshipType": "hive_table_columns" + } + ], + "partitionKeys": [], + "db": { + "typeName": "hive_db", + "uniqueAttributes": { + "qualifiedName": "cadb02@cm" + }, + "relationshipType": "hive_table_db" + } + }, + "proxy": false + } + ] + } + }, + { + "type": "ENTITY_CREATE_V2", + "user": "hive", + "entities": { + "entities": [ + { + "typeName": "hive_table_ddl", + "attributes": { + "serviceType": "hive", + "qualifiedName": "cadb02.hh6_renamed@cm:1616450674247", + "execTime": 1616450674247, + "queryText": "ALTER TABLE hh6 RENAME TO hh6_renamed", + "name": "ALTER TABLE hh6 RENAME TO hh6_renamed", + "userName": "hive" + }, + "guid": "-14529329955589467", + "isIncomplete": false, + "provenanceType": 0, + "version": 0, + "relationshipAttributes": { + "table": { + "typeName": "hive_table", + "uniqueAttributes": { + "qualifiedName": "cadb02.hh6_renamed@cm" + }, + "relationshipType": "hive_table_ddl_queries" + } + }, + "proxy": false + } + ] + } + } +] diff --git a/addons/hive-bridge/src/test/resources/users-credentials.properties b/addons/hive-bridge/src/test/resources/users-credentials.properties new file mode 100644 index 0000000000..5046dbaf64 --- /dev/null +++ b/addons/hive-bridge/src/test/resources/users-credentials.properties @@ -0,0 +1,3 @@ +#username=group::sha256-password +admin=ADMIN::a4a88c0872bf652bb9ed803ece5fd6e82354838a9bf59ab4babb1dab322154e1 +rangertagsync=RANGER_TAG_SYNC::0afe7a1968b07d4c3ff4ed8c2d809a32ffea706c66cd795ead9048e81cfaf034 diff --git a/addons/impala-bridge-shim/pom.xml b/addons/impala-bridge-shim/pom.xml new file mode 100644 index 0000000000..079eb85006 --- /dev/null +++ b/addons/impala-bridge-shim/pom.xml @@ -0,0 +1,43 @@ + + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + impala-bridge-shim + Apache Atlas Impala Bridge Shim Module + Apache Atlas Impala Bridge Shim + jar + + + + org.apache.atlas + atlas-plugin-classloader + + + org.apache.atlas + impala-hook-api + + + \ No newline at end of file diff --git a/addons/impala-bridge-shim/src/main/java/org/apache/atlas/impala/hook/ImpalaLineageHook.java b/addons/impala-bridge-shim/src/main/java/org/apache/atlas/impala/hook/ImpalaLineageHook.java new file mode 100644 index 0000000000..34e6dcf4ed --- /dev/null +++ b/addons/impala-bridge-shim/src/main/java/org/apache/atlas/impala/hook/ImpalaLineageHook.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.hook; + +import org.apache.atlas.plugin.classloader.AtlasPluginClassLoader; +import org.apache.impala.hooks.QueryCompleteContext; +import org.apache.impala.hooks.QueryEventHook; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is used to convert lineage records from Impala to lineage notifications and + * send them to Atlas. + */ +public class ImpalaLineageHook implements QueryEventHook { + private static final Logger LOG = LoggerFactory.getLogger(ImpalaLineageHook.class); + + private static final String ATLAS_PLUGIN_TYPE_IMPALA = "impala"; + private static final String ATLAS_IMPALA_LINEAGE_HOOK_IMPL_CLASSNAME = + "org.apache.atlas.impala.hook.ImpalaHook"; + + private AtlasPluginClassLoader atlasPluginClassLoader = null; + private QueryEventHook impalaLineageHookImpl; + + public ImpalaLineageHook() { + } + + /** + * Execute Impala hook + */ + public void onQueryComplete(QueryCompleteContext context) { + LOG.debug("==> ImpalaLineageHook.onQueryComplete()"); + + try { + activatePluginClassLoader(); + impalaLineageHookImpl.onQueryComplete(context); + } catch (Exception ex) { + String errorMessage = String.format("Error in processing impala lineage: {}", context.getLineageGraph()); + LOG.error(errorMessage, ex); + } finally { + deactivatePluginClassLoader(); + } + + LOG.debug("<== ImpalaLineageHook.onQueryComplete()"); + } + + /** + * Initialization of Impala hook + */ + public void onImpalaStartup() { + LOG.debug("==> ImpalaLineageHook.onImpalaStartup()"); + + try { + atlasPluginClassLoader = AtlasPluginClassLoader.getInstance(ATLAS_PLUGIN_TYPE_IMPALA, this.getClass()); + + @SuppressWarnings("unchecked") + Class cls = (Class) Class + .forName(ATLAS_IMPALA_LINEAGE_HOOK_IMPL_CLASSNAME, true, atlasPluginClassLoader); + + activatePluginClassLoader(); + + impalaLineageHookImpl = cls.newInstance(); + impalaLineageHookImpl.onImpalaStartup(); + } catch (Exception excp) { + LOG.error("Error instantiating Atlas hook implementation for Impala lineage", excp); + } finally { + deactivatePluginClassLoader(); + } + + LOG.debug("<== ImpalaLineageHook.onImpalaStartup()"); + } + + private void activatePluginClassLoader() { + if (atlasPluginClassLoader != null) { + atlasPluginClassLoader.activate(); + } + } + + private void deactivatePluginClassLoader() { + if (atlasPluginClassLoader != null) { + atlasPluginClassLoader.deactivate(); + } + } +} \ No newline at end of file diff --git a/addons/impala-bridge/pom.xml b/addons/impala-bridge/pom.xml new file mode 100644 index 0000000000..186251abe2 --- /dev/null +++ b/addons/impala-bridge/pom.xml @@ -0,0 +1,549 @@ + + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + impala-bridge + Apache Atlas Impala Bridge Module + Apache Atlas Impala Bridge + jar + + + + + org.slf4j + slf4j-api + + + + org.slf4j + slf4j-log4j12 + + + + org.apache.atlas + atlas-notification + + + + org.apache.atlas + impala-hook-api + + + + + org.apache.atlas + atlas-client-v2 + ${project.version} + test + + + org.apache.logging.log4j + log4j-core + ${log4j2.version} + test + + + org.apache.logging.log4j + log4j-api + ${log4j2.version} + test + + + + com.sun.jersey + jersey-server + ${jersey.version} + test + + + + org.apache.atlas + hive-bridge + test + + + + org.apache.atlas + atlas-webapp + war + test + + + + org.apache.hadoop + hadoop-client + + + javax.servlet + servlet-api + + + org.eclipse.jetty + * + + + + + + org.apache.hadoop + hadoop-annotations + + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.databind.version} + + + + commons-io + commons-io + ${commons-io.version} + + + + commons-cli + commons-cli + ${commons-cli.version} + + + + commons-lang + commons-lang + ${commons-lang.version} + + + + org.testng + testng + + + + org.mockito + mockito-all + + + + org.eclipse.jetty + jetty-server + test + + + + org.apache.atlas + atlas-graphdb-impls + pom + test + + + + org.apache.atlas + atlas-intg + tests + test + + + + org.apache.atlas + atlas-repository + tests + test + + + + org.apache.hive + hive-exec + ${hive.version} + test + + + javax.servlet + * + + + + + + org.apache.hive + hive-jdbc + ${hive.version} + test + + + javax.servlet + * + + + javax.ws.rs + * + + + org.eclipse.jetty + * + + + + + + org.apache.hive + hive-cli + ${hive.version} + test + + + javax.servlet + * + + + org.eclipse.jetty.aggregate + * + + + + + + + + dist + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-hook + package + + copy + + + ${project.build.directory}/dependency/hook/impala/atlas-impala-plugin-impl + false + false + true + + + ${project.groupId} + ${project.artifactId} + ${project.version} + + + ${project.groupId} + atlas-client-common + ${project.version} + + + ${project.groupId} + atlas-client-v1 + ${project.version} + + + ${project.groupId} + atlas-client-v2 + ${project.version} + + + ${project.groupId} + atlas-intg + ${project.version} + + + ${project.groupId} + atlas-notification + ${project.version} + + + ${project.groupId} + atlas-common + ${project.version} + + + org.apache.kafka + kafka_${kafka.scala.binary.version} + ${kafka.version} + + + org.apache.kafka + kafka-clients + ${kafka.version} + + + com.sun.jersey.contribs + jersey-multipart + ${jersey.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.databind.version} + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + + + com.fasterxml.jackson.core + jackson-annotations + ${jackson.version} + + + commons-configuration + commons-configuration + ${commons-conf.version} + + + com.sun.jersey + jersey-json + ${jersey.version} + + + javax.ws.rs + jsr311-api + ${jsr.version} + + + + + + copy-hook-shim + package + + copy + + + ${project.build.directory}/dependency/hook/impala + false + false + true + + + ${project.groupId} + impala-bridge-shim + ${project.version} + + + ${project.groupId} + atlas-plugin-classloader + ${project.version} + + + + + + + + + + + + + + org.eclipse.jetty + jetty-maven-plugin + ${jetty.version} + + ${skipTests} + + + 31000 + 60000 + + ../../webapp/target/atlas-webapp-${project.version}.war + true + ../../webapp/src/main/webapp + + / + ${project.basedir}/../../webapp/src/main/webapp/WEB-INF/web.xml + + true + + true + + atlas.home + ${project.build.directory} + + + atlas.conf + ${project.build.directory}/test-classes + + + atlas.data + ${project.build.directory}/data + + + atlas.log.dir + ${project.build.directory}/logs + + + atlas.log.file + application.log + + + log4j.configuration + file:///${project.build.directory}/test-classes/atlas-log4j.xml + + + atlas.graphdb.backend + ${graphdb.backend.impl} + + + embedded.solr.directory + ${project.build.directory} + + + solr.log.dir + ${project.build.directory}/logs + + + org.eclipse.jetty.annotations.maxWait + 5000 + + + atlas-stop + 31001 + ${jetty-maven-plugin.stopWait} + ${debug.jetty.daemon} + ${project.build.testOutputDirectory} + true + jar + + + + org.apache.curator + curator-client + ${curator.version} + + + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + + + + + start-jetty + pre-integration-test + + + stop + deploy-war + + + + stop-jetty + post-integration-test + + stop + + + + + + + org.apache.maven.plugins + maven-site-plugin + + + org.apache.maven.doxia + doxia-module-twiki + ${doxia.version} + + + org.apache.maven.doxia + doxia-core + ${doxia.version} + + + + + + site + + prepare-package + + + + false + false + + + + + org.codehaus.mojo + exec-maven-plugin + 1.2.1 + false + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + copy-resources + validate + + copy-resources + + + ${basedir}/target/models + + + ${basedir}/../models + + 0000-Area0/0010-base_model.json + 1000-Hadoop/** + + + + + + + copy-solr-resources + validate + + copy-resources + + + ${project.build.directory}/solr + + + ${basedir}/../../test-tools/src/main/resources/solr + + + + + + + + + + diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/ImpalaLineageTool.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/ImpalaLineageTool.java new file mode 100644 index 0000000000..6e6d6f1eed --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/ImpalaLineageTool.java @@ -0,0 +1,216 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala; + +import org.apache.atlas.impala.hook.ImpalaLineageHook; + +import java.io.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOCase; +import org.apache.commons.io.comparator.LastModifiedFileComparator; +import org.apache.commons.io.filefilter.PrefixFileFilter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Entry point of actual implementation of Impala lineage tool. It reads the lineage records in + * lineage log. It then calls instance of ImpalaLineageHook to convert lineage records to + * lineage notifications and send them to Atlas. + */ +public class ImpalaLineageTool { + private static final Logger LOG = LoggerFactory.getLogger(ImpalaLineageTool.class); + private static final String WAL_FILE_EXTENSION = ".wal"; + private static final String WAL_FILE_PREFIX = "WAL"; + private String directoryName; + private String prefix; + + public ImpalaLineageTool(String[] args) { + try { + Options options = new Options(); + options.addOption("d", "directory", true, "the lineage files' folder"); + options.addOption("p", "prefix", true, "the prefix of the lineage files"); + + CommandLine cmd = new DefaultParser().parse(options, args); + directoryName = cmd.getOptionValue("d"); + prefix = cmd.getOptionValue("p"); + } catch(ParseException e) { + LOG.warn("Failed to parse command arguments. Error: ", e.getMessage()); + printUsage(); + + throw new RuntimeException(e); + } + } + + public void run() { + ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook(); + + File[] currentFiles = getCurrentFiles(); + int fileNum = currentFiles.length; + + for(int i = 0; i < fileNum; i++) { + String filename = currentFiles[i].getAbsolutePath(); + String walFilename = directoryName + WAL_FILE_PREFIX + currentFiles[i].getName() + WAL_FILE_EXTENSION; + + LOG.info("Importing: {}", filename); + importHImpalaEntities(impalaLineageHook, filename, walFilename); + + if(i != fileNum - 1) { + deleteLineageAndWal(currentFiles[i], walFilename); + } + } + LOG.info("Impala bridge processing: Done! "); + } + + public static void main(String[] args) { + if (args != null && args.length != 4) { + // The lineage file location and prefix should be input as the parameters + System.out.println("Impala bridge: wrong number of arguments. Please try again"); + printUsage(); + return; + } + + ImpalaLineageTool instance = new ImpalaLineageTool(args); + instance.run(); + } + + /** + * Delete the used lineage file and wal file + * @param currentFile The current file + * @param wal The wal file + */ + public static void deleteLineageAndWal(File currentFile, String wal) { + if(currentFile.exists() && currentFile.delete()) { + LOG.info("Lineage file {} is deleted successfully", currentFile.getPath()); + } else { + LOG.info("Failed to delete the lineage file {}", currentFile.getPath()); + } + + File file = new File(wal); + + if(file.exists() && file.delete()) { + LOG.info("Wal file {} deleted successfully", wal); + } else { + LOG.info("Failed to delete the wal file {}", wal); + } + } + + private static void printUsage() { + System.out.println(); + System.out.println(); + System.out.println("Usage: import-impala.sh [-d ] [-p ]" ); + System.out.println(" Imports specified lineage files by given directory and file prefix."); + System.out.println(); + } + + /** + * This function figures out the right lineage file path+name to process sorted by the last + * time they are modified. (old -> new) + * @return get the lineage files from given directory with given prefix. + */ + public File[] getCurrentFiles() { + try { + LOG.info("Scanning: " + directoryName); + File folder = new File(directoryName); + File[] listOfFiles = folder.listFiles((FileFilter) new PrefixFileFilter(prefix, IOCase.SENSITIVE)); + + if ((listOfFiles == null) || (listOfFiles.length == 0)) { + LOG.info("Found no lineage files."); + return new File[0]; + } + + if(listOfFiles.length > 1) { + Arrays.sort(listOfFiles, LastModifiedFileComparator.LASTMODIFIED_COMPARATOR); + } + + LOG.info("Found {} lineage files" + listOfFiles.length); + return listOfFiles; + } catch(Exception e) { + LOG.error("Import lineage file failed.", e); + } + return new File[0]; + } + + private boolean processImpalaLineageHook(ImpalaLineageHook impalaLineageHook, List lineageList) { + boolean allSucceed = true; + + // returns true if successfully sent to Atlas + for (String lineageRecord : lineageList) { + try { + impalaLineageHook.process(lineageRecord); + } catch (Exception ex) { + String errorMessage = String.format("Exception at query {} \n", lineageRecord); + LOG.error(errorMessage, ex); + + allSucceed = false; + } + } + + return allSucceed; + } + + /** + * Create a list of lineage queries based on the lineage file and the wal file + * @param name + * @param walfile + * @return + */ + public void importHImpalaEntities(ImpalaLineageHook impalaLineageHook, String name, String walfile) { + List lineageList = new ArrayList<>(); + + try { + File lineageFile = new File(name); //use current file length to minus the offset + File walFile = new File(walfile); + // if the wal file does not exist, create one with 0 byte read, else, read the number + if(!walFile.exists()) { + BufferedWriter writer = new BufferedWriter(new FileWriter(walfile)); + writer.write("0, " + name); + writer.close(); + } + + LOG.debug("Reading: " + name); + String lineageRecord = FileUtils.readFileToString(lineageFile, "UTF-8"); + + lineageList.add(lineageRecord); + + // call instance of ImpalaLineageHook to process the list of Impala lineage record + if(processImpalaLineageHook(impalaLineageHook, lineageList)) { + // write how many bytes the current file is to the wal file + FileWriter newWalFile = new FileWriter(walfile, true); + BufferedWriter newWalFileBuf = new BufferedWriter(newWalFile); + newWalFileBuf.newLine(); + newWalFileBuf.write(String.valueOf(lineageFile.length()) + "," + name); + + newWalFileBuf.close(); + newWalFile.close(); + } else { + LOG.error("Error sending some of impala lineage records to ImpalaHook"); + } + } catch (Exception e) { + LOG.error("Error in processing lineage records. Exception: " + e.getMessage()); + } + } + +} \ No newline at end of file diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/AtlasImpalaHookContext.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/AtlasImpalaHookContext.java new file mode 100644 index 0000000000..51b2f832e7 --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/AtlasImpalaHookContext.java @@ -0,0 +1,213 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.hook; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import org.apache.atlas.impala.model.ImpalaOperationType; +import org.apache.atlas.impala.model.ImpalaQuery; +import org.apache.atlas.impala.model.LineageVertex; +import org.apache.atlas.impala.model.LineageVertexMetadata; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.commons.lang.StringUtils; + + +/** + * Contain the info related to an linear record from Impala + */ +public class AtlasImpalaHookContext { + public static final char QNAME_SEP_METADATA_NAMESPACE = '@'; + public static final char QNAME_SEP_ENTITY_NAME = '.'; + public static final char QNAME_SEP_PROCESS = ':'; + + private final ImpalaLineageHook hook; + private final ImpalaOperationType impalaOperation; + private final ImpalaQuery lineageQuery; + private final Map qNameEntityMap = new HashMap<>(); + + public AtlasImpalaHookContext(ImpalaLineageHook hook, ImpalaOperationType operationType, + ImpalaQuery lineageQuery) throws Exception { + this.hook = hook; + this.impalaOperation = operationType; + this.lineageQuery = lineageQuery; + + } + + public ImpalaQuery getLineageQuery() { + return lineageQuery; + } + public String getQueryStr() { return lineageQuery.getQueryText(); } + + public ImpalaOperationType getImpalaOperationType() { + return impalaOperation; + } + + public void putEntity(String qualifiedName, AtlasEntity entity) { + qNameEntityMap.put(qualifiedName, entity); + } + + public AtlasEntity getEntity(String qualifiedName) { + return qNameEntityMap.get(qualifiedName); + } + + public Collection getEntities() { return qNameEntityMap.values(); } + + public String getMetadataNamespace() { + return hook.getMetadataNamespace(); + } + + public String getHostName() { + return hook.getHostName(); + } + + public boolean isConvertHdfsPathToLowerCase() { + return hook.isConvertHdfsPathToLowerCase(); + } + + public String getQualifiedNameForDb(String dbName) { + return (dbName + QNAME_SEP_METADATA_NAMESPACE).toLowerCase() + getMetadataNamespace(); + } + + public String getQualifiedNameForTable(String fullTableName) throws IllegalArgumentException { + if (fullTableName == null) { + throw new IllegalArgumentException("fullTableName is null"); + } + + int sepPos = fullTableName.lastIndexOf(QNAME_SEP_ENTITY_NAME); + + if (!isSeparatorIndexValid(sepPos)) { + throw new IllegalArgumentException(fullTableName + " does not contain database name"); + } + + return getQualifiedNameForTable(fullTableName.substring(0, sepPos), fullTableName.substring(sepPos+1)); + } + + public String getQualifiedNameForTable(String dbName, String tableName) { + return (dbName + QNAME_SEP_ENTITY_NAME + tableName + QNAME_SEP_METADATA_NAMESPACE).toLowerCase() + getMetadataNamespace(); + } + + public String getQualifiedNameForColumn(LineageVertex vertex) { + // get database name and table name + LineageVertexMetadata metadata = vertex.getMetadata(); + + if (metadata == null) { + return getQualifiedNameForColumn(vertex.getVertexId()); + } + + String fullTableName = metadata.getTableName(); + + if (StringUtils.isEmpty(fullTableName)) { + throw new IllegalArgumentException("fullTableName in column metadata is null"); + } + + int sepPos = fullTableName.lastIndexOf(QNAME_SEP_ENTITY_NAME); + + if (!isSeparatorIndexValid(sepPos)) { + throw new IllegalArgumentException(fullTableName + "in column metadata does not contain database name"); + } + + // get pure column name + String columnName = vertex.getVertexId(); + if (StringUtils.isEmpty(columnName)) { + throw new IllegalArgumentException("column name in vertexId is null"); + } + + int sepPosLast = columnName.lastIndexOf(QNAME_SEP_ENTITY_NAME); + if (isSeparatorIndexValid(sepPosLast)) { + columnName = columnName.substring(sepPosLast+1); + } + + return getQualifiedNameForColumn( + fullTableName.substring(0, sepPos), + fullTableName.substring(sepPos+1), + columnName); + } + + public String getQualifiedNameForColumn(String fullColumnName) throws IllegalArgumentException { + if (fullColumnName == null) { + throw new IllegalArgumentException("fullColumnName is null"); + } + + int sepPosFirst = fullColumnName.indexOf(QNAME_SEP_ENTITY_NAME); + int sepPosLast = fullColumnName.lastIndexOf(QNAME_SEP_ENTITY_NAME); + + if (!isSeparatorIndexValid(sepPosFirst) || !isSeparatorIndexValid(sepPosLast) || + sepPosFirst == sepPosLast) { + throw new IllegalArgumentException( + String.format("fullColumnName {} does not contain database name or table name", + fullColumnName)); + } + + return getQualifiedNameForColumn( + fullColumnName.substring(0, sepPosFirst), + fullColumnName.substring(sepPosFirst+1, sepPosLast), + fullColumnName.substring(sepPosLast+1)); + } + + public String getColumnNameOnly(String fullColumnName) throws IllegalArgumentException { + if (fullColumnName == null) { + throw new IllegalArgumentException("fullColumnName is null"); + } + + int sepPosLast = fullColumnName.lastIndexOf(QNAME_SEP_ENTITY_NAME); + + if (!isSeparatorIndexValid(sepPosLast)) { + return fullColumnName; + } + + return fullColumnName.substring(sepPosLast+1); + } + + public String getQualifiedNameForColumn(String dbName, String tableName, String columnName) { + return + (dbName + QNAME_SEP_ENTITY_NAME + tableName + QNAME_SEP_ENTITY_NAME + + columnName + QNAME_SEP_METADATA_NAMESPACE).toLowerCase() + getMetadataNamespace(); + } + + public String getUserName() { return lineageQuery.getUser(); } + + public String getDatabaseNameFromTable(String fullTableName) { + int sepPos = fullTableName.lastIndexOf(QNAME_SEP_ENTITY_NAME); + if (isSeparatorIndexValid(sepPos)) { + return fullTableName.substring(0, sepPos); + } + + return null; + } + + public String getTableNameFromColumn(String columnName) { + int sepPos = columnName.lastIndexOf(QNAME_SEP_ENTITY_NAME); + if (!isSeparatorIndexValid(sepPos)) { + return null; + } + + String tableName = columnName.substring(0, sepPos); + if (!ImpalaIdentifierParser.isTableNameValid(tableName)) { + return null; + } + + return tableName; + } + + public boolean isSeparatorIndexValid(int index) { + return index > 0; + } + +} diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaHook.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaHook.java new file mode 100644 index 0000000000..fc047629a6 --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaHook.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.hook; + +import org.apache.impala.hooks.QueryCompleteContext; +import org.apache.impala.hooks.QueryEventHook; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ImpalaHook implements QueryEventHook { + private static final Logger LOG = LoggerFactory.getLogger(ImpalaHook.class); + + private ImpalaLineageHook lineageHook; + + /** + * Execute Impala hook + */ + public void onQueryComplete(QueryCompleteContext context) { + try { + lineageHook.process(context.getLineageGraph()); + } catch (Exception ex) { + String errorMessage = String.format("Error in processing impala lineage: {}", context.getLineageGraph()); + LOG.error(errorMessage, ex); + } + } + + /** + * Initialization of Impala hook + */ + public void onImpalaStartup() { + lineageHook = new ImpalaLineageHook(); + } +} diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaIdentifierParser.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaIdentifierParser.java new file mode 100644 index 0000000000..33e44f729e --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaIdentifierParser.java @@ -0,0 +1,389 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.hook; + +import java.util.Arrays; +import java.util.HashSet; + +import java.util.Set; +import org.apache.commons.lang.StringUtils; + +/** + * Check if a string is a valid Impala table identifier. + * It could be . or + */ +public class ImpalaIdentifierParser { + // http://www.cloudera.com/content/www/en-us/documentation/enterprise/latest/topics/impala_identifiers.html + // https://github.com/apache/impala/blob/64e6719870db5602a6fa85014bc6c264080b9414/tests/common/patterns.py + // VALID_IMPALA_IDENTIFIER_REGEX = re.compile(r'^[a-zA-Z][a-zA-Z0-9_]{,127}$') + // add "." to allow . + public static final String VALID_IMPALA_IDENTIFIER_REGEX = "^[a-zA-Z][a-zA-Z0-9_.]{0,127}$"; + + public static boolean isTableNameValid(String inTableName) { + if (StringUtils.isEmpty(inTableName)) { + return false; + } + + if (!inTableName.matches(VALID_IMPALA_IDENTIFIER_REGEX)) { + return false; + } + + String[] tokens = inTableName.split("."); + if (tokens.length > 2) { + // valid value should be . or + return false; + } + + for (String token : tokens) { + if (isReserved(token)) { + return false; + } + } + + return true; + } + + // The following is extracted from Impala code. + // Mainly from https://github.com/apache/impala/blob/master/fe/src/main/jflex/sql-scanner.flex + // Map from keyword string to token id. + // We use a linked hash map because the insertion order is important. + // for example, we want "and" to come after "&&" to make sure error reporting + // uses "and" as a display name and not "&&". + // Please keep the puts sorted alphabetically by keyword (where the order + // does not affect the desired error reporting) + static HashSet keywordMap; + // map from token id to token description + static HashSet tokenIdMap; + // Reserved words are words that cannot be used as identifiers. It is a superset of + // keywords. + static Set reservedWords; + + + public static void init() { + // initilize keywords + keywordMap = new HashSet<>(); + keywordMap.add("&&"); + keywordMap.add("add"); + keywordMap.add("aggregate"); + keywordMap.add("all"); + keywordMap.add("alter"); + keywordMap.add("analytic"); + keywordMap.add("and"); + keywordMap.add("anti"); + keywordMap.add("api_version"); + keywordMap.add("array"); + keywordMap.add("as"); + keywordMap.add("asc"); + keywordMap.add("authorization"); + keywordMap.add("avro"); + keywordMap.add("between"); + keywordMap.add("bigint"); + keywordMap.add("binary"); + keywordMap.add("block_size"); + keywordMap.add("boolean"); + keywordMap.add("by"); + keywordMap.add("cached"); + keywordMap.add("case"); + keywordMap.add("cascade"); + keywordMap.add("cast"); + keywordMap.add("change"); + keywordMap.add("char"); + keywordMap.add("class"); + keywordMap.add("close_fn"); + keywordMap.add("column"); + keywordMap.add("columns"); + keywordMap.add("comment"); + keywordMap.add("compression"); + keywordMap.add("compute"); + keywordMap.add("copy"); + keywordMap.add("create"); + keywordMap.add("cross"); + keywordMap.add("current"); + keywordMap.add("data"); + keywordMap.add("database"); + keywordMap.add("databases"); + keywordMap.add("date"); + keywordMap.add("datetime"); + keywordMap.add("decimal"); + //keywordMap.add("default"); "default" can be database or table name + keywordMap.add("delete"); + keywordMap.add("delimited"); + keywordMap.add("desc"); + keywordMap.add("describe"); + keywordMap.add("distinct"); + keywordMap.add("div"); + keywordMap.add("double"); + keywordMap.add("drop"); + keywordMap.add("else"); + keywordMap.add("encoding"); + keywordMap.add("end"); + keywordMap.add("escaped"); + keywordMap.add("exists"); + keywordMap.add("explain"); + keywordMap.add("extended"); + keywordMap.add("external"); + keywordMap.add("false"); + keywordMap.add("fields"); + keywordMap.add("fileformat"); + keywordMap.add("files"); + keywordMap.add("finalize_fn"); + keywordMap.add("first"); + keywordMap.add("float"); + keywordMap.add("following"); + keywordMap.add("for"); + keywordMap.add("format"); + keywordMap.add("formatted"); + keywordMap.add("from"); + keywordMap.add("full"); + keywordMap.add("function"); + keywordMap.add("functions"); + keywordMap.add("grant"); + keywordMap.add("group"); + keywordMap.add("hash"); + keywordMap.add("having"); + keywordMap.add("if"); + keywordMap.add("ilike"); + keywordMap.add("ignore"); + keywordMap.add("in"); + keywordMap.add("incremental"); + keywordMap.add("init_fn"); + keywordMap.add("inner"); + keywordMap.add("inpath"); + keywordMap.add("insert"); + keywordMap.add("int"); + keywordMap.add("integer"); + keywordMap.add("intermediate"); + keywordMap.add("interval"); + keywordMap.add("into"); + keywordMap.add("invalidate"); + keywordMap.add("iregexp"); + keywordMap.add("is"); + keywordMap.add("join"); + keywordMap.add("kudu"); + keywordMap.add("last"); + keywordMap.add("left"); + keywordMap.add("like"); + keywordMap.add("limit"); + keywordMap.add("lines"); + keywordMap.add("load"); + keywordMap.add("location"); + keywordMap.add("map"); + keywordMap.add("merge_fn"); + keywordMap.add("metadata"); + keywordMap.add("not"); + keywordMap.add("null"); + keywordMap.add("nulls"); + keywordMap.add("offset"); + keywordMap.add("on"); + keywordMap.add("||"); + keywordMap.add("or"); + keywordMap.add("orc"); + keywordMap.add("order"); + keywordMap.add("outer"); + keywordMap.add("over"); + keywordMap.add("overwrite"); + keywordMap.add("parquet"); + keywordMap.add("parquetfile"); + keywordMap.add("partition"); + keywordMap.add("partitioned"); + keywordMap.add("partitions"); + keywordMap.add("preceding"); + keywordMap.add("prepare_fn"); + keywordMap.add("primary"); + keywordMap.add("produced"); + keywordMap.add("purge"); + keywordMap.add("range"); + keywordMap.add("rcfile"); + keywordMap.add("real"); + keywordMap.add("recover"); + keywordMap.add("refresh"); + keywordMap.add("regexp"); + keywordMap.add("rename"); + keywordMap.add("repeatable"); + keywordMap.add("replace"); + keywordMap.add("replication"); + keywordMap.add("restrict"); + keywordMap.add("returns"); + keywordMap.add("revoke"); + keywordMap.add("right"); + keywordMap.add("rlike"); + keywordMap.add("role"); + keywordMap.add("roles"); + keywordMap.add("row"); + keywordMap.add("rows"); + keywordMap.add("schema"); + keywordMap.add("schemas"); + keywordMap.add("select"); + keywordMap.add("semi"); + keywordMap.add("sequencefile"); + keywordMap.add("serdeproperties"); + keywordMap.add("serialize_fn"); + keywordMap.add("set"); + keywordMap.add("show"); + keywordMap.add("smallint"); + keywordMap.add("sort"); + keywordMap.add("stats"); + keywordMap.add("stored"); + keywordMap.add("straight_join"); + keywordMap.add("string"); + keywordMap.add("struct"); + keywordMap.add("symbol"); + keywordMap.add("table"); + keywordMap.add("tables"); + keywordMap.add("tablesample"); + keywordMap.add("tblproperties"); + keywordMap.add("terminated"); + keywordMap.add("textfile"); + keywordMap.add("then"); + keywordMap.add("timestamp"); + keywordMap.add("tinyint"); + keywordMap.add("to"); + keywordMap.add("true"); + keywordMap.add("truncate"); + keywordMap.add("unbounded"); + keywordMap.add("uncached"); + keywordMap.add("union"); + keywordMap.add("unknown"); + keywordMap.add("update"); + keywordMap.add("update_fn"); + keywordMap.add("upsert"); + keywordMap.add("use"); + keywordMap.add("using"); + keywordMap.add("values"); + keywordMap.add("varchar"); + keywordMap.add("view"); + keywordMap.add("when"); + keywordMap.add("where"); + keywordMap.add("with"); + + // Initilize tokenIdMap for error reporting + tokenIdMap = new HashSet<>(keywordMap); + + // add non-keyword tokens. Please keep this in the same order as they are used in this + // file. + tokenIdMap.add("EOF"); + tokenIdMap.add("..."); + tokenIdMap.add(":"); + tokenIdMap.add(";"); + tokenIdMap.add("COMMA"); + tokenIdMap.add("."); + tokenIdMap.add("*"); + tokenIdMap.add("("); + tokenIdMap.add(")"); + tokenIdMap.add("["); + tokenIdMap.add("]"); + tokenIdMap.add("/"); + tokenIdMap.add("%"); + tokenIdMap.add("+"); + tokenIdMap.add("-"); + tokenIdMap.add("&"); + tokenIdMap.add("|"); + tokenIdMap.add("^"); + tokenIdMap.add("~"); + tokenIdMap.add("="); + tokenIdMap.add("!"); + tokenIdMap.add("<"); + tokenIdMap.add(">"); + tokenIdMap.add("UNMATCHED STRING LITERAL"); + tokenIdMap.add("!="); + tokenIdMap.add("INTEGER LITERAL"); + tokenIdMap.add("NUMERIC OVERFLOW"); + tokenIdMap.add("DECIMAL LITERAL"); + tokenIdMap.add("EMPTY IDENTIFIER"); + tokenIdMap.add("IDENTIFIER"); + tokenIdMap.add("STRING LITERAL"); + tokenIdMap.add("COMMENTED_PLAN_HINT_START"); + tokenIdMap.add("COMMENTED_PLAN_HINT_END"); + tokenIdMap.add("Unexpected character"); + + + // For impala 3.0, reserved words = keywords + sql16ReservedWords - builtinFunctions + // - whitelist + // unused reserved words = reserved words - keywords. These words are reserved for + // forward compatibility purposes. + reservedWords = new HashSet<>(keywordMap); + // Add SQL:2016 reserved words + reservedWords.addAll(Arrays.asList(new String[] { + "abs", "acos", "allocate", "any", "are", "array_agg", "array_max_cardinality", + "asensitive", "asin", "asymmetric", "at", "atan", "atomic", "avg", "begin", + "begin_frame", "begin_partition", "blob", "both", "call", "called", "cardinality", + "cascaded", "ceil", "ceiling", "char_length", "character", "character_length", + "check", "classifier", "clob", "close", "coalesce", "collate", "collect", + "commit", "condition", "connect", "constraint", "contains", "convert", "copy", + "corr", "corresponding", "cos", "cosh", "count", "covar_pop", "covar_samp", + "cube", "cume_dist", "current_catalog", "current_date", + "current_default_transform_group", "current_path", "current_path", "current_role", + "current_role", "current_row", "current_schema", "current_time", + "current_timestamp", "current_transform_group_for_type", "current_user", "cursor", + "cycle", "day", "deallocate", "dec", "decfloat", "declare", "define", + "dense_rank", "deref", "deterministic", "disconnect", "dynamic", "each", + "element", "empty", "end-exec", "end_frame", "end_partition", "equals", "escape", + "every", "except", "exec", "execute", "exp", "extract", "fetch", "filter", + "first_value", "floor", "foreign", "frame_row", "free", "fusion", "get", "global", + "grouping", "groups", "hold", "hour", "identity", "indicator", "initial", "inout", + "insensitive", "integer", "intersect", "intersection", "json_array", + "json_arrayagg", "json_exists", "json_object", "json_objectagg", "json_query", + "json_table", "json_table_primitive", "json_value", "lag", "language", "large", + "last_value", "lateral", "lead", "leading", "like_regex", "listagg", "ln", + "local", "localtime", "localtimestamp", "log", "log10 ", "lower", "match", + "match_number", "match_recognize", "matches", "max", "member", "merge", "method", + "min", "minute", "mod", "modifies", "module", "month", "multiset", "national", + "natural", "nchar", "nclob", "new", "no", "none", "normalize", "nth_value", + "ntile", "nullif", "numeric", "occurrences_regex", "octet_length", "of", "old", + "omit", "one", "only", "open", "out", "overlaps", "overlay", "parameter", + "pattern", "per", "percent", "percent_rank", "percentile_cont", "percentile_disc", + "period", "portion", "position", "position_regex", "power", "precedes", + "precision", "prepare", "procedure", "ptf", "rank", "reads", "real", "recursive", + "ref", "references", "referencing", "regr_avgx", "regr_avgy", "regr_count", + "regr_intercept", "regr_r2", "regr_slope", "regr_sxx", "regr_sxy", "regr_syy", + "release", "result", "return", "rollback", "rollup", "row_number", "running", + "savepoint", "scope", "scroll", "search", "second", "seek", "sensitive", + "session_user", "similar", "sin", "sinh", "skip", "some", "specific", + "specifictype", "sql", "sqlexception", "sqlstate", "sqlwarning", "sqrt", "start", + "static", "stddev_pop", "stddev_samp", "submultiset", "subset", "substring", + "substring_regex", "succeeds", "sum", "symmetric", "system", "system_time", + "system_user", "tan", "tanh", "time", "timezone_hour", "timezone_minute", + "trailing", "translate", "translate_regex", "translation", "treat", "trigger", + "trim", "trim_array", "uescape", "unique", "unknown", "unnest", "update ", + "upper", "user", "value", "value_of", "var_pop", "var_samp", "varbinary", + "varying", "versioning", "whenever", "width_bucket", "window", "within", + "without", "year"})); + // TODO: Remove impala builtin function names. Need to find content of + // BuiltinsDb.getInstance().getAllFunctions() + //reservedWords.removeAll(BuiltinsDb.getInstance().getAllFunctions().keySet()); + + // Remove whitelist words. These words might be heavily used in production, and + // impala is unlikely to implement SQL features around these words in the near future. + reservedWords.removeAll(Arrays.asList(new String[] { + // time units + "year", "month", "day", "hour", "minute", "second", + "begin", "call", "check", "classifier", "close", "identity", "language", + "localtime", "member", "module", "new", "nullif", "old", "open", "parameter", + "period", "result", "return", "sql", "start", "system", "time", "user", "value" + })); + } + + static { + init(); + } + + static boolean isReserved(String token) { + return token != null && reservedWords.contains(token.toLowerCase()); + } +} diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaLineageHook.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaLineageHook.java new file mode 100644 index 0000000000..907f24478c --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaLineageHook.java @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.hook; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import com.google.common.collect.Sets; +import java.io.IOException; +import org.apache.atlas.hook.AtlasHook; +import org.apache.atlas.impala.hook.events.BaseImpalaEvent; +import org.apache.atlas.impala.hook.events.CreateImpalaProcess; +import org.apache.atlas.impala.model.ImpalaOperationType; +import org.apache.atlas.impala.model.ImpalaQuery; +import org.apache.atlas.type.AtlasType; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import javax.security.auth.Subject; +import javax.security.auth.kerberos.KerberosPrincipal; +import java.util.HashSet; + +import static org.apache.atlas.repository.Constants.IMPALA_SOURCE; + +public class ImpalaLineageHook extends AtlasHook { + private static final Logger LOG = LoggerFactory.getLogger(ImpalaLineageHook.class); + public static final String ATLAS_ENDPOINT = "atlas.rest.address"; + public static final String REALM_SEPARATOR = "@"; + public static final String CONF_PREFIX = "atlas.hook.impala."; + public static final String CONF_REALM_NAME = "atlas.realm.name"; + public static final String HDFS_PATH_CONVERT_TO_LOWER_CASE = CONF_PREFIX + "hdfs_path.convert_to_lowercase"; + public static final String DEFAULT_HOST_NAME = "localhost"; + + private static final String realm; + private static final boolean convertHdfsPathToLowerCase; + private static String hostName; + + static { + realm = atlasProperties.getString(CONF_REALM_NAME, DEFAULT_CLUSTER_NAME); // what should default be ?? + convertHdfsPathToLowerCase = atlasProperties.getBoolean(HDFS_PATH_CONVERT_TO_LOWER_CASE, false); + + try { + hostName = InetAddress.getLocalHost().getHostName(); + } catch (UnknownHostException e) { + LOG.warn("No hostname found. Setting the hostname to default value {}", DEFAULT_HOST_NAME, e); + hostName = DEFAULT_HOST_NAME; + } + } + + public ImpalaLineageHook() { + + } + + public String getMessageSource() { + return IMPALA_SOURCE; + } + + public void process(String impalaQueryString) throws Exception { + if (StringUtils.isEmpty(impalaQueryString)) { + LOG.warn("==> ImpalaLineageHook.process skips because the impalaQueryString is empty <=="); + return; + } + + ImpalaQuery lineageQuery = AtlasType.fromJson(impalaQueryString, ImpalaQuery.class); + process(lineageQuery); + } + + public void process(ImpalaQuery lineageQuery) throws Exception { + if (lineageQuery == null) { + LOG.warn("==> ImpalaLineageHook.process skips because the query object is null <=="); + return; + } + + if (StringUtils.isEmpty(lineageQuery.getQueryText())) { + LOG.warn("==> ImpalaLineageHook.process skips because the query text is empty <=="); + return; + } + + if (LOG.isDebugEnabled()) { + LOG.debug("==> ImpalaLineageHook.process({})", lineageQuery.getQueryText()); + } + + try { + ImpalaOperationType operationType = ImpalaOperationParser.getImpalaOperationType(lineageQuery.getQueryText()); + AtlasImpalaHookContext context = + new AtlasImpalaHookContext(this, operationType, lineageQuery); + BaseImpalaEvent event = null; + + switch (operationType) { + case CREATEVIEW: + case CREATETABLE_AS_SELECT: + case ALTERVIEW_AS: + case QUERY: + event = new CreateImpalaProcess(context); + break; + default: + if (LOG.isDebugEnabled()) { + LOG.debug("HiveHook.run({}): operation ignored", lineageQuery.getQueryText()); + } + break; + } + + if (event != null) { + LOG.debug("Processing event: " + lineageQuery.getQueryText()); + + final UserGroupInformation ugi = getUgiFromUserName(lineageQuery.getUser()); + + super.notifyEntities(event.getNotificationMessages(), ugi); + } + } catch (Throwable t) { + + LOG.error("ImpalaLineageHook.process(): failed to process query {}", + AtlasType.toJson(lineageQuery), t); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== ImpalaLineageHook.process({})", lineageQuery.getQueryText()); + } + } + + public String getHostName() { + return hostName; + } + + private UserGroupInformation getUgiFromUserName(String userName) throws IOException { + String userPrincipal = userName.contains(REALM_SEPARATOR)? userName : userName + "@" + getRealm(); + Subject userSubject = new Subject(false, Sets.newHashSet( + new KerberosPrincipal(userPrincipal)), new HashSet(),new HashSet()); + return UserGroupInformation.getUGIFromSubject(userSubject); + } + + public String getRealm() { + return realm; + } + + public boolean isConvertHdfsPathToLowerCase() { + return convertHdfsPathToLowerCase; + } +} \ No newline at end of file diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaOperationParser.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaOperationParser.java new file mode 100644 index 0000000000..98f3eed1b7 --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaOperationParser.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.hook; + +import org.apache.atlas.impala.model.ImpalaOperationType; +import org.apache.commons.lang.StringUtils; +import java.util.regex.Pattern; + +/** + * Parse an Impala query text and output the impala operation type + */ +public class ImpalaOperationParser { + + private static final Pattern COMMENT_PATTERN = Pattern.compile("/\\*.*?\\*/", Pattern.DOTALL); + + private static final Pattern CREATE_VIEW_PATTERN = + Pattern.compile("^[ ]*\\bcreate\\b.*\\bview\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE); + + private static final Pattern CREATE_TABLE_AS_SELECT_PATTERN = + Pattern.compile("^[ ]*\\bcreate\\b.*\\btable\\b.*\\bas\\b.*\\bselect\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE); + + private static final Pattern ALTER_VIEW_AS_SELECT_PATTERN = + Pattern.compile("^[ ]*\\balter\\b.*\\bview\\b.*\\bas.*\\bselect\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE); + + private static final Pattern INSERT_SELECT_FROM_PATTERN = + Pattern.compile("^[ ]*\\binsert\\b.*\\b(into|overwrite)\\b.*\\bselect\\b.*\\bfrom\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE); + + public ImpalaOperationParser() { + } + + public static ImpalaOperationType getImpalaOperationType(String queryText) { + // Impala does no generate lineage record for command "LOAD DATA IN PATH" + String queryTextWithNoComments = COMMENT_PATTERN.matcher(queryText).replaceAll(""); + if (doesMatch(queryTextWithNoComments, CREATE_VIEW_PATTERN)) { + return ImpalaOperationType.CREATEVIEW; + } else if (doesMatch(queryTextWithNoComments, CREATE_TABLE_AS_SELECT_PATTERN)) { + return ImpalaOperationType.CREATETABLE_AS_SELECT; + } else if (doesMatch(queryTextWithNoComments, ALTER_VIEW_AS_SELECT_PATTERN)) { + return ImpalaOperationType.ALTERVIEW_AS; + } else if (doesMatch(queryTextWithNoComments, INSERT_SELECT_FROM_PATTERN)) { + return ImpalaOperationType.QUERY; + } + + return ImpalaOperationType.UNKNOWN; + } + + public static ImpalaOperationType getImpalaOperationSubType(ImpalaOperationType operationType, String queryText) { + if (operationType == ImpalaOperationType.QUERY) { + if (StringUtils.containsIgnoreCase(queryText, "insert into")) { + return ImpalaOperationType.INSERT; + } else if (StringUtils.containsIgnoreCase(queryText, "insert overwrite")) { + return ImpalaOperationType.INSERT_OVERWRITE; + } + } + + return ImpalaOperationType.UNKNOWN; + } + + private static boolean doesMatch(final String queryText, final Pattern pattern) { + return pattern.matcher(queryText).matches(); + } + +} \ No newline at end of file diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/BaseImpalaEvent.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/BaseImpalaEvent.java new file mode 100644 index 0000000000..32efb8321c --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/BaseImpalaEvent.java @@ -0,0 +1,665 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.hook.events; + +import static org.apache.atlas.impala.hook.AtlasImpalaHookContext.QNAME_SEP_PROCESS; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.atlas.impala.hook.AtlasImpalaHookContext; +import org.apache.atlas.impala.hook.ImpalaOperationParser; +import org.apache.atlas.impala.model.ImpalaDataType; +import org.apache.atlas.impala.model.ImpalaNode; +import org.apache.atlas.impala.model.ImpalaOperationType; +import org.apache.atlas.impala.model.ImpalaVertexType; +import org.apache.atlas.impala.model.LineageVertex; +import org.apache.atlas.impala.model.LineageVertexMetadata; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.type.AtlasTypeUtil; + +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The base class for generating notification event to Atlas server + * Most code is copied from BaseHiveEvent to avoid depending on org.apache.atlas.hive.hook + */ +public abstract class BaseImpalaEvent { + private static final Logger LOG = LoggerFactory.getLogger(BaseImpalaEvent.class); + + // Impala should re-use the same entity type as hive. So Hive and Impala can operate on same + // database or table + public static final String HIVE_TYPE_DB = "hive_db"; + public static final String HIVE_TYPE_TABLE = "hive_table"; + public static final String HIVE_TYPE_COLUMN = "hive_column"; + + public static final String ATTRIBUTE_QUALIFIED_NAME = "qualifiedName"; + public static final String ATTRIBUTE_NAME = "name"; + public static final String ATTRIBUTE_OWNER = "owner"; + public static final String ATTRIBUTE_CLUSTER_NAME = "clusterName"; + public static final String ATTRIBUTE_CREATE_TIME = "createTime"; + public static final String ATTRIBUTE_LAST_ACCESS_TIME = "lastAccessTime"; + public static final String ATTRIBUTE_DB = "db"; + public static final String ATTRIBUTE_COLUMNS = "columns"; + public static final String ATTRIBUTE_TABLE = "table"; + public static final String ATTRIBUTE_INPUTS = "inputs"; + public static final String ATTRIBUTE_OUTPUTS = "outputs"; + public static final String ATTRIBUTE_OPERATION_TYPE = "operationType"; + public static final String ATTRIBUTE_START_TIME = "startTime"; + public static final String ATTRIBUTE_USER_NAME = "userName"; + public static final String ATTRIBUTE_QUERY_TEXT = "queryText"; + public static final String ATTRIBUTE_PROCESS = "process"; + public static final String ATTRIBUTE_PROCESS_EXECUTIONS = "processExecutions"; + public static final String ATTRIBUTE_QUERY_ID = "queryId"; + public static final String ATTRIBUTE_QUERY_PLAN = "queryPlan"; + public static final String ATTRIBUTE_END_TIME = "endTime"; + public static final String ATTRIBUTE_RECENT_QUERIES = "recentQueries"; + public static final String ATTRIBUTE_QUERY = "query"; + public static final String ATTRIBUTE_DEPENDENCY_TYPE = "dependencyType"; + public static final String ATTRIBUTE_HOSTNAME = "hostName"; + public static final String EMPTY_ATTRIBUTE_VALUE = ""; + public static final String ATTRIBUTE_EXEC_TIME = "execTime"; + public static final String ATTRIBUTE_DDL_QUERIES = "ddlQueries"; + public static final String ATTRIBUTE_SERVICE_TYPE = "serviceType"; + public static final long MILLIS_CONVERT_FACTOR = 1000; + + protected final AtlasImpalaHookContext context; + protected final Map vertexNameMap; + protected final Map verticesMap; + + public BaseImpalaEvent(AtlasImpalaHookContext context) { + + this.context = context; + vertexNameMap = new HashMap<>(); + verticesMap = new HashMap<>(); + } + + public AtlasImpalaHookContext getContext() { + return context; + } + + public abstract List getNotificationMessages() throws Exception; + + public String getUserName() { return context.getUserName(); } + + public String getTableNameFromVertex(LineageVertex vertex) { + if (vertex.getVertexType() == ImpalaVertexType.COLUMN) { + LineageVertexMetadata metadata = vertex.getMetadata(); + + if (metadata != null) { + return metadata.getTableName(); + } + } + + return getTableNameFromColumn(vertex.getVertexId()); + } + + public String getTableNameFromColumn(String columnName) { + return context.getTableNameFromColumn(columnName); + } + + public String getQualifiedName(ImpalaNode node) throws IllegalArgumentException { + + return getQualifiedName(node.getOwnVertex()); + } + + public String getQualifiedName(LineageVertex node) throws IllegalArgumentException { + if (node == null) { + throw new IllegalArgumentException("node is null"); + } + + ImpalaVertexType nodeType = node.getVertexType(); + + if (nodeType == null) { + if (node.getVertexId() != null) { + LOG.warn("null qualified name for type: null and name: {}", node.getVertexId()); + } + return null; + } + + if (node.getVertexId() == null) { + LOG.warn("null qualified name for type: {} and name: null", nodeType); + return null; + } + + switch (nodeType) { + case DATABASE: + return context.getQualifiedNameForDb(node.getVertexId()); + + case TABLE: + return context.getQualifiedNameForTable(node.getVertexId()); + + case COLUMN: + return context.getQualifiedNameForColumn(node); + + default: + LOG.warn("null qualified name for type: {} and name: {}", nodeType, node.getVertexId()); + return null; + } + } + + static final class AtlasEntityComparator implements Comparator { + @Override + public int compare(AtlasEntity entity1, AtlasEntity entity2) { + String name1 = (String)entity1.getAttribute(ATTRIBUTE_QUALIFIED_NAME); + String name2 = (String)entity2.getAttribute(ATTRIBUTE_QUALIFIED_NAME); + + if (name1 == null) { + return -1; + } + + if (name2 == null) { + return 1; + } + + return name1.toLowerCase().compareTo(name2.toLowerCase()); + } + } + + static final Comparator entityComparator = new AtlasEntityComparator(); + + protected String getQualifiedName(List inputs, List outputs) throws Exception { + ImpalaOperationType operation = context.getImpalaOperationType(); + + if (operation == ImpalaOperationType.CREATEVIEW || + operation == ImpalaOperationType.CREATETABLE_AS_SELECT || + operation == ImpalaOperationType.ALTERVIEW_AS) { + List sortedEntities = new ArrayList<>(outputs); + + Collections.sort(sortedEntities, entityComparator); + + for (AtlasEntity entity : sortedEntities) { + if (entity.getTypeName().equalsIgnoreCase(HIVE_TYPE_TABLE)) { + Long createTime = (Long)entity.getAttribute(ATTRIBUTE_CREATE_TIME); + + return (String)entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME) + QNAME_SEP_PROCESS + createTime; + } + } + } + + if (operation != ImpalaOperationType.QUERY) { + String errorMessage = String.format("Expect operation to be QUERY, but get unexpected operation type {}", operation.name()); + LOG.error(errorMessage); + throw new IllegalArgumentException(errorMessage); + } + + // construct qualified name for QUERY + String qualifiedName = null; + String operationName = operation.toString(); + + if (operationName != null) { + StringBuilder sb = new StringBuilder(operationName); + + addToProcessQualifiedName(sb, inputs, false); + sb.append("->"); + addToProcessQualifiedName(sb, outputs, true); + + qualifiedName = sb.toString(); + } + + + return qualifiedName; + } + + protected void addToProcessQualifiedName(StringBuilder processQualifiedName, Collection entities, boolean isOutput) { + if (entities == null) { + return; + } + + ImpalaOperationType operation = context.getImpalaOperationType(); + String queryText = context.getQueryStr(); + List sortedEntities = new ArrayList<>(entities); + + Collections.sort(sortedEntities, entityComparator); + + Set dataSetsProcessed = new HashSet<>(); + + for (AtlasEntity entity : sortedEntities) { + String qualifiedName = null; + long createTime = 0; + + qualifiedName = (String)entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME); + + if (entity.getTypeName().equalsIgnoreCase(HIVE_TYPE_TABLE)) { + Long createTimeObj = (Long)entity.getAttribute(ATTRIBUTE_CREATE_TIME); + if (createTimeObj != null) { + createTime = createTimeObj; + } + } + + if (qualifiedName == null || !dataSetsProcessed.add(qualifiedName)) { + continue; + } + + if (isOutput) { + boolean addWriteType = false; + ImpalaOperationType subType = ImpalaOperationParser.getImpalaOperationSubType(operation, queryText); + + switch (subType) { + // Impala does not generate lineage for UPDATE and DELETE + case INSERT: + case INSERT_OVERWRITE: + addWriteType = true; + break; + } + + if (addWriteType) { + processQualifiedName.append(QNAME_SEP_PROCESS).append(subType.name()); + } + } + + processQualifiedName.append(QNAME_SEP_PROCESS).append(qualifiedName.toLowerCase().replaceAll("/", "")); + + if (createTime != 0) { + processQualifiedName.append(QNAME_SEP_PROCESS).append(createTime); + } + } + } + + protected AtlasEntity getInputOutputEntity(ImpalaNode node, AtlasEntityExtInfo entityExtInfo) throws Exception { + AtlasEntity ret = null; + + switch(node.getNodeType()) { + case TABLE: + case PARTITION: + case DFS_DIR: { + ret = toAtlasEntity(node, entityExtInfo); + } + break; + } + + return ret; + } + + protected AtlasEntity toAtlasEntity(ImpalaNode node, AtlasEntityExtInfo entityExtInfo) throws Exception { + AtlasEntity ret = null; + + switch (node.getNodeType()) { + case DATABASE: + ret = toDbEntity(node); + break; + + case TABLE: + case PARTITION: + ret = toTableEntity(node, entityExtInfo); + break; + + default: + break; + } + + return ret; + } + + protected AtlasEntity toDbEntity(ImpalaNode db) throws Exception { + return toDbEntity(db.getNodeName()); + } + + protected AtlasEntity toDbEntity(String dbName) throws Exception { + String dbQualifiedName = context.getQualifiedNameForDb(dbName); + AtlasEntity ret = context.getEntity(dbQualifiedName); + + if (ret == null) { + ret = new AtlasEntity(HIVE_TYPE_DB); + + // Impala hook should not send metadata entities. set 'guid' to null - which will: + // - result in this entity to be not included in 'referredEntities' + // - cause Atlas server to resolve the entity by its qualifiedName + ret.setGuid(null); + + ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, dbQualifiedName); + ret.setAttribute(ATTRIBUTE_NAME, dbName.toLowerCase()); + ret.setAttribute(ATTRIBUTE_CLUSTER_NAME, context.getMetadataNamespace()); + + context.putEntity(dbQualifiedName, ret); + } + + return ret; + } + + protected AtlasEntityWithExtInfo toTableEntity(ImpalaNode table) throws Exception { + AtlasEntityWithExtInfo ret = new AtlasEntityWithExtInfo(); + + AtlasEntity entity = toTableEntity(table, ret); + + if (entity != null) { + ret.setEntity(entity); + } else { + ret = null; + } + + return ret; + } + + protected AtlasEntity toTableEntity(ImpalaNode table, AtlasEntitiesWithExtInfo entities) throws Exception { + AtlasEntity ret = toTableEntity(table, (AtlasEntityExtInfo) entities); + + if (ret != null) { + entities.addEntity(ret); + } + + return ret; + } + + protected AtlasEntity toTableEntity(ImpalaNode table, AtlasEntityExtInfo entityExtInfo) throws Exception { + if ((table == null) || (table.getNodeName() == null)) { + throw new IllegalArgumentException("table is null or its name is null"); + } + + String dbName = context.getDatabaseNameFromTable(table.getNodeName()); + if (dbName == null) { + throw new IllegalArgumentException(String.format("db name is null for table: {}", table.getNodeName())); + } + + AtlasEntity dbEntity = toDbEntity(dbName); + + if (entityExtInfo != null) { + if (dbEntity != null) { + entityExtInfo.addReferredEntity(dbEntity); + } + } + + AtlasEntity ret = toTableEntity(getObjectId(dbEntity), table, entityExtInfo); + + return ret; + } + + protected AtlasEntity toTableEntity(AtlasObjectId dbId, ImpalaNode table, AtlasEntityExtInfo entityExtInfo) throws Exception { + String tblQualifiedName = getQualifiedName(table); + AtlasEntity ret = context.getEntity(tblQualifiedName); + + if (ret != null) { + return ret; + } + + // a table created in Impala still uses HIVE_TYPE_TABLE to allow both Impala and Hive operate + // on the same table + ret = new AtlasEntity(HIVE_TYPE_TABLE); + + // Impala hook should not send meta data entity to Atlas. set 'guid' to null - which will: + // - result in this entity to be not included in 'referredEntities' + // - cause Atlas server to resolve the entity by its qualifiedName + // TODO: enable this once HMS hook is in. Disable this before that. + ret.setGuid(null); + + long createTime = getTableCreateTime(table); + long lastAccessTime = createTime; + + ret.setAttribute(ATTRIBUTE_DB, dbId); + ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, tblQualifiedName); + ret.setAttribute(ATTRIBUTE_NAME, table.getNodeName().toLowerCase()); + + // just fake it. It should not be sent to Atlas once HMS hook is in + ret.setAttribute(ATTRIBUTE_OWNER, getUserName()); + + ret.setAttribute(ATTRIBUTE_CREATE_TIME, createTime); + ret.setAttribute(ATTRIBUTE_LAST_ACCESS_TIME, lastAccessTime); + + AtlasObjectId tableId = getObjectId(ret); + List columns = getColumnEntities(tableId, table); + + if (entityExtInfo != null) { + if (columns != null) { + for (AtlasEntity column : columns) { + entityExtInfo.addReferredEntity(column); + } + } + } + + ret.setAttribute(ATTRIBUTE_COLUMNS, getObjectIds(columns)); + + + context.putEntity(tblQualifiedName, ret); + + return ret; + } + + public static AtlasObjectId getObjectId(AtlasEntity entity) { + String qualifiedName = (String) entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME); + AtlasObjectId ret = new AtlasObjectId(entity.getGuid(), entity.getTypeName(), Collections + .singletonMap(ATTRIBUTE_QUALIFIED_NAME, qualifiedName)); + + return ret; + } + + public static List getObjectIds(List entities) { + final List ret; + + if (CollectionUtils.isNotEmpty(entities)) { + ret = new ArrayList<>(entities.size()); + + for (AtlasEntity entity : entities) { + ret.add(getObjectId(entity)); + } + } else { + ret = Collections.emptyList(); + } + + return ret; + } + + /** + * return the createTime of the table. + * @param table + * @return the createTime of the table. Its unit is in milliseconds. + */ + public static long getTableCreateTime(ImpalaNode table) { + return getTableCreateTime(table.getOwnVertex()); + } + + public static long getTableCreateTime(LineageVertex tableVertex) { + Long createTime = tableVertex.getCreateTime(); + if (createTime != null) { + // the time unit of vertex is in seconds. Convert to milliseconds before sending to Atlas. + return createTime.longValue() * MILLIS_CONVERT_FACTOR; + } else { + return System.currentTimeMillis(); + } + } + + protected List getColumnEntities(AtlasObjectId tableId, ImpalaNode table) { + List ret = new ArrayList<>(); + + for (ImpalaNode childNode : table.getChildren().values()) { + String colQualifiedName = getQualifiedName(childNode); + AtlasEntity column = context.getEntity(colQualifiedName); + + if (column == null) { + column = new AtlasEntity(HIVE_TYPE_COLUMN); + + // if column's table was sent in an earlier notification, set 'guid' to null - which will: + // - result in this entity to be not included in 'referredEntities' + // - cause Atlas server to resolve the entity by its qualifiedName + // TODO: enable this once HMS hook is in. Disable this before that. + column.setGuid(null); + + column.setAttribute(ATTRIBUTE_TABLE, tableId); + column.setAttribute(ATTRIBUTE_QUALIFIED_NAME, colQualifiedName); + column.setAttribute(ATTRIBUTE_NAME, context.getColumnNameOnly(childNode.getNodeName())); + + // just fake it. It should not be sent to Atlas once HMS hook is in + column.setAttribute(ATTRIBUTE_OWNER, getUserName()); + + context.putEntity(colQualifiedName, column); + } + + ret.add(column); + } + + return ret; + } + + protected AtlasEntity getImpalaProcessEntity(List inputs, List outputs) throws Exception { + AtlasEntity ret = new AtlasEntity(ImpalaDataType.IMPALA_PROCESS.getName()); + String queryStr = context.getQueryStr(); + + if (queryStr != null) { + queryStr = queryStr.toLowerCase().trim(); + } + + Long startTime = getQueryStartTime(); + Long endTime = getQueryEndTime(); + + ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getQualifiedName(inputs, outputs)); + ret.setAttribute(ATTRIBUTE_INPUTS, getObjectIds(inputs)); + ret.setAttribute(ATTRIBUTE_OUTPUTS, getObjectIds(outputs)); + ret.setAttribute(ATTRIBUTE_NAME, queryStr); + ret.setAttribute(ATTRIBUTE_OPERATION_TYPE, context.getImpalaOperationType()); + + // We are setting an empty value to these attributes, since now we have a new entity type called impala process + // execution which captures these values. We have to set empty values here because these attributes are + // mandatory attributes for impala process entity type. + ret.setAttribute(ATTRIBUTE_START_TIME, startTime); + ret.setAttribute(ATTRIBUTE_END_TIME, endTime); + ret.setAttribute(ATTRIBUTE_USER_NAME, EMPTY_ATTRIBUTE_VALUE); + ret.setAttribute(ATTRIBUTE_QUERY_TEXT, EMPTY_ATTRIBUTE_VALUE); + ret.setAttribute(ATTRIBUTE_QUERY_ID, EMPTY_ATTRIBUTE_VALUE); + ret.setAttribute(ATTRIBUTE_QUERY_PLAN, "Not Supported"); + ret.setAttribute(ATTRIBUTE_RECENT_QUERIES, Collections.singletonList(queryStr)); + + return ret; + } + + protected AtlasEntity getImpalaProcessExecutionEntity(AtlasEntity impalaProcess) throws Exception { + AtlasEntity ret = new AtlasEntity(ImpalaDataType.IMPALA_PROCESS_EXECUTION.getName()); + String queryStr = context.getQueryStr(); + + if (queryStr != null) { + queryStr = queryStr.toLowerCase().trim(); + } + + Long startTime = getQueryStartTime(); + Long endTime = getQueryEndTime(); + + ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, impalaProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME).toString() + + QNAME_SEP_PROCESS + startTime.toString() + + QNAME_SEP_PROCESS + endTime.toString()); + ret.setAttribute(ATTRIBUTE_NAME, queryStr + QNAME_SEP_PROCESS + startTime); + ret.setAttribute(ATTRIBUTE_START_TIME, startTime); + ret.setAttribute(ATTRIBUTE_END_TIME, endTime); + ret.setAttribute(ATTRIBUTE_USER_NAME, getUserName()); + ret.setAttribute(ATTRIBUTE_QUERY_TEXT, queryStr); + ret.setAttribute(ATTRIBUTE_QUERY_ID, context.getLineageQuery().getQueryId()); + ret.setAttribute(ATTRIBUTE_QUERY_PLAN, "Not Supported"); + ret.setAttribute(ATTRIBUTE_HOSTNAME, context.getHostName()); + ret.setRelationshipAttribute(ATTRIBUTE_PROCESS, AtlasTypeUtil.toAtlasRelatedObjectId(impalaProcess)); + + return ret; + } + + protected Long getQueryStartTime() { + return context.getLineageQuery().getTimestamp() * BaseImpalaEvent.MILLIS_CONVERT_FACTOR; + } + + protected Long getQueryEndTime() { + return context.getLineageQuery().getEndTime() * BaseImpalaEvent.MILLIS_CONVERT_FACTOR; + } + + protected void addProcessedEntities(AtlasEntitiesWithExtInfo entitiesWithExtInfo) { + for (AtlasEntity entity : context.getEntities()) { + entitiesWithExtInfo.addReferredEntity(entity); + } + + entitiesWithExtInfo.compact(); + } + + // The unit of createTime in vertex is in seconds. So the returned value is + // time in seconds. + protected Long getCreateTimeInVertex(LineageVertex vertex) { + if (vertex == null) { + return System.currentTimeMillis() / MILLIS_CONVERT_FACTOR; + } + + Long createTime = vertex.getCreateTime(); + + if (createTime != null) { + return createTime; + } + + if (vertex.getVertexType() == ImpalaVertexType.COLUMN) { + LineageVertexMetadata metadata = vertex.getMetadata(); + + if (metadata != null) { + return metadata.getTableCreateTime(); + } + } + + return System.currentTimeMillis() / MILLIS_CONVERT_FACTOR; + } + + protected ImpalaNode createTableNode(String tableName, Long createTime) { + // the created table vertex does not have its Id set as it is not referred in edge + LineageVertex tableVertex = new LineageVertex(); + tableVertex.setVertexType(ImpalaVertexType.TABLE); + tableVertex.setVertexId(tableName); + tableVertex.setCreateTime(createTime); + return new ImpalaNode(tableVertex); + } + + protected AtlasEntity createHiveDDLEntity(AtlasEntity dbOrTable) { + return createHiveDDLEntity(dbOrTable, true); + } + + protected AtlasEntity createHiveDDLEntity(AtlasEntity dbOrTable, boolean excludeEntityGuid) { + AtlasObjectId objId = BaseImpalaEvent.getObjectId(dbOrTable); + AtlasEntity hiveDDL = null; + + if (excludeEntityGuid) { + objId.setGuid(null); + } + + if (StringUtils.equals(objId.getTypeName(), HIVE_TYPE_DB)) { + hiveDDL = new AtlasEntity(ImpalaDataType.HIVE_DB_DDL.getName(), ATTRIBUTE_DB, objId); + } else if (StringUtils.equals(objId.getTypeName(), HIVE_TYPE_TABLE)) { + hiveDDL = new AtlasEntity(ImpalaDataType.HIVE_TABLE_DDL.getName(), ATTRIBUTE_TABLE, objId); + } + + if (hiveDDL != null) { + hiveDDL.setAttribute(ATTRIBUTE_SERVICE_TYPE, "impala"); + hiveDDL.setAttribute(ATTRIBUTE_EXEC_TIME, getQueryStartTime()); + hiveDDL.setAttribute(ATTRIBUTE_QUERY_TEXT, context.getQueryStr()); + hiveDDL.setAttribute(ATTRIBUTE_USER_NAME, getUserName()); + hiveDDL.setAttribute(ATTRIBUTE_NAME, context.getQueryStr() + QNAME_SEP_PROCESS + getQueryStartTime().toString()); + hiveDDL.setAttribute(ATTRIBUTE_QUALIFIED_NAME, hiveDDL.getAttribute(ATTRIBUTE_NAME)); + } + + return hiveDDL; + } + + protected boolean isDdlOperation() { + return (context.getImpalaOperationType().equals(ImpalaOperationType.CREATEVIEW) + || context.getImpalaOperationType().equals(ImpalaOperationType.ALTERVIEW_AS) + || context.getImpalaOperationType().equals(ImpalaOperationType.CREATETABLE_AS_SELECT)); + } +} diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/CreateImpalaProcess.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/CreateImpalaProcess.java new file mode 100644 index 0000000000..5e6ea5a55f --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/events/CreateImpalaProcess.java @@ -0,0 +1,361 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.hook.events; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.atlas.impala.hook.AtlasImpalaHookContext; +import org.apache.atlas.impala.model.ImpalaDataType; +import org.apache.atlas.impala.model.ImpalaDependencyType; +import org.apache.atlas.impala.model.ImpalaNode; +import org.apache.atlas.impala.model.ImpalaVertexType; +import org.apache.atlas.impala.model.LineageEdge; +import org.apache.atlas.impala.model.ImpalaQuery; +import org.apache.atlas.impala.model.LineageVertex; +import org.apache.atlas.impala.model.LineageVertexMetadata; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2; +import org.apache.commons.collections.CollectionUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CreateImpalaProcess extends BaseImpalaEvent { + private static final Logger LOG = LoggerFactory.getLogger(CreateImpalaProcess.class); + + public CreateImpalaProcess(AtlasImpalaHookContext context) { + super(context); + } + + public List getNotificationMessages() throws Exception { + List ret = null; + AtlasEntitiesWithExtInfo entities = getEntities(); + + if (entities != null && CollectionUtils.isNotEmpty(entities.getEntities())) { + ret = Collections.singletonList(new EntityCreateRequestV2(getUserName(), entities)); + } + + return ret; + } + + public AtlasEntitiesWithExtInfo getEntities() throws Exception { + AtlasEntitiesWithExtInfo ret = null; + List inputNodes = new ArrayList<>(); + List outputNodes = new ArrayList<>(); + List inputs = new ArrayList<>(); + List outputs = new ArrayList<>(); + Set processedNames = new HashSet<>(); + + getInputOutList(context.getLineageQuery(), inputNodes, outputNodes); + + if (skipProcess(inputNodes, outputNodes)) { + return ret; + } + + ret = new AtlasEntitiesWithExtInfo(); + + if (!inputNodes.isEmpty()) { + for (ImpalaNode input : inputNodes) { + String qualifiedName = getQualifiedName(input); + + if (qualifiedName == null || !processedNames.add(qualifiedName)) { + continue; + } + + AtlasEntity entity = getInputOutputEntity(input, ret); + + if (entity != null) { + inputs.add(entity); + } + } + } + + if (outputNodes != null) { + for (ImpalaNode output : outputNodes) { + String qualifiedName = getQualifiedName(output); + + if (qualifiedName == null || !processedNames.add(qualifiedName)) { + continue; + } + + AtlasEntity entity = getInputOutputEntity(output, ret); + + if (entity != null) { + outputs.add(entity); + + if (isDdlOperation()) { + AtlasEntity ddlEntity = createHiveDDLEntity(entity); + if (ddlEntity != null) { + ret.addEntity(ddlEntity); + } + } + } + } + } + + if (!inputs.isEmpty() || !outputs.isEmpty()) { + AtlasEntity process = getImpalaProcessEntity(inputs, outputs); + if (process!= null) { + if (LOG.isDebugEnabled()) { + LOG.debug("get process entity with qualifiedName: {}", + process.getAttribute(ATTRIBUTE_QUALIFIED_NAME)); + } + + ret.addEntity(process); + + AtlasEntity processExecution = getImpalaProcessExecutionEntity(process); + if (processExecution != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("get process executition entity with qualifiedName: {}", + processExecution.getAttribute(ATTRIBUTE_QUALIFIED_NAME)); + } + + ret.addEntity(processExecution); + } + + processColumnLineage(process, ret); + + addProcessedEntities(ret); + } + } else { + ret = null; + } + + + return ret; + } + + private void processColumnLineage(AtlasEntity impalaProcess, AtlasEntitiesWithExtInfo entities) { + List edges = context.getLineageQuery().getEdges(); + + if (CollectionUtils.isEmpty(edges)) { + return; + } + + final List columnLineages = new ArrayList<>(); + final Set processedOutputCols = new HashSet<>(); + + for (LineageEdge edge : edges) { + + if (!edge.getEdgeType().equals(ImpalaDependencyType.PROJECTION)) { + // Impala dependency type can only be predicate or projection. + // Impala predicate dependency: This is a dependency between a set of target + // columns (or exprs) and a set of source columns (base table columns). It + // indicates that the source columns restrict the values of their targets (e.g. + // by participating in WHERE clause predicates). It should not be part of lineage + continue; + } + + List outputColumns = new ArrayList<>(); + for (Long targetId : edge.getTargets()) { + LineageVertex columnVertex = verticesMap.get(targetId); + String outputColName = getQualifiedName(columnVertex); + AtlasEntity outputColumn = context.getEntity(outputColName); + + if (LOG.isDebugEnabled()) { + LOG.debug("processColumnLineage(): target id = {}, target column name = {}", + targetId, outputColName); + } + + if (outputColumn == null) { + LOG.warn("column-lineage: non-existing output-column {}", outputColName); + continue; + } + + if (processedOutputCols.contains(outputColName)) { + LOG.warn("column-lineage: duplicate for output-column {}", outputColName); + continue; + } else { + processedOutputCols.add(outputColName); + } + + outputColumns.add(outputColumn); + } + + List inputColumns = new ArrayList<>(); + + for (Long sourceId : edge.getSources()) { + LineageVertex columnVertex = verticesMap.get(sourceId); + String inputColName = getQualifiedName(columnVertex); + AtlasEntity inputColumn = context.getEntity(inputColName); + + if (inputColumn == null) { + LOG.warn("column-lineage: non-existing input-column {} with id ={}", inputColName, sourceId); + continue; + } + + inputColumns.add(inputColumn); + } + + if (inputColumns.isEmpty()) { + continue; + } + + AtlasEntity columnLineageProcess = new AtlasEntity(ImpalaDataType.IMPALA_COLUMN_LINEAGE.getName()); + + String columnQualifiedName = (String)impalaProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME) + + AtlasImpalaHookContext.QNAME_SEP_PROCESS + outputColumns.get(0).getAttribute(ATTRIBUTE_NAME); + columnLineageProcess.setAttribute(ATTRIBUTE_NAME, columnQualifiedName); + columnLineageProcess.setAttribute(ATTRIBUTE_QUALIFIED_NAME, columnQualifiedName); + columnLineageProcess.setAttribute(ATTRIBUTE_INPUTS, getObjectIds(inputColumns)); + columnLineageProcess.setAttribute(ATTRIBUTE_OUTPUTS, getObjectIds(outputColumns)); + columnLineageProcess.setAttribute(ATTRIBUTE_QUERY, getObjectId(impalaProcess)); + + // based on https://github.com/apache/impala/blob/master/fe/src/main/java/org/apache/impala/analysis/ColumnLineageGraph.java#L267 + // There are two types of dependencies that are represented as edges in the column + // lineage graph: + // a) Projection dependency: This is a dependency between a set of source + // columns (base table columns) and a single target (result expr or table column). + // This dependency indicates that values of the target depend on the values of the source + // columns. + // b) Predicate dependency: This is a dependency between a set of target + // columns (or exprs) and a set of source columns (base table columns). It indicates that + // the source columns restrict the values of their targets (e.g. by participating in + // WHERE clause predicates). + columnLineageProcess.setAttribute(ATTRIBUTE_DEPENDENCY_TYPE, ImpalaDependencyType.PROJECTION.getName()); + + columnLineages.add(columnLineageProcess); + } + + for (AtlasEntity columnLineage : columnLineages) { + String columnQualifiedName = (String)columnLineage.getAttribute(ATTRIBUTE_QUALIFIED_NAME); + if (LOG.isDebugEnabled()) { + LOG.debug("get column lineage entity with qualifiedName: {}", columnQualifiedName); + } + + entities.addEntity(columnLineage); + } + } + + // Process the impala query, classify the vertices as input or output based on LineageEdge + // Then organize the vertices into hierarchical structure: put all column vertices of a table + // as children of a ImpalaNode representing that table. + private void getInputOutList(ImpalaQuery lineageQuery, List inputNodes, + List outputNodes) { + // get vertex map with key being its id and + // ImpalaNode map with its own vertex's vertexId as its key + for (LineageVertex vertex : lineageQuery.getVertices()) { + updateVertexMap(vertex); + } + + // get set of source ID and set of target Id + Set sourceIds = new HashSet<>(); + Set targetIds = new HashSet<>(); + for (LineageEdge edge : lineageQuery.getEdges()) { + if (ImpalaDependencyType.PROJECTION.equals(edge.getEdgeType())) { + sourceIds.addAll(edge.getSources()); + targetIds.addAll(edge.getTargets()); + } + } + + Map inputMap = buildInputOutputList(sourceIds, verticesMap, vertexNameMap); + Map outputMap = buildInputOutputList(targetIds, verticesMap, vertexNameMap); + + inputNodes.addAll(inputMap.values()); + outputNodes.addAll(outputMap.values()); + } + + // Update internal maps using this vertex. + private void updateVertexMap(LineageVertex vertex) { + verticesMap.put(vertex.getId(), vertex); + vertexNameMap.put(vertex.getVertexId(), new ImpalaNode(vertex)); + + if (vertex.getVertexType() == ImpalaVertexType.COLUMN) { + LineageVertexMetadata metadata = vertex.getMetadata(); + + if (metadata == null) { + return; + } + + // if the vertex is column and contains metadata, create a vertex for its table + String tableName = metadata.getTableName(); + ImpalaNode tableNode = vertexNameMap.get(tableName); + + if (tableNode == null) { + tableNode = createTableNode(tableName, metadata.getTableCreateTime()); + vertexNameMap.put(tableName, tableNode); + } + } + } + + /** + * From the list of Ids and Id to Vertices map, generate the Table name to ImpalaNode map. + * @param idSet the list of Ids. They are from lineage edges + * @param vertexMap the Id to Vertex map + * @param vertexNameMap the vertexId to ImpalaNode map. + * @return the table name to ImpalaNode map, whose table node contains its columns + */ + private Map buildInputOutputList(Set idSet, Map vertexMap, + Map vertexNameMap) { + Map returnTableMap = new HashMap<>(); + + for (Long id : idSet) { + LineageVertex vertex = vertexMap.get(id); + if (vertex == null) { + LOG.warn("cannot find vertex with id: {}", id); + continue; + } + + if (ImpalaVertexType.COLUMN.equals(vertex.getVertexType())) { + // add column to its table node + String tableName = getTableNameFromVertex(vertex); + if (tableName == null) { + LOG.warn("cannot find tableName for vertex with id: {}, column name : {}", + id, vertex.getVertexId() == null? "null" : vertex.getVertexId()); + + continue; + } + + ImpalaNode tableNode = returnTableMap.get(tableName); + + if (tableNode == null) { + tableNode = vertexNameMap.get(tableName); + + if (tableNode == null) { + LOG.warn("cannot find table node for vertex with id: {}, column name : {}", + id, vertex.getVertexId()); + + tableNode = createTableNode(tableName, getCreateTimeInVertex(null)); + vertexNameMap.put(tableName, tableNode); + } + + returnTableMap.put(tableName, tableNode); + } + + tableNode.addChild(vertex); + } + } + + return returnTableMap; + } + + private boolean skipProcess(List inputNodes, List ouputNodes) { + if (inputNodes.isEmpty() || ouputNodes.isEmpty()) { + return true; + } + + return false; + } +} diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaDataType.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaDataType.java new file mode 100644 index 0000000000..4e0d478706 --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaDataType.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.impala.model; + +/** + * Data types used for Impala bridge + */ +public enum ImpalaDataType { + + IMPALA_PROCESS, + IMPALA_PROCESS_EXECUTION, + IMPALA_COLUMN_LINEAGE, + HIVE_DB_DDL, + HIVE_TABLE_DDL; + + public String getName() { + return name().toLowerCase(); + } +} diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaDependencyType.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaDependencyType.java new file mode 100644 index 0000000000..892ee9b2fb --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaDependencyType.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.model; + +public enum ImpalaDependencyType { + PROJECTION("PROJECTION"), + PREDICATE("PREDICATE"); + + private final String name; + + ImpalaDependencyType(String name) { + this.name = name; + } + + public String getName() { + return name.toUpperCase(); + } +} diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaNode.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaNode.java new file mode 100644 index 0000000000..a3ddf53729 --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaNode.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.model; + +import java.util.HashMap; +import java.util.Map; + +/** + * Contain vertex info of this node and its children. It is used only internally + */ +public class ImpalaNode { + LineageVertex ownVertex; + Map children; + + public ImpalaNode(LineageVertex ownVertex) { + this.ownVertex = ownVertex; + children = new HashMap<>(); + } + + public String getNodeName() { return ownVertex.getVertexId(); } + public ImpalaVertexType getNodeType() { return ownVertex.getVertexType(); } + public LineageVertex getOwnVertex() { return ownVertex; } + public Map getChildren() { return children; } + + /** + * Add child to this node + * @param child + * @return the node corresponding to the input child vertex + */ + public ImpalaNode addChild(LineageVertex child) { + ImpalaNode exitingChild = children.get(child.getId()); + if (exitingChild != null) { + return exitingChild; + } + + ImpalaNode newChild = new ImpalaNode(child); + return children.put(child.getId(), newChild); + } +} diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaOperationType.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaOperationType.java new file mode 100644 index 0000000000..a893b8845c --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaOperationType.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.impala.model; + +public enum ImpalaOperationType{ + // main operation type + CREATEVIEW ("CREATEVIEW"), + CREATETABLE_AS_SELECT ("CREATETABLE_AS_SELECT"), + ALTERVIEW_AS ("ALTERVIEW_AS"), + QUERY ("QUERY"), + + // sub operation type, which is associated with output + INSERT ("INSERT"), + INSERT_OVERWRITE ("INSERT_OVERWRITE"), + + // default type + UNKNOWN ("UNKNOWN"); + + private final String name; + + ImpalaOperationType(String s) { + name = s; + } + + public boolean equalsName(String otherName) { + return name.equals(otherName); + } + + public String toString() { + return this.name; + } +} \ No newline at end of file diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaQuery.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaQuery.java new file mode 100644 index 0000000000..27bdc72e9d --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaQuery.java @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.model; + +import static com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility.NONE; +import static com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility.PUBLIC_ONLY; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import java.util.List; + +/** + * Represent an Impala lineage record in lineage log. + */ +@JsonAutoDetect(getterVisibility=PUBLIC_ONLY, setterVisibility=PUBLIC_ONLY, fieldVisibility=NONE) +@JsonSerialize(include=JsonSerialize.Inclusion.NON_NULL) +@JsonIgnoreProperties(ignoreUnknown=true) +public class ImpalaQuery { + private String queryText; + private String queryId; + private String hash; + private String user; + + // the time stamp is in seconds. It is Unix epoch, which is the number of seconds that have + // elapsed since January 1, 1970 (midnight UTC/GMT), not counting leap seconds + private Long timestamp; + private Long endTime; + private List edges; + private List vertices; + + public List getEdges() { + return edges; + } + + public List getVertices() { + return vertices; + } + + public Long getEndTime() { + return endTime; + } + + public String getHash() { + return hash; + } + + public String getQueryId() { + return queryId; + } + + public String getQueryText() { + return queryText; + } + + public Long getTimestamp() { + return timestamp; + } + + public String getUser() { + return user; + } + + public void setEdges(List edges) { + this.edges = edges; + } + + public void setEndTime(Long endTime) { + this.endTime = endTime; + } + + public void setHash(String hash) { + this.hash = hash; + } + + public void setQueryId(String queryId) { + this.queryId = queryId; + } + + public void setQueryText(String queryText) { + this.queryText = queryText; + } + + public void setTimestamp(Long timestamp) { this.timestamp = timestamp; } + + public void setUser(String user) { + this.user = user; + } + + public void setVertices(List vertices) { + this.vertices = vertices; + } + +} diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaVertexType.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaVertexType.java new file mode 100644 index 0000000000..8ec3f857ad --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/ImpalaVertexType.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.model; + +public enum ImpalaVertexType { + DFS_DIR("DFS_DIR"), + PARTITION("PARTITION"), + COLUMN("COLUMN"), + TABLE("TABLE"), + DATABASE("DATABASE"); + + private final String name; + + ImpalaVertexType(String name) { + this.name = name; + } + + public String getName() { + return name.toUpperCase(); + } +} diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/LineageEdge.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/LineageEdge.java new file mode 100644 index 0000000000..251507e9e1 --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/LineageEdge.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.model; + +import static com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility.NONE; +import static com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility.PUBLIC_ONLY; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import java.util.List; + +/** + * This represents an edge in Impala's lineage record that connects two entities + */ +@JsonAutoDetect(getterVisibility=PUBLIC_ONLY, setterVisibility=PUBLIC_ONLY, fieldVisibility=NONE) +@JsonSerialize(include=JsonSerialize.Inclusion.NON_NULL) +@JsonIgnoreProperties(ignoreUnknown=true) +public class LineageEdge { + private List sources; + private List targets; + private ImpalaDependencyType edgeType; + + public List getSources() { + return sources; + } + + public List getTargets() { + return targets; + } + + public ImpalaDependencyType getEdgeType() { + return edgeType; + } + + public void setSources(List sources) { + this.sources = sources; + } + + public void setTargets(List targets) { + this.targets = targets; + } + + public void setEdgeType(ImpalaDependencyType edgeType) { + this.edgeType = edgeType; + } +} diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/LineageVertex.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/LineageVertex.java new file mode 100644 index 0000000000..0a664fc8b6 --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/LineageVertex.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.model; + +import static com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility.NONE; +import static com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility.PUBLIC_ONLY; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; + +/** + * This represents an entity in Impala's lineage record. + */ +@JsonAutoDetect(getterVisibility=PUBLIC_ONLY, setterVisibility=PUBLIC_ONLY, fieldVisibility=NONE) +@JsonSerialize(include=JsonSerialize.Inclusion.NON_NULL) +@JsonIgnoreProperties(ignoreUnknown=true) +public class LineageVertex { + // id is used to reference this entity. It is used in LineageEdge to specify source and target + // https://github.com/apache/impala/blob/master/be/src/util/lineage-util.h#L40 + // Impala id is int64. Therefore, define this field as Long + private Long id; + + // specify the type of the entity, it could be "TABLE", "COLUMN" etc. + private ImpalaVertexType vertexType; + + // specify the name of the entity + private String vertexId; + + // It is optional, and could be null. It is only set if the entity is a column, and this field contains metadata of its table. + private LineageVertexMetadata metadata; + + // It is optional. Its unit in seconds. + private Long createTime; + + public Long getId() { return id; } + + public ImpalaVertexType getVertexType() { + return vertexType; + } + + public String getVertexId() { + return vertexId; + } + + public LineageVertexMetadata getMetadata() { + return metadata; + } + + public Long getCreateTime() { return createTime; } + + public void setId(Long id) { + this.id = id; + } + + public void setVertexType(ImpalaVertexType vertexType) { + this.vertexType = vertexType; + } + + public void setVertexId(String vertexId) { + this.vertexId = vertexId; + } + + public void setMetadata(LineageVertexMetadata metadata) { this.metadata = metadata; } + + public void setCreateTime(Long createTime) { this.createTime = createTime; } +} \ No newline at end of file diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/LineageVertexMetadata.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/LineageVertexMetadata.java new file mode 100644 index 0000000000..2b3226c21d --- /dev/null +++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/model/LineageVertexMetadata.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.model; + +import static com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility.NONE; +import static com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility.PUBLIC_ONLY; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; + +/** + * This represents optional metadata in Impala's lineage vertex entity. + */ +@JsonAutoDetect(getterVisibility=PUBLIC_ONLY, setterVisibility=PUBLIC_ONLY, fieldVisibility=NONE) +@JsonSerialize(include=JsonSerialize.Inclusion.NON_NULL) +@JsonIgnoreProperties(ignoreUnknown=true) +public class LineageVertexMetadata { + // specify the name of the table + private String tableName; + + // the create time of the table. Its unit is in seconds. + private Long tableCreateTime; + + public String getTableName() { return tableName; } + + public Long getTableCreateTime() { return tableCreateTime; } + + public void setTableName(String tableName) { this.tableName = tableName; } + + public void setTableCreateTime(Long createTime) { this.tableCreateTime = createTime; } +} diff --git a/addons/impala-bridge/src/main/resources/atlas-log4j.xml b/addons/impala-bridge/src/main/resources/atlas-log4j.xml new file mode 100644 index 0000000000..97317a8754 --- /dev/null +++ b/addons/impala-bridge/src/main/resources/atlas-log4j.xml @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/addons/impala-bridge/src/main/resources/import-impala.sh b/addons/impala-bridge/src/main/resources/import-impala.sh new file mode 100644 index 0000000000..b440f2d056 --- /dev/null +++ b/addons/impala-bridge/src/main/resources/import-impala.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. See accompanying LICENSE file. +# +# resolve links - $0 may be a softlink +PRG="${0}" + +[[ `uname -s` == *"CYGWIN"* ]] && CYGWIN=true + +while [ -h "${PRG}" ]; do + ls=`ls -ld "${PRG}"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "${PRG}"`/"$link" + fi +done + +BASEDIR=`dirname ${PRG}` + +if test -z "${JAVA_HOME}" +then + JAVA_BIN=`which java` + JAR_BIN=`which jar` +else + JAVA_BIN="${JAVA_HOME}/bin/java" + JAR_BIN="${JAVA_HOME}/bin/jar" +fi +export JAVA_BIN + +if [ ! -e "${JAVA_BIN}" ] || [ ! -e "${JAR_BIN}" ]; then + echo "$JAVA_BIN and/or $JAR_BIN not found on the system. Please make sure java and jar commands are available." + exit 1 +fi + +# Construct ATLAS_CONF where atlas-properties reside +# assume the hive-server2 is installed and contains Atlas configuration +# Otherwise, need to setup Atlas required properties and libraries before running this tool +if [ ! -z "$HIVE_CONF_DIR" ]; then + HIVE_CONF=$HIVE_CONF_DIR +elif [ ! -z "$HIVE_HOME" ]; then + HIVE_CONF="$HIVE_HOME/conf" +elif [ -e /etc/hive/conf ]; then + HIVE_CONF="/etc/hive/conf" +else + echo "Could not find a valid HIVE configuration for ATLAS" + exit 1 +fi +if [ -z "$ATLAS_CONF" ]; then + export ATLAS_CONF=$HIVE_CONF +fi + +# log dir for applications +ATLAS_LOG_DIR="/var/log/atlas" +ATLAS_LOG_FILE="impala-bridge.log" +LOG_CONFIG="${BASEDIR}/atlas-log4j.xml" + +# Construct Atlas classpath. +DIR=$PWD +PARENT="$(dirname "$DIR")" +GRANDPARENT="$(dirname "$PARENT")" +LIB_PATH="$GRANDPARENT/server/webapp/atlas/WEB-INF/lib" +echo "$LIB_PATH" +# Construct Atlas classpath. +for i in "$LIB_PATH/"*.jar; do + ATLASCPPATH="${ATLASCPPATH}:$i" +done + +for i in "${BASEDIR}/"*.jar; do + ATLASCPPATH="${ATLASCPPATH}:$i" +done + +if [ -z "${ATLAS_CONF_DIR}" ] && [ -e /etc/atlas/conf ];then + ATLAS_CONF_DIR=/etc/atlas/conf +fi +ATLASCPPATH=${ATLASCPPATH}:${ATLAS_CONF_DIR} + +echo "Logging: ${ATLAS_LOG_DIR}/${ATLAS_LOG_FILE}" +echo "Log config: ${LOG_CONFIG}" + +TIME=`date %Y%m%d%H%M%s` +CP="${ATLASCPPATH}:${ATLAS_CONF}" + +# If running in cygwin, convert pathnames and classpath to Windows format. +if [ "${CYGWIN}" == "true" ] +then + ATLAS_LOG_DIR=`cygpath -w ${ATLAS_LOG_DIR}` + ATLAS_LOG_FILE=`cygpath -w ${ATLAS_LOG_FILE}` + CP=`cygpath -w -p ${CP}` +fi + +JAVA_PROPERTIES="$ATLAS_OPTS -Datlas.log.dir=$ATLAS_LOG_DIR -Datlas.log.file=$ATLAS_LOG_FILE -Dlog4j.configuration=file://$LOG_CONFIG" + +IMPORT_ARGS=$@ +JVM_ARGS= + +JAVA_PROPERTIES="${JAVA_PROPERTIES} ${JVM_ARGS}" +"${JAVA_BIN}" ${JAVA_PROPERTIES} -cp "${CP}" org.apache.atlas.impala.ImpalaLineageTool $IMPORT_ARGS + +RETVAL=$? +[ $RETVAL -eq 0 ] && echo Done! +[ $RETVAL -ne 0 ] && echo Failed! +exit $RETVAL \ No newline at end of file diff --git a/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageITBase.java b/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageITBase.java new file mode 100644 index 0000000000..ef23a26d10 --- /dev/null +++ b/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageITBase.java @@ -0,0 +1,495 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala; + +import static org.apache.atlas.impala.hook.events.BaseImpalaEvent.ATTRIBUTE_QUALIFIED_NAME; +import static org.apache.atlas.impala.hook.events.BaseImpalaEvent.ATTRIBUTE_QUERY_TEXT; +import static org.apache.atlas.impala.hook.events.BaseImpalaEvent.ATTRIBUTE_RECENT_QUERIES; +import static org.apache.atlas.impala.hook.events.BaseImpalaEvent.HIVE_TYPE_DB; +import static org.apache.atlas.impala.hook.events.BaseImpalaEvent.HIVE_TYPE_TABLE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.fail; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.AtlasClientV2; +import org.apache.atlas.impala.hook.AtlasImpalaHookContext; +import org.apache.atlas.impala.hook.ImpalaLineageHook; +import org.apache.atlas.impala.hook.events.BaseImpalaEvent; +import org.apache.atlas.impala.model.ImpalaDataType; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.utils.AuthenticationUtil; +import org.apache.atlas.utils.ParamChecker; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.lang.RandomStringUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.testng.annotations.BeforeClass; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.Assert; + +public class ImpalaLineageITBase { + private static final Logger LOG = LoggerFactory.getLogger(ImpalaLineageITBase.class); + + public static final String DEFAULT_DB = "default"; + public static final String SEP = ":".intern(); + public static final String IO_SEP = "->".intern(); + protected static final String DGI_URL = "http://localhost:21000/"; + protected static final String CLUSTER_NAME = "primary"; + protected static final String PART_FILE = "2015-01-01"; + protected static final String INPUTS = "inputs"; + protected static final String OUTPUTS = "outputs"; + protected static AtlasClientV2 atlasClientV2; + + private static final String REFERENCEABLE_ATTRIBUTE_NAME = "qualifiedName"; + private static final String ATTR_NAME = "name"; + + // to push entity creation/update to HMS, so HMS hook can push the metadata notification + // to Atlas, then the lineage notification from this tool can be created at Atlas + protected static Driver driverWithoutContext; + protected static SessionState ss; + protected static HiveConf conf; + + + @BeforeClass + public void setUp() throws Exception { + //Set-up hive session + conf = new HiveConf(); + conf.setClassLoader(Thread.currentThread().getContextClassLoader()); + HiveConf conf = new HiveConf(); + SessionState ss = new SessionState(conf); + ss = SessionState.start(ss); + SessionState.setCurrentSessionState(ss); + driverWithoutContext = new Driver(conf); + + Configuration configuration = ApplicationProperties.get(); + + String[] atlasEndPoint = configuration.getStringArray(ImpalaLineageHook.ATLAS_ENDPOINT); + if (atlasEndPoint == null || atlasEndPoint.length == 0) { + atlasEndPoint = new String[]{DGI_URL}; + } + + if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) { + atlasClientV2 = new AtlasClientV2(atlasEndPoint, new String[]{"admin", "admin"}); + } else { + atlasClientV2 = new AtlasClientV2(atlasEndPoint); + } + + } + + // return guid of the entity + protected String assertEntityIsRegistered(final String typeName, final String property, final String value, + final AssertPredicate assertPredicate) throws Exception { + waitFor(100000, new Predicate() { + @Override + public void evaluate() throws Exception { + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByAttribute(typeName, Collections + .singletonMap(property,value)); + AtlasEntity entity = atlasEntityWithExtInfo.getEntity(); + assertNotNull(entity); + if (assertPredicate != null) { + assertPredicate.assertOnEntity(entity); + } + } + }); + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(property,value)); + AtlasEntity entity = atlasEntityWithExtInfo.getEntity(); + return (String) entity.getGuid(); + } + + protected String assertEntityIsRegistered(final String typeName, List processQFNames, + final AssertPredicates assertPredicates) throws Exception { + List> attributesList = new ArrayList<>(); + + for (String processName : processQFNames) { + attributesList.add(Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, processName)); + } + + return waitForWithReturn(80000, new PredicateWithReturn() { + @Override + public String evaluate() throws Exception { + AtlasEntity.AtlasEntitiesWithExtInfo atlasEntitiesWithExtInfo = atlasClientV2.getEntitiesByAttribute(typeName, attributesList); + List entities = atlasEntitiesWithExtInfo.getEntities(); + assertNotNull(entities); + if (assertPredicates != null) { + return assertPredicates.assertOnEntities(entities); + } + + return null; + } + }); + } + + protected String assertEntityIsRegisteredViaGuid(String guid, + final AssertPredicate assertPredicate) throws Exception { + waitFor(80000, new Predicate() { + @Override + public void evaluate() throws Exception { + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByGuid(guid); + AtlasEntity entity = atlasEntityWithExtInfo.getEntity(); + assertNotNull(entity); + if (assertPredicate != null) { + assertPredicate.assertOnEntity(entity); + } + + } + }); + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2.getEntityByGuid(guid); + AtlasEntity entity = atlasEntityWithExtInfo.getEntity(); + return (String) entity.getGuid(); + } + + + protected String assertProcessIsRegistered(List processQFNames, String queryString) throws Exception { + try { + Thread.sleep(5000); + + LOG.debug("Searching for process with query {}", queryString); + + return assertEntityIsRegistered(ImpalaDataType.IMPALA_PROCESS.getName(), processQFNames, new AssertPredicates() { + @Override + public String assertOnEntities(final List entities) throws Exception { + for (AtlasEntity entity : entities) { + List recentQueries = (List) entity + .getAttribute(ATTRIBUTE_RECENT_QUERIES); + + if (queryString.equalsIgnoreCase(recentQueries.get(0))) + return entity.getGuid(); + + } + + throw new IllegalStateException("Not found entity with matching query"); + } + }); + } catch(Exception e) { + LOG.error("Exception : ", e); + throw e; + } + } + + protected String assertProcessIsRegistered(String processQFName, String queryString) throws Exception { + try { + Thread.sleep(5000); + + LOG.debug("Searching for process with qualified name {} and query {}", processQFName, queryString); + + return assertEntityIsRegistered(ImpalaDataType.IMPALA_PROCESS.getName(), ATTRIBUTE_QUALIFIED_NAME, processQFName, new AssertPredicate() { + @Override + public void assertOnEntity(final AtlasEntity entity) throws Exception { + List recentQueries = (List) entity.getAttribute(ATTRIBUTE_RECENT_QUERIES); + + Assert.assertEquals(recentQueries.get(0), lower(queryString)); + } + }); + } catch(Exception e) { + LOG.error("Exception : ", e); + throw e; + } + } + + private String assertProcessExecutionIsRegistered(AtlasEntity impalaProcess, final String queryString) throws Exception { + try { + Thread.sleep(5000); + + String guid = ""; + List processExecutions = toAtlasObjectIdList(impalaProcess.getRelationshipAttribute( + BaseImpalaEvent.ATTRIBUTE_PROCESS_EXECUTIONS)); + for (AtlasObjectId processExecution : processExecutions) { + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2. + getEntityByGuid(processExecution.getGuid()); + + AtlasEntity entity = atlasEntityWithExtInfo.getEntity(); + if (String.valueOf(entity.getAttribute(ATTRIBUTE_QUERY_TEXT)).equals(queryString.toLowerCase().trim())) { + guid = entity.getGuid(); + break; + } + } + + return assertEntityIsRegisteredViaGuid(guid, new AssertPredicate() { + @Override + public void assertOnEntity(final AtlasEntity entity) throws Exception { + String queryText = (String) entity.getAttribute(ATTRIBUTE_QUERY_TEXT); + Assert.assertEquals(queryText, queryString.toLowerCase().trim()); + } + }); + } catch(Exception e) { + LOG.error("Exception : ", e); + throw e; + } + } + + protected AtlasObjectId toAtlasObjectId(Object obj) { + final AtlasObjectId ret; + + if (obj instanceof AtlasObjectId) { + ret = (AtlasObjectId) obj; + } else if (obj instanceof Map) { + ret = new AtlasObjectId((Map) obj); + } else if (obj != null) { + ret = new AtlasObjectId(obj.toString()); // guid + } else { + ret = null; + } + + return ret; + } + + protected List toAtlasObjectIdList(Object obj) { + final List ret; + + if (obj instanceof Collection) { + Collection coll = (Collection) obj; + + ret = new ArrayList<>(coll.size()); + + for (Object item : coll) { + AtlasObjectId objId = toAtlasObjectId(item); + + if (objId != null) { + ret.add(objId); + } + } + } else { + AtlasObjectId objId = toAtlasObjectId(obj); + + if (objId != null) { + ret = new ArrayList<>(1); + + ret.add(objId); + } else { + ret = null; + } + } + + return ret; + } + + + protected String assertDatabaseIsRegistered(String dbName) throws Exception { + return assertDatabaseIsRegistered(dbName, null); + } + + protected String assertDatabaseIsRegistered(String dbName, AssertPredicate assertPredicate) throws Exception { + LOG.debug("Searching for database: {}", dbName); + + String dbQualifiedName = dbName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME; + + dbQualifiedName = dbQualifiedName.toLowerCase(); + + return assertEntityIsRegistered(HIVE_TYPE_DB, REFERENCEABLE_ATTRIBUTE_NAME, dbQualifiedName, assertPredicate); + } + + protected String assertTableIsRegistered(String dbName, String tableName) throws Exception { + return assertTableIsRegistered(dbName, tableName, null, false); + } + + protected String assertTableIsRegistered(String fullTableName) throws Exception { + return assertTableIsRegistered(fullTableName, null, false); + } + + protected String assertTableIsRegistered(String dbName, String tableName, AssertPredicate assertPredicate, boolean isTemporary) throws Exception { + LOG.debug("Searching for table {}.{}", dbName, tableName); + + String fullTableName = dbName + AtlasImpalaHookContext.QNAME_SEP_ENTITY_NAME + tableName; + + return assertTableIsRegistered(fullTableName, assertPredicate, isTemporary); + } + + protected String assertTableIsRegistered(String fullTableName, AssertPredicate assertPredicate, boolean isTemporary) throws Exception { + LOG.debug("Searching for table {}", fullTableName); + + String tableQualifiedName = (fullTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE).toLowerCase() + + CLUSTER_NAME; + + return assertEntityIsRegistered(HIVE_TYPE_TABLE, REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName, + assertPredicate); + } + + protected String createDatabase() throws Exception { + String dbName = dbName(); + + return createDatabase(dbName); + } + + protected String createDatabase(String dbName) throws Exception { + runCommandWithDelay("CREATE DATABASE IF NOT EXISTS " + dbName, 3000); + + return dbName; + } + + protected String createTable(String dbName, String columnsString) throws Exception { + return createTable(dbName, columnsString, false); + } + + protected String createTable(String dbName, String columnsString, boolean isPartitioned) throws Exception { + String tableName = tableName(); + return createTable(dbName, tableName, columnsString, isPartitioned); + } + + protected String createTable(String dbName, String tableName, String columnsString, boolean isPartitioned) throws Exception { + runCommandWithDelay("CREATE TABLE IF NOT EXISTS " + dbName + "." + tableName + " " + columnsString + " comment 'table comment' " + (isPartitioned ? " partitioned by(dt string)" : ""), 3000); + + return dbName + "." + tableName; + } + + protected AtlasEntity validateProcess(String processQFName, String queryString) throws Exception { + String processId = assertProcessIsRegistered(processQFName, queryString); + AtlasEntity processEntity = atlasClientV2.getEntityByGuid(processId).getEntity(); + + return processEntity; + } + + protected AtlasEntity validateProcess(List processQFNames, String queryString) throws Exception { + String processId = assertProcessIsRegistered(processQFNames, queryString); + AtlasEntity processEntity = atlasClientV2.getEntityByGuid(processId).getEntity(); + + return processEntity; + } + + protected AtlasEntity validateProcessExecution(AtlasEntity impalaProcess, String queryString) throws Exception { + String processExecutionId = assertProcessExecutionIsRegistered(impalaProcess, queryString); + AtlasEntity processExecutionEntity = atlasClientV2.getEntityByGuid(processExecutionId).getEntity(); + return processExecutionEntity; + } + + protected int numberOfProcessExecutions(AtlasEntity impalaProcess) { + return toAtlasObjectIdList(impalaProcess.getRelationshipAttribute( + BaseImpalaEvent.ATTRIBUTE_PROCESS_EXECUTIONS)).size(); + } + + public interface AssertPredicate { + void assertOnEntity(AtlasEntity entity) throws Exception; + } + + public interface AssertPredicates { + String assertOnEntities(List entities) throws Exception; + } + + public interface PredicateWithReturn { + /** + * Perform a predicate evaluation. + * + * @return the boolean result of the evaluation. + * @throws Exception thrown if the predicate evaluation could not evaluate. + */ + String evaluate() throws Exception; + } + + public interface Predicate { + /** + * Perform a predicate evaluation. + * + * @return the boolean result of the evaluation. + * @throws Exception thrown if the predicate evaluation could not evaluate. + */ + void evaluate() throws Exception; + } + + /** + * Wait for a condition, expressed via a {@link Predicate} to become true. + * + * @param timeout maximum time in milliseconds to wait for the predicate to become true. + * @param predicate predicate waiting on. + */ + protected void waitFor(int timeout, Predicate predicate) throws Exception { + ParamChecker.notNull(predicate, "predicate"); + long mustEnd = System.currentTimeMillis() + timeout; + + while (true) { + try { + predicate.evaluate(); + return; + } catch(Error | Exception e) { + if (System.currentTimeMillis() >= mustEnd) { + fail("Assertions failed. Failing after waiting for timeout " + timeout + " msecs", e); + } + LOG.debug("Waiting up to {} msec as assertion failed", mustEnd - System.currentTimeMillis(), e); + Thread.sleep(5000); + } + } + } + + /** + * Wait for a condition, expressed via a {@link Predicate} to become true. + * + * @param timeout maximum time in milliseconds to wait for the predicate to become true. + * @param predicate predicate waiting on. + */ + protected String waitForWithReturn(int timeout, PredicateWithReturn predicate) throws Exception { + ParamChecker.notNull(predicate, "predicate"); + long mustEnd = System.currentTimeMillis() + timeout; + + while (true) { + try { + return predicate.evaluate(); + } catch(Error | Exception e) { + if (System.currentTimeMillis() >= mustEnd) { + fail("Assertions failed. Failing after waiting for timeout " + timeout + " msecs", e); + } + LOG.debug("Waiting up to {} msec as assertion failed", mustEnd - System.currentTimeMillis(), e); + Thread.sleep(5000); + } + } + } + + public static String lower(String str) { + if (StringUtils.isEmpty(str)) { + return null; + } + return str.toLowerCase().trim(); + } + + protected void runCommand(String cmd) throws Exception { + runCommandWithDelay(cmd, 0); + } + + protected void runCommandWithDelay(String cmd, int sleepMs) throws Exception { + runCommandWithDelay(driverWithoutContext, cmd, sleepMs); + } + + protected void runCommandWithDelay(Driver driver, String cmd, int sleepMs) throws Exception { + LOG.debug("Running command '{}'", cmd); + CommandProcessorResponse response = driver.run(cmd); + assertEquals(response.getResponseCode(), 0); + if (sleepMs != 0) { + Thread.sleep(sleepMs); + } + } + + protected String random() { + return RandomStringUtils.randomAlphanumeric(10); + } + + protected String tableName() { + return "table_" + random(); + } + protected String dbName() {return "db_" + random();} +} diff --git a/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageToolIT.java b/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageToolIT.java new file mode 100644 index 0000000000..53e9b1224a --- /dev/null +++ b/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageToolIT.java @@ -0,0 +1,655 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.impala; + +import static org.apache.atlas.impala.hook.events.BaseImpalaEvent.ATTRIBUTE_QUERY_TEXT; + +import java.util.ArrayList; +import java.util.List; +import org.apache.atlas.impala.hook.AtlasImpalaHookContext; +import org.apache.atlas.impala.hook.ImpalaLineageHook; +import org.apache.atlas.impala.hook.events.BaseImpalaEvent; +import org.apache.atlas.impala.model.ImpalaQuery; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.testng.Assert; +import org.testng.annotations.Test; + +import static org.apache.atlas.impala.hook.events.BaseImpalaEvent.ATTRIBUTE_DDL_QUERIES; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; + +public class ImpalaLineageToolIT extends ImpalaLineageITBase { + public static final long TABLE_CREATE_TIME_SOURCE = 1554750070; + public static final long TABLE_CREATE_TIME = 1554750072; + private static String dir = System.getProperty("user.dir") + "/src/test/resources/"; + + /** + * This tests + * 1) ImpalaLineageTool can parse one lineage file that contains "create view" command lineage + * 2) Lineage is sent to Atlas + * 3) Atlas can get this lineage from Atlas + */ + @Test + public void testCreateViewFromFile() { + // this file contains a single lineage record for "create view". + // It has table vertex with createTime + String IMPALA = dir + "impalaCreateView.json"; + String IMPALA_WAL = dir + "WALimpala.wal"; + + List lineageList = new ArrayList<>(); + ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook(); + + try { + // create database and tables to simulate Impala behavior that Impala updates metadata + // to HMS and HMSHook sends the metadata to Atlas, which has to happen before + // Atlas can handle lineage notification + String dbName = "db_1"; + createDatabase(dbName); + + String sourceTableName = "table_1"; + createTable(dbName, sourceTableName,"(id string, count int)", false); + + String targetTableName = "view_1"; + createTable(dbName, targetTableName,"(count int, id string)", false); + + // process lineage record, and send corresponding notification to Atlas + String[] args = new String[]{"-d", "./", "-p", "impala"}; + ImpalaLineageTool toolInstance = new ImpalaLineageTool(args); + toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL); + + // verify the process is saved in Atlas + // the value is from info in IMPALA_3 + String createTime = new Long((long)(1554750072)*1000).toString(); + String processQFName = + "db_1.view_1" + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime; + + processQFName = processQFName.toLowerCase(); + + String queryString = "create view db_1.view_1 as select count, id from db_1.table_1"; + AtlasEntity processEntity1 = validateProcess(processQFName, queryString); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString); + AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseImpalaEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + + String guid = assertTableIsRegistered(dbName, targetTableName); + AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity(); + List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + assertNotNull(ddlQueries); + assertEquals(ddlQueries.size(), 1); + } catch (Exception e) { + System.out.print("Appending file error"); + } + } + + /** + * This tests is for create view query with extra comment and spaces added in between: + * 1) ImpalaLineageTool can parse one lineage file that contains " create view" command lineage + * 2) Lineage is sent to Atlas + * 3) Atlas can get this lineage from Atlas + */ + @Test + public void testCreateViewWithCommentSpacesFromFile() { + // this file contains a single lineage record for "create view". + // It has table vertex with createTime + String IMPALA = dir + "impalaCreateViewWithCommentSpaces.json"; + String IMPALA_WAL = dir + "WALimpala.wal"; + + List lineageList = new ArrayList<>(); + ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook(); + + try { + // create database and tables to simulate Impala behavior that Impala updates metadata + // to HMS and HMSHook sends the metadata to Atlas, which has to happen before + // Atlas can handle lineage notification + String dbName = "db_8"; + createDatabase(dbName); + + String sourceTableName = "table_1"; + createTable(dbName, sourceTableName,"(id string, count int)", false); + + String targetTableName = "view_1"; + createTable(dbName, targetTableName,"(count int, id string)", false); + + // process lineage record, and send corresponding notification to Atlas + String[] args = new String[]{"-d", "./", "-p", "impala"}; + ImpalaLineageTool toolInstance = new ImpalaLineageTool(args); + toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL); + + // verify the process is saved in Atlas + // the value is from info in IMPALA_3 + String createTime = new Long((long)(1554750072)*1000).toString(); + String processQFName = + "db_8.view_1" + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime; + + processQFName = processQFName.toLowerCase(); + + String queryString = " create /* comment1 */ view db_8.view_1 as select /* comment2 */ count, id from db_8.table_1"; + AtlasEntity processEntity1 = validateProcess(processQFName, queryString); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString); + AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseImpalaEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + + String guid = assertTableIsRegistered(dbName, targetTableName); + AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity(); + List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + assertNotNull(ddlQueries); + assertEquals(ddlQueries.size(), 1); + } catch (Exception e) { + System.out.print("Appending file error"); + } + } + + /** + * This tests + * 1) ImpalaLineageTool can parse one lineage file that contains "create view" command lineage, + * but there is no table vertex with createTime. + * 2) Lineage is sent to Atlas + * 3) Atlas can get this lineage from Atlas + */ + @Test + public void testCreateViewNoCreateTimeFromFile() { + // this file contains a single lineage record for "create view". + // there is no table vertex with createTime, which is lineage record generated by Impala + // originally. The table create time is hard-coded before Impala fixes this issue. + String IMPALA = dir + "impalaCreateViewNoCreateTime.json"; + String IMPALA_WAL = dir + "WALimpala.wal"; + + List lineageList = new ArrayList<>(); + ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook(); + + try { + // create database and tables to simulate Impala behavior that Impala updates metadata + // to HMS and HMSHook sends the metadata to Atlas, which has to happen before + // Atlas can handle lineage notification + String dbName = "db_2"; + createDatabase(dbName); + + String sourceTableName = "table_1"; + createTable(dbName, sourceTableName,"(id string, count int)", false); + + String targetTableName = "view_1"; + createTable(dbName, targetTableName,"(count int, id string)", false); + + // process lineage record, and send corresponding notification to Atlas + String[] args = new String[]{"-d", "./", "-p", "impala"}; + ImpalaLineageTool toolInstance = new ImpalaLineageTool(args); + Long beforeCreateTime = System.currentTimeMillis() / BaseImpalaEvent.MILLIS_CONVERT_FACTOR; + toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL); + Long afterCreateTime = System.currentTimeMillis() / BaseImpalaEvent.MILLIS_CONVERT_FACTOR; + + String processQFNameWithoutTime = + dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS; + processQFNameWithoutTime = processQFNameWithoutTime.toLowerCase(); + + List processQFNames = new ArrayList<>(); + String createTime = new Long(beforeCreateTime.longValue()*1000).toString(); + processQFNames.add(processQFNameWithoutTime + createTime); + + if (beforeCreateTime != afterCreateTime) { + createTime = new Long(afterCreateTime.longValue() * 1000).toString(); + processQFNames.add(processQFNameWithoutTime + createTime); + } + + // verify the process is saved in Atlas. the value is from info in IMPALA_4. + // There is no createTime in lineage record, so we don't know the process qualified name + // And can only verify the process is created for the given query. + String queryString = "create view " + dbName + "." + targetTableName + " as select count, id from " + dbName + "." + sourceTableName; + AtlasEntity processEntity1 = validateProcess(processQFNames, queryString); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString); + AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseImpalaEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + + String guid = assertTableIsRegistered(dbName, targetTableName); + AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity(); + List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + assertNotNull(ddlQueries); + assertEquals(ddlQueries.size(), 1); + } catch (Exception e) { + System.out.print("Appending file error"); + } + } + + /** + * This tests + * 1) ImpalaLineageTool can parse one lineage file that contains "create table as select" command lineage, + * there is table vertex with createTime. + * 2) Lineage is sent to Atlas + * 3) Atlas can get this lineage from Atlas + */ + @Test + public void testCreateTableAsSelectFromFile() throws Exception { + String IMPALA = dir + "impalaCreateTableAsSelect.json"; + String IMPALA_WAL = dir + "WALimpala.wal"; + + ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook(); + + // create database and tables to simulate Impala behavior that Impala updates metadata + // to HMS and HMSHook sends the metadata to Atlas, which has to happen before + // Atlas can handle lineage notification + String dbName = "db_3"; + createDatabase(dbName); + + String sourceTableName = "table_1"; + createTable(dbName, sourceTableName,"(id string, count int)", false); + + String targetTableName = "table_2"; + createTable(dbName, targetTableName,"(count int, id string)", false); + + // process lineage record, and send corresponding notification to Atlas + String[] args = new String[]{"-d", "./", "-p", "impala"}; + ImpalaLineageTool toolInstance = new ImpalaLineageTool(args); + toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL); + + // verify the process is saved in Atlas + // the value is from info in IMPALA_4. + String createTime = new Long(TABLE_CREATE_TIME*1000).toString(); + String processQFName = + dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime; + + processQFName = processQFName.toLowerCase(); + + String queryString = "create table " + dbName + "." + targetTableName + " as select count, id from " + dbName + "." + sourceTableName; + AtlasEntity processEntity1 = validateProcess(processQFName, queryString); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString); + AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseImpalaEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + + String guid = assertTableIsRegistered(dbName, targetTableName); + AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity(); + List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + assertNotNull(ddlQueries); + assertEquals(ddlQueries.size(), 1); + } + + /** + * This tests is based on extra comment and spaces adding to create table as select query + * 1) ImpalaLineageTool can parse one lineage file that contains "create table as select" command lineage, + * there is table vertex with createTime. + * 2) Lineage is sent to Atlas + * 3) Atlas can get this lineage from Atlas + */ + @Test + public void testCreateTableAsSelectWithCommentSpacesFromFile() throws Exception { + String IMPALA = dir + "impalaCreateTableAsSelectWithCommentSpaces.json"; + String IMPALA_WAL = dir + "WALimpala.wal"; + + ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook(); + + // create database and tables to simulate Impala behavior that Impala updates metadata + // to HMS and HMSHook sends the metadata to Atlas, which has to happen before + // Atlas can handle lineage notification + String dbName = "db_9"; + createDatabase(dbName); + + String sourceTableName = "table_1"; + createTable(dbName, sourceTableName,"(id string, count int)", false); + + String targetTableName = "table_2"; + createTable(dbName, targetTableName,"(count int, id string)", false); + + // process lineage record, and send corresponding notification to Atlas + String[] args = new String[]{"-d", "./", "-p", "impala"}; + ImpalaLineageTool toolInstance = new ImpalaLineageTool(args); + toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL); + + // verify the process is saved in Atlas + // the value is from info in IMPALA_4. + String createTime = new Long(TABLE_CREATE_TIME*1000).toString(); + String processQFName = + dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime; + + processQFName = processQFName.toLowerCase(); + + String queryString = "create /* Test */ table " + dbName + "." + + targetTableName + " as /* Test */ select count, id from " + dbName + "." + sourceTableName; + AtlasEntity processEntity1 = validateProcess(processQFName, queryString); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString); + AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseImpalaEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + + String guid = assertTableIsRegistered(dbName, targetTableName); + AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity(); + List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + assertNotNull(ddlQueries); + assertEquals(ddlQueries.size(), 1); + } + + /** + * This tests + * 1) ImpalaLineageTool can parse one lineage file that contains "alter view as select" command lineage, + * there is table vertex with createTime. + * 2) Lineage is sent to Atlas + * 3) Atlas can get this lineage from Atlas + */ + @Test + public void testAlterViewAsSelectFromFile() throws Exception { + String IMPALA = dir + "impalaAlterViewAsSelect.json"; + String IMPALA_WAL = dir + "WALimpala.wal"; + + ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook(); + + // create database and tables to simulate Impala behavior that Impala updates metadata + // to HMS and HMSHook sends the metadata to Atlas, which has to happen before + // Atlas can handle lineage notification + String dbName = "db_4"; + createDatabase(dbName); + + String sourceTableName = "table_1"; + createTable(dbName, sourceTableName,"(id string, count int)", false); + + String targetTableName = "view_1"; + createTable(dbName, targetTableName,"(count int, id string)", false); + + // process lineage record, and send corresponding notification to Atlas + String[] args = new String[]{"-d", "./", "-p", "impala"}; + ImpalaLineageTool toolInstance = new ImpalaLineageTool(args); + toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL); + + // verify the process is saved in Atlas + // the value is from info in IMPALA_4. + String createTime = new Long(TABLE_CREATE_TIME*1000).toString(); + String processQFName = + dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime; + + processQFName = processQFName.toLowerCase(); + + String queryString = "alter view " + dbName + "." + targetTableName + " as select count, id from " + dbName + "." + sourceTableName; + AtlasEntity processEntity1 = validateProcess(processQFName, queryString); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString); + AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseImpalaEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + + String guid = assertTableIsRegistered(dbName, targetTableName); + AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity(); + List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + assertNotNull(ddlQueries); + assertEquals(ddlQueries.size(), 1); + } + + /** + * This tests is for extra comment and spaces present in alter view as select query + * 1) ImpalaLineageTool can parse one lineage file that contains "alter view as select" command lineage, + * there is table vertex with createTime. + * 2) Lineage is sent to Atlas + * 3) Atlas can get this lineage from Atlas + */ + @Test + public void testAlterViewAsSelectWithCommentSpacesFromFile() throws Exception { + String IMPALA = dir + "impalaAlterViewAsSelectWithCommentSpaces.json"; + String IMPALA_WAL = dir + "WALimpala.wal"; + + ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook(); + + // create database and tables to simulate Impala behavior that Impala updates metadata + // to HMS and HMSHook sends the metadata to Atlas, which has to happen before + // Atlas can handle lineage notification + String dbName = "db_10"; + createDatabase(dbName); + + String sourceTableName = "table_1"; + createTable(dbName, sourceTableName,"(id string, count int)", false); + + String targetTableName = "view_1"; + createTable(dbName, targetTableName,"(count int, id string)", false); + + // process lineage record, and send corresponding notification to Atlas + String[] args = new String[]{"-d", "./", "-p", "impala"}; + ImpalaLineageTool toolInstance = new ImpalaLineageTool(args); + toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL); + + // verify the process is saved in Atlas + // the value is from info in IMPALA_4. + String createTime = new Long(TABLE_CREATE_TIME*1000).toString(); + String processQFName = + dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime; + + processQFName = processQFName.toLowerCase(); + + String queryString = "alter /* comment1 */ view " + dbName + "." + targetTableName + + " as select /* comment1 */ count, id from " + dbName + "." + sourceTableName; + AtlasEntity processEntity1 = validateProcess(processQFName, queryString); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString); + AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseImpalaEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + + String guid = assertTableIsRegistered(dbName, targetTableName); + AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity(); + List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + assertNotNull(ddlQueries); + assertEquals(ddlQueries.size(), 1); + } + + /** + * This tests + * 1) ImpalaLineageTool can parse one lineage file that contains "insert into" command lineage, + * there is table vertex with createTime. + * 2) Lineage is sent to Atlas + * 3) Atlas can get this lineage from Atlas + */ + @Test + public void testInsertIntoAsSelectFromFile() throws Exception { + String IMPALA = dir + "impalaInsertIntoAsSelect.json"; + String IMPALA_WAL = dir + "WALimpala.wal"; + + ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook(); + + // create database and tables to simulate Impala behavior that Impala updates metadata + // to HMS and HMSHook sends the metadata to Atlas, which has to happen before + // Atlas can handle lineage notification + String dbName = "db_5"; + createDatabase(dbName); + + String sourceTableName = "table_1"; + createTable(dbName, sourceTableName,"(id string, count int)", false); + + String targetTableName = "table_2"; + createTable(dbName, targetTableName,"(count int, id string, int_col int)", false); + + // process lineage record, and send corresponding notification to Atlas + String[] args = new String[]{"-d", "./", "-p", "impala"}; + ImpalaLineageTool toolInstance = new ImpalaLineageTool(args); + toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL); + + // verify the process is saved in Atlas + // the value is from info in IMPALA_4. + String createTime1 = new Long(TABLE_CREATE_TIME_SOURCE*1000).toString(); + String createTime2 = new Long(TABLE_CREATE_TIME*1000).toString(); + String sourceQFName = dbName + "." + sourceTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime1; + String targetQFName = dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime2; + String processQFName = "QUERY:" + sourceQFName.toLowerCase() + "->:INSERT:" + targetQFName.toLowerCase(); + + String queryString = "insert into table " + dbName + "." + targetTableName + " (count, id) select count, id from " + dbName + "." + sourceTableName; + AtlasEntity processEntity1 = validateProcess(processQFName, queryString); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString); + AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseImpalaEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + + String guid = assertTableIsRegistered(dbName, targetTableName); + AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity(); + List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + assertNotNull(ddlQueries); + assertEquals(ddlQueries.size(), 0); + } + + /** + * This tests + * 1) ImpalaLineageTool can parse one lineage file that contains multiple "insert into" command lineages, + * there is table vertex with createTime. + * 2) Lineage is sent to Atlas + * 3) Atlas can get these lineages from Atlas + */ + @Test + public void testMultipleInsertIntoAsSelectFromFile() throws Exception { + String IMPALA = dir + "impalaMultipleInsertIntoAsSelect1.json"; + String IMPALA_WAL = dir + "WALimpala.wal"; + + ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook(); + + // create database and tables to simulate Impala behavior that Impala updates metadata + // to HMS and HMSHook sends the metadata to Atlas, which has to happen before + // Atlas can handle lineage notification + String dbName = "db_6"; + createDatabase(dbName); + + String sourceTableName = "table_1"; + createTable(dbName, sourceTableName,"(id string, count int)", false); + + String targetTableName = "table_2"; + createTable(dbName, targetTableName,"(count int, id string, int_col int)", false); + + // process lineage record, and send corresponding notification to Atlas + String[] args = new String[]{"-d", "./", "-p", "impala"}; + ImpalaLineageTool toolInstance = new ImpalaLineageTool(args); + toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL); + + // re-run the same lineage record, should have the same process entity and another process execution entity + Thread.sleep(5000); + IMPALA = dir + "impalaMultipleInsertIntoAsSelect2.json"; + toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL); + Thread.sleep(5000); + + // verify the process is saved in Atlas + // the value is from info in IMPALA_4. + String createTime1 = new Long(TABLE_CREATE_TIME_SOURCE*1000).toString(); + String createTime2 = new Long(TABLE_CREATE_TIME*1000).toString(); + String sourceQFName = dbName + "." + sourceTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime1; + String targetQFName = dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime2; + String processQFName = "QUERY:" + sourceQFName.toLowerCase() + "->:INSERT:" + targetQFName.toLowerCase(); + + String queryString = "insert into table " + dbName + "." + targetTableName + " (count, id) select count, id from " + dbName + "." + sourceTableName; + queryString = queryString.toLowerCase().trim(); + String queryString2 = queryString; + + Thread.sleep(5000); + AtlasEntity processEntity1 = validateProcess(processQFName, queryString); + + List processExecutions = toAtlasObjectIdList(processEntity1.getRelationshipAttribute( + BaseImpalaEvent.ATTRIBUTE_PROCESS_EXECUTIONS)); + Assert.assertEquals(processExecutions.size(), 2); + for (AtlasObjectId processExecutionId : processExecutions) { + AtlasEntity.AtlasEntityWithExtInfo atlasEntityWithExtInfo = atlasClientV2. + getEntityByGuid(processExecutionId.getGuid()); + + AtlasEntity processExecutionEntity = atlasEntityWithExtInfo.getEntity(); + String entityQueryText = String.valueOf(processExecutionEntity.getAttribute(ATTRIBUTE_QUERY_TEXT)).toLowerCase().trim(); + if (!(queryString.equalsIgnoreCase(entityQueryText) || queryString2.equalsIgnoreCase(entityQueryText))) { + String errorMessage = String.format("process query text '%s' does not match expected value of '%s' or '%s'", entityQueryText, queryString, queryString2); + Assert.assertTrue(false, errorMessage); + } + } + + String guid = assertTableIsRegistered(dbName, targetTableName); + AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity(); + List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + assertNotNull(ddlQueries); + assertEquals(ddlQueries.size(), 0); + } + + /** + * This tests + * 1) ImpalaLineageTool can parse one lineage file that contains "create table as select" command lineage, + * there is table vertex with createTime. The target vertex's vertexId does not contain db name and table name + * 2) Lineage is sent to Atlas + * 3) Atlas can get this lineage from Atlas + */ + @Test + public void testCreateTableAsSelectVertexIdNoTableNameFromFile() throws Exception { + String IMPALA = dir + "impalaCreateTableAsSelectVertexIdNoTableName.json"; + String IMPALA_WAL = dir + "WALimpala.wal"; + + ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook(); + + // create database and tables to simulate Impala behavior that Impala updates metadata + // to HMS and HMSHook sends the metadata to Atlas, which has to happen before + // Atlas can handle lineage notification + String dbName = "sales_db"; + createDatabase(dbName); + + String sourceTableName = "sales_asia"; + createTable(dbName, sourceTableName,"(id string, name string)", false); + + String targetTableName = "sales_china"; + createTable(dbName, targetTableName,"(id string, name string)", false); + + // process lineage record, and send corresponding notification to Atlas + String[] args = new String[]{"-d", "./", "-p", "impala"}; + ImpalaLineageTool toolInstance = new ImpalaLineageTool(args); + toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL); + + // verify the process is saved in Atlas + // the value is from info in IMPALA_4. + String createTime = new Long((long)1560885039*1000).toString(); + String processQFName = + dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime; + + processQFName = processQFName.toLowerCase(); + + String queryString = "create table " + targetTableName + " as select * from " + sourceTableName; + AtlasEntity processEntity1 = validateProcess(processQFName, queryString); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString); + AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseImpalaEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + + String guid = assertTableIsRegistered(dbName, targetTableName); + AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity(); + List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + assertNotNull(ddlQueries); + assertEquals(ddlQueries.size(), 1); + } +} \ No newline at end of file diff --git a/addons/impala-bridge/src/test/java/org/apache/atlas/impala/hook/ImpalaLineageHookIT.java b/addons/impala-bridge/src/test/java/org/apache/atlas/impala/hook/ImpalaLineageHookIT.java new file mode 100644 index 0000000000..56d74fee3d --- /dev/null +++ b/addons/impala-bridge/src/test/java/org/apache/atlas/impala/hook/ImpalaLineageHookIT.java @@ -0,0 +1,165 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.impala.hook; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.apache.atlas.impala.ImpalaLineageITBase; +import org.apache.atlas.impala.hook.events.BaseImpalaEvent; +import org.apache.atlas.impala.model.ImpalaDependencyType; +import org.apache.atlas.impala.model.ImpalaVertexType; +import org.apache.atlas.impala.model.LineageEdge; +import org.apache.atlas.impala.model.ImpalaQuery; +import org.apache.atlas.impala.model.LineageVertex; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.AfterClass; +import org.testng.annotations.Test; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.atlas.impala.hook.events.BaseImpalaEvent.ATTRIBUTE_DDL_QUERIES; +import static org.testng.Assert.assertFalse; + +public class ImpalaLineageHookIT extends ImpalaLineageITBase { + private static final Logger LOG = LoggerFactory.getLogger(ImpalaLineageHookIT.class); + private static ImpalaLineageHook impalaHook; + + @BeforeClass + public void setUp() throws Exception { + super.setUp(); + impalaHook = new ImpalaLineageHook(); + } + + @AfterClass + public void testClean() { + impalaHook = null; + } + + @Test + public void testCreateView() throws Exception { + // first trigger HMS hook to create related entities + String dbName = createDatabase(); + assertDatabaseIsRegistered(dbName); + + String tableName = createTable(dbName, "(id string, count int)"); + String viewName = createTable(dbName, "(count int, id string)"); + + // then process lineage record to push lineage to Atlas + ImpalaQuery queryObj = new ImpalaQuery(); + List edges = new ArrayList<>(); + List vertices = new ArrayList<>(); + + queryObj.setQueryText("create view " + viewName + " as select count, id from " + tableName); + queryObj.setQueryId("3a441d0c130962f8:7f634aec00000000"); + queryObj.setHash("64ff0425ccdfaada53e3f2fd76f566f7"); + queryObj.setUser("admin"); + queryObj.setTimestamp((long)1554750072); + queryObj.setEndTime((long)1554750554); + + LineageEdge edge1 = new LineageEdge(); + edge1.setSources( Arrays.asList((long)1)); + edge1.setTargets( Arrays.asList((long)0)); + edge1.setEdgeType(ImpalaDependencyType.PROJECTION); + edges.add(edge1); + + LineageEdge edge2 = new LineageEdge(); + edge2.setSources( Arrays.asList((long)3)); + edge2.setTargets( Arrays.asList((long)2)); + edge2.setEdgeType(ImpalaDependencyType.PROJECTION); + edges.add(edge2); + + queryObj.setEdges(edges); + + LineageVertex vertex1 = new LineageVertex(); + vertex1.setId((long)0); + vertex1.setVertexType(ImpalaVertexType.COLUMN); + vertex1.setVertexId(viewName + ".count"); + vertices.add(vertex1); + + LineageVertex vertex2 = new LineageVertex(); + vertex2.setId((long)1); + vertex2.setVertexType(ImpalaVertexType.COLUMN); + vertex2.setVertexId(tableName + ".count"); + vertices.add(vertex2); + + LineageVertex vertex3 = new LineageVertex(); + vertex3.setId((long)2); + vertex3.setVertexType(ImpalaVertexType.COLUMN); + vertex3.setVertexId(viewName + ".id"); + vertices.add(vertex3); + + LineageVertex vertex4 = new LineageVertex(); + vertex4.setId((long)3); + vertex4.setVertexType(ImpalaVertexType.COLUMN); + vertex4.setVertexId(tableName + ".id"); + vertices.add(vertex4); + + LineageVertex vertex5 = new LineageVertex(); + vertex5.setId((long)4); + vertex5.setVertexType(ImpalaVertexType.TABLE); + vertex5.setVertexId(viewName); + vertex5.setCreateTime(System.currentTimeMillis() / 1000); + vertices.add(vertex5); + + LineageVertex vertex6 = new LineageVertex(); + vertex6.setId((long)5); + vertex6.setVertexType(ImpalaVertexType.TABLE); + vertex6.setVertexId(tableName); + vertex6.setCreateTime(System.currentTimeMillis() / 1000); + vertices.add(vertex6); + + queryObj.setVertices(vertices); + + try { + impalaHook.process(queryObj); + String createTime = new Long(BaseImpalaEvent.getTableCreateTime(vertex5)).toString(); + String processQFName = + vertex5.getVertexId() + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE + + CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime; + + processQFName = processQFName.toLowerCase(); + + // check process and process execution entities + AtlasEntity processEntity1 = validateProcess(processQFName, queryObj.getQueryText()); + AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryObj.getQueryText()); + AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute( + BaseImpalaEvent.ATTRIBUTE_PROCESS)); + Assert.assertEquals(process1.getGuid(), processEntity1.getGuid()); + Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1); + + // check DDL entity + String viewId = assertTableIsRegistered(viewName); + AtlasEntity entity = atlasClientV2.getEntityByGuid(viewId).getEntity(); + List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES); + + assertNotNull(ddlQueries); + assertEquals(ddlQueries.size(), 1); + } catch (Exception ex) { + LOG.error("process create_view failed: ", ex); + assertFalse(true); + } + } +} diff --git a/addons/impala-bridge/src/test/resources/atlas-application.properties b/addons/impala-bridge/src/test/resources/atlas-application.properties new file mode 100644 index 0000000000..898b69c999 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/atlas-application.properties @@ -0,0 +1,124 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +######### Atlas Server Configs ######### +atlas.rest.address=http://localhost:31000 + +######### Graph Database Configs ######### + + +# Graph database implementation. Value inserted by maven. +atlas.graphdb.backend=org.apache.atlas.repository.graphdb.janus.AtlasJanusGraphDatabase + +# Graph Storage +atlas.graph.storage.backend=berkeleyje + +# Entity repository implementation +atlas.EntityAuditRepository.impl=org.apache.atlas.repository.audit.InMemoryEntityAuditRepository + +# Graph Search Index Backend +atlas.graph.index.search.backend=solr + +#Berkeley storage directory +atlas.graph.storage.directory=${sys:atlas.data}/berkley + +#hbase +#For standalone mode , specify localhost +#for distributed mode, specify zookeeper quorum here + +atlas.graph.storage.hostname=${graph.storage.hostname} +atlas.graph.storage.hbase.regions-per-server=1 +atlas.graph.storage.lock.wait-time=10000 + +#ElasticSearch +atlas.graph.index.search.directory=${sys:atlas.data}/es +atlas.graph.index.search.elasticsearch.client-only=false +atlas.graph.index.search.elasticsearch.local-mode=true +atlas.graph.index.search.elasticsearch.create.sleep=2000 + +# Solr cloud mode properties +atlas.graph.index.search.solr.mode=cloud +atlas.graph.index.search.solr.zookeeper-url=${solr.zk.address} +atlas.graph.index.search.solr.embedded=true +atlas.graph.index.search.max-result-set-size=150 + + +######### Notification Configs ######### +atlas.notification.embedded=true + +atlas.kafka.zookeeper.connect=localhost:19026 +atlas.kafka.bootstrap.servers=localhost:19027 +atlas.kafka.data=${sys:atlas.data}/kafka +atlas.kafka.zookeeper.session.timeout.ms=4000 +atlas.kafka.zookeeper.sync.time.ms=20 +atlas.kafka.consumer.timeout.ms=4000 +atlas.kafka.auto.commit.interval.ms=100 +atlas.kafka.hook.group.id=atlas +atlas.kafka.entities.group.id=atlas_entities +#atlas.kafka.auto.commit.enable=false + +atlas.kafka.enable.auto.commit=false +atlas.kafka.auto.offset.reset=earliest +atlas.kafka.session.timeout.ms=30000 +atlas.kafka.offsets.topic.replication.factor=1 + + + +######### Entity Audit Configs ######### +atlas.audit.hbase.tablename=ATLAS_ENTITY_AUDIT_EVENTS +atlas.audit.zookeeper.session.timeout.ms=1000 +atlas.audit.hbase.zookeeper.quorum=localhost +atlas.audit.hbase.zookeeper.property.clientPort=19026 + +######### Security Properties ######### + +# SSL config +atlas.enableTLS=false +atlas.server.https.port=31443 + +######### Security Properties ######### + +hbase.security.authentication=simple + +atlas.hook.falcon.synchronous=true + +######### JAAS Configuration ######## + +atlas.jaas.KafkaClient.loginModuleName = com.sun.security.auth.module.Krb5LoginModule +atlas.jaas.KafkaClient.loginModuleControlFlag = required +atlas.jaas.KafkaClient.option.useKeyTab = true +atlas.jaas.KafkaClient.option.storeKey = true +atlas.jaas.KafkaClient.option.serviceName = kafka +atlas.jaas.KafkaClient.option.keyTab = /etc/security/keytabs/atlas.service.keytab +atlas.jaas.KafkaClient.option.principal = atlas/_HOST@EXAMPLE.COM + +######### High Availability Configuration ######## +atlas.server.ha.enabled=false +#atlas.server.ids=id1 +#atlas.server.address.id1=localhost:21000 + +######### Atlas Authorization ######### +atlas.authorizer.impl=none +# atlas.authorizer.impl=simple +# atlas.authorizer.simple.authz.policy.file=atlas-simple-authz-policy.json + +######### Atlas Authentication ######### +atlas.authentication.method.file=true +atlas.authentication.method.ldap.type=none +atlas.authentication.method.kerberos=false +# atlas.authentication.method.file.filename=users-credentials.properties diff --git a/addons/impala-bridge/src/test/resources/atlas-log4j.xml b/addons/impala-bridge/src/test/resources/atlas-log4j.xml new file mode 100644 index 0000000000..c661d36f81 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/atlas-log4j.xml @@ -0,0 +1,130 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/addons/impala-bridge/src/test/resources/hive-site.xml b/addons/impala-bridge/src/test/resources/hive-site.xml new file mode 100644 index 0000000000..edd0c54313 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/hive-site.xml @@ -0,0 +1,94 @@ + + + + + + + hive.exec.submit.local.task.via.child + false + + + + mapreduce.framework.name + local + + + + fs.default.name + file:/// + + + + hive.metastore.event.listeners + org.apache.atlas.hive.hook.HiveMetastoreHookImpl + + + + hive.support.concurrency + false + + + + hive.metastore.warehouse.dir + ${project.basedir}/target/metastore + + + + javax.jdo.option.ConnectionURL + jdbc:derby:;databaseName=${project.basedir}/target/metastore_db;create=true + + + + atlas.hook.hive.synchronous + true + + + + fs.pfile.impl + org.apache.hadoop.fs.ProxyLocalFileSystem + + + + hive.in.test + true + + + + hive.zookeeper.quorum + localhost:19026 + + + + hive.metastore.schema.verification + false + + + + hive.metastore.disallow.incompatible.col.type.changes + false + + + + datanucleus.schema.autoCreateAll + true + + + + hive.exec.scratchdir + ${project.basedir}/target/scratchdir + + + \ No newline at end of file diff --git a/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelect.json b/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelect.json new file mode 100644 index 0000000000..aca2661e81 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelect.json @@ -0,0 +1,66 @@ +{ + "queryText":"alter view db_4.view_1 as select count, id from db_4.table_1", + "queryId":"3a441d0c130962f8:7f634aec00000000", + "hash":"64ff0425ccdfaada53e3f2fd76f566f7", + "user":"admin", + "timestamp":1554750072, + "endTime":1554750554, + "edges":[ + { + "sources":[ + 1 + ], + "targets":[ + 0 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + 3 + ], + "targets":[ + 2 + ], + "edgeType":"PROJECTION" + } + ], + "vertices":[ + { + "id":0, + "vertexType":"COLUMN", + "vertexId":"db_4.view_1.count", + "metadata": { + "tableName": "db_4.view_1", + "tableCreateTime": 1554750072 + } + }, + { + "id":1, + "vertexType":"COLUMN", + "vertexId":"db_4.table_1.count", + "metadata": { + "tableName": "db_4.table_1", + "tableCreateTime": 1554750070 + } + }, + { + "id":2, + "vertexType":"COLUMN", + "vertexId":"db_4.view_1.id", + "metadata": { + "tableName": "db_4.view_1", + "tableCreateTime": 1554750072 + } + }, + { + "id":3, + "vertexType":"COLUMN", + "vertexId":"db_4.table_1.id", + "metadata": { + "tableName": "db_4.table_1", + "tableCreateTime": 1554750070 + } + } + ] +} \ No newline at end of file diff --git a/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelectWithCommentSpaces.json b/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelectWithCommentSpaces.json new file mode 100644 index 0000000000..322abb5ea6 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelectWithCommentSpaces.json @@ -0,0 +1,66 @@ +{ + "queryText":"alter /* comment1 */ view db_10.view_1 as select /* comment1 */ count, id from db_10.table_1", + "queryId":"3a441d0c130962f8:7f634aec00000000", + "hash":"64ff0425ccdfaada53e3f2fd76f566f7", + "user":"admin", + "timestamp":1554750072, + "endTime":1554750554, + "edges":[ + { + "sources":[ + 1 + ], + "targets":[ + 0 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + 3 + ], + "targets":[ + 2 + ], + "edgeType":"PROJECTION" + } + ], + "vertices":[ + { + "id":0, + "vertexType":"COLUMN", + "vertexId":"db_10.view_1.count", + "metadata": { + "tableName": "db_10.view_1", + "tableCreateTime": 1554750072 + } + }, + { + "id":1, + "vertexType":"COLUMN", + "vertexId":"db_10.table_1.count", + "metadata": { + "tableName": "db_10.table_1", + "tableCreateTime": 1554750070 + } + }, + { + "id":2, + "vertexType":"COLUMN", + "vertexId":"db_10.view_1.id", + "metadata": { + "tableName": "db_10.view_1", + "tableCreateTime": 1554750072 + } + }, + { + "id":3, + "vertexType":"COLUMN", + "vertexId":"db_10.table_1.id", + "metadata": { + "tableName": "db_10.table_1", + "tableCreateTime": 1554750070 + } + } + ] +} \ No newline at end of file diff --git a/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelect.json b/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelect.json new file mode 100644 index 0000000000..7bf361c767 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelect.json @@ -0,0 +1,66 @@ +{ + "queryText":"create table db_3.table_2 as select count, id from db_3.table_1", + "queryId":"3a441d0c130962f8:7f634aec00000000", + "hash":"64ff0425ccdfaada53e3f2fd76f566f7", + "user":"admin", + "timestamp":1554750072, + "endTime":1554750554, + "edges":[ + { + "sources":[ + 1 + ], + "targets":[ + 0 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + 3 + ], + "targets":[ + 2 + ], + "edgeType":"PROJECTION" + } + ], + "vertices":[ + { + "id":0, + "vertexType":"COLUMN", + "vertexId":"db_3.table_2.count", + "metadata": { + "tableName": "db_3.table_2", + "tableCreateTime": 1554750072 + } + }, + { + "id":1, + "vertexType":"COLUMN", + "vertexId":"db_3.table_1.count", + "metadata": { + "tableName": "db_3.table_1", + "tableCreateTime": 1554750070 + } + }, + { + "id":2, + "vertexType":"COLUMN", + "vertexId":"db_3.table_2.id", + "metadata": { + "tableName": "db_3.table_2", + "tableCreateTime": 1554750072 + } + }, + { + "id":3, + "vertexType":"COLUMN", + "vertexId":"db_3.table_1.id", + "metadata": { + "tableName": "db_3.table_1", + "tableCreateTime": 1554750070 + } + } + ] +} \ No newline at end of file diff --git a/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelectVertexIdNoTableName.json b/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelectVertexIdNoTableName.json new file mode 100644 index 0000000000..0fadcc8933 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelectVertexIdNoTableName.json @@ -0,0 +1,66 @@ +{ + "queryText":"create table sales_china as select * from sales_asia", + "queryId":"2940d0b242de53ea:e82ba8d300000000", + "hash":"a705a9ec851a5440afca0dfb8df86cd5", + "user":"root", + "timestamp":1560885032, + "endTime":1560885040, + "edges":[ + { + "sources":[ + 1 + ], + "targets":[ + 0 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + 3 + ], + "targets":[ + 2 + ], + "edgeType":"PROJECTION" + } + ], + "vertices":[ + { + "id":0, + "vertexType":"COLUMN", + "vertexId":"id", + "metadata":{ + "tableName":"sales_db.sales_china", + "tableCreateTime":1560885039 + } + }, + { + "id":1, + "vertexType":"COLUMN", + "vertexId":"sales_db.sales_asia.id", + "metadata":{ + "tableName":"sales_db.sales_asia", + "tableCreateTime":1560884919 + } + }, + { + "id":2, + "vertexType":"COLUMN", + "vertexId":"name", + "metadata":{ + "tableName":"sales_db.sales_china", + "tableCreateTime":1560885039 + } + }, + { + "id":3, + "vertexType":"COLUMN", + "vertexId":"sales_db.sales_asia.name", + "metadata":{ + "tableName":"sales_db.sales_asia", + "tableCreateTime":1560884919 + } + } + ] +} \ No newline at end of file diff --git a/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelectWithCommentSpaces.json b/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelectWithCommentSpaces.json new file mode 100644 index 0000000000..f588190b09 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelectWithCommentSpaces.json @@ -0,0 +1,66 @@ +{ + "queryText":"create /* Test */ table db_9.table_2 as /* Test */ select count, id from db_9.table_1", + "queryId":"3a441d0c130962f8:7f634aec00000000", + "hash":"64ff0425ccdfaada53e3f2fd76f566f7", + "user":"admin", + "timestamp":1554750072, + "endTime":1554750554, + "edges":[ + { + "sources":[ + 1 + ], + "targets":[ + 0 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + 3 + ], + "targets":[ + 2 + ], + "edgeType":"PROJECTION" + } + ], + "vertices":[ + { + "id":0, + "vertexType":"COLUMN", + "vertexId":"db_9.table_2.count", + "metadata": { + "tableName": "db_9.table_2", + "tableCreateTime": 1554750072 + } + }, + { + "id":1, + "vertexType":"COLUMN", + "vertexId":"db_9.table_1.count", + "metadata": { + "tableName": "db_9.table_1", + "tableCreateTime": 1554750070 + } + }, + { + "id":2, + "vertexType":"COLUMN", + "vertexId":"db_9.table_2.id", + "metadata": { + "tableName": "db_9.table_2", + "tableCreateTime": 1554750072 + } + }, + { + "id":3, + "vertexType":"COLUMN", + "vertexId":"db_9.table_1.id", + "metadata": { + "tableName": "db_9.table_1", + "tableCreateTime": 1554750070 + } + } + ] +} \ No newline at end of file diff --git a/addons/impala-bridge/src/test/resources/impalaCreateView.json b/addons/impala-bridge/src/test/resources/impalaCreateView.json new file mode 100644 index 0000000000..bf55d9f725 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/impalaCreateView.json @@ -0,0 +1,66 @@ +{ + "queryText":"create view db_1.view_1 as select count, id from db_1.table_1", + "queryId":"3a441d0c130962f8:7f634aec00000000", + "hash":"64ff0425ccdfaada53e3f2fd76f566f7", + "user":"admin", + "timestamp":1554750072, + "endTime":1554750554, + "edges":[ + { + "sources":[ + 1 + ], + "targets":[ + 0 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + 3 + ], + "targets":[ + 2 + ], + "edgeType":"PROJECTION" + } + ], + "vertices":[ + { + "id":0, + "vertexType":"COLUMN", + "vertexId":"db_1.view_1.count", + "metadata": { + "tableName": "db_1.view_1", + "tableCreateTime": 1554750072 + } + }, + { + "id":1, + "vertexType":"COLUMN", + "vertexId":"db_1.table_1.count", + "metadata": { + "tableName": "db_1.table_1", + "tableCreateTime": 1554750070 + } + }, + { + "id":2, + "vertexType":"COLUMN", + "vertexId":"db_1.view_1.id", + "metadata": { + "tableName": "db_1.view_1", + "tableCreateTime": 1554750072 + } + }, + { + "id":3, + "vertexType":"COLUMN", + "vertexId":"db_1.table_1.id", + "metadata": { + "tableName": "db_1.table_1", + "tableCreateTime": 1554750070 + } + } + ] +} \ No newline at end of file diff --git a/addons/impala-bridge/src/test/resources/impalaCreateViewNoCreateTime.json b/addons/impala-bridge/src/test/resources/impalaCreateViewNoCreateTime.json new file mode 100644 index 0000000000..b825a386c9 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/impalaCreateViewNoCreateTime.json @@ -0,0 +1,50 @@ +{ + "queryText":"create view db_2.view_1 as select count, id from db_2.table_1", + "queryId":"5a441d0c130962f8:7f634aec00000000", + "hash":"64ff0425ccdfaada53e3f2fd76f566f7", + "user":"admin", + "timestamp":1554750072, + "endTime":1554750554, + "edges":[ + { + "sources":[ + 1 + ], + "targets":[ + 0 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + 3 + ], + "targets":[ + 2 + ], + "edgeType":"PROJECTION" + } + ], + "vertices":[ + { + "id":0, + "vertexType":"COLUMN", + "vertexId":"db_2.view_1.count" + }, + { + "id":1, + "vertexType":"COLUMN", + "vertexId":"db_2.table_1.count" + }, + { + "id":2, + "vertexType":"COLUMN", + "vertexId":"db_2.view_1.id" + }, + { + "id":3, + "vertexType":"COLUMN", + "vertexId":"db_2.table_1.id" + } + ] +} \ No newline at end of file diff --git a/addons/impala-bridge/src/test/resources/impalaCreateViewWithCommentSpaces.json b/addons/impala-bridge/src/test/resources/impalaCreateViewWithCommentSpaces.json new file mode 100644 index 0000000000..e49b6b7c40 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/impalaCreateViewWithCommentSpaces.json @@ -0,0 +1,66 @@ +{ + "queryText":" create /* comment1 */ view db_8.view_1 as select /* comment2 */ count, id from db_8.table_1", + "queryId":"3a441d0c130962f8:7f634aec00000000", + "hash":"64ff0425ccdfaada53e3f2fd76f566f7", + "user":"admin", + "timestamp":1554750072, + "endTime":1554750554, + "edges":[ + { + "sources":[ + 1 + ], + "targets":[ + 0 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + 3 + ], + "targets":[ + 2 + ], + "edgeType":"PROJECTION" + } + ], + "vertices":[ + { + "id":0, + "vertexType":"COLUMN", + "vertexId":"db_8.view_1.count", + "metadata": { + "tableName": "db_8.view_1", + "tableCreateTime": 1554750072 + } + }, + { + "id":1, + "vertexType":"COLUMN", + "vertexId":"db_8.table_1.count", + "metadata": { + "tableName": "db_8.table_1", + "tableCreateTime": 1554750070 + } + }, + { + "id":2, + "vertexType":"COLUMN", + "vertexId":"db_8.view_1.id", + "metadata": { + "tableName": "db_8.view_1", + "tableCreateTime": 1554750072 + } + }, + { + "id":3, + "vertexType":"COLUMN", + "vertexId":"db_8.table_1.id", + "metadata": { + "tableName": "db_8.table_1", + "tableCreateTime": 1554750070 + } + } + ] +} \ No newline at end of file diff --git a/addons/impala-bridge/src/test/resources/impalaInsertIntoAsSelect.json b/addons/impala-bridge/src/test/resources/impalaInsertIntoAsSelect.json new file mode 100644 index 0000000000..deb14669c9 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/impalaInsertIntoAsSelect.json @@ -0,0 +1,83 @@ +{ + "queryText":"insert into table db_5.table_2 (count, id) select count, id from db_5.table_1", + "queryId":"3a441d0c130962f8:7f634aec00000000", + "hash":"64ff0425ccdfaada53e3f2fd76f566f7", + "user":"admin", + "timestamp":1554750072, + "endTime":1554750554, + "edges":[ + { + "sources":[ + 1 + ], + "targets":[ + 0 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + 3 + ], + "targets":[ + 2 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + ], + "targets":[ + 4 + ], + "edgeType":"PROJECTION" + } + ], + "vertices":[ + { + "id":0, + "vertexType":"COLUMN", + "vertexId":"db_5.table_2.count", + "metadata": { + "tableName": "db_5.table_2", + "tableCreateTime": 1554750072 + } + }, + { + "id":1, + "vertexType":"COLUMN", + "vertexId":"db_5.table_1.count", + "metadata": { + "tableName": "db_5.table_1", + "tableCreateTime": 1554750070 + } + }, + { + "id":2, + "vertexType":"COLUMN", + "vertexId":"db_5.table_2.id", + "metadata": { + "tableName": "db_5.table_2", + "tableCreateTime": 1554750072 + } + }, + { + "id":3, + "vertexType":"COLUMN", + "vertexId":"db_5.table_1.id", + "metadata": { + "tableName": "db_5.table_1", + "tableCreateTime": 1554750070 + } + }, + { + "id":4, + "vertexType":"COLUMN", + "vertexId":"db_5.table_2.int_col", + "metadata": { + "tableName": "db_5.table_2", + "tableCreateTime": 1554750072 + } + } + ] +} \ No newline at end of file diff --git a/addons/impala-bridge/src/test/resources/impalaMultipleInsertIntoAsSelect1.json b/addons/impala-bridge/src/test/resources/impalaMultipleInsertIntoAsSelect1.json new file mode 100644 index 0000000000..4e2783783f --- /dev/null +++ b/addons/impala-bridge/src/test/resources/impalaMultipleInsertIntoAsSelect1.json @@ -0,0 +1,83 @@ +{ + "queryText":"insert into table db_6.table_2 (count, id) select count, id from db_6.table_1", + "queryId":"3a441d0c130962f8:7f634aec00000000", + "hash":"64ff0425ccdfaada53e3f2fd76f566f7", + "user":"admin", + "timestamp":1554750072, + "endTime":1554750554, + "edges":[ + { + "sources":[ + 1 + ], + "targets":[ + 0 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + 3 + ], + "targets":[ + 2 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + ], + "targets":[ + 4 + ], + "edgeType":"PROJECTION" + } + ], + "vertices":[ + { + "id":0, + "vertexType":"COLUMN", + "vertexId":"db_6.table_2.count", + "metadata": { + "tableName": "db_6.table_2", + "tableCreateTime": 1554750072 + } + }, + { + "id":1, + "vertexType":"COLUMN", + "vertexId":"db_6.table_1.count", + "metadata": { + "tableName": "db_6.table_1", + "tableCreateTime": 1554750070 + } + }, + { + "id":2, + "vertexType":"COLUMN", + "vertexId":"db_6.table_2.id", + "metadata": { + "tableName": "db_6.table_2", + "tableCreateTime": 1554750072 + } + }, + { + "id":3, + "vertexType":"COLUMN", + "vertexId":"db_6.table_1.id", + "metadata": { + "tableName": "db_6.table_1", + "tableCreateTime": 1554750070 + } + }, + { + "id":4, + "vertexType":"COLUMN", + "vertexId":"db_6.table_2.int_col", + "metadata": { + "tableName": "db_6.table_2", + "tableCreateTime": 1554750072 + } + } + ] +} diff --git a/addons/impala-bridge/src/test/resources/impalaMultipleInsertIntoAsSelect2.json b/addons/impala-bridge/src/test/resources/impalaMultipleInsertIntoAsSelect2.json new file mode 100644 index 0000000000..ece6535d99 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/impalaMultipleInsertIntoAsSelect2.json @@ -0,0 +1,83 @@ +{ + "queryText":"insert into table db_6.table_2 (count, id) select count, id from db_6.table_1", + "queryId":"3a441d0c130962f8:7f634aec00000000", + "hash":"64ff0425ccdfaada53e3f2fd76f566f7", + "user":"admin", + "timestamp":1554750082, + "endTime":1554750584, + "edges":[ + { + "sources":[ + 1 + ], + "targets":[ + 0 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + 3 + ], + "targets":[ + 2 + ], + "edgeType":"PROJECTION" + }, + { + "sources":[ + ], + "targets":[ + 4 + ], + "edgeType":"PROJECTION" + } + ], + "vertices":[ + { + "id":0, + "vertexType":"COLUMN", + "vertexId":"db_6.table_2.count", + "metadata": { + "tableName": "db_6.table_2", + "tableCreateTime": 1554750072 + } + }, + { + "id":1, + "vertexType":"COLUMN", + "vertexId":"db_6.table_1.count", + "metadata": { + "tableName": "db_6.table_1", + "tableCreateTime": 1554750070 + } + }, + { + "id":2, + "vertexType":"COLUMN", + "vertexId":"db_6.table_2.id", + "metadata": { + "tableName": "db_6.table_2", + "tableCreateTime": 1554750072 + } + }, + { + "id":3, + "vertexType":"COLUMN", + "vertexId":"db_6.table_1.id", + "metadata": { + "tableName": "db_6.table_1", + "tableCreateTime": 1554750070 + } + }, + { + "id":4, + "vertexType":"COLUMN", + "vertexId":"db_6.table_2.int_col", + "metadata": { + "tableName": "db_6.table_2", + "tableCreateTime": 1554750072 + } + } + ] +} \ No newline at end of file diff --git a/addons/impala-bridge/src/test/resources/users-credentials.properties b/addons/impala-bridge/src/test/resources/users-credentials.properties new file mode 100644 index 0000000000..5046dbaf64 --- /dev/null +++ b/addons/impala-bridge/src/test/resources/users-credentials.properties @@ -0,0 +1,3 @@ +#username=group::sha256-password +admin=ADMIN::a4a88c0872bf652bb9ed803ece5fd6e82354838a9bf59ab4babb1dab322154e1 +rangertagsync=RANGER_TAG_SYNC::0afe7a1968b07d4c3ff4ed8c2d809a32ffea706c66cd795ead9048e81cfaf034 diff --git a/addons/impala-hook-api/pom.xml b/addons/impala-hook-api/pom.xml new file mode 100644 index 0000000000..fd970675a5 --- /dev/null +++ b/addons/impala-hook-api/pom.xml @@ -0,0 +1,33 @@ + + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + impala-hook-api + Apache Atlas Impala Hook API Module + Apache Atlas Impala Hook API + jar + + \ No newline at end of file diff --git a/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryCompleteContext.java b/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryCompleteContext.java new file mode 100644 index 0000000000..dc8e31716b --- /dev/null +++ b/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryCompleteContext.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.impala.hooks; + +import java.util.Objects; + +/** + * {@link QueryCompleteContext} encapsulates immutable information sent from the + * BE to a post-query hook. + */ +public class QueryCompleteContext { + private final String lineageGraph_; + + public QueryCompleteContext(String lineageGraph) { + lineageGraph_ = Objects.requireNonNull(lineageGraph); + } + + /** + * Returns the lineage graph sent from the backend during + * {@link QueryEventHook#onQueryComplete(QueryCompleteContext)}. This graph + * object will generally contain more information than it did when it was + * first constructed in the frontend, because the backend will have filled + * in additional information. + *

+ * The returned object is a JSON representation of the lineage graph object + * for the query. The details of the JSON translation are not provided here + * as this is meant to be a temporary feature, and the String format will + * be changed to something more strongly-typed in the future. + *

+ * + * @return lineage graph from the query that executed + */ + public String getLineageGraph() { return lineageGraph_; } + + @Override + public String toString() { + return "QueryCompleteContext{" + + "lineageGraph='" + lineageGraph_ + '\'' + + '}'; + } +} diff --git a/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryEventHook.java b/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryEventHook.java new file mode 100644 index 0000000000..cd4d2ec080 --- /dev/null +++ b/addons/impala-hook-api/src/main/java/org/apache/impala/hooks/QueryEventHook.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.impala.hooks; + +/** + * {@link QueryEventHook} is the interface for implementations that + * can hook into supported events in Impala query execution. + */ +public interface QueryEventHook { + /** + * Hook method invoked when the Impala daemon starts up. + *

+ * This method will block completion of daemon startup, so you should + * execute any long-running actions asynchronously. + *

+ *

Error-Handling

+ *

+ * Any {@link Exception} thrown from this method will effectively fail + * Impala startup with an error. Implementations should handle all + * exceptions as gracefully as they can, even if the end result is to + * throw them. + *

+ */ + void onImpalaStartup(); + + /** + * Hook method invoked asynchronously when a (qualifying) Impala query + * has executed, but before it has returned. + *

+ * This method will not block the invoking or subsequent queries, + * but may block future hook invocations if it runs for too long + *

+ *

Error-Handling

+ *

+ * Any {@link Throwable} thrown from this method will only be caught + * and logged and will not affect the result of any query. Hook implementations + * should make a best-effort to handle their own exceptions. + *

+ *

Important:

+ *

+ * This hook is actually invoked when the query is unregistered, + * which may happen a long time after the query has executed. + * e.g. the following sequence is possible: + *

    + *
  1. User executes query from Hue. + *
  2. User goes home for weekend, leaving Hue tab open in browser + *
  3. If we're lucky, the session timeout expires after some amount of idle time. + *
  4. The query gets unregistered, lineage record gets logged + *
+ *

+ *

Service Guarantees

+ * + * Impala makes the following guarantees about how this method is executed + * with respect to other implementations that may be registered: + * + *

Hooks are executed asynchronously

+ * + * All hook execution happens asynchronously of the query that triggered + * them. Hooks may still be executing after the query response has returned + * to the caller. Additionally, hooks may execute concurrently if the + * hook executor thread size is configured appropriately. + * + *

Hook Invocation is in Configuration Order

+ * + * The submission of the hook execution tasks occurs in the order + * that the hooks were defined in configuration. This generally means that + * hooks will start executing in order, but there are no guarantees + * about finishing order. + *

+ * For example, if configured with {@code query_event_hook_classes=hook1,hook2,hook3}, + * then hook1 will start before hook2, and hook2 will start before hook3. + * If you need to guarantee that hook1 completes before hook2 starts, then + * you should specify {@code query_event_hook_nthreads=1} for serial hook + * execution. + *

+ * + *

Hook Execution Blocks

+ * + * A hook will block the thread it executes on until it completes. If a hook hangs, + * then the thread also hangs. Impala (currently) will not check for hanging hooks to + * take any action. This means that if you have {@code query_event_hook_nthreads} + * less than the number of hooks, then 1 hook may effectively block others from + * executing. + * + *

Hook Exceptions are non-fatal

+ * + * Any exception thrown from this hook method will be logged and ignored. Therefore, + * an exception in 1 hook will not affect another hook (when no shared resources are + * involved). + * + *

Hook Execution may end abruptly at Impala shutdown

+ * + * If a hook is still executing when Impala is shutdown, there are no guarantees + * that it will complete execution before being killed. + * + * + * @param context object containing the post execution context + * of the query + */ + void onQueryComplete(QueryCompleteContext context); +} diff --git a/addons/kafka-bridge/pom.xml b/addons/kafka-bridge/pom.xml index a33e11bb55..30fb53d27c 100644 --- a/addons/kafka-bridge/pom.xml +++ b/addons/kafka-bridge/pom.xml @@ -55,6 +55,37 @@ test + + org.apache.hadoop + hadoop-hdfs + test + + + javax.servlet + servlet-api + + + + + + org.apache.hadoop + hadoop-annotations + test + + + + org.apache.hadoop + hadoop-minicluster + ${hadoop.version} + test + + + javax.servlet + servlet-api + + + + org.testng testng @@ -100,6 +131,12 @@ + + org.apache.hadoop + hadoop-auth + ${hadoop.version} + compile + diff --git a/addons/sqoop-bridge-shim/pom.xml b/addons/sqoop-bridge-shim/pom.xml new file mode 100755 index 0000000000..533a1f1a8c --- /dev/null +++ b/addons/sqoop-bridge-shim/pom.xml @@ -0,0 +1,47 @@ + + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + sqoop-bridge-shim + Apache Atlas Sqoop Bridge Shim Module + Apache Atlas Sqoop Bridge Shim + jar + + + + + org.apache.atlas + atlas-plugin-classloader + + + + org.apache.sqoop + sqoop + ${sqoop.version} + compile + + + diff --git a/addons/sqoop-bridge-shim/src/main/java/org/apache/atlas/sqoop/hook/SqoopHook.java b/addons/sqoop-bridge-shim/src/main/java/org/apache/atlas/sqoop/hook/SqoopHook.java new file mode 100644 index 0000000000..08b858728e --- /dev/null +++ b/addons/sqoop-bridge-shim/src/main/java/org/apache/atlas/sqoop/hook/SqoopHook.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.sqoop.hook; + + +import org.apache.atlas.plugin.classloader.AtlasPluginClassLoader; +import org.apache.sqoop.SqoopJobDataPublisher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Sqoop hook used for atlas entity registration. + */ +public class SqoopHook extends SqoopJobDataPublisher { + private static final Logger LOG = LoggerFactory.getLogger(SqoopHook.class); + + private static final String ATLAS_PLUGIN_TYPE = "sqoop"; + private static final String ATLAS_SQOOP_HOOK_IMPL_CLASSNAME = "org.apache.atlas.sqoop.hook.SqoopHook"; + + private AtlasPluginClassLoader atlasPluginClassLoader = null; + private SqoopJobDataPublisher sqoopHookImpl = null; + + public SqoopHook() { + this.initialize(); + } + + @Override + public void publish(SqoopJobDataPublisher.Data data) throws Exception { + if (LOG.isDebugEnabled()) { + LOG.debug("==> SqoopHook.run({})", data); + } + + try { + activatePluginClassLoader(); + sqoopHookImpl.publish(data); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== SqoopHook.run({})", data); + } + } + + private void initialize() { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HiveHook.initialize()"); + } + + try { + atlasPluginClassLoader = AtlasPluginClassLoader.getInstance(ATLAS_PLUGIN_TYPE, this.getClass()); + + @SuppressWarnings("unchecked") + Class cls = (Class) Class + .forName(ATLAS_SQOOP_HOOK_IMPL_CLASSNAME, true, atlasPluginClassLoader); + + activatePluginClassLoader(); + + sqoopHookImpl = cls.newInstance(); + } catch (Exception excp) { + LOG.error("Error instantiating Atlas hook implementation", excp); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HiveHook.initialize()"); + } + } + + private void activatePluginClassLoader() { + if (atlasPluginClassLoader != null) { + atlasPluginClassLoader.activate(); + } + } + + private void deactivatePluginClassLoader() { + if (atlasPluginClassLoader != null) { + atlasPluginClassLoader.deactivate(); + } + } +} diff --git a/addons/sqoop-bridge/pom.xml b/addons/sqoop-bridge/pom.xml new file mode 100644 index 0000000000..021e93f56d --- /dev/null +++ b/addons/sqoop-bridge/pom.xml @@ -0,0 +1,472 @@ + + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + sqoop-bridge + Apache Atlas Sqoop Bridge Module + Apache Atlas Sqoop Bridge + jar + + + + + org.slf4j + slf4j-api + + + + org.slf4j + slf4j-log4j12 + + + + org.apache.hive + hive-metastore + ${hive.version} + provided + + + org.mortbay.jetty + * + + + com.github.stephenc.findbugs + findbugs-annotations + + + + + + + org.apache.hive + hive-exec + ${hive.version} + provided + + + + org.apache.hive + hive-cli + ${hive.version} + test + + + javax.servlet + * + + + javax.ws.rs + * + + + org.eclipse.jetty.aggregate + * + + + javax.servlet + servlet-api + + + + + + + org.apache.sqoop + sqoop + ${sqoop.version} + compile + + + + org.apache.atlas + atlas-client-v1 + + + + org.apache.atlas + atlas-notification + + + + org.apache.atlas + hive-bridge + + + + org.apache.hive + hive-common + ${hive.version} + + + + + org.apache.atlas + atlas-webapp + war + test + + + + org.apache.hadoop + hadoop-client + + + javax.servlet + servlet-api + + + + + + org.apache.hadoop + hadoop-annotations + + + + org.testng + testng + + + + org.eclipse.jetty + jetty-server + test + + + + org.apache.atlas + atlas-graphdb-impls + pom + test + + + + + + dist + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-hook + package + + copy + + + ${project.build.directory}/dependency/hook/sqoop/atlas-sqoop-plugin-impl + false + false + true + + + ${project.groupId} + ${project.artifactId} + ${project.version} + + + ${project.groupId} + ${project.artifactId} + ${project.version} + + + ${project.groupId} + hive-bridge + ${project.version} + + + ${project.groupId} + atlas-client-common + ${project.version} + + + ${project.groupId} + atlas-client-v1 + ${project.version} + + + ${project.groupId} + atlas-client-v2 + ${project.version} + + + ${project.groupId} + atlas-intg + ${project.version} + + + ${project.groupId} + atlas-notification + ${project.version} + + + ${project.groupId} + atlas-common + ${project.version} + + + org.apache.kafka + kafka_${kafka.scala.binary.version} + ${kafka.version} + + + org.apache.kafka + kafka-clients + ${kafka.version} + + + com.sun.jersey + jersey-json + ${jersey.version} + + + javax.ws.rs + jsr311-api + ${jsr.version} + + + + + + copy-hook-shim + package + + copy + + + ${project.build.directory}/dependency/hook/sqoop + false + false + true + + + ${project.groupId} + sqoop-bridge-shim + ${project.version} + + + ${project.groupId} + atlas-plugin-classloader + ${project.version} + + + + + + + + + + + + + + org.eclipse.jetty + jetty-maven-plugin + ${jetty.version} + + ${skipTests} + + + 31000 + 60000 + + ../../webapp/target/atlas-webapp-${project.version}.war + true + + / + ${project.basedir}/../../webapp/src/test/webapp/WEB-INF/web.xml + + true + + true + + atlas.home + ${project.build.directory} + + + atlas.conf + ${project.build.directory}/test-classes + + + atlas.data + ${project.build.directory}/data + + + atlas.log.dir + ${project.build.directory}/logs + + + atlas.log.file + application.log + + + log4j.configuration + file:///${project.build.directory}/test-classes/atlas-log4j.xml + + + atlas.graphdb.backend + ${graphdb.backend.impl} + + + embedded.solr.directory + ${project.build.directory} + + + solr.log.dir + ${project.build.directory}/logs + + + org.eclipse.jetty.annotations.maxWait + 5000 + + + atlas-stop + 31001 + ${jetty-maven-plugin.stopWait} + jar + + + + org.apache.curator + curator-client + ${curator.version} + + + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + + + + + start-jetty + pre-integration-test + + + stop + deploy-war + + + true + + + + stop-jetty + post-integration-test + + stop + + + + + + + org.apache.maven.plugins + maven-site-plugin + + + org.apache.maven.doxia + doxia-module-twiki + ${doxia.version} + + + org.apache.maven.doxia + doxia-core + ${doxia.version} + + + + + + site + + prepare-package + + + + false + false + + + + + org.codehaus.mojo + exec-maven-plugin + 1.2.1 + false + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + copy-resources + validate + + copy-resources + + + ${basedir}/target/models + + + ${basedir}/../models + + 0000-Area0/0010-base_model.json + 1000-Hadoop/** + + + + + + + copy-solr-resources + validate + + copy-resources + + + ${project.build.directory}/solr + + + ${basedir}/../../test-tools/src/main/resources/solr + + + + + + + + + + diff --git a/addons/sqoop-bridge/src/main/java/org/apache/atlas/sqoop/hook/SqoopHook.java b/addons/sqoop-bridge/src/main/java/org/apache/atlas/sqoop/hook/SqoopHook.java new file mode 100644 index 0000000000..0a8cb96675 --- /dev/null +++ b/addons/sqoop-bridge/src/main/java/org/apache/atlas/sqoop/hook/SqoopHook.java @@ -0,0 +1,258 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.sqoop.hook; + + +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.AtlasClient; +import org.apache.atlas.AtlasConstants; +import org.apache.atlas.hive.bridge.HiveMetaStoreBridge; +import org.apache.atlas.hive.model.HiveDataTypes; +import org.apache.atlas.hook.AtlasHook; +import org.apache.atlas.hook.AtlasHookException; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.sqoop.model.SqoopDataTypes; +import org.apache.atlas.type.AtlasTypeUtil; +import org.apache.atlas.utils.AtlasConfigurationUtil; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.lang3.StringUtils; +import org.apache.sqoop.SqoopJobDataPublisher; +import org.apache.sqoop.util.ImportException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.Map; +import java.util.HashMap; +import java.util.Properties; +import java.util.List; +import java.util.Date; + +import static org.apache.atlas.repository.Constants.SQOOP_SOURCE; + +/** + * AtlasHook sends lineage information to the AtlasSever. + */ +public class SqoopHook extends SqoopJobDataPublisher { + private static final Logger LOG = LoggerFactory.getLogger(SqoopHook.class); + + public static final String CLUSTER_NAME_KEY = "atlas.cluster.name"; + public static final String ATLAS_METADATA_NAMESPACE = "atlas.metadata.namespace"; + public static final String DEFAULT_CLUSTER_NAME = "primary"; + + public static final String USER = "userName"; + public static final String DB_STORE_TYPE = "dbStoreType"; + public static final String DB_STORE_USAGE = "storeUse"; + public static final String SOURCE = "source"; + public static final String DESCRIPTION = "description"; + public static final String STORE_URI = "storeUri"; + public static final String OPERATION = "operation"; + public static final String START_TIME = "startTime"; + public static final String END_TIME = "endTime"; + public static final String CMD_LINE_OPTS = "commandlineOpts"; + public static final String INPUTS = "inputs"; + public static final String OUTPUTS = "outputs"; + public static final String ATTRIBUTE_DB = "db"; + + public static final String RELATIONSHIP_HIVE_TABLE_DB = "hive_table_db"; + public static final String RELATIONSHIP_DATASET_PROCESS_INPUTS = "dataset_process_inputs"; + public static final String RELATIONSHIP_PROCESS_DATASET_OUTPUTS = "process_dataset_outputs"; + + private static final AtlasHookImpl atlasHook; + + static { + org.apache.hadoop.conf.Configuration.addDefaultResource("sqoop-site.xml"); + + atlasHook = new AtlasHookImpl(); + } + + @Override + public void publish(SqoopJobDataPublisher.Data data) throws AtlasHookException { + try { + Configuration atlasProperties = ApplicationProperties.get(); + String metadataNamespace = + AtlasConfigurationUtil.getRecentString(atlasProperties, ATLAS_METADATA_NAMESPACE, getClusterName(atlasProperties)); + + AtlasEntity entDbStore = toSqoopDBStoreEntity(data); + AtlasEntity entHiveDb = toHiveDatabaseEntity(metadataNamespace, data.getHiveDB()); + AtlasEntity entHiveTable = data.getHiveTable() != null ? toHiveTableEntity(entHiveDb, data.getHiveTable()) : null; + AtlasEntity entProcess = toSqoopProcessEntity(entDbStore, entHiveDb, entHiveTable, data, metadataNamespace); + + + AtlasEntitiesWithExtInfo entities = new AtlasEntitiesWithExtInfo(entProcess); + + entities.addReferredEntity(entDbStore); + entities.addReferredEntity(entHiveDb); + if (entHiveTable != null) { + entities.addReferredEntity(entHiveTable); + } + + HookNotification message = new EntityCreateRequestV2(AtlasHook.getUser(), entities); + + atlasHook.sendNotification(message); + } catch(Exception e) { + LOG.error("SqoopHook.publish() failed", e); + + throw new AtlasHookException("SqoopHook.publish() failed.", e); + } + } + + private String getClusterName(Configuration config) { + return config.getString(CLUSTER_NAME_KEY, DEFAULT_CLUSTER_NAME); + } + + private AtlasEntity toHiveDatabaseEntity(String metadataNamespace, String dbName) { + AtlasEntity entHiveDb = new AtlasEntity(HiveDataTypes.HIVE_DB.getName()); + String qualifiedName = HiveMetaStoreBridge.getDBQualifiedName(metadataNamespace, dbName); + + entHiveDb.setAttribute(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, metadataNamespace); + entHiveDb.setAttribute(AtlasClient.NAME, dbName); + entHiveDb.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, qualifiedName); + + return entHiveDb; + } + + private AtlasEntity toHiveTableEntity(AtlasEntity entHiveDb, String tableName) { + AtlasEntity entHiveTable = new AtlasEntity(HiveDataTypes.HIVE_TABLE.getName()); + String qualifiedName = HiveMetaStoreBridge.getTableQualifiedName((String)entHiveDb.getAttribute(AtlasConstants.CLUSTER_NAME_ATTRIBUTE), (String)entHiveDb.getAttribute(AtlasClient.NAME), tableName); + + entHiveTable.setAttribute(AtlasClient.NAME, tableName.toLowerCase()); + entHiveTable.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, qualifiedName); + entHiveTable.setRelationshipAttribute(ATTRIBUTE_DB, AtlasTypeUtil.getAtlasRelatedObjectId(entHiveDb, RELATIONSHIP_HIVE_TABLE_DB)); + + return entHiveTable; + } + + private AtlasEntity toSqoopDBStoreEntity(SqoopJobDataPublisher.Data data) throws ImportException { + String table = data.getStoreTable(); + String query = data.getStoreQuery(); + + if (StringUtils.isBlank(table) && StringUtils.isBlank(query)) { + throw new ImportException("Both table and query cannot be empty for DBStoreInstance"); + } + + String usage = table != null ? "TABLE" : "QUERY"; + String source = table != null ? table : query; + String name = getSqoopDBStoreName(data); + + AtlasEntity entDbStore = new AtlasEntity(SqoopDataTypes.SQOOP_DBDATASTORE.getName()); + + entDbStore.setAttribute(AtlasClient.NAME, name); + entDbStore.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, name); + entDbStore.setAttribute(SqoopHook.DB_STORE_TYPE, data.getStoreType()); + entDbStore.setAttribute(SqoopHook.DB_STORE_USAGE, usage); + entDbStore.setAttribute(SqoopHook.STORE_URI, data.getUrl()); + entDbStore.setAttribute(SqoopHook.SOURCE, source); + entDbStore.setAttribute(SqoopHook.DESCRIPTION, ""); + entDbStore.setAttribute(AtlasClient.OWNER, data.getUser()); + + return entDbStore; + } + + private AtlasEntity toSqoopProcessEntity(AtlasEntity entDbStore, AtlasEntity entHiveDb, AtlasEntity entHiveTable, + SqoopJobDataPublisher.Data data, String metadataNamespace) { + AtlasEntity entProcess = new AtlasEntity(SqoopDataTypes.SQOOP_PROCESS.getName()); + String sqoopProcessName = getSqoopProcessName(data, metadataNamespace); + Map sqoopOptionsMap = new HashMap<>(); + Properties options = data.getOptions(); + + for (Object k : options.keySet()) { + sqoopOptionsMap.put((String)k, (String) options.get(k)); + } + + entProcess.setAttribute(AtlasClient.NAME, sqoopProcessName); + entProcess.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, sqoopProcessName); + entProcess.setAttribute(SqoopHook.OPERATION, data.getOperation()); + + List sqoopObjects = Collections.singletonList(AtlasTypeUtil.getAtlasObjectId(entDbStore)); + List hiveObjects = Collections.singletonList(AtlasTypeUtil.getAtlasObjectId(entHiveTable != null ? entHiveTable : entHiveDb)); + + if (isImportOperation(data)) { + entProcess.setRelationshipAttribute(SqoopHook.INPUTS, AtlasTypeUtil.getAtlasRelatedObjectIdList(sqoopObjects, RELATIONSHIP_DATASET_PROCESS_INPUTS)); + entProcess.setRelationshipAttribute(SqoopHook.OUTPUTS, AtlasTypeUtil.getAtlasRelatedObjectIdList(hiveObjects, RELATIONSHIP_PROCESS_DATASET_OUTPUTS)); + } else { + entProcess.setRelationshipAttribute(SqoopHook.INPUTS, AtlasTypeUtil.getAtlasRelatedObjectIdList(hiveObjects, RELATIONSHIP_DATASET_PROCESS_INPUTS)); + entProcess.setRelationshipAttribute(SqoopHook.OUTPUTS, AtlasTypeUtil.getAtlasRelatedObjectIdList(sqoopObjects, RELATIONSHIP_PROCESS_DATASET_OUTPUTS)); + } + + entProcess.setAttribute(SqoopHook.USER, data.getUser()); + entProcess.setAttribute(SqoopHook.START_TIME, new Date(data.getStartTime())); + entProcess.setAttribute(SqoopHook.END_TIME, new Date(data.getEndTime())); + entProcess.setAttribute(SqoopHook.CMD_LINE_OPTS, sqoopOptionsMap); + + return entProcess; + } + + private boolean isImportOperation(SqoopJobDataPublisher.Data data) { + return data.getOperation().toLowerCase().equals("import"); + } + + static String getSqoopProcessName(Data data, String metadataNamespace) { + StringBuilder name = new StringBuilder(String.format("sqoop %s --connect %s", data.getOperation(), data.getUrl())); + + if (StringUtils.isNotEmpty(data.getHiveTable())) { + name.append(" --table ").append(data.getStoreTable()); + } else { + name.append(" --database ").append(data.getHiveDB()); + } + + if (StringUtils.isNotEmpty(data.getStoreQuery())) { + name.append(" --query ").append(data.getStoreQuery()); + } + + if (data.getHiveTable() != null) { + name.append(String.format(" --hive-%s --hive-database %s --hive-table %s --hive-cluster %s", data.getOperation(), data.getHiveDB().toLowerCase(), data.getHiveTable().toLowerCase(), metadataNamespace)); + } else { + name.append(String.format("--hive-%s --hive-database %s --hive-cluster %s", data.getOperation(), data.getHiveDB(), metadataNamespace)); + } + + return name.toString(); + } + + static String getSqoopDBStoreName(SqoopJobDataPublisher.Data data) { + StringBuilder name = new StringBuilder(String.format("%s --url %s", data.getStoreType(), data.getUrl())); + + if (StringUtils.isNotEmpty(data.getHiveTable())) { + name.append(" --table ").append(data.getStoreTable()); + } else { + name.append(" --database ").append(data.getHiveDB()); + } + + if (StringUtils.isNotEmpty(data.getStoreQuery())) { + name.append(" --query ").append(data.getStoreQuery()); + } + + return name.toString(); + } + + private static class AtlasHookImpl extends AtlasHook { + + public String getMessageSource() { + return SQOOP_SOURCE; + } + + public void sendNotification(HookNotification notification) { + super.notifyEntities(Collections.singletonList(notification), null); + } + } +} diff --git a/addons/sqoop-bridge/src/main/java/org/apache/atlas/sqoop/model/SqoopDataTypes.java b/addons/sqoop-bridge/src/main/java/org/apache/atlas/sqoop/model/SqoopDataTypes.java new file mode 100644 index 0000000000..e71220ab90 --- /dev/null +++ b/addons/sqoop-bridge/src/main/java/org/apache/atlas/sqoop/model/SqoopDataTypes.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.sqoop.model; + +/** + * Hive Data Types for model and bridge. + */ +public enum SqoopDataTypes { + + // Classes + SQOOP_DBDATASTORE, + SQOOP_PROCESS, + ; + + public String getName() { + return name().toLowerCase(); + } +} diff --git a/addons/sqoop-bridge/src/test/java/org/apache/atlas/sqoop/hook/SqoopHookIT.java b/addons/sqoop-bridge/src/test/java/org/apache/atlas/sqoop/hook/SqoopHookIT.java new file mode 100644 index 0000000000..71a8779dba --- /dev/null +++ b/addons/sqoop-bridge/src/test/java/org/apache/atlas/sqoop/hook/SqoopHookIT.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.sqoop.hook; + +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.AtlasClient; +import org.apache.atlas.hive.bridge.HiveMetaStoreBridge; +import org.apache.atlas.hive.model.HiveDataTypes; +import org.apache.atlas.sqoop.model.SqoopDataTypes; +import org.apache.atlas.utils.AuthenticationUtil; +import org.apache.atlas.utils.ParamChecker; +import org.apache.atlas.v1.model.instance.Referenceable; +import org.apache.commons.configuration.Configuration; +import org.apache.sqoop.SqoopJobDataPublisher; +import org.slf4j.Logger; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.Properties; + +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.fail; + +public class SqoopHookIT { + public static final Logger LOG = org.slf4j.LoggerFactory.getLogger(SqoopHookIT.class); + private static final String CLUSTER_NAME = "primary"; + public static final String DEFAULT_DB = "default"; + private static final int MAX_WAIT_TIME = 2000; + private AtlasClient atlasClient; + + @BeforeClass + public void setUp() throws Exception { + //Set-up sqoop session + Configuration configuration = ApplicationProperties.get(); + if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) { + atlasClient = new AtlasClient(configuration.getStringArray(HiveMetaStoreBridge.ATLAS_ENDPOINT), new String[]{"admin", "admin"}); + } else { + atlasClient = new AtlasClient(configuration.getStringArray(HiveMetaStoreBridge.ATLAS_ENDPOINT)); + } + } + + @Test + public void testSqoopImport() throws Exception { + SqoopJobDataPublisher.Data d = new SqoopJobDataPublisher.Data("import", "jdbc:mysql:///localhost/db", + "mysqluser", "mysql", "myTable", null, "default", "hiveTable", new Properties(), + System.currentTimeMillis() - 100, System.currentTimeMillis()); + SqoopHook hook = new SqoopHook(); + hook.publish(d); + Thread.sleep(1000); + String storeName = SqoopHook.getSqoopDBStoreName(d); + assertDBStoreIsRegistered(storeName); + String name = SqoopHook.getSqoopProcessName(d, CLUSTER_NAME); + assertSqoopProcessIsRegistered(name); + assertHiveTableIsRegistered(DEFAULT_DB, "hiveTable"); + } + + @Test + public void testSqoopExport() throws Exception { + SqoopJobDataPublisher.Data d = new SqoopJobDataPublisher.Data("export", "jdbc:mysql:///localhost/db", + "mysqluser", "mysql", "myTable", null, "default", "hiveTable", new Properties(), + System.currentTimeMillis() - 100, System.currentTimeMillis()); + SqoopHook hook = new SqoopHook(); + hook.publish(d); + Thread.sleep(1000); + String storeName = SqoopHook.getSqoopDBStoreName(d); + assertDBStoreIsRegistered(storeName); + String name = SqoopHook.getSqoopProcessName(d, CLUSTER_NAME); + assertSqoopProcessIsRegistered(name); + assertHiveTableIsRegistered(DEFAULT_DB, "hiveTable"); + } + + private String assertDBStoreIsRegistered(String storeName) throws Exception { + LOG.debug("Searching for db store {}", storeName); + return assertEntityIsRegistered(SqoopDataTypes.SQOOP_DBDATASTORE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, storeName, null); + } + + private String assertHiveTableIsRegistered(String dbName, String tableName) throws Exception { + LOG.debug("Searching for table {}.{}", dbName, tableName); + return assertEntityIsRegistered(HiveDataTypes.HIVE_TABLE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, dbName, tableName), null); + } + + private String assertSqoopProcessIsRegistered(String processName) throws Exception { + LOG.debug("Searching for sqoop process {}", processName); + return assertEntityIsRegistered(SqoopDataTypes.SQOOP_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processName, null); + } + + protected String assertEntityIsRegistered(final String typeName, final String property, final String value, + final AssertPredicate assertPredicate) throws Exception { + waitFor(80000, new Predicate() { + @Override + public void evaluate() throws Exception { + Referenceable entity = atlasClient.getEntity(typeName, property, value); + assertNotNull(entity); + if (assertPredicate != null) { + assertPredicate.assertOnEntity(entity); + } + } + }); + Referenceable entity = atlasClient.getEntity(typeName, property, value); + return entity.getId()._getId(); + } + + public interface Predicate { + void evaluate() throws Exception; + } + + public interface AssertPredicate { + void assertOnEntity(Referenceable entity) throws Exception; + } + + protected void waitFor(int timeout, Predicate predicate) throws Exception { + ParamChecker.notNull(predicate, "predicate"); + long mustEnd = System.currentTimeMillis() + timeout; + + while (true) { + try { + predicate.evaluate(); + return; + } catch(Error | Exception e) { + if (System.currentTimeMillis() >= mustEnd) { + fail("Assertions failed. Failing after waiting for timeout " + timeout + " msecs", e); + } + LOG.debug("Waiting up to {} msec as assertion failed", mustEnd - System.currentTimeMillis(), e); + Thread.sleep(5000); + } + } + } +} diff --git a/addons/sqoop-bridge/src/test/resources/atlas-application.properties b/addons/sqoop-bridge/src/test/resources/atlas-application.properties new file mode 100644 index 0000000000..898b69c999 --- /dev/null +++ b/addons/sqoop-bridge/src/test/resources/atlas-application.properties @@ -0,0 +1,124 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +######### Atlas Server Configs ######### +atlas.rest.address=http://localhost:31000 + +######### Graph Database Configs ######### + + +# Graph database implementation. Value inserted by maven. +atlas.graphdb.backend=org.apache.atlas.repository.graphdb.janus.AtlasJanusGraphDatabase + +# Graph Storage +atlas.graph.storage.backend=berkeleyje + +# Entity repository implementation +atlas.EntityAuditRepository.impl=org.apache.atlas.repository.audit.InMemoryEntityAuditRepository + +# Graph Search Index Backend +atlas.graph.index.search.backend=solr + +#Berkeley storage directory +atlas.graph.storage.directory=${sys:atlas.data}/berkley + +#hbase +#For standalone mode , specify localhost +#for distributed mode, specify zookeeper quorum here + +atlas.graph.storage.hostname=${graph.storage.hostname} +atlas.graph.storage.hbase.regions-per-server=1 +atlas.graph.storage.lock.wait-time=10000 + +#ElasticSearch +atlas.graph.index.search.directory=${sys:atlas.data}/es +atlas.graph.index.search.elasticsearch.client-only=false +atlas.graph.index.search.elasticsearch.local-mode=true +atlas.graph.index.search.elasticsearch.create.sleep=2000 + +# Solr cloud mode properties +atlas.graph.index.search.solr.mode=cloud +atlas.graph.index.search.solr.zookeeper-url=${solr.zk.address} +atlas.graph.index.search.solr.embedded=true +atlas.graph.index.search.max-result-set-size=150 + + +######### Notification Configs ######### +atlas.notification.embedded=true + +atlas.kafka.zookeeper.connect=localhost:19026 +atlas.kafka.bootstrap.servers=localhost:19027 +atlas.kafka.data=${sys:atlas.data}/kafka +atlas.kafka.zookeeper.session.timeout.ms=4000 +atlas.kafka.zookeeper.sync.time.ms=20 +atlas.kafka.consumer.timeout.ms=4000 +atlas.kafka.auto.commit.interval.ms=100 +atlas.kafka.hook.group.id=atlas +atlas.kafka.entities.group.id=atlas_entities +#atlas.kafka.auto.commit.enable=false + +atlas.kafka.enable.auto.commit=false +atlas.kafka.auto.offset.reset=earliest +atlas.kafka.session.timeout.ms=30000 +atlas.kafka.offsets.topic.replication.factor=1 + + + +######### Entity Audit Configs ######### +atlas.audit.hbase.tablename=ATLAS_ENTITY_AUDIT_EVENTS +atlas.audit.zookeeper.session.timeout.ms=1000 +atlas.audit.hbase.zookeeper.quorum=localhost +atlas.audit.hbase.zookeeper.property.clientPort=19026 + +######### Security Properties ######### + +# SSL config +atlas.enableTLS=false +atlas.server.https.port=31443 + +######### Security Properties ######### + +hbase.security.authentication=simple + +atlas.hook.falcon.synchronous=true + +######### JAAS Configuration ######## + +atlas.jaas.KafkaClient.loginModuleName = com.sun.security.auth.module.Krb5LoginModule +atlas.jaas.KafkaClient.loginModuleControlFlag = required +atlas.jaas.KafkaClient.option.useKeyTab = true +atlas.jaas.KafkaClient.option.storeKey = true +atlas.jaas.KafkaClient.option.serviceName = kafka +atlas.jaas.KafkaClient.option.keyTab = /etc/security/keytabs/atlas.service.keytab +atlas.jaas.KafkaClient.option.principal = atlas/_HOST@EXAMPLE.COM + +######### High Availability Configuration ######## +atlas.server.ha.enabled=false +#atlas.server.ids=id1 +#atlas.server.address.id1=localhost:21000 + +######### Atlas Authorization ######### +atlas.authorizer.impl=none +# atlas.authorizer.impl=simple +# atlas.authorizer.simple.authz.policy.file=atlas-simple-authz-policy.json + +######### Atlas Authentication ######### +atlas.authentication.method.file=true +atlas.authentication.method.ldap.type=none +atlas.authentication.method.kerberos=false +# atlas.authentication.method.file.filename=users-credentials.properties diff --git a/addons/sqoop-bridge/src/test/resources/atlas-log4j.xml b/addons/sqoop-bridge/src/test/resources/atlas-log4j.xml new file mode 100755 index 0000000000..262a710f7a --- /dev/null +++ b/addons/sqoop-bridge/src/test/resources/atlas-log4j.xml @@ -0,0 +1,137 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/addons/sqoop-bridge/src/test/resources/hive-site.xml b/addons/sqoop-bridge/src/test/resources/hive-site.xml new file mode 100644 index 0000000000..f058c2edc2 --- /dev/null +++ b/addons/sqoop-bridge/src/test/resources/hive-site.xml @@ -0,0 +1,48 @@ + + + + + + + hive.exec.post.hooks + org.apache.atlas.hive.hook.HiveHook + + + + hive.support.concurrency + false + + + + hive.metastore.warehouse.dir + ${user.dir}/target/metastore + + + + javax.jdo.option.ConnectionURL + jdbc:derby:${user.dir}/target/metastore_db;create=true + + + + atlas.hook.hive.synchronous + true + + + + fs.pfile.impl + org.apache.hadoop.fs.ProxyLocalFileSystem + + \ No newline at end of file diff --git a/addons/sqoop-bridge/src/test/resources/sqoop-site.xml b/addons/sqoop-bridge/src/test/resources/sqoop-site.xml new file mode 100644 index 0000000000..a63e7e4e8d --- /dev/null +++ b/addons/sqoop-bridge/src/test/resources/sqoop-site.xml @@ -0,0 +1,190 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + atlas.rest.address + http://localhost:21000/ + + + + sqoop.job.data.publish.class + org.apache.atlas.sqoop.hook.SqoopHook + + + + atlas.cluster.name + primary + + diff --git a/addons/sqoop-bridge/src/test/resources/users-credentials.properties b/addons/sqoop-bridge/src/test/resources/users-credentials.properties new file mode 100644 index 0000000000..5046dbaf64 --- /dev/null +++ b/addons/sqoop-bridge/src/test/resources/users-credentials.properties @@ -0,0 +1,3 @@ +#username=group::sha256-password +admin=ADMIN::a4a88c0872bf652bb9ed803ece5fd6e82354838a9bf59ab4babb1dab322154e1 +rangertagsync=RANGER_TAG_SYNC::0afe7a1968b07d4c3ff4ed8c2d809a32ffea706c66cd795ead9048e81cfaf034 diff --git a/addons/storm-bridge-shim/pom.xml b/addons/storm-bridge-shim/pom.xml new file mode 100755 index 0000000000..f4e75927f0 --- /dev/null +++ b/addons/storm-bridge-shim/pom.xml @@ -0,0 +1,88 @@ + + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + storm-bridge-shim + Apache Atlas Storm Bridge Shim Module + Apache Atlas Storm Bridge Shim + jar + + + + + org.apache.atlas + atlas-plugin-classloader + + + + + org.apache.storm + storm-core + ${storm.version} + jar + + + org.slf4j + slf4j-log4j12 + + + javax.servlet + servlet-api + + + io.dropwizard.metrics + metrics-core + + + io.dropwizard.metrics + metrics-graphite + + + io.dropwizard.metrics + metrics-ganglia + + + + + + + io.dropwizard.metrics + metrics-core + ${dropwizard-metrics} + + + io.dropwizard.metrics + metrics-graphite + ${dropwizard-metrics} + + + io.dropwizard.metrics + metrics-ganglia + ${dropwizard-metrics} + + + + diff --git a/addons/storm-bridge-shim/src/main/java/org/apache/atlas/storm/hook/StormAtlasHook.java b/addons/storm-bridge-shim/src/main/java/org/apache/atlas/storm/hook/StormAtlasHook.java new file mode 100644 index 0000000000..0ce7633aa2 --- /dev/null +++ b/addons/storm-bridge-shim/src/main/java/org/apache/atlas/storm/hook/StormAtlasHook.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.storm.hook; + + +import org.apache.atlas.plugin.classloader.AtlasPluginClassLoader; +import org.apache.storm.ISubmitterHook; +import org.apache.storm.generated.StormTopology; +import org.apache.storm.generated.TopologyInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; + +/** + * Storm hook used for atlas entity registration. + */ +public class StormAtlasHook implements ISubmitterHook { + private static final Logger LOG = LoggerFactory.getLogger(StormAtlasHook.class); + + + private static final String ATLAS_PLUGIN_TYPE = "storm"; + private static final String ATLAS_STORM_HOOK_IMPL_CLASSNAME = "org.apache.atlas.storm.hook.StormAtlasHook"; + + private AtlasPluginClassLoader atlasPluginClassLoader = null; + private ISubmitterHook stormHook = null; + + + public StormAtlasHook() { + this.initialize(); + } + + @Override + public void notify(TopologyInfo topologyInfo, Map stormConf, StormTopology stormTopology) + throws IllegalAccessException { + if (LOG.isDebugEnabled()) { + LOG.debug("==> StormAtlasHook.notify({}, {}, {})", topologyInfo, stormConf, stormTopology); + } + + try { + activatePluginClassLoader(); + stormHook.notify(topologyInfo, stormConf, stormTopology); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== StormAtlasHook.notify({}, {}, {})", topologyInfo, stormConf, stormTopology); + } + } + + private void initialize() { + if (LOG.isDebugEnabled()) { + LOG.debug("==> StormAtlasHook.initialize()"); + } + + try { + atlasPluginClassLoader = AtlasPluginClassLoader.getInstance(ATLAS_PLUGIN_TYPE, this.getClass()); + + @SuppressWarnings("unchecked") + Class cls = (Class) Class + .forName(ATLAS_STORM_HOOK_IMPL_CLASSNAME, true, atlasPluginClassLoader); + + activatePluginClassLoader(); + + stormHook = cls.newInstance(); + } catch (Exception excp) { + LOG.error("Error instantiating Atlas hook implementation", excp); + } finally { + deactivatePluginClassLoader(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== StormAtlasHook.initialize()"); + } + } + + private void activatePluginClassLoader() { + if (atlasPluginClassLoader != null) { + atlasPluginClassLoader.activate(); + } + } + + private void deactivatePluginClassLoader() { + if (atlasPluginClassLoader != null) { + atlasPluginClassLoader.deactivate(); + } + } +} diff --git a/addons/storm-bridge/pom.xml b/addons/storm-bridge/pom.xml new file mode 100644 index 0000000000..77dce71536 --- /dev/null +++ b/addons/storm-bridge/pom.xml @@ -0,0 +1,591 @@ + + + + 4.0.0 + + apache-atlas + org.apache.atlas + 3.0.0-SNAPSHOT + ../../ + + storm-bridge + Apache Atlas Storm Bridge Module + Apache Atlas Storm Bridge + jar + + + + + org.apache.atlas + atlas-client-v1 + + + + org.apache.atlas + atlas-client-v2 + + + + org.apache.atlas + atlas-notification + + + + org.apache.atlas + hive-bridge + + + + org.apache.hive + hive-exec + ${hive.version} + + + javax.servlet + * + + + provided + + + + org.apache.hbase + hbase-common + ${hbase.version} + + + javax.servlet + servlet-api + + + org.eclipse.jetty + * + + + + + + org.mockito + mockito-all + + + + + + org.apache.storm + storm-core + ${storm.version} + + + org.slf4j + log4j-over-slf4j + + + javax.servlet + servlet-api + + + io.dropwizard.metrics + metrics-core + + + io.dropwizard.metrics + metrics-graphite + + + io.dropwizard.metrics + metrics-ganglia + + + + + + + io.dropwizard.metrics + metrics-core + ${dropwizard-metrics} + + + io.dropwizard.metrics + metrics-graphite + ${dropwizard-metrics} + + + io.dropwizard.metrics + metrics-ganglia + ${dropwizard-metrics} + + + + + org.testng + testng + + + + org.eclipse.jetty + jetty-server + test + + + + org.eclipse.jetty + jetty-util + ${jetty.version} + test + + + + commons-collections + commons-collections + + + + + + org.apache.atlas + atlas-graphdb-impls + pom + test + + + + org.apache.atlas + atlas-webapp + war + test + + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + test + + + + + + dist + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-hook + package + + copy + + + ${project.build.directory}/dependency/hook/storm/atlas-storm-plugin-impl + false + false + true + + + ${project.groupId} + ${project.artifactId} + ${project.version} + + + ${project.groupId} + atlas-client-common + ${project.version} + + + ${project.groupId} + atlas-client-v1 + ${project.version} + + + ${project.groupId} + atlas-client-v2 + ${project.version} + + + ${project.groupId} + hive-bridge + ${project.version} + + + ${project.groupId} + atlas-intg + ${project.version} + + + ${project.groupId} + atlas-notification + ${project.version} + + + ${project.groupId} + atlas-common + ${project.version} + + + org.apache.kafka + kafka_${kafka.scala.binary.version} + ${kafka.version} + + + org.apache.kafka + kafka-clients + ${kafka.version} + + + aopalliance + aopalliance + ${aopalliance.version} + + + commons-configuration + commons-configuration + ${commons-conf.version} + + + org.apache.commons + commons-configuration2 + ${commons-conf2.version} + + + commons-logging + commons-logging + ${commons-logging.version} + + + commons-collections + commons-collections + ${commons-collections.version} + + + javax.inject + javax.inject + ${javax-inject.version} + + + org.codehaus.jettison + jettison + ${jettison.version} + + + org.codehaus.jettison + jettison + ${jettison.version} + + + com.thoughtworks.paranamer + paranamer + ${paranamer.version} + + + org.apache.hive + hive-exec + ${hive.version} + + + org.apache.hbase + hbase-common + ${hbase.version} + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + + + org.apache.hadoop + hadoop-auth + ${hadoop.version} + + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.databind.version} + + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + + + + com.fasterxml.jackson.core + jackson-annotations + ${jackson.version} + + + org.codehaus.woodstox + stax2-api + ${codehaus.woodstox.stax2-api.version} + + + org.apache.hadoop + hadoop-hdfs-client + ${hadoop.hdfs-client.version} + + + + ch.qos.reload4j + reload4j + ${reload4j.version} + + + com.sun.jersey + jersey-json + ${jersey.version} + + + javax.ws.rs + jsr311-api + ${jsr.version} + + + com.fasterxml.woodstox + woodstox-core + ${woodstox-core.version} + + + + + + copy-hook-shim + package + + copy + + + ${project.build.directory}/dependency/hook/storm + false + false + true + + + ${project.groupId} + storm-bridge-shim + ${project.version} + + + ${project.groupId} + atlas-plugin-classloader + ${project.version} + + + + + + + + + + + + + + + org.eclipse.jetty + jetty-maven-plugin + ${jetty.version} + + ${skipTests} + + + 31000 + 60000 + + ../../webapp/target/atlas-webapp-${project.version}.war + true + ../../webapp/src/main/webapp + + / + ${project.basedir}/../../webapp/src/main/webapp/WEB-INF/web.xml + + true + + true + + atlas.home + ${project.build.directory} + + + atlas.conf + ${project.build.directory}/test-classes + + + atlas.data + ${project.build.directory}/data + + + atlas.log.dir + ${project.build.directory}/logs + + + atlas.log.file + application.log + + + log4j.configuration + file:///${project.build.directory}/test-classes/atlas-log4j.xml + + + atlas.graphdb.backend + ${graphdb.backend.impl} + + + embedded.solr.directory + ${project.build.directory} + + + solr.log.dir + ${project.build.directory}/logs + + + org.eclipse.jetty.annotations.maxWait + 5000 + + + atlas-stop + 31001 + ${jetty-maven-plugin.stopWait} + jar + + + + org.apache.curator + curator-client + ${curator.version} + + + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + + + + + start-jetty + pre-integration-test + + + stop + deploy-war + + + true + + + + stop-jetty + post-integration-test + + stop + + + + + + + org.apache.maven.plugins + maven-site-plugin + + + org.apache.maven.doxia + doxia-module-twiki + ${doxia.version} + + + org.apache.maven.doxia + doxia-core + ${doxia.version} + + + + + + site + + prepare-package + + + + false + false + + + + + + org.codehaus.mojo + exec-maven-plugin + 1.2.1 + false + + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + copy-resources + validate + + copy-resources + + + ${basedir}/target/models + + + ${basedir}/../models + + 0000-Area0/0010-base_model.json + 1000-Hadoop/** + + + + + + + copy-solr-resources + validate + + copy-resources + + + ${project.build.directory}/solr + + + ${basedir}/../../test-tools/src/main/resources/solr + + + + + + + + + + diff --git a/addons/storm-bridge/src/main/java/org/apache/atlas/storm/hook/StormAtlasHook.java b/addons/storm-bridge/src/main/java/org/apache/atlas/storm/hook/StormAtlasHook.java new file mode 100644 index 0000000000..69d58d5743 --- /dev/null +++ b/addons/storm-bridge/src/main/java/org/apache/atlas/storm/hook/StormAtlasHook.java @@ -0,0 +1,416 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.storm.hook; + +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo; +import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo; +import org.apache.atlas.model.notification.HookNotification; +import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2; +import org.apache.atlas.type.AtlasTypeUtil; +import org.apache.atlas.utils.HdfsNameServiceResolver; +import org.apache.commons.collections.CollectionUtils; +import org.apache.storm.ISubmitterHook; +import org.apache.storm.generated.Bolt; +import org.apache.storm.generated.SpoutSpec; +import org.apache.storm.generated.StormTopology; +import org.apache.storm.generated.TopologyInfo; +import org.apache.storm.utils.Utils; +import org.apache.atlas.AtlasClient; +import org.apache.atlas.AtlasConstants; +import org.apache.atlas.hive.bridge.HiveMetaStoreBridge; +import org.apache.atlas.hook.AtlasHook; +import org.apache.atlas.storm.model.StormDataTypes; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.slf4j.Logger; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Date; + +import static org.apache.atlas.repository.Constants.STORM_SOURCE; + +/** + * StormAtlasHook sends storm topology metadata information to Atlas + * via a Kafka Broker for durability. + *

+ * This is based on the assumption that the same topology name is used + * for the various lifecycle stages. + */ +public class StormAtlasHook extends AtlasHook implements ISubmitterHook { + public static final Logger LOG = org.slf4j.LoggerFactory.getLogger(StormAtlasHook.class); + + public static final String ANONYMOUS_OWNER = "anonymous"; // if Storm topology does not contain the owner instance; possible if Storm is running in unsecure mode. + public static final String HBASE_NAMESPACE_DEFAULT = "default"; + public static final String ATTRIBUTE_DB = "db"; + + public static final String RELATIONSHIP_STORM_TOPOLOGY_NODES = "storm_topology_nodes"; + public static final String RELATIONSHIP_DATASET_PROCESS_INPUTS = "dataset_process_inputs"; + public static final String RELATIONSHIP_PROCESS_DATASET_OUTPUTS = "process_dataset_outputs"; + public static final String HBASE_TABLE_QUALIFIED_NAME_FORMAT = "%s:%s@%s"; + + /** + * This is the client-side hook that storm fires when a topology is added. + * + * @param topologyInfo topology info + * @param stormConf configuration + * @param stormTopology a storm topology + */ + @Override + public void notify(TopologyInfo topologyInfo, Map stormConf, StormTopology stormTopology) { + LOG.info("Collecting metadata for a new storm topology: {}", topologyInfo.get_name()); + + try { + String user = getUser(topologyInfo.get_owner(), null); + AtlasEntity topology = createTopologyInstance(topologyInfo, stormConf); + AtlasEntitiesWithExtInfo entity = new AtlasEntitiesWithExtInfo(topology); + + addTopologyDataSets(stormTopology, topologyInfo.get_owner(), stormConf, topology, entity); + + // create the graph for the topology + List graphNodes = createTopologyGraph(stormTopology, stormTopology.get_spouts(), stormTopology.get_bolts()); + + if (CollectionUtils.isNotEmpty(graphNodes)) { + // add the connection from topology to the graph + topology.setRelationshipAttribute("nodes", AtlasTypeUtil.getAtlasRelatedObjectIds(graphNodes, RELATIONSHIP_STORM_TOPOLOGY_NODES)); + + for (AtlasEntity graphNode : graphNodes) { + entity.addReferredEntity(graphNode); + } + } + + List hookNotifications = Collections.singletonList(new EntityCreateRequestV2(user, entity)); + + notifyEntities(hookNotifications, null); + } catch (Exception e) { + throw new RuntimeException("Atlas hook is unable to process the topology.", e); + } + } + + private AtlasEntity createTopologyInstance(TopologyInfo topologyInfo, Map stormConf) { + AtlasEntity topology = new AtlasEntity(StormDataTypes.STORM_TOPOLOGY.getName()); + String owner = topologyInfo.get_owner(); + + if (StringUtils.isEmpty(owner)) { + owner = ANONYMOUS_OWNER; + } + + topology.setAttribute("id", topologyInfo.get_id()); + topology.setAttribute(AtlasClient.NAME, topologyInfo.get_name()); + topology.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, topologyInfo.get_name()); + topology.setAttribute(AtlasClient.OWNER, owner); + topology.setAttribute("startTime", new Date(System.currentTimeMillis())); + topology.setAttribute(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, getMetadataNamespace()); + + return topology; + } + + private void addTopologyDataSets(StormTopology stormTopology, String topologyOwner, Map stormConf, AtlasEntity topology, AtlasEntityExtInfo entityExtInfo) { + // add each spout as an input data set + addTopologyInputs(stormTopology.get_spouts(), stormConf, topologyOwner, topology, entityExtInfo); + + // add the appropriate bolts as output data sets + addTopologyOutputs(stormTopology, topologyOwner, stormConf, topology, entityExtInfo); + } + + private void addTopologyInputs(Map spouts, Map stormConf, String topologyOwner, AtlasEntity topology, AtlasEntityExtInfo entityExtInfo) { + List inputs = new ArrayList<>(); + + for (Map.Entry entry : spouts.entrySet()) { + Serializable instance = Utils.javaDeserialize(entry.getValue().get_spout_object().get_serialized_java(), Serializable.class); + String dsType = instance.getClass().getSimpleName(); + AtlasEntity dsEntity = addDataSet(dsType, topologyOwner, instance, stormConf, entityExtInfo); + + if (dsEntity != null) { + inputs.add(dsEntity); + } + } + + topology.setRelationshipAttribute("inputs", AtlasTypeUtil.getAtlasRelatedObjectIds(inputs, RELATIONSHIP_DATASET_PROCESS_INPUTS)); + } + + private void addTopologyOutputs(StormTopology stormTopology, String topologyOwner, Map stormConf, AtlasEntity topology, AtlasEntityExtInfo entityExtInfo) { + List outputs = new ArrayList<>(); + Map bolts = stormTopology.get_bolts(); + Set boltNames = StormTopologyUtil.getTerminalUserBoltNames(stormTopology); + + for (String boltName : boltNames) { + Serializable instance = Utils.javaDeserialize(bolts.get(boltName).get_bolt_object().get_serialized_java(), Serializable.class); + String dsType = instance.getClass().getSimpleName(); + AtlasEntity dsEntity = addDataSet(dsType, topologyOwner, instance, stormConf, entityExtInfo); + + if (dsEntity != null) { + outputs.add(dsEntity); + } + } + + topology.setRelationshipAttribute("outputs", AtlasTypeUtil.getAtlasRelatedObjectIds(outputs, RELATIONSHIP_PROCESS_DATASET_OUTPUTS)); + } + + private AtlasEntity addDataSet(String dataSetType, String topologyOwner, Serializable instance, Map stormConf, AtlasEntityExtInfo entityExtInfo) { + Map config = StormTopologyUtil.getFieldValues(instance, true, null); + AtlasEntity ret = null; + String metadataNamespace = getMetadataNamespace(); + + // todo: need to redo this with a config driven approach + switch (dataSetType) { + case "KafkaSpout": { + String topicName = config.get("KafkaSpout.kafkaSpoutConfig.translator.topic"); + String uri = config.get("KafkaSpout.kafkaSpoutConfig.kafkaProps.bootstrap.servers"); + + if (StringUtils.isEmpty(topicName)) { + topicName = config.get("KafkaSpout._spoutConfig.topic"); + } + + if (StringUtils.isEmpty(uri)) { + uri = config.get("KafkaSpout._spoutConfig.hosts.brokerZkStr"); + } + + if (StringUtils.isEmpty(topologyOwner)) { + topologyOwner = ANONYMOUS_OWNER; + } + + if (topicName == null) { + LOG.error("Kafka topic name not found"); + } else { + ret = new AtlasEntity(StormDataTypes.KAFKA_TOPIC.getName()); + + ret.setAttribute("topic", topicName); + ret.setAttribute("uri", uri); + ret.setAttribute(AtlasClient.OWNER, topologyOwner); + ret.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getKafkaTopicQualifiedName(metadataNamespace, topicName)); + ret.setAttribute(AtlasClient.NAME, topicName); + } + } + break; + + case "HBaseBolt": { + final String hbaseTableName = config.get("HBaseBolt.tableName"); + String uri = config.get("hbase.rootdir"); + + if (StringUtils.isEmpty(uri)) { + uri = hbaseTableName; + } + + metadataNamespace = extractComponentMetadataNamespace(HBaseConfiguration.create(), stormConf); + + if (hbaseTableName == null) { + LOG.error("HBase table name not found"); + } else { + ret = new AtlasEntity(StormDataTypes.HBASE_TABLE.getName()); + + ret.setAttribute("uri", hbaseTableName); + ret.setAttribute(AtlasClient.NAME, uri); + ret.setAttribute(AtlasClient.OWNER, stormConf.get("storm.kerberos.principal")); + //TODO - Hbase Namespace is hardcoded to 'default'. need to check how to get this or is it already part of tableName + ret.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getHbaseTableQualifiedName(metadataNamespace, HBASE_NAMESPACE_DEFAULT, hbaseTableName)); + } + } + break; + + case "HdfsBolt": { + final String hdfsUri = config.get("HdfsBolt.rotationActions") == null ? config.get("HdfsBolt.fileNameFormat.path") : config.get("HdfsBolt.rotationActions"); + final String hdfsPathStr = config.get("HdfsBolt.fsUrl") + hdfsUri; + final Path hdfsPath = new Path(hdfsPathStr); + final String nameServiceID = HdfsNameServiceResolver.getNameServiceIDForPath(hdfsPathStr); + + ret = new AtlasEntity(HiveMetaStoreBridge.HDFS_PATH); + + ret.setAttribute(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, metadataNamespace); + ret.setAttribute(AtlasClient.OWNER, stormConf.get("hdfs.kerberos.principal")); + ret.setAttribute(AtlasClient.NAME, Path.getPathWithoutSchemeAndAuthority(hdfsPath).toString().toLowerCase()); + + if (StringUtils.isNotEmpty(nameServiceID)) { + String updatedPath = HdfsNameServiceResolver.getPathWithNameServiceID(hdfsPathStr); + + ret.setAttribute("path", updatedPath); + ret.setAttribute("nameServiceId", nameServiceID); + ret.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getHdfsPathQualifiedName(metadataNamespace, updatedPath)); + } else { + ret.setAttribute("path", hdfsPathStr); + ret.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getHdfsPathQualifiedName(metadataNamespace, hdfsPathStr)); + } + } + break; + + case "HiveBolt": { + metadataNamespace = extractComponentMetadataNamespace(new HiveConf(), stormConf); + + final String dbName = config.get("HiveBolt.options.databaseName"); + final String tblName = config.get("HiveBolt.options.tableName"); + + if (dbName == null || tblName ==null) { + LOG.error("Hive database or table name not found"); + } else { + AtlasEntity dbEntity = new AtlasEntity("hive_db"); + + dbEntity.setAttribute(AtlasClient.NAME, dbName); + dbEntity.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, HiveMetaStoreBridge.getDBQualifiedName(metadataNamespace, dbName)); + dbEntity.setAttribute(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, metadataNamespace); + + entityExtInfo.addReferredEntity(dbEntity); + + // todo: verify if hive table has everything needed to retrieve existing table + ret = new AtlasEntity("hive_table"); + + ret.setAttribute(AtlasClient.NAME, tblName); + ret.setRelationshipAttribute(ATTRIBUTE_DB, AtlasTypeUtil.getAtlasRelatedObjectId(dbEntity, "hive_table_db")); + ret.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, HiveMetaStoreBridge.getTableQualifiedName(metadataNamespace, dbName, tblName)); + } + } + break; + + default: + // custom node - create a base dataset class with name attribute + //TODO - What should we do for custom data sets. Not sure what name we can set here? + return null; + } + + if (ret != null) { + entityExtInfo.addReferredEntity(ret); + } + + return ret; + } + + private List createTopologyGraph(StormTopology stormTopology, Map spouts, Map bolts) { + // Add graph of nodes in the topology + Map nodeEntities = new HashMap<>(); + + addSpouts(spouts, nodeEntities); + addBolts(bolts, nodeEntities); + + addGraphConnections(stormTopology, nodeEntities); + + return new ArrayList<>(nodeEntities.values()); + } + + private void addSpouts(Map spouts, Map nodeEntities) { + for (Map.Entry entry : spouts.entrySet()) { + String spoutName = entry.getKey(); + AtlasEntity spout = createSpoutInstance(spoutName, entry.getValue()); + + nodeEntities.put(spoutName, spout); + } + } + + private void addBolts(Map bolts, Map nodeEntities) { + for (Map.Entry entry : bolts.entrySet()) { + String boltName = entry.getKey(); + AtlasEntity boltInstance = createBoltInstance(boltName, entry.getValue()); + + nodeEntities.put(boltName, boltInstance); + } + } + + private AtlasEntity createSpoutInstance(String spoutName, SpoutSpec stormSpout) { + AtlasEntity spout = new AtlasEntity(StormDataTypes.STORM_SPOUT.getName()); + Serializable instance = Utils.javaDeserialize(stormSpout.get_spout_object().get_serialized_java(), Serializable.class); + Map flatConfigMap = StormTopologyUtil.getFieldValues(instance, true, null); + + spout.setAttribute(AtlasClient.NAME, spoutName); + spout.setAttribute("driverClass", instance.getClass().getName()); + spout.setAttribute("conf", flatConfigMap); + + return spout; + } + + private AtlasEntity createBoltInstance(String boltName, Bolt stormBolt) { + AtlasEntity bolt = new AtlasEntity(StormDataTypes.STORM_BOLT.getName()); + Serializable instance = Utils.javaDeserialize(stormBolt.get_bolt_object().get_serialized_java(), Serializable.class); + Map flatConfigMap = StormTopologyUtil.getFieldValues(instance, true, null); + + bolt.setAttribute(AtlasClient.NAME, boltName); + bolt.setAttribute("driverClass", instance.getClass().getName()); + bolt.setAttribute("conf", flatConfigMap); + + return bolt; + } + + private void addGraphConnections(StormTopology stormTopology, Map nodeEntities) { + // adds connections between spouts and bolts + Map> adjacencyMap = StormTopologyUtil.getAdjacencyMap(stormTopology, true); + + for (Map.Entry> entry : adjacencyMap.entrySet()) { + String nodeName = entry.getKey(); + Set adjacencyList = adjacencyMap.get(nodeName); + + if (CollectionUtils.isEmpty(adjacencyList)) { + continue; + } + + // add outgoing links + AtlasEntity node = nodeEntities.get(nodeName); + List outputs = new ArrayList<>(adjacencyList.size()); + + outputs.addAll(adjacencyList); + node.setAttribute("outputs", outputs); + + // add incoming links + for (String adjacentNodeName : adjacencyList) { + AtlasEntity adjacentNode = nodeEntities.get(adjacentNodeName); + @SuppressWarnings("unchecked") + List inputs = (List) adjacentNode.getAttribute("inputs"); + + if (inputs == null) { + inputs = new ArrayList<>(); + } + + inputs.add(nodeName); + adjacentNode.setAttribute("inputs", inputs); + } + } + } + + public static String getKafkaTopicQualifiedName(String metadataNamespace, String topicName) { + return String.format("%s@%s", topicName.toLowerCase(), metadataNamespace); + } + + public static String getHbaseTableQualifiedName(String metadataNamespace, String nameSpace, String tableName) { + return String.format(HBASE_TABLE_QUALIFIED_NAME_FORMAT, nameSpace.toLowerCase(), tableName.toLowerCase(), metadataNamespace); + } + + public static String getHdfsPathQualifiedName(String metadataNamespace, String hdfsPath) { + return String.format("%s@%s", hdfsPath.toLowerCase(), metadataNamespace); + } + + private String extractComponentMetadataNamespace(Configuration configuration, Map stormConf) { + String clusterName = configuration.get(CLUSTER_NAME_KEY, null); + + if (clusterName == null) { + clusterName = getMetadataNamespace(); + } + + return clusterName; + } + + @Override + public String getMessageSource() { + return STORM_SOURCE; + } +} \ No newline at end of file diff --git a/addons/storm-bridge/src/main/java/org/apache/atlas/storm/hook/StormTopologyUtil.java b/addons/storm-bridge/src/main/java/org/apache/atlas/storm/hook/StormTopologyUtil.java new file mode 100644 index 0000000000..b903dbc694 --- /dev/null +++ b/addons/storm-bridge/src/main/java/org/apache/atlas/storm/hook/StormTopologyUtil.java @@ -0,0 +1,248 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.storm.hook; + +import org.apache.commons.lang.StringUtils; +import org.apache.storm.generated.Bolt; +import org.apache.storm.generated.GlobalStreamId; +import org.apache.storm.generated.Grouping; +import org.apache.storm.generated.StormTopology; +import com.google.common.base.Joiner; +import org.slf4j.Logger; + +import java.lang.reflect.Field; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * A storm topology utility class. + */ +public final class StormTopologyUtil { + public static final Logger LOG = org.slf4j.LoggerFactory.getLogger(StormTopologyUtil.class); + + private StormTopologyUtil() { + } + + public static Set getTerminalUserBoltNames(StormTopology topology) { + Set terminalBolts = new HashSet<>(); + Set inputs = new HashSet<>(); + for (Map.Entry entry : topology.get_bolts().entrySet()) { + String name = entry.getKey(); + Set inputsForBolt = entry.getValue().get_common().get_inputs().keySet(); + if (!isSystemComponent(name)) { + for (GlobalStreamId streamId : inputsForBolt) { + inputs.add(streamId.get_componentId()); + } + } + } + + for (String boltName : topology.get_bolts().keySet()) { + if (!isSystemComponent(boltName) && !inputs.contains(boltName)) { + terminalBolts.add(boltName); + } + } + + return terminalBolts; + } + + public static boolean isSystemComponent(String componentName) { + return componentName.startsWith("__"); + } + + public static Map> getAdjacencyMap(StormTopology topology, + boolean removeSystemComponent) { + Map> adjacencyMap = new HashMap<>(); + + for (Map.Entry entry : topology.get_bolts().entrySet()) { + String boltName = entry.getKey(); + Map inputs = entry.getValue().get_common().get_inputs(); + for (Map.Entry input : inputs.entrySet()) { + String inputComponentId = input.getKey().get_componentId(); + Set components = adjacencyMap.containsKey(inputComponentId) + ? adjacencyMap.get(inputComponentId) : new HashSet(); + components.add(boltName); + components = removeSystemComponent ? removeSystemComponents(components) + : components; + if (!removeSystemComponent || !isSystemComponent(inputComponentId)) { + adjacencyMap.put(inputComponentId, components); + } + } + } + + return adjacencyMap; + } + + public static Set removeSystemComponents(Set components) { + Set userComponents = new HashSet<>(); + for (String component : components) { + if (!isSystemComponent(component)) + userComponents.add(component); + } + + return userComponents; + } + + private static final Set WRAPPER_TYPES = new HashSet() {{ + add(Boolean.class); + add(Character.class); + add(Byte.class); + add(Short.class); + add(Integer.class); + add(Long.class); + add(Float.class); + add(Double.class); + add(Void.class); + add(String.class); + }}; + + public static boolean isWrapperType(Class clazz) { + return WRAPPER_TYPES.contains(clazz); + } + + public static boolean isCollectionType(Class clazz) { + return Collection.class.isAssignableFrom(clazz); + } + + public static boolean isMapType(Class clazz) { + return Map.class.isAssignableFrom(clazz); + } + + public static Map getFieldValues(Object instance, + boolean prependClassName, + Set objectsToSkip) { + if (objectsToSkip == null) { + objectsToSkip = new HashSet<>(); + } + + Map output = new HashMap<>(); + + try { + if (objectsToSkip.add(instance)) { + Class clazz = instance.getClass(); + for (Class c = clazz; c != null; c = c.getSuperclass()) { + Field[] fields = c.getDeclaredFields(); + for (Field field : fields) { + if (java.lang.reflect.Modifier.isStatic(field.getModifiers())) { + continue; + } + + String key; + if (prependClassName) { + key = String.format("%s.%s", clazz.getSimpleName(), field.getName()); + } else { + key = field.getName(); + } + + boolean accessible = field.isAccessible(); + if (!accessible) { + field.setAccessible(true); + } + Object fieldVal = field.get(instance); + if (fieldVal == null) { + continue; + } else if (fieldVal.getClass().isPrimitive() || + isWrapperType(fieldVal.getClass())) { + if (toString(fieldVal, false).isEmpty()) continue; + output.put(key, toString(fieldVal, false)); + } else if (isMapType(fieldVal.getClass())) { + //TODO: check if it makes more sense to just stick to json + // like structure instead of a flatten output. + Map map = (Map) fieldVal; + for (Object entry : map.entrySet()) { + Object mapKey = ((Map.Entry) entry).getKey(); + Object mapVal = ((Map.Entry) entry).getValue(); + + String keyStr = getString(mapKey, false, objectsToSkip); + String valStr = getString(mapVal, false, objectsToSkip); + if (StringUtils.isNotEmpty(valStr)) { + output.put(String.format("%s.%s", key, keyStr), valStr); + } + } + } else if (isCollectionType(fieldVal.getClass())) { + //TODO check if it makes more sense to just stick to + // json like structure instead of a flatten output. + Collection collection = (Collection) fieldVal; + if (collection.size() == 0) continue; + String outStr = ""; + for (Object o : collection) { + outStr += getString(o, false, objectsToSkip) + ","; + } + if (outStr.length() > 0) { + outStr = outStr.substring(0, outStr.length() - 1); + } + output.put(key, String.format("%s", outStr)); + } else { + Map nestedFieldValues = getFieldValues(fieldVal, false, objectsToSkip); + for (Map.Entry entry : nestedFieldValues.entrySet()) { + output.put(String.format("%s.%s", key, entry.getKey()), entry.getValue()); + } + } + if (!accessible) { + field.setAccessible(false); + } + } + } + } + } + catch (Exception e){ + LOG.warn("Exception while constructing topology", e); + } + return output; + } + + private static String getString(Object instance, + boolean wrapWithQuote, + Set objectsToSkip) { + if (instance == null) { + return null; + } else if (instance.getClass().isPrimitive() || isWrapperType(instance.getClass())) { + return toString(instance, wrapWithQuote); + } else { + return getString(getFieldValues(instance, false, objectsToSkip), wrapWithQuote); + } + } + + private static String getString(Map flattenFields, boolean wrapWithQuote) { + String outStr = ""; + if (flattenFields != null && !flattenFields.isEmpty()) { + if (wrapWithQuote) { + outStr += "\"" + Joiner.on(",").join(flattenFields.entrySet()) + "\","; + } else { + outStr += Joiner.on(",").join(flattenFields.entrySet()) + ","; + } + } + if (outStr.length() > 0) { + outStr = outStr.substring(0, outStr.length() - 1); + } + return outStr; + } + + private static String toString(Object instance, boolean wrapWithQuote) { + if (instance instanceof String) + if (wrapWithQuote) + return "\"" + instance + "\""; + else + return instance.toString(); + else + return instance.toString(); + } +} diff --git a/addons/storm-bridge/src/main/java/org/apache/atlas/storm/model/StormDataTypes.java b/addons/storm-bridge/src/main/java/org/apache/atlas/storm/model/StormDataTypes.java new file mode 100644 index 0000000000..7eb1e3cb87 --- /dev/null +++ b/addons/storm-bridge/src/main/java/org/apache/atlas/storm/model/StormDataTypes.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.storm.model; + + +/** + * Storm Data Types for model and hook. + */ +public enum StormDataTypes { + + // Topology Classes + STORM_TOPOLOGY, // represents the topology containing the DAG + + STORM_NODE, // base abstraction for producer and processor + STORM_SPOUT, // data producer node having only outputs + STORM_BOLT, // data processing node having both inputs and outputs + + // Data Sets + KAFKA_TOPIC, // kafka data set + JMS_TOPIC, // jms data set + HBASE_TABLE, // hbase table data set + ; + + public String getName() { + return name().toLowerCase(); + } +} diff --git a/addons/storm-bridge/src/test/java/org/apache/atlas/storm/hook/StormAtlasHookIT.java b/addons/storm-bridge/src/test/java/org/apache/atlas/storm/hook/StormAtlasHookIT.java new file mode 100644 index 0000000000..e11e1b8b0a --- /dev/null +++ b/addons/storm-bridge/src/test/java/org/apache/atlas/storm/hook/StormAtlasHookIT.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.storm.hook; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ArrayNode; +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.AtlasClient; +import org.apache.atlas.hive.bridge.HiveMetaStoreBridge; +import org.apache.atlas.v1.model.instance.Referenceable; +import org.apache.atlas.storm.model.StormDataTypes; +import org.apache.atlas.utils.AuthenticationUtil; +import org.apache.commons.configuration.Configuration; +import org.apache.storm.ILocalCluster; +import org.apache.storm.generated.StormTopology; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +@Test +public class StormAtlasHookIT { + + public static final Logger LOG = LoggerFactory.getLogger(StormAtlasHookIT.class); + + private static final String ATLAS_URL = "http://localhost:21000/"; + private static final String TOPOLOGY_NAME = "word-count"; + + private ILocalCluster stormCluster; + private AtlasClient atlasClient; + + @BeforeClass + public void setUp() throws Exception { + // start a local storm cluster + stormCluster = StormTestUtil.createLocalStormCluster(); + LOG.info("Created a storm local cluster"); + + Configuration configuration = ApplicationProperties.get(); + if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) { + atlasClient = new AtlasClient(configuration.getStringArray(HiveMetaStoreBridge.ATLAS_ENDPOINT), new String[]{"admin", "admin"}); + } else { + atlasClient = new AtlasClient(configuration.getStringArray(HiveMetaStoreBridge.ATLAS_ENDPOINT)); + } + } + + + @AfterClass + public void tearDown() throws Exception { + LOG.info("Shutting down storm local cluster"); + stormCluster.shutdown(); + + atlasClient = null; + } + + @Test + public void testAddEntities() throws Exception { + StormTopology stormTopology = StormTestUtil.createTestTopology(); + StormTestUtil.submitTopology(stormCluster, TOPOLOGY_NAME, stormTopology); + LOG.info("Submitted topology {}", TOPOLOGY_NAME); + + // todo: test if topology metadata is registered in atlas + String guid = getTopologyGUID(); + Assert.assertNotNull(guid); + LOG.info("GUID is {}", guid); + + Referenceable topologyReferenceable = atlasClient.getEntity(guid); + Assert.assertNotNull(topologyReferenceable); + } + + private String getTopologyGUID() throws Exception { + LOG.debug("Searching for topology {}", TOPOLOGY_NAME); + String query = String.format("from %s where name = \"%s\"", + StormDataTypes.STORM_TOPOLOGY.getName(), TOPOLOGY_NAME); + + JsonNode results = atlasClient.search(query, 10, 0); + JsonNode row = results.get(0); + + return row.has("$id$") ? row.get("$id$").get("id").asText() : null; + } +} diff --git a/addons/storm-bridge/src/test/java/org/apache/atlas/storm/hook/StormTestUtil.java b/addons/storm-bridge/src/test/java/org/apache/atlas/storm/hook/StormTestUtil.java new file mode 100644 index 0000000000..d869f18cd7 --- /dev/null +++ b/addons/storm-bridge/src/test/java/org/apache/atlas/storm/hook/StormTestUtil.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.storm.hook; + +import org.apache.storm.Config; +import org.apache.storm.ILocalCluster; +import org.apache.storm.Testing; +import org.apache.storm.generated.StormTopology; +import org.apache.storm.testing.TestGlobalCount; +import org.apache.storm.testing.TestWordCounter; +import org.apache.storm.testing.TestWordSpout; +import org.apache.storm.topology.TopologyBuilder; +import org.apache.storm.utils.Utils; + +import java.util.HashMap; + +/** + * An until to create a test topology. + */ +final class StormTestUtil { + + private StormTestUtil() { + } + + public static ILocalCluster createLocalStormCluster() { + // start a local storm cluster + HashMap localClusterConf = new HashMap<>(); + localClusterConf.put("nimbus-daemon", true); + return Testing.getLocalCluster(localClusterConf); + } + + public static StormTopology createTestTopology() { + TopologyBuilder builder = new TopologyBuilder(); + builder.setSpout("words", new TestWordSpout(), 10); + builder.setBolt("count", new TestWordCounter(), 3).shuffleGrouping("words"); + builder.setBolt("globalCount", new TestGlobalCount(), 2).shuffleGrouping("count"); + + return builder.createTopology(); + } + + public static Config submitTopology(ILocalCluster stormCluster, String topologyName, + StormTopology stormTopology) throws Exception { + Config stormConf = new Config(); + stormConf.putAll(Utils.readDefaultConfig()); + stormConf.put("storm.cluster.mode", "local"); + stormConf.setDebug(true); + stormConf.setMaxTaskParallelism(3); + stormConf.put(Config.STORM_TOPOLOGY_SUBMISSION_NOTIFIER_PLUGIN, + org.apache.atlas.storm.hook.StormAtlasHook.class.getName()); + + stormCluster.submitTopology(topologyName, stormConf, stormTopology); + + Thread.sleep(10000); + return stormConf; + } +} diff --git a/addons/storm-bridge/src/test/resources/atlas-application.properties b/addons/storm-bridge/src/test/resources/atlas-application.properties new file mode 100644 index 0000000000..b822578947 --- /dev/null +++ b/addons/storm-bridge/src/test/resources/atlas-application.properties @@ -0,0 +1,126 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +######### Atlas Server Configs ######### +atlas.rest.address=http://localhost:31000 + + + +######### Graph Database Configs ######### + + +# Graph database implementation. Value inserted by maven. +atlas.graphdb.backend=org.apache.atlas.repository.graphdb.janus.AtlasJanusGraphDatabase + +# Graph Storage +atlas.graph.storage.backend=berkeleyje + +# Entity repository implementation +atlas.EntityAuditRepository.impl=org.apache.atlas.repository.audit.InMemoryEntityAuditRepository + +# Graph Search Index Backend +atlas.graph.index.search.backend=solr + +#Berkeley storage directory +atlas.graph.storage.directory=${sys:atlas.data}/berkley + +#hbase +#For standalone mode , specify localhost +#for distributed mode, specify zookeeper quorum here + +atlas.graph.storage.hostname=${graph.storage.hostname} +atlas.graph.storage.hbase.regions-per-server=1 +atlas.graph.storage.lock.wait-time=10000 + +#ElasticSearch +atlas.graph.index.search.directory=${sys:atlas.data}/es +atlas.graph.index.search.elasticsearch.client-only=false +atlas.graph.index.search.elasticsearch.local-mode=true +atlas.graph.index.search.elasticsearch.create.sleep=2000 + +# Solr cloud mode properties +atlas.graph.index.search.solr.mode=cloud +atlas.graph.index.search.solr.zookeeper-url=${solr.zk.address} +atlas.graph.index.search.solr.embedded=true +atlas.graph.index.search.max-result-set-size=150 + + +######### Notification Configs ######### +atlas.notification.embedded=true + +atlas.kafka.zookeeper.connect=localhost:19026 +atlas.kafka.bootstrap.servers=localhost:19027 +atlas.kafka.data=${sys:atlas.data}/kafka +atlas.kafka.zookeeper.session.timeout.ms=4000 +atlas.kafka.zookeeper.sync.time.ms=20 +atlas.kafka.consumer.timeout.ms=4000 +atlas.kafka.auto.commit.interval.ms=100 +atlas.kafka.hook.group.id=atlas +atlas.kafka.entities.group.id=atlas_entities +#atlas.kafka.auto.commit.enable=false + +atlas.kafka.enable.auto.commit=false +atlas.kafka.auto.offset.reset=earliest +atlas.kafka.session.timeout.ms=30000 +atlas.kafka.offsets.topic.replication.factor=1 + + + +######### Entity Audit Configs ######### +atlas.audit.hbase.tablename=ATLAS_ENTITY_AUDIT_EVENTS +atlas.audit.zookeeper.session.timeout.ms=1000 +atlas.audit.hbase.zookeeper.quorum=localhost +atlas.audit.hbase.zookeeper.property.clientPort=19026 + +######### Security Properties ######### + +# SSL config +atlas.enableTLS=false +atlas.server.https.port=31443 + +######### Security Properties ######### + +hbase.security.authentication=simple + +atlas.hook.falcon.synchronous=true + +######### JAAS Configuration ######## + +atlas.jaas.KafkaClient.loginModuleName = com.sun.security.auth.module.Krb5LoginModule +atlas.jaas.KafkaClient.loginModuleControlFlag = required +atlas.jaas.KafkaClient.option.useKeyTab = true +atlas.jaas.KafkaClient.option.storeKey = true +atlas.jaas.KafkaClient.option.serviceName = kafka +atlas.jaas.KafkaClient.option.keyTab = /etc/security/keytabs/atlas.service.keytab +atlas.jaas.KafkaClient.option.principal = atlas/_HOST@EXAMPLE.COM + +######### High Availability Configuration ######## +atlas.server.ha.enabled=false +#atlas.server.ids=id1 +#atlas.server.address.id1=localhost:21000 + +######### Atlas Authorization ######### +atlas.authorizer.impl=none +# atlas.authorizer.impl=simple +# atlas.authorizer.simple.authz.policy.file=atlas-simple-authz-policy.json + +######### Atlas Authentication ######### +atlas.authentication.method.file=true +atlas.authentication.method.ldap.type=none +atlas.authentication.method.kerberos=false +# atlas.authentication.method.file.filename=users-credentials.properties diff --git a/addons/storm-bridge/src/test/resources/atlas-log4j.xml b/addons/storm-bridge/src/test/resources/atlas-log4j.xml new file mode 100755 index 0000000000..262a710f7a --- /dev/null +++ b/addons/storm-bridge/src/test/resources/atlas-log4j.xml @@ -0,0 +1,137 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/addons/storm-bridge/src/test/resources/users-credentials.properties b/addons/storm-bridge/src/test/resources/users-credentials.properties new file mode 100644 index 0000000000..5046dbaf64 --- /dev/null +++ b/addons/storm-bridge/src/test/resources/users-credentials.properties @@ -0,0 +1,3 @@ +#username=group::sha256-password +admin=ADMIN::a4a88c0872bf652bb9ed803ece5fd6e82354838a9bf59ab4babb1dab322154e1 +rangertagsync=RANGER_TAG_SYNC::0afe7a1968b07d4c3ff4ed8c2d809a32ffea706c66cd795ead9048e81cfaf034 diff --git a/atlas-hub/env_change.sh b/atlas-hub/env_change.sh new file mode 100644 index 0000000000..66e0718bbf --- /dev/null +++ b/atlas-hub/env_change.sh @@ -0,0 +1,24 @@ +#! /bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +sed -i -e "s~RANGER_SERVICE_URL~${RANGER_SERVICE_URL}~g" /opt/ranger-atlas-plugin/install.properties +sed -i -e "s~ATLAS_REPOSITORY_NAME~${ATLAS_REPOSITORY_NAME}~g" /opt/ranger-atlas-plugin/install.properties +bash /opt/ranger-atlas-plugin/enable-atlas-plugin.sh +sleep 10 \ No newline at end of file diff --git a/atlas-hub/pre-conf/ranger/disable-atlas-plugin.sh b/atlas-hub/pre-conf/ranger/disable-atlas-plugin.sh new file mode 100755 index 0000000000..a15728f695 --- /dev/null +++ b/atlas-hub/pre-conf/ranger/disable-atlas-plugin.sh @@ -0,0 +1,795 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +function getInstallProperty() { + local propertyName=$1 + local propertyValue="" + + for file in "${COMPONENT_INSTALL_ARGS}" "${INSTALL_ARGS}" + do + if [ -f "${file}" ] + then + propertyValue=`grep "^${propertyName}[ \t]*=" ${file} | awk -F= '{ sub("^[ \t]*", "", $2); sub("[ \t]*$", "", $2); print $2 }'` + if [ "${propertyValue}" != "" ] + then + break + fi + fi + done + + echo ${propertyValue} +} + +# +# Base env variable for Ranger related files/directories +# +PROJ_NAME=ranger + +# +# The script should be run by "root" user +# + +if [ ! -w /etc/passwd ] +then + echo "ERROR: $0 script should be run as root." + exit 1 +fi + +#Check for JAVA_HOME +if [ "${JAVA_HOME}" == "" ] +then + echo "ERROR: JAVA_HOME environment property not defined, aborting installation." + exit 1 +fi + +# +# Identify the component, action from the script file +# + +basedir=`dirname $0` +if [ "${basedir}" = "." ] +then + basedir=`pwd` +elif [ "${basedir}" = ".." ] +then + basedir=`(cd .. ;pwd)` +fi + +# +# As this script is common to all component, find the component name based on the script-name +# + +COMPONENT_NAME=`basename $0 | cut -d. -f1 | sed -e 's:^disable-::' | sed -e 's:^enable-::'` + +echo "${COMPONENT_NAME}" | grep 'plugin' > /dev/null 2>&1 + +if [ $? -ne 0 ] +then + echo "$0 : is not applicable for component [${COMPONENT_NAME}]. It is applicable only for ranger plugin component; Exiting ..." + exit 0 +fi + +HCOMPONENT_NAME=`echo ${COMPONENT_NAME} | sed -e 's:-plugin::'` + +CFG_OWNER_INF="${HCOMPONENT_NAME}:${HCOMPONENT_NAME}" + +if [ "${HCOMPONENT_NAME}" = "hdfs" ] +then + HCOMPONENT_NAME="hadoop" +fi + +# +# Based on script name, identify if the action is enabled or disabled +# + +basename $0 | cut -d. -f1 | grep '^enable-' > /dev/null 2>&1 + +if [ $? -eq 0 ] +then + action=enable +else + action=disable +fi + + +# +# environment variables for enable|disable scripts +# + +PROJ_INSTALL_DIR=`(cd ${basedir} ; pwd)` +SET_ENV_SCRIPT_NAME=set-${COMPONENT_NAME}-env.sh +SET_ENV_SCRIPT_TEMPLATE=${PROJ_INSTALL_DIR}/install/conf.templates/enable/${SET_ENV_SCRIPT_NAME} +DEFAULT_XML_CONFIG=${PROJ_INSTALL_DIR}/install/conf.templates/default/configuration.xml +PROJ_LIB_DIR=${PROJ_INSTALL_DIR}/lib +PROJ_INSTALL_LIB_DIR="${PROJ_INSTALL_DIR}/install/lib" +INSTALL_ARGS="${PROJ_INSTALL_DIR}/install.properties" +COMPONENT_INSTALL_ARGS="${PROJ_INSTALL_DIR}/${COMPONENT_NAME}-install.properties" +JAVA=$JAVA_HOME/bin/java + +PLUGIN_DEPENDENT_LIB_DIR=lib/"${PROJ_NAME}-${COMPONENT_NAME}-impl" +PROJ_LIB_PLUGIN_DIR=${PROJ_INSTALL_DIR}/${PLUGIN_DEPENDENT_LIB_DIR} + +HCOMPONENT_INSTALL_DIR_NAME=$(getInstallProperty 'COMPONENT_INSTALL_DIR_NAME') + + +CUSTOM_USER=$(getInstallProperty 'CUSTOM_USER') +CUSTOM_USER=${CUSTOM_USER// } + +CUSTOM_GROUP=$(getInstallProperty 'CUSTOM_GROUP') +CUSTOM_GROUP=${CUSTOM_GROUP// } + +CUSTOM_GROUP_STATUS=${CUSTOM_GROUP}; +CUSTOM_USER_STATUS=${CUSTOM_USER}; +egrep "^$CUSTOM_GROUP" /etc/group >& /dev/null +if [ $? -ne 0 ] +then + CUSTOM_GROUP_STATUS="" +fi +id -u ${CUSTOM_USER} > /dev/null 2>&1 +if [ $? -ne 0 ] +then + CUSTOM_USER_STATUS="" +fi + +if [ ! -z "${CUSTOM_USER_STATUS}" ] && [ ! -z "${CUSTOM_GROUP_STATUS}" ] +then + echo "Custom user and group is available, using custom user and group." + CFG_OWNER_INF="${CUSTOM_USER}:${CUSTOM_GROUP}" +elif [ ! -z "${CUSTOM_USER_STATUS}" ] && [ -z "${CUSTOM_GROUP_STATUS}" ] +then + echo "Custom user is available, using custom user and default group." + CFG_OWNER_INF="${CUSTOM_USER}:${HCOMPONENT_NAME}" +elif [ -z "${CUSTOM_USER_STATUS}" ] && [ ! -z "${CUSTOM_GROUP_STATUS}" ] +then + echo "Custom group is available, using default user and custom group." + CFG_OWNER_INF="${HCOMPONENT_NAME}:${CUSTOM_GROUP}" +else + echo "Custom user and group are not available, using default user and group." + CFG_OWNER_INF="${HCOMPONENT_NAME}:${HCOMPONENT_NAME}" +fi + +if [ "${HCOMPONENT_INSTALL_DIR_NAME}" = "" ] +then + if [ "${HCOMPONENT_NAME}" = "knox" ]; + then + HCOMPONENT_INSTALL_DIR_NAME=$(getInstallProperty 'KNOX_HOME') + fi + if [ "${HCOMPONENT_INSTALL_DIR_NAME}" = "" ] + then + HCOMPONENT_INSTALL_DIR_NAME=${HCOMPONENT_NAME} + fi +fi + +firstletter=${HCOMPONENT_INSTALL_DIR_NAME:0:1} +if [ "$firstletter" = "/" ]; then + hdir=${HCOMPONENT_INSTALL_DIR_NAME} +else + hdir=${PROJ_INSTALL_DIR}/../${HCOMPONENT_INSTALL_DIR_NAME} +fi + +# +# TEST - START +# +if [ ! -d ${hdir} ] +then + mkdir -p ${hdir} +fi +# +# TEST - END +# +HCOMPONENT_INSTALL_DIR=`(cd ${hdir} ; pwd)` +HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/lib +if [ "${HCOMPONENT_NAME}" = "knox" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/ext +elif [ "${HCOMPONENT_NAME}" = "solr" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/solr-webapp/webapp/WEB-INF/lib +elif [ "${HCOMPONENT_NAME}" = "kafka" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/libs +elif [ "${HCOMPONENT_NAME}" = "storm" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/extlib-daemon +elif [ "${HCOMPONENT_NAME}" = "atlas" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/libext +elif [ "${HCOMPONENT_NAME}" = "hadoop" ] || + [ "${HCOMPONENT_NAME}" = "yarn" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/share/hadoop/hdfs/lib +elif [ "${HCOMPONENT_NAME}" = "sqoop" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/server/lib +elif [ "${HCOMPONENT_NAME}" = "kylin" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/tomcat/webapps/kylin/WEB-INF/lib +elif [ "${HCOMPONENT_NAME}" = "elasticsearch" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/plugins +elif [ "${HCOMPONENT_NAME}" = "presto" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/plugin/ranger + if [ ! -d "${HCOMPONENT_LIB_DIR}" ]; then + echo "INFO: Creating ${HCOMPONENT_LIB_DIR}" + mkdir -p ${HCOMPONENT_LIB_DIR} + fi +fi + +HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/conf +if [ "${HCOMPONENT_NAME}" = "solr" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/resources + if [ ! -d $HCOMPONENT_CONF_DIR ]; then + install_owner=`ls -ld | cut -f 3 -d " "` + echo "INFO: Creating $HCOMPONENT_CONF_DIR" + mkdir -p $HCOMPONENT_CONF_DIR + echo "INFO: Changing ownership of $HCOMPONENT_CONF_DIR to $install_owner" + chown $install_owner:$install_owner $HCOMPONENT_CONF_DIR + fi +elif [ "${HCOMPONENT_NAME}" = "kafka" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/config +elif [ "${HCOMPONENT_NAME}" = "hadoop" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/etc/hadoop +elif [ "${HCOMPONENT_NAME}" = "yarn" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/etc/hadoop +elif [ "${HCOMPONENT_NAME}" = "sqoop" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/conf +elif [ "${HCOMPONENT_NAME}" = "elasticsearch" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/config/ranger-elasticsearch-plugin + if [ ! -d $HCOMPONENT_CONF_DIR ]; then + echo "INFO: Creating $HCOMPONENT_CONF_DIR" + mkdir -p $HCOMPONENT_CONF_DIR + echo "INFO: Changing ownership of $HCOMPONENT_CONF_DIR to $CFG_OWNER_INF" + chown $CFG_OWNER_INF $HCOMPONENT_CONF_DIR + fi +elif [ "${HCOMPONENT_NAME}" = "presto" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/etc +fi + +HCOMPONENT_ARCHIVE_CONF_DIR=${HCOMPONENT_CONF_DIR}/.archive +SET_ENV_SCRIPT=${HCOMPONENT_CONF_DIR}/${SET_ENV_SCRIPT_NAME} + + +if [ ! -d "${HCOMPONENT_INSTALL_DIR}" ] +then + echo "ERROR: Unable to find the install directory of component [${HCOMPONENT_NAME}]; dir [${HCOMPONENT_INSTALL_DIR}] not found." + echo "Exiting installation." + exit 1 +fi + +if [ ! -d "${HCOMPONENT_CONF_DIR}" ] +then + echo "ERROR: Unable to find the conf directory of component [${HCOMPONENT_NAME}]; dir [${HCOMPONENT_CONF_DIR}] not found." + echo "Exiting installation." + exit 1 +fi + +if [ ! -d "${HCOMPONENT_LIB_DIR}" ] +then + mkdir -p "${HCOMPONENT_LIB_DIR}" + if [ ! -d "${HCOMPONENT_LIB_DIR}" ] + then + echo "ERROR: Unable to find the lib directory of component [${HCOMPONENT_NAME}]; dir [${HCOMPONENT_LIB_DIR}] not found." + echo "Exiting installation." + exit 1 + fi +fi + +ambari_hive_install="N" +if [ "${HCOMPONENT_NAME}" = "hive" ] +then + HCOMPONENT_CONF_SERVER_DIR="${HCOMPONENT_CONF_DIR}"/conf.server + if [ -d "${HCOMPONENT_CONF_SERVER_DIR}" ] + then + ambari_hive_install="Y" + fi +fi + +# +# Common functions used by all enable/disable scripts +# + +log() { + echo "+ `date` : $*" +} + + +create_jceks() { + + alias=$1 + pass=$2 + jceksFile=$3 + + if [ -f "${jceksFile}" ] + then + jcebdir=`dirname ${jceksFile}` + jcebname=`basename ${jceksFile}` + archive_jce=${jcebdir}/.${jcebname}.`date '+%Y%m%d%H%M%S'` + log "Saving current JCE file: ${jceksFile} to ${archive_jce} ..." + cp ${jceksFile} ${archive_jce} + fi + + tempFile=/tmp/jce.$$.out + + $JAVA_HOME/bin/java -cp ":${PROJ_INSTALL_LIB_DIR}/*:" org.apache.ranger.credentialapi.buildks create "${alias}" -value "${pass}" -provider "jceks://file${jceksFile}" > ${tempFile} 2>&1 + + if [ $? -ne 0 ] + then + echo "Unable to store password in non-plain text format. Error: [`cat ${tempFile}`]" + echo "Exiting plugin installation" + rm -f ${tempFile} + exit 0 + fi + + rm -f ${tempFile} +} + +log "${HCOMPONENT_NAME}: lib folder=$HCOMPONENT_LIB_DIR conf folder=$HCOMPONENT_CONF_DIR" + +# +# If there is a set-ranger-${COMPONENT}-env.sh, install it +# +dt=`date '+%Y%m%d-%H%M%S'` + +if [ -f "${SET_ENV_SCRIPT_TEMPLATE}" ] +then + # + # If the setenv script already exists, move it to the archive folder + # + if [ -f "${SET_ENV_SCRIPT}" ] + then + if [ ! -d "${HCOMPONENT_ARCHIVE_CONF_DIR}" ] + then + mkdir -p ${HCOMPONENT_ARCHIVE_CONF_DIR} + fi + log "Saving current ${SET_ENV_SCRIPT_NAME} to ${HCOMPONENT_ARCHIVE_CONF_DIR} ..." + mv ${SET_ENV_SCRIPT} ${HCOMPONENT_ARCHIVE_CONF_DIR}/${SET_ENV_SCRIPT_NAME}.${dt} + fi + + if [ "${action}" = "enable" ] + then + + cp ${SET_ENV_SCRIPT_TEMPLATE} ${SET_ENV_SCRIPT} + + DEST_SCRIPT_FILE=${HCOMPONENT_INSTALL_DIR}/libexec/${HCOMPONENT_NAME}-config.sh + + DEST_SCRIPT_ARCHIVE_FILE=${HCOMPONENT_INSTALL_DIR}/libexec/.${HCOMPONENT_NAME}-config.sh.${dt} + + if [ -f "${DEST_SCRIPT_FILE}" ] + then + + log "Saving current ${DEST_SCRIPT_FILE} to ${DEST_SCRIPT_ARCHIVE_FILE} ..." + + cp ${DEST_SCRIPT_FILE} ${DEST_SCRIPT_ARCHIVE_FILE} + + grep 'xasecure-.*-env.sh' ${DEST_SCRIPT_FILE} > /dev/null 2>&1 + if [ $? -eq 0 ] + then + ts=`date '+%Y%m%d%H%M%S'` + grep -v 'xasecure-.*-env.sh' ${DEST_SCRIPT_FILE} > ${DEST_SCRIPT_FILE}.${ts} + if [ $? -eq 0 ] + then + log "Removing old reference to xasecure setenv source ..." + cat ${DEST_SCRIPT_FILE}.${ts} > ${DEST_SCRIPT_FILE} + rm -f ${DEST_SCRIPT_FILE}.${ts} + fi + fi + + grep "[ \t]*.[ \t]*${SET_ENV_SCRIPT}" ${DEST_SCRIPT_FILE} > /dev/null + if [ $? -ne 0 ] + then + log "Appending sourcing script, ${SET_ENV_SCRIPT_NAME} in the file: ${DEST_SCRIPT_FILE} " + cat >> ${DEST_SCRIPT_FILE} <\n`date`\n" > ${HCOMPONENT_CONF_DIR}/ranger-security.xml + chown ${CFG_OWNER_INF} ${HCOMPONENT_CONF_DIR}/ranger-security.xml + chmod a+r ${HCOMPONENT_CONF_DIR}/ranger-security.xml + for cf in ${PROJ_INSTALL_DIR}/install/conf.templates/${action}/*.xml + do + cfb=`basename ${cf}` + if [ -f "${HCOMPONENT_CONF_DIR}/${cfb}" ] + then + log "Saving ${HCOMPONENT_CONF_DIR}/${cfb} to ${HCOMPONENT_CONF_DIR}/.${cfb}.${dt} ..." + cp ${HCOMPONENT_CONF_DIR}/${cfb} ${HCOMPONENT_CONF_DIR}/.${cfb}.${dt} + fi + cp ${cf} ${HCOMPONENT_CONF_DIR}/ + chown ${CFG_OWNER_INF} ${HCOMPONENT_CONF_DIR}/${cfb} + chmod a+r ${HCOMPONENT_CONF_DIR}/${cfb} + done + else + if [ -f ${HCOMPONENT_CONF_DIR}/ranger-security.xml ] + then + mv ${HCOMPONENT_CONF_DIR}/ranger-security.xml ${HCOMPONENT_CONF_DIR}/.ranger-security.xml.`date '+%Y%m%d%H%M%S'` + fi + fi + + # + # Ensure that POLICY_CACHE_FILE_PATH is accessible + # + REPO_NAME=$(getInstallProperty 'REPOSITORY_NAME') + export POLICY_CACHE_FILE_PATH=/etc/${PROJ_NAME}/${REPO_NAME}/policycache + export CREDENTIAL_PROVIDER_FILE=/etc/${PROJ_NAME}/${REPO_NAME}/cred.jceks + if [ ! -d ${POLICY_CACHE_FILE_PATH} ] + then + mkdir -p ${POLICY_CACHE_FILE_PATH} + fi + chmod a+rx /etc/${PROJ_NAME} + chmod a+rx /etc/${PROJ_NAME}/${REPO_NAME} + chmod a+rx ${POLICY_CACHE_FILE_PATH} + chown -R ${CFG_OWNER_INF} /etc/${PROJ_NAME}/${REPO_NAME} + + for f in ${PROJ_INSTALL_DIR}/install/conf.templates/${action}/*.cfg + do + if [ -f "${f}" ] + then + fn=`basename $f` + orgfn=`echo $fn | sed -e 's:-changes.cfg:.xml:'` + fullpathorgfn="${HCOMPONENT_CONF_DIR}/${orgfn}" + if [ ! -f ${fullpathorgfn} ] + then + if [ -f ${DEFAULT_XML_CONFIG} ] + then + log "Creating default file from [${DEFAULT_XML_CONFIG}] for [${fullpathorgfn}] .." + cp ${DEFAULT_XML_CONFIG} ${fullpathorgfn} + chown ${CFG_OWNER_INF} ${fullpathorgfn} + chmod a+r ${fullpathorgfn} + else + echo "ERROR: Unable to find ${fullpathorgfn}" + exit 1 + fi + fi + archivefn="${HCOMPONENT_CONF_DIR}/.${orgfn}.${dt}" + newfn="${HCOMPONENT_CONF_DIR}/.${orgfn}-new.${dt}" + log "Saving current config file: ${fullpathorgfn} to ${archivefn} ..." + cp ${fullpathorgfn} ${archivefn} + if [ $? -eq 0 ] + then + ${JAVA} -cp "${INSTALL_CP}" org.apache.ranger.utils.install.XmlConfigChanger -i ${archivefn} -o ${newfn} -c ${f} -p ${INSTALL_ARGS} + if [ $? -eq 0 ] + then + diff -w ${newfn} ${fullpathorgfn} > /dev/null 2>&1 + if [ $? -ne 0 ] + then + cat ${newfn} > ${fullpathorgfn} + fi + + # For Ambari install copy the .xml to conf.server also + if [ "${ambari_hive_install}" = "Y" ] + then + fullpathorgHS2fn="${HCOMPONENT_CONF_SERVER_DIR}/${orgfn}" + archiveHS2fn="${HCOMPONENT_CONF_SERVER_DIR}/.${orgfn}.${dt}" + newHS2fn="${HCOMPONENT_CONF_SERVER_DIR}/.${orgfn}-new.${dt}" + log "Saving current conf.server file: ${fullpathorgHS2fn} to ${archiveHS2fn} ..." + if [ -f ${fullpathorgHS2fn} ] + then + cp ${fullpathorgHS2fn} ${archiveHS2fn} + fi + cp ${fullpathorgfn} ${HCOMPONENT_CONF_SERVER_DIR}/${orgfn} + chown ${CFG_OWNER_INF} ${HCOMPONENT_CONF_SERVER_DIR}/${orgfn} + fi + + else + echo "ERROR: Unable to make changes to config. file: ${fullpathorgfn}" + echo "exiting ...." + exit 1 + fi + else + echo "ERROR: Unable to save config. file: ${fullpathorgfn} to ${archivefn}" + echo "exiting ...." + exit 1 + fi + fi + done + if [ "${HCOMPONENT_NAME}" = "hbase" ] || [ "${HCOMPONENT_NAME}" = "storm" ]; + then + chmod 644 ${HCOMPONENT_CONF_DIR}/* + fi +fi + +# +# Create library link +# +if [ "${action}" = "enable" ] +then + dt=`date '+%Y%m%d%H%M%S'` + for f in ${PROJ_LIB_DIR}/* + do + if [ -f "${f}" ] || [ -d "${f}" ] + then + bn=`basename $f` + if [ -f ${HCOMPONENT_LIB_DIR}/${bn} ] || [ -d ${HCOMPONENT_LIB_DIR}/${bn} ] + then + log "Saving lib file: ${HCOMPONENT_LIB_DIR}/${bn} to ${HCOMPONENT_LIB_DIR}/.${bn}.${dt} ..." + mv ${HCOMPONENT_LIB_DIR}/${bn} ${HCOMPONENT_LIB_DIR}/.${bn}.${dt} + fi + if [ ! -f ${HCOMPONENT_LIB_DIR}/${bn} ] && [ ! -d ${HCOMPONENT_LIB_DIR}/${bn} ] + then + ln -s ${f} ${HCOMPONENT_LIB_DIR}/${bn} + fi + fi + done + + # + # Encrypt the password and keep it secure in Credential Provider API + # + CredFile=${CREDENTIAL_PROVIDER_FILE} + if ! [ `echo ${CredFile} | grep '^/.*'` ] + then + echo "ERROR:Please enter the Credential File Store with proper file path" + exit 1 + fi + + pardir=`dirname ${CredFile}` + + if [ ! -d "${pardir}" ] + then + mkdir -p "${pardir}" + if [ $? -ne 0 ] + then + echo "ERROR: Unable to create credential store file path" + exit 1 + fi + chmod a+rx "${pardir}" + fi + + # + # Generate Credential Provider file and Credential for SSL KEYSTORE AND TRUSTSTORE + # + sslkeystoreAlias="sslKeyStore" + sslkeystoreCred=$(getInstallProperty 'SSL_KEYSTORE_PASSWORD') + create_jceks "${sslkeystoreAlias}" "${sslkeystoreCred}" "${CredFile}" + ssltruststoreAlias="sslTrustStore" + ssltruststoreCred=$(getInstallProperty 'SSL_TRUSTSTORE_PASSWORD') + create_jceks "${ssltruststoreAlias}" "${ssltruststoreCred}" "${CredFile}" + chown ${CFG_OWNER_INF} ${CredFile} + # + # To allow all users in the server (where Hive CLI and HBase CLI is used), + # user needs to have read access for the credential file. + # + chmod a+r ${CredFile} +fi + +# +# Knox specific configuration +# +# + +if [ "${HCOMPONENT_NAME}" = "knox" ] +then + if [ "${action}" = "enable" ] + then + authFrom="AclsAuthz" + authTo="XASecurePDPKnox" + else + authTo="AclsAuthz" + authFrom="XASecurePDPKnox" + fi + + dt=`date '+%Y%m%d%H%M%S'` + for fn in `ls ${HCOMPONENT_CONF_DIR}/topologies/*.xml 2> /dev/null` + do + if [ -f "${fn}" ] + then + dn=`dirname ${fn}` + bn=`basename ${fn}` + bf=${dn}/.${bn}.${dt} + echo "backup of ${fn} to ${bf} ..." + cp ${fn} ${bf} + echo "Updating topology file: [${fn}] ... " + cat ${fn} | sed -e "s-${authFrom}-${authTo}-" > ${fn}.${dt}.new + if [ $? -eq 0 ] + then + cat ${fn}.${dt}.new > ${fn} + rm ${fn}.${dt}.new + fi + fi + done +fi + +if [ "${HCOMPONENT_NAME}" = "storm" ] +then + CFG_FILE=${HCOMPONENT_CONF_DIR}/storm.yaml + ARCHIVE_FILE=${HCOMPONENT_CONF_DIR}/.storm.yaml.`date '+%Y%m%d%H%M%S'` + + if [ -f "${CFG_FILE}" ] + then + cp ${CFG_FILE} ${ARCHIVE_FILE} + + if [ "${action}" = "enable" ] + then + awk -F: 'BEGIN { + configured = 0 ; + } + { + if ($1 == "nimbus.authorizer") { + if ($2 ~ /^[ \t]*"org.apache.ranger.authorization.storm.authorizer.RangerStormAuthorizer"[ \t]*$/) { + configured = 1 ; + printf("%s\n",$0) ; + } + else { + printf("#%s\n",$0); + printf("nimbus.authorizer: \"org.apache.ranger.authorization.storm.authorizer.RangerStormAuthorizer\"\n") ; + configured = 1 ; + } + } + else { + printf("%s\n",$0) ; + } + } + END { + if (configured == 0) { + printf("nimbus.authorizer: \"org.apache.ranger.authorization.storm.authorizer.RangerStormAuthorizer\"\n") ; + } + }' ${CFG_FILE} > ${CFG_FILE}.new && cat ${CFG_FILE}.new > ${CFG_FILE} && rm -f ${CFG_FILE}.new + + else + awk -F: 'BEGIN { + configured = 0 ; + } + { + if ($1 == "nimbus.authorizer") { + if ($2 ~ /^[ \t]*"org.apache.storm.security.auth.authorizer.SimpleACLAuthorizer"[ \t]*$/) { + configured = 1 ; + printf("%s\n",$0) ; + } + else { + printf("#%s\n",$0); + printf("nimbus.authorizer: \"org.apache.storm.security.auth.authorizer.SimpleACLAuthorizer\"\n") ; + configured = 1 ; + } + } + else { + printf("%s\n",$0) ; + } + } + END { + if (configured == 0) { + printf("nimbus.authorizer: \"org.apache.storm.security.auth.authorizer.SimpleACLAuthorizer\"\n") ; + } + }' ${CFG_FILE} > ${CFG_FILE}.new && cat ${CFG_FILE}.new > ${CFG_FILE} && rm -f ${CFG_FILE}.new + fi + fi +fi + +#Check Properties whether in File, return code 1 if not exist +#$1 -> propertyName; $2 -> fileName +checkPropertyInFile(){ + validate=$(sed '/^\#/d' $2 | grep "^$1" | tail -n 1 | cut -d "=" -f1-) # for validation + if test -z "$validate" ; then return 1; fi +} + +#Add Properties to File +#$1 -> propertyName; $2 -> newPropertyValue; $3 -> fileName +addPropertyToFile(){ + echo "$1=$2">>$3 + validate=$(sed '/^\#/d' $3 | grep "^$1" | tail -n 1 | cut -d "=" -f2-) # for validation + if test -z "$validate" ; then log "[E] Failed to add properties '$1' to $3 file!"; exit 1; fi + echo "Property $1 added successfully with : '$2'" +} + +#Update Properties to File +#$1 -> propertyName; $2 -> newPropertyValue; $3 -> fileName +updatePropertyToFile(){ + sed -i 's@^'$1'=[^ ]*$@'$1'='$2'@g' $3 + validate=$(sed '/^\#/d' $3 | grep "^$1" | tail -n 1 | cut -d "=" -f2-) # for validation + if test -z "$validate" ; then log "[E] '$1' not found in $3 file while Updating....!!"; exit 1; fi + echo "Property $1 updated successfully with : '$2'" +} + +#Add or Update Properties to File +#$1 -> propertyName; $2 -> newPropertyValue; $3 -> fileName +addOrUpdatePropertyToFile(){ + checkPropertyInFile $1 $3 + if [ $? -eq 1 ] + then + addPropertyToFile $1 $2 $3 + else + updatePropertyToFile $1 $2 $3 + fi +} + +if [ "${HCOMPONENT_NAME}" = "sqoop" ] +then + if [ "${action}" = "enable" ] + then + authName="org.apache.ranger.authorization.sqoop.authorizer.RangerSqoopAuthorizer" + else + authName="" + fi + + dt=`date '+%Y%m%d%H%M%S'` + fn=`ls ${HCOMPONENT_CONF_DIR}/sqoop.properties 2> /dev/null` + if [ -f "${fn}" ] + then + dn=`dirname ${fn}` + bn=`basename ${fn}` + bf=${dn}/.${bn}.${dt} + echo "backup of ${fn} to ${bf} ..." + cp ${fn} ${bf} + echo "Add or Update properties file: [${fn}] ... " + addOrUpdatePropertyToFile org.apache.sqoop.security.authorization.validator $authName ${fn} + fi +fi + +if [ "${HCOMPONENT_NAME}" = "kylin" ] +then + if [ "${action}" = "enable" ] + then + authName="org.apache.ranger.authorization.kylin.authorizer.RangerKylinAuthorizer" + else + authName="" + fi + + dt=`date '+%Y%m%d%H%M%S'` + fn=`ls ${HCOMPONENT_CONF_DIR}/kylin.properties 2> /dev/null` + if [ -f "${fn}" ] + then + dn=`dirname ${fn}` + bn=`basename ${fn}` + bf=${dn}/.${bn}.${dt} + echo "backup of ${fn} to ${bf} ..." + cp ${fn} ${bf} + echo "Add or Update properties file: [${fn}] ... " + addOrUpdatePropertyToFile kylin.server.external-acl-provider $authName ${fn} + fi +fi + +if [ "${HCOMPONENT_NAME}" = "presto" ] +then + if [ "${action}" = "enable" ] + then + controlName="ranger" + else + controlName="" + fi + dt=`date '+%Y%m%d%H%M%S'` + fn=`ls ${HCOMPONENT_CONF_DIR}/access-control.properties 2> /dev/null` + if [ -f "${fn}" ] + then + dn=`dirname ${fn}` + bn=`basename ${fn}` + bf=${dn}/.${bn}.${dt} + echo "backup of ${fn} to ${bf} ..." + cp ${fn} ${bf} + else + fn=${HCOMPONENT_CONF_DIR}/access-control.properties + fi + echo "Add or Update properties file: [${fn}] ... " + addOrUpdatePropertyToFile access-control.name $controlName ${fn} + echo "Linking config files" + cd ${HCOMPONENT_LIB_DIR}/ranger-presto-plugin-impl/ + ln -sf ${HCOMPONENT_CONF_DIR} conf +fi + + +# +# Set notice to restart the ${HCOMPONENT_NAME} +# + +echo "Ranger Plugin for ${HCOMPONENT_NAME} has been ${action}d. Please restart ${HCOMPONENT_NAME} to ensure that changes are effective." + +exit 0 diff --git a/atlas-hub/pre-conf/ranger/enable-atlas-plugin.sh b/atlas-hub/pre-conf/ranger/enable-atlas-plugin.sh new file mode 100755 index 0000000000..10c7fcfb0a --- /dev/null +++ b/atlas-hub/pre-conf/ranger/enable-atlas-plugin.sh @@ -0,0 +1,798 @@ +#!/bin/bash +JAVA_HOME='/usr/lib/jvm/java-1.8.0-openjdk-amd64' + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +function getInstallProperty() { + local propertyName=$1 + local propertyValue="" + + for file in "${COMPONENT_INSTALL_ARGS}" "${INSTALL_ARGS}" + do + if [ -f "${file}" ] + then + propertyValue=`grep "^${propertyName}[ \t]*=" ${file} | awk -F= '{ sub("^[ \t]*", "", $2); sub("[ \t]*$", "", $2); print $2 }'` + if [ "${propertyValue}" != "" ] + then + break + fi + fi + done + + echo ${propertyValue} +} + +# +# Base env variable for Ranger related files/directories +# +PROJ_NAME=ranger + +# +# The script should be run by "root" user +# + +if [ ! -w /etc/passwd ] +then + echo "ERROR: $0 script should be run as root." + exit 1 +fi + +#Check for JAVA_HOME +if [ "${JAVA_HOME}" == "" ] +then + echo "ERROR: JAVA_HOME environment property not defined, aborting installation." + exit 1 +fi + +# +# Identify the component, action from the script file +# + +basedir=`dirname $0` +if [ "${basedir}" = "." ] +then + basedir=`pwd` +elif [ "${basedir}" = ".." ] +then + basedir=`(cd .. ;pwd)` +fi + +# +# As this script is common to all component, find the component name based on the script-name +# + +COMPONENT_NAME=`basename $0 | cut -d. -f1 | sed -e 's:^disable-::' | sed -e 's:^enable-::'` + +echo "${COMPONENT_NAME}" | grep 'plugin' > /dev/null 2>&1 + +if [ $? -ne 0 ] +then + echo "$0 : is not applicable for component [${COMPONENT_NAME}]. It is applicable only for ranger plugin component; Exiting ..." + exit 0 +fi + +HCOMPONENT_NAME=`echo ${COMPONENT_NAME} | sed -e 's:-plugin::'` + +CFG_OWNER_INF="${HCOMPONENT_NAME}:${HCOMPONENT_NAME}" + +if [ "${HCOMPONENT_NAME}" = "hdfs" ] +then + HCOMPONENT_NAME="hadoop" +fi + +# +# Based on script name, identify if the action is enabled or disabled +# + +basename $0 | cut -d. -f1 | grep '^enable-' > /dev/null 2>&1 + +if [ $? -eq 0 ] +then + action=enable +else + action=disable +fi + + +# +# environment variables for enable|disable scripts +# + +PROJ_INSTALL_DIR=`(cd ${basedir} ; pwd)` +SET_ENV_SCRIPT_NAME=set-${COMPONENT_NAME}-env.sh +SET_ENV_SCRIPT_TEMPLATE=${PROJ_INSTALL_DIR}/install/conf.templates/enable/${SET_ENV_SCRIPT_NAME} +DEFAULT_XML_CONFIG=${PROJ_INSTALL_DIR}/install/conf.templates/default/configuration.xml +PROJ_LIB_DIR=${PROJ_INSTALL_DIR}/lib +PROJ_INSTALL_LIB_DIR="${PROJ_INSTALL_DIR}/install/lib" +INSTALL_ARGS="${PROJ_INSTALL_DIR}/install.properties" +COMPONENT_INSTALL_ARGS="${PROJ_INSTALL_DIR}/${COMPONENT_NAME}-install.properties" +JAVA=$JAVA_HOME/bin/java + +PLUGIN_DEPENDENT_LIB_DIR=lib/"${PROJ_NAME}-${COMPONENT_NAME}-impl" +PROJ_LIB_PLUGIN_DIR=${PROJ_INSTALL_DIR}/${PLUGIN_DEPENDENT_LIB_DIR} + +HCOMPONENT_INSTALL_DIR_NAME=$(getInstallProperty 'COMPONENT_INSTALL_DIR_NAME') + + +CUSTOM_USER=$(getInstallProperty 'CUSTOM_USER') +CUSTOM_USER=${CUSTOM_USER// } + +CUSTOM_GROUP=$(getInstallProperty 'CUSTOM_GROUP') +CUSTOM_GROUP=${CUSTOM_GROUP// } + +CUSTOM_GROUP_STATUS=${CUSTOM_GROUP}; +CUSTOM_USER_STATUS=${CUSTOM_USER}; +egrep "^$CUSTOM_GROUP" /etc/group >& /dev/null +if [ $? -ne 0 ] +then + CUSTOM_GROUP_STATUS="" +fi +id -u ${CUSTOM_USER} > /dev/null 2>&1 +if [ $? -ne 0 ] +then + CUSTOM_USER_STATUS="" +fi + +if [ ! -z "${CUSTOM_USER_STATUS}" ] && [ ! -z "${CUSTOM_GROUP_STATUS}" ] +then + echo "Custom user and group is available, using custom user and group." + CFG_OWNER_INF="${CUSTOM_USER}:${CUSTOM_GROUP}" +elif [ ! -z "${CUSTOM_USER_STATUS}" ] && [ -z "${CUSTOM_GROUP_STATUS}" ] +then + echo "Custom user is available, using custom user and default group." + CFG_OWNER_INF="${CUSTOM_USER}:${HCOMPONENT_NAME}" +elif [ -z "${CUSTOM_USER_STATUS}" ] && [ ! -z "${CUSTOM_GROUP_STATUS}" ] +then + echo "Custom group is available, using default user and custom group." + CFG_OWNER_INF="${HCOMPONENT_NAME}:${CUSTOM_GROUP}" +else + echo "Custom user and group are not available, using default user and group." + CFG_OWNER_INF="${HCOMPONENT_NAME}:${HCOMPONENT_NAME}" +fi + +if [ "${HCOMPONENT_INSTALL_DIR_NAME}" = "" ] +then + if [ "${HCOMPONENT_NAME}" = "knox" ]; + then + HCOMPONENT_INSTALL_DIR_NAME=$(getInstallProperty 'KNOX_HOME') + fi + if [ "${HCOMPONENT_INSTALL_DIR_NAME}" = "" ] + then + HCOMPONENT_INSTALL_DIR_NAME=${HCOMPONENT_NAME} + fi +fi + +firstletter=${HCOMPONENT_INSTALL_DIR_NAME:0:1} +if [ "$firstletter" = "/" ]; then + hdir=${HCOMPONENT_INSTALL_DIR_NAME} +else + hdir=${PROJ_INSTALL_DIR}/../${HCOMPONENT_INSTALL_DIR_NAME} +fi + +# +# TEST - START +# +if [ ! -d ${hdir} ] +then + mkdir -p ${hdir} +fi +# +# TEST - END +# +HCOMPONENT_INSTALL_DIR=`(cd ${hdir} ; pwd)` +HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/lib +if [ "${HCOMPONENT_NAME}" = "knox" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/ext +elif [ "${HCOMPONENT_NAME}" = "solr" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/solr-webapp/webapp/WEB-INF/lib +elif [ "${HCOMPONENT_NAME}" = "kafka" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/libs +elif [ "${HCOMPONENT_NAME}" = "storm" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/extlib-daemon +elif [ "${HCOMPONENT_NAME}" = "atlas" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/libext +elif [ "${HCOMPONENT_NAME}" = "hadoop" ] || + [ "${HCOMPONENT_NAME}" = "yarn" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/share/hadoop/hdfs/lib +elif [ "${HCOMPONENT_NAME}" = "sqoop" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/server/lib +elif [ "${HCOMPONENT_NAME}" = "kylin" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/tomcat/webapps/kylin/WEB-INF/lib +elif [ "${HCOMPONENT_NAME}" = "elasticsearch" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/plugins +elif [ "${HCOMPONENT_NAME}" = "presto" ]; then + HCOMPONENT_LIB_DIR=${HCOMPONENT_INSTALL_DIR}/plugin/ranger + if [ ! -d "${HCOMPONENT_LIB_DIR}" ]; then + echo "INFO: Creating ${HCOMPONENT_LIB_DIR}" + mkdir -p ${HCOMPONENT_LIB_DIR} + fi +fi + +HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/conf +if [ "${HCOMPONENT_NAME}" = "solr" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/resources + if [ ! -d $HCOMPONENT_CONF_DIR ]; then + install_owner=`ls -ld | cut -f 3 -d " "` + echo "INFO: Creating $HCOMPONENT_CONF_DIR" + mkdir -p $HCOMPONENT_CONF_DIR + echo "INFO: Changing ownership of $HCOMPONENT_CONF_DIR to $install_owner" + chown $install_owner:$install_owner $HCOMPONENT_CONF_DIR + fi +elif [ "${HCOMPONENT_NAME}" = "kafka" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/config +elif [ "${HCOMPONENT_NAME}" = "hadoop" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/etc/hadoop +elif [ "${HCOMPONENT_NAME}" = "yarn" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/etc/hadoop +elif [ "${HCOMPONENT_NAME}" = "sqoop" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/conf +elif [ "${HCOMPONENT_NAME}" = "elasticsearch" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/config/ranger-elasticsearch-plugin + if [ ! -d $HCOMPONENT_CONF_DIR ]; then + echo "INFO: Creating $HCOMPONENT_CONF_DIR" + mkdir -p $HCOMPONENT_CONF_DIR + echo "INFO: Changing ownership of $HCOMPONENT_CONF_DIR to $CFG_OWNER_INF" + chown $CFG_OWNER_INF $HCOMPONENT_CONF_DIR + fi +elif [ "${HCOMPONENT_NAME}" = "presto" ]; then + HCOMPONENT_CONF_DIR=${HCOMPONENT_INSTALL_DIR}/etc +fi + +HCOMPONENT_ARCHIVE_CONF_DIR=${HCOMPONENT_CONF_DIR}/.archive +SET_ENV_SCRIPT=${HCOMPONENT_CONF_DIR}/${SET_ENV_SCRIPT_NAME} + + +if [ ! -d "${HCOMPONENT_INSTALL_DIR}" ] +then + echo "ERROR: Unable to find the install directory of component [${HCOMPONENT_NAME}]; dir [${HCOMPONENT_INSTALL_DIR}] not found." + echo "Exiting installation." + exit 1 +fi + +if [ ! -d "${HCOMPONENT_CONF_DIR}" ] +then + echo "ERROR: Unable to find the conf directory of component [${HCOMPONENT_NAME}]; dir [${HCOMPONENT_CONF_DIR}] not found." + echo "Exiting installation." + exit 1 +fi + +if [ ! -d "${HCOMPONENT_LIB_DIR}" ] +then + mkdir -p "${HCOMPONENT_LIB_DIR}" + if [ ! -d "${HCOMPONENT_LIB_DIR}" ] + then + echo "ERROR: Unable to find the lib directory of component [${HCOMPONENT_NAME}]; dir [${HCOMPONENT_LIB_DIR}] not found." + echo "Exiting installation." + exit 1 + fi +fi + +ambari_hive_install="N" +if [ "${HCOMPONENT_NAME}" = "hive" ] +then + HCOMPONENT_CONF_SERVER_DIR="${HCOMPONENT_CONF_DIR}"/conf.server + if [ -d "${HCOMPONENT_CONF_SERVER_DIR}" ] + then + ambari_hive_install="Y" + fi +fi + +# +# Common functions used by all enable/disable scripts +# + +log() { + echo "+ `date` : $*" +} + + +create_jceks() { + + alias=$1 + pass=$2 + jceksFile=$3 + + if [ -f "${jceksFile}" ] + then + jcebdir=`dirname ${jceksFile}` + jcebname=`basename ${jceksFile}` + archive_jce=${jcebdir}/.${jcebname}.`date '+%Y%m%d%H%M%S'` + log "Saving current JCE file: ${jceksFile} to ${archive_jce} ..." + cp ${jceksFile} ${archive_jce} + fi + + tempFile=/tmp/jce.$$.out + + $JAVA_HOME/bin/java -cp ":${PROJ_INSTALL_LIB_DIR}/*:" org.apache.ranger.credentialapi.buildks create "${alias}" -value "${pass}" -provider "jceks://file${jceksFile}" > ${tempFile} 2>&1 + + if [ $? -ne 0 ] + then + echo "Unable to store password in non-plain text format. Error: [`cat ${tempFile}`]" + echo "Exiting plugin installation" + rm -f ${tempFile} + exit 0 + fi + + rm -f ${tempFile} +} + +log "${HCOMPONENT_NAME}: lib folder=$HCOMPONENT_LIB_DIR conf folder=$HCOMPONENT_CONF_DIR" + +# +# If there is a set-ranger-${COMPONENT}-env.sh, install it +# +dt=`date '+%Y%m%d-%H%M%S'` + +if [ -f "${SET_ENV_SCRIPT_TEMPLATE}" ] +then + # + # If the setenv script already exists, move it to the archive folder + # + if [ -f "${SET_ENV_SCRIPT}" ] + then + if [ ! -d "${HCOMPONENT_ARCHIVE_CONF_DIR}" ] + then + mkdir -p ${HCOMPONENT_ARCHIVE_CONF_DIR} + fi + log "Saving current ${SET_ENV_SCRIPT_NAME} to ${HCOMPONENT_ARCHIVE_CONF_DIR} ..." + mv ${SET_ENV_SCRIPT} ${HCOMPONENT_ARCHIVE_CONF_DIR}/${SET_ENV_SCRIPT_NAME}.${dt} + fi + + if [ "${action}" = "enable" ] + then + + cp ${SET_ENV_SCRIPT_TEMPLATE} ${SET_ENV_SCRIPT} + + DEST_SCRIPT_FILE=${HCOMPONENT_INSTALL_DIR}/libexec/${HCOMPONENT_NAME}-config.sh + + DEST_SCRIPT_ARCHIVE_FILE=${HCOMPONENT_INSTALL_DIR}/libexec/.${HCOMPONENT_NAME}-config.sh.${dt} + + if [ -f "${DEST_SCRIPT_FILE}" ] + then + + log "Saving current ${DEST_SCRIPT_FILE} to ${DEST_SCRIPT_ARCHIVE_FILE} ..." + + cp ${DEST_SCRIPT_FILE} ${DEST_SCRIPT_ARCHIVE_FILE} + + grep 'xasecure-.*-env.sh' ${DEST_SCRIPT_FILE} > /dev/null 2>&1 + if [ $? -eq 0 ] + then + ts=`date '+%Y%m%d%H%M%S'` + grep -v 'xasecure-.*-env.sh' ${DEST_SCRIPT_FILE} > ${DEST_SCRIPT_FILE}.${ts} + if [ $? -eq 0 ] + then + log "Removing old reference to xasecure setenv source ..." + cat ${DEST_SCRIPT_FILE}.${ts} > ${DEST_SCRIPT_FILE} + rm -f ${DEST_SCRIPT_FILE}.${ts} + fi + fi + + grep "[ \t]*.[ \t]*${SET_ENV_SCRIPT}" ${DEST_SCRIPT_FILE} > /dev/null + if [ $? -ne 0 ] + then + log "Appending sourcing script, ${SET_ENV_SCRIPT_NAME} in the file: ${DEST_SCRIPT_FILE} " + cat >> ${DEST_SCRIPT_FILE} <\n`date`\n" > ${HCOMPONENT_CONF_DIR}/ranger-security.xml + chown ${CFG_OWNER_INF} ${HCOMPONENT_CONF_DIR}/ranger-security.xml + chmod a+r ${HCOMPONENT_CONF_DIR}/ranger-security.xml + for cf in ${PROJ_INSTALL_DIR}/install/conf.templates/${action}/*.xml + do + cfb=`basename ${cf}` + if [ -f "${HCOMPONENT_CONF_DIR}/${cfb}" ] + then + log "Saving ${HCOMPONENT_CONF_DIR}/${cfb} to ${HCOMPONENT_CONF_DIR}/.${cfb}.${dt} ..." + cp ${HCOMPONENT_CONF_DIR}/${cfb} ${HCOMPONENT_CONF_DIR}/.${cfb}.${dt} + fi + cp ${cf} ${HCOMPONENT_CONF_DIR}/ + chown ${CFG_OWNER_INF} ${HCOMPONENT_CONF_DIR}/${cfb} + chmod a+r ${HCOMPONENT_CONF_DIR}/${cfb} + done + else + if [ -f ${HCOMPONENT_CONF_DIR}/ranger-security.xml ] + then + mv ${HCOMPONENT_CONF_DIR}/ranger-security.xml ${HCOMPONENT_CONF_DIR}/.ranger-security.xml.`date '+%Y%m%d%H%M%S'` + fi + fi + + # + # Ensure that POLICY_CACHE_FILE_PATH is accessible + # + REPO_NAME=$(getInstallProperty 'REPOSITORY_NAME') + export POLICY_CACHE_FILE_PATH=/etc/${PROJ_NAME}/${REPO_NAME}/policycache + export CREDENTIAL_PROVIDER_FILE=/etc/${PROJ_NAME}/${REPO_NAME}/cred.jceks + if [ ! -d ${POLICY_CACHE_FILE_PATH} ] + then + mkdir -p ${POLICY_CACHE_FILE_PATH} + fi + chmod a+rx /etc/${PROJ_NAME} + chmod a+rx /etc/${PROJ_NAME}/${REPO_NAME} + chmod a+rx ${POLICY_CACHE_FILE_PATH} + chown -R ${CFG_OWNER_INF} /etc/${PROJ_NAME}/${REPO_NAME} + + for f in ${PROJ_INSTALL_DIR}/install/conf.templates/${action}/*.cfg + do + if [ -f "${f}" ] + then + fn=`basename $f` + orgfn=`echo $fn | sed -e 's:-changes.cfg:.xml:'` + fullpathorgfn="${HCOMPONENT_CONF_DIR}/${orgfn}" + if [ ! -f ${fullpathorgfn} ] + then + if [ -f ${DEFAULT_XML_CONFIG} ] + then + log "Creating default file from [${DEFAULT_XML_CONFIG}] for [${fullpathorgfn}] .." + cp ${DEFAULT_XML_CONFIG} ${fullpathorgfn} + chown ${CFG_OWNER_INF} ${fullpathorgfn} + chmod a+r ${fullpathorgfn} + else + echo "ERROR: Unable to find ${fullpathorgfn}" + exit 1 + fi + fi + archivefn="${HCOMPONENT_CONF_DIR}/.${orgfn}.${dt}" + newfn="${HCOMPONENT_CONF_DIR}/.${orgfn}-new.${dt}" + log "Saving current config file: ${fullpathorgfn} to ${archivefn} ..." + cp ${fullpathorgfn} ${archivefn} + if [ $? -eq 0 ] + then + ${JAVA} -cp "${INSTALL_CP}" org.apache.ranger.utils.install.XmlConfigChanger -i ${archivefn} -o ${newfn} -c ${f} -p ${INSTALL_ARGS} + if [ $? -eq 0 ] + then + diff -w ${newfn} ${fullpathorgfn} > /dev/null 2>&1 + if [ $? -ne 0 ] + then + cat ${newfn} > ${fullpathorgfn} + fi + + # For Ambari install copy the .xml to conf.server also + if [ "${ambari_hive_install}" = "Y" ] + then + fullpathorgHS2fn="${HCOMPONENT_CONF_SERVER_DIR}/${orgfn}" + archiveHS2fn="${HCOMPONENT_CONF_SERVER_DIR}/.${orgfn}.${dt}" + newHS2fn="${HCOMPONENT_CONF_SERVER_DIR}/.${orgfn}-new.${dt}" + log "Saving current conf.server file: ${fullpathorgHS2fn} to ${archiveHS2fn} ..." + if [ -f ${fullpathorgHS2fn} ] + then + cp ${fullpathorgHS2fn} ${archiveHS2fn} + fi + cp ${fullpathorgfn} ${HCOMPONENT_CONF_SERVER_DIR}/${orgfn} + chown ${CFG_OWNER_INF} ${HCOMPONENT_CONF_SERVER_DIR}/${orgfn} + fi + + else + echo "ERROR: Unable to make changes to config. file: ${fullpathorgfn}" + echo "exiting ...." + exit 1 + fi + else + echo "ERROR: Unable to save config. file: ${fullpathorgfn} to ${archivefn}" + echo "exiting ...." + exit 1 + fi + fi + done + if [ "${HCOMPONENT_NAME}" = "hbase" ] || [ "${HCOMPONENT_NAME}" = "storm" ]; + then + chmod 644 ${HCOMPONENT_CONF_DIR}/* + fi +fi + +# +# Create library link +# +if [ "${action}" = "enable" ] +then + dt=`date '+%Y%m%d%H%M%S'` + for f in ${PROJ_LIB_DIR}/* + do + if [ -f "${f}" ] || [ -d "${f}" ] + then + bn=`basename $f` + if [ -f ${HCOMPONENT_LIB_DIR}/${bn} ] || [ -d ${HCOMPONENT_LIB_DIR}/${bn} ] + then + log "Saving lib file: ${HCOMPONENT_LIB_DIR}/${bn} to ${HCOMPONENT_LIB_DIR}/.${bn}.${dt} ..." + mv ${HCOMPONENT_LIB_DIR}/${bn} ${HCOMPONENT_LIB_DIR}/.${bn}.${dt} + fi + if [ ! -f ${HCOMPONENT_LIB_DIR}/${bn} ] && [ ! -d ${HCOMPONENT_LIB_DIR}/${bn} ] + then + ln -s ${f} ${HCOMPONENT_LIB_DIR}/${bn} + fi + fi + done + + # + # Encrypt the password and keep it secure in Credential Provider API + # + CredFile=${CREDENTIAL_PROVIDER_FILE} + if ! [ `echo ${CredFile} | grep '^/.*'` ] + then + echo "ERROR:Please enter the Credential File Store with proper file path" + exit 1 + fi + + pardir=`dirname ${CredFile}` + + if [ ! -d "${pardir}" ] + then + mkdir -p "${pardir}" + if [ $? -ne 0 ] + then + echo "ERROR: Unable to create credential store file path" + exit 1 + fi + chmod a+rx "${pardir}" + fi + + # + # Generate Credential Provider file and Credential for SSL KEYSTORE AND TRUSTSTORE + # + sslkeystoreAlias="sslKeyStore" + sslkeystoreCred=$(getInstallProperty 'SSL_KEYSTORE_PASSWORD') + create_jceks "${sslkeystoreAlias}" "${sslkeystoreCred}" "${CredFile}" + ssltruststoreAlias="sslTrustStore" + ssltruststoreCred=$(getInstallProperty 'SSL_TRUSTSTORE_PASSWORD') + create_jceks "${ssltruststoreAlias}" "${ssltruststoreCred}" "${CredFile}" + chown ${CFG_OWNER_INF} ${CredFile} + # + # To allow all users in the server (where Hive CLI and HBase CLI is used), + # user needs to have read access for the credential file. + # + chmod a+r ${CredFile} +fi + +# +# Knox specific configuration +# +# + +if [ "${HCOMPONENT_NAME}" = "knox" ] +then + if [ "${action}" = "enable" ] + then + authFrom="AclsAuthz" + authTo="XASecurePDPKnox" + else + authTo="AclsAuthz" + authFrom="XASecurePDPKnox" + fi + + dt=`date '+%Y%m%d%H%M%S'` + for fn in `ls ${HCOMPONENT_CONF_DIR}/topologies/*.xml 2> /dev/null` + do + if [ -f "${fn}" ] + then + dn=`dirname ${fn}` + bn=`basename ${fn}` + bf=${dn}/.${bn}.${dt} + echo "backup of ${fn} to ${bf} ..." + cp ${fn} ${bf} + echo "Updating topology file: [${fn}] ... " + cat ${fn} | sed -e "s-${authFrom}-${authTo}-" > ${fn}.${dt}.new + if [ $? -eq 0 ] + then + cat ${fn}.${dt}.new > ${fn} + rm ${fn}.${dt}.new + fi + fi + done +fi + +if [ "${HCOMPONENT_NAME}" = "storm" ] +then + CFG_FILE=${HCOMPONENT_CONF_DIR}/storm.yaml + ARCHIVE_FILE=${HCOMPONENT_CONF_DIR}/.storm.yaml.`date '+%Y%m%d%H%M%S'` + + if [ -f "${CFG_FILE}" ] + then + cp ${CFG_FILE} ${ARCHIVE_FILE} + + if [ "${action}" = "enable" ] + then + awk -F: 'BEGIN { + configured = 0 ; + } + { + if ($1 == "nimbus.authorizer") { + if ($2 ~ /^[ \t]*"org.apache.ranger.authorization.storm.authorizer.RangerStormAuthorizer"[ \t]*$/) { + configured = 1 ; + printf("%s\n",$0) ; + } + else { + printf("#%s\n",$0); + printf("nimbus.authorizer: \"org.apache.ranger.authorization.storm.authorizer.RangerStormAuthorizer\"\n") ; + configured = 1 ; + } + } + else { + printf("%s\n",$0) ; + } + } + END { + if (configured == 0) { + printf("nimbus.authorizer: \"org.apache.ranger.authorization.storm.authorizer.RangerStormAuthorizer\"\n") ; + } + }' ${CFG_FILE} > ${CFG_FILE}.new && cat ${CFG_FILE}.new > ${CFG_FILE} && rm -f ${CFG_FILE}.new + + else + awk -F: 'BEGIN { + configured = 0 ; + } + { + if ($1 == "nimbus.authorizer") { + if ($2 ~ /^[ \t]*"org.apache.storm.security.auth.authorizer.SimpleACLAuthorizer"[ \t]*$/) { + configured = 1 ; + printf("%s\n",$0) ; + } + else { + printf("#%s\n",$0); + printf("nimbus.authorizer: \"org.apache.storm.security.auth.authorizer.SimpleACLAuthorizer\"\n") ; + configured = 1 ; + } + } + else { + printf("%s\n",$0) ; + } + } + END { + if (configured == 0) { + printf("nimbus.authorizer: \"org.apache.storm.security.auth.authorizer.SimpleACLAuthorizer\"\n") ; + } + }' ${CFG_FILE} > ${CFG_FILE}.new && cat ${CFG_FILE}.new > ${CFG_FILE} && rm -f ${CFG_FILE}.new + fi + fi +fi + +#Check Properties whether in File, return code 1 if not exist +#$1 -> propertyName; $2 -> fileName +checkPropertyInFile(){ + validate=$(sed '/^\#/d' $2 | grep "^$1" | tail -n 1 | cut -d "=" -f1-) # for validation + if test -z "$validate" ; then return 1; fi +} + +#Add Properties to File +#$1 -> propertyName; $2 -> newPropertyValue; $3 -> fileName +addPropertyToFile(){ + echo "$1=$2">>$3 + validate=$(sed '/^\#/d' $3 | grep "^$1" | tail -n 1 | cut -d "=" -f2-) # for validation + if test -z "$validate" ; then log "[E] Failed to add properties '$1' to $3 file!"; exit 1; fi + echo "Property $1 added successfully with : '$2'" +} + +#Update Properties to File +#$1 -> propertyName; $2 -> newPropertyValue; $3 -> fileName +updatePropertyToFile(){ + sed 's@^'$1'=[^ ]*$@'$1'='$2'@g' $3 > /tmp/temporaryfile + cp /tmp/temporaryfile $3 + rm /tmp/temporaryfile + validate=$(sed '/^\#/d' $3 | grep "^$1" | tail -n 1 | cut -d "=" -f2-) # for validation + if test -z "$validate" ; then log "[E] '$1' not found in $3 file while Updating....!!"; exit 1; fi + echo "Property $1 updated successfully with : '$2'" +} + +#Add or Update Properties to File +#$1 -> propertyName; $2 -> newPropertyValue; $3 -> fileName +addOrUpdatePropertyToFile(){ + checkPropertyInFile $1 $3 + if [ $? -eq 1 ] + then + addPropertyToFile $1 $2 $3 + else + updatePropertyToFile $1 $2 $3 + fi +} + +if [ "${HCOMPONENT_NAME}" = "sqoop" ] +then + if [ "${action}" = "enable" ] + then + authName="org.apache.ranger.authorization.sqoop.authorizer.RangerSqoopAuthorizer" + else + authName="" + fi + + dt=`date '+%Y%m%d%H%M%S'` + fn=`ls ${HCOMPONENT_CONF_DIR}/sqoop.properties 2> /dev/null` + if [ -f "${fn}" ] + then + dn=`dirname ${fn}` + bn=`basename ${fn}` + bf=${dn}/.${bn}.${dt} + echo "backup of ${fn} to ${bf} ..." + cp ${fn} ${bf} + echo "Add or Update properties file: [${fn}] ... " + addOrUpdatePropertyToFile org.apache.sqoop.security.authorization.validator $authName ${fn} + fi +fi + +if [ "${HCOMPONENT_NAME}" = "kylin" ] +then + if [ "${action}" = "enable" ] + then + authName="org.apache.ranger.authorization.kylin.authorizer.RangerKylinAuthorizer" + else + authName="" + fi + + dt=`date '+%Y%m%d%H%M%S'` + fn=`ls ${HCOMPONENT_CONF_DIR}/kylin.properties 2> /dev/null` + if [ -f "${fn}" ] + then + dn=`dirname ${fn}` + bn=`basename ${fn}` + bf=${dn}/.${bn}.${dt} + echo "backup of ${fn} to ${bf} ..." + cp ${fn} ${bf} + echo "Add or Update properties file: [${fn}] ... " + addOrUpdatePropertyToFile kylin.server.external-acl-provider $authName ${fn} + fi +fi + +if [ "${HCOMPONENT_NAME}" = "presto" ] +then + if [ "${action}" = "enable" ] + then + controlName="ranger" + else + controlName="" + fi + dt=`date '+%Y%m%d%H%M%S'` + fn=`ls ${HCOMPONENT_CONF_DIR}/access-control.properties 2> /dev/null` + if [ -f "${fn}" ] + then + dn=`dirname ${fn}` + bn=`basename ${fn}` + bf=${dn}/.${bn}.${dt} + echo "backup of ${fn} to ${bf} ..." + cp ${fn} ${bf} + else + fn=${HCOMPONENT_CONF_DIR}/access-control.properties + fi + echo "Add or Update properties file: [${fn}] ... " + addOrUpdatePropertyToFile access-control.name $controlName ${fn} + echo "Linking config files" + cd ${HCOMPONENT_LIB_DIR}/ranger-presto-plugin-impl/ + ln -sf ${HCOMPONENT_CONF_DIR} conf +fi + + +# +# Set notice to restart the ${HCOMPONENT_NAME} +# + +echo "Ranger Plugin for ${HCOMPONENT_NAME} has been ${action}d. Please restart ${HCOMPONENT_NAME} to ensure that changes are effective." + +exit 0 diff --git a/atlas-hub/pre-conf/ranger/install.properties b/atlas-hub/pre-conf/ranger/install.properties new file mode 100755 index 0000000000..8cf19fc3b8 --- /dev/null +++ b/atlas-hub/pre-conf/ranger/install.properties @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Location of Policy Manager URL +# +# Example: +# POLICY_MGR_URL=http://policymanager.xasecure.net:6080 +# +POLICY_MGR_URL=RANGER_SERVICE_URL + +# +# This is the repository name created within policy manager +# +# Example: +# REPOSITORY_NAME=atlasdev +# +REPOSITORY_NAME=ATLAS_REPOSITORY_NAME + +# +# Atlas installation directory +# +COMPONENT_INSTALL_DIR_NAME=ATLAS_INSTALL_DIR + +# AUDIT configuration with V3 properties + +#Should audit be summarized at source +XAAUDIT.SUMMARY.ENABLE=true + +# Enable audit logs to Solr +#Example +#XAAUDIT.SOLR.ENABLE=true +#XAAUDIT.SOLR.URL=http://localhost:6083/solr/ranger_audits +#XAAUDIT.SOLR.ZOOKEEPER= +#XAAUDIT.SOLR.FILE_SPOOL_DIR=/var/log/atlas/audit/solr/spool + +XAAUDIT.SOLR.ENABLE=false +XAAUDIT.SOLR.URL=NONE +XAAUDIT.SOLR.USER=NONE +XAAUDIT.SOLR.PASSWORD=NONE +XAAUDIT.SOLR.ZOOKEEPER=NONE + +# Enable audit logs to HDFS +#Example +#XAAUDIT.HDFS.ENABLE=true +#XAAUDIT.HDFS.HDFS_DIR=hdfs://node-1.example.com:8020/ranger/audit +# If using Azure Blob Storage +#XAAUDIT.HDFS.HDFS_DIR=wasb[s]://@.blob.core.windows.net/ +#XAAUDIT.HDFS.HDFS_DIR=wasb://ranger_audit_container@my-azure-account.blob.core.windows.net/ranger/audit +#XAAUDIT.HDFS.FILE_SPOOL_DIR=/var/log/atlas/audit/hdfs/spool + +XAAUDIT.HDFS.ENABLE=false +XAAUDIT.HDFS.HDFS_DIR=hdfs://__REPLACE__NAME_NODE_HOST:8020/ranger/audit + +# Following additional propertis are needed When auditing to Azure Blob Storage via HDFS +# Get these values from your /etc/hadoop/conf/core-site.xml +#XAAUDIT.HDFS.HDFS_DIR=wasb[s]://@.blob.core.windows.net/ +XAAUDIT.HDFS.AZURE_ACCOUNTNAME=__REPLACE_AZURE_ACCOUNT_NAME +XAAUDIT.HDFS.AZURE_ACCOUNTKEY=__REPLACE_AZURE_ACCOUNT_KEY +XAAUDIT.HDFS.AZURE_SHELL_KEY_PROVIDER=__REPLACE_AZURE_SHELL_KEY_PROVIDER +XAAUDIT.HDFS.AZURE_ACCOUNTKEY_PROVIDER=__REPLACE_AZURE_ACCOUNT_KEY_PROVIDER + +# End of V3 properties + + +# +# SSL Client Certificate Information +# +# Example: +# SSL_KEYSTORE_FILE_PATH=/etc/atlas/conf/ranger-plugin-keystore.jks +# SSL_KEYSTORE_PASSWORD=none +# SSL_TRUSTSTORE_FILE_PATH=/etc/atlas/conf/ranger-plugin-truststore.jks +# SSL_TRUSTSTORE_PASSWORD=none +# +# You do not need use SSL between agent and security admin tool, please leave these sample value as it is. +# +SSL_KEYSTORE_FILE_PATH=/etc/atlas/conf/ranger-plugin-keystore.jks +SSL_KEYSTORE_PASSWORD=myKeyFilePassword +SSL_TRUSTSTORE_FILE_PATH=/etc/atlas/conf/ranger-plugin-truststore.jks +SSL_TRUSTSTORE_PASSWORD=changeit + +# +# Custom component user +# CUSTOM_COMPONENT_USER= +# keep blank if component user is default +CUSTOM_USER=atlas + + +# +# Custom component group +# CUSTOM_COMPONENT_GROUP= +# keep blank if component group is default +CUSTOM_GROUP=hadoop + diff --git a/atlas-hub/pre-conf/ranger/install/conf.templates/default/configuration.xml b/atlas-hub/pre-conf/ranger/install/conf.templates/default/configuration.xml new file mode 100755 index 0000000000..bdb0125891 --- /dev/null +++ b/atlas-hub/pre-conf/ranger/install/conf.templates/default/configuration.xml @@ -0,0 +1,20 @@ + + + + + diff --git a/atlas-hub/pre-conf/atlas-auth/atlas-atlas-audit.xml b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/atlas-atlas-audit.xml similarity index 100% rename from atlas-hub/pre-conf/atlas-auth/atlas-atlas-audit.xml rename to atlas-hub/pre-conf/ranger/install/conf.templates/enable/atlas-atlas-audit.xml diff --git a/atlas-hub/pre-conf/atlas-auth/atlas-atlas-security-changes.cfg b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/atlas-atlas-security-changes.cfg similarity index 100% rename from atlas-hub/pre-conf/atlas-auth/atlas-atlas-security-changes.cfg rename to atlas-hub/pre-conf/ranger/install/conf.templates/enable/atlas-atlas-security-changes.cfg diff --git a/atlas-hub/pre-conf/atlas-auth/atlas-atlas-security.xml b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/atlas-atlas-security.xml similarity index 100% rename from atlas-hub/pre-conf/atlas-auth/atlas-atlas-security.xml rename to atlas-hub/pre-conf/ranger/install/conf.templates/enable/atlas-atlas-security.xml diff --git a/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-atlas-audit-changes.cfg b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-atlas-audit-changes.cfg new file mode 100755 index 0000000000..07fc382798 --- /dev/null +++ b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-atlas-audit-changes.cfg @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +#solr configuration +xasecure.audit.destination.solr %XAAUDIT.SOLR.ENABLE% mod create-if-not-exists +xasecure.audit.destination.solr.urls %XAAUDIT.SOLR.URL% mod create-if-not-exists +xasecure.audit.destination.solr.user %XAAUDIT.SOLR.USER% mod create-if-not-exists +xasecure.audit.destination.solr.password %XAAUDIT.SOLR.PASSWORD% mod create-if-not-exists +xasecure.audit.destination.solr.zookeepers %XAAUDIT.SOLR.ZOOKEEPER% mod create-if-not-exists + + +#hdfs configuration +xasecure.audit.destination.hdfs %XAAUDIT.HDFS.ENABLE% mod create-if-not-exists +xasecure.audit.destination.hdfs.dir %XAAUDIT.HDFS.HDFS_DIR% mod create-if-not-exists + + + diff --git a/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-atlas-audit.xml b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-atlas-audit.xml new file mode 100755 index 0000000000..ca1dd338f2 --- /dev/null +++ b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-atlas-audit.xml @@ -0,0 +1,139 @@ + + + + + + xasecure.audit.is.enabled + true + + + + + + xasecure.audit.destination.solr + false + + + + xasecure.audit.destination.solr.urls + NONE + + + + xasecure.audit.destination.solr.zookeepers + + + + + xasecure.audit.destination.solr.collection + NONE + + + + + + xasecure.audit.destination.hdfs + false + + + + xasecure.audit.destination.hdfs.dir + hdfs://__REPLACE__NAME_NODE_HOST:8020/ranger/audit + + + + xasecure.audit.destination.hdfs.subdir + %app-type%/%time:yyyyMMdd% + + + + xasecure.audit.destination.hdfs.filename.format + %app-type%_ranger_audit_%hostname%.log + + + + xasecure.audit.destination.hdfs.file.rollover.sec + 86400 + + + + + + + xasecure.audit.destination.log4j + true + + + + xasecure.audit.destination.log4j.logger + AUDIT + + + + + xasecure.audit.destination.elasticsearch + true + + + xasecure.audit.elasticsearch.is.enabled + true/ + + + xasecure.audit.destination.elasticsearch.urls + logging-master.logging.svc.cluster.local + + + xasecure.audit.destination.elasticsearch.index + ranger-audit + + diff --git a/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-atlas-security-changes.cfg b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-atlas-security-changes.cfg new file mode 100755 index 0000000000..08790b9852 --- /dev/null +++ b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-atlas-security-changes.cfg @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Change the original policy parameter to work with policy manager based. +# +# +ranger.plugin.atlas.service.name %REPOSITORY_NAME% mod create-if-not-exists + +ranger.plugin.atlas.policy.source.impl org.apache.ranger.admin.client.RangerAdminRESTClient mod create-if-not-exists + +ranger.plugin.atlas.policy.rest.url %POLICY_MGR_URL% mod create-if-not-exists +ranger.plugin.atlas.policy.rest.ssl.config.file /etc/atlas/conf/ranger-policymgr-ssl.xml mod create-if-not-exists +ranger.plugin.atlas.policy.pollIntervalMs 30000 mod create-if-not-exists +ranger.plugin.atlas.policy.cache.dir %POLICY_CACHE_FILE_PATH% mod create-if-not-exists +ranger.plugin.atlas.policy.rest.client.connection.timeoutMs 120000 mod create-if-not-exists +ranger.plugin.atlas.policy.rest.client.read.timeoutMs 30000 mod create-if-not-exists + diff --git a/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-atlas-security.xml b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-atlas-security.xml new file mode 100755 index 0000000000..933c8e78df --- /dev/null +++ b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-atlas-security.xml @@ -0,0 +1,83 @@ + + + + + + ranger.plugin.atlas.service.name + atlasdev + + Name of the Ranger service containing policies for this YARN instance + + + + + ranger.plugin.atlas.policy.source.impl + org.apache.ranger.admin.client.RangerAdminRESTClient + + Class to retrieve policies from the source + + + + + ranger.plugin.atlas.policy.rest.url + http://policymanagerhost:port + + URL to Ranger Admin + + + + + ranger.plugin.atlas.policy.rest.ssl.config.file + /etc/atlas/conf/ranger-policymgr-ssl.xml + + Path to the file containing SSL details to contact Ranger Admin + + + + + ranger.plugin.atlas.policy.pollIntervalMs + 30000 + + How often to poll for changes in policies? + + + + + ranger.plugin.atlas.policy.cache.dir + /etc/ranger/atlasdev/policycache + + Directory where Ranger policies are cached after successful retrieval from the source + + + + + ranger.plugin.atlas.policy.rest.client.connection.timeoutMs + 120000 + + RangerRestClient Connection Timeout in Milli Seconds + + + + + ranger.plugin.atlas.policy.rest.client.read.timeoutMs + 30000 + + RangerRestClient read Timeout in Milli Seconds + + + diff --git a/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-policymgr-ssl-changes.cfg b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-policymgr-ssl-changes.cfg new file mode 100755 index 0000000000..47126492f2 --- /dev/null +++ b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-policymgr-ssl-changes.cfg @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SSL Params +# +xasecure.policymgr.clientssl.keystore %SSL_KEYSTORE_FILE_PATH% mod create-if-not-exists +xasecure.policymgr.clientssl.keystore.credential.file jceks://file%CREDENTIAL_PROVIDER_FILE% mod create-if-not-exists +xasecure.policymgr.clientssl.truststore %SSL_TRUSTSTORE_FILE_PATH% mod create-if-not-exists +xasecure.policymgr.clientssl.truststore.credential.file jceks://file%CREDENTIAL_PROVIDER_FILE% mod create-if-not-exists \ No newline at end of file diff --git a/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-policymgr-ssl.xml b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-policymgr-ssl.xml new file mode 100755 index 0000000000..3baf7725cf --- /dev/null +++ b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-policymgr-ssl.xml @@ -0,0 +1,49 @@ + + + + + + + xasecure.policymgr.clientssl.keystore + hadoopdev-clientcert.jks + + Java Keystore files + + + + xasecure.policymgr.clientssl.truststore + cacerts-xasecure.jks + + java truststore file + + + + xasecure.policymgr.clientssl.keystore.credential.file + jceks://file/tmp/keystore-hadoopdev-ssl.jceks + + java keystore credential file + + + + xasecure.policymgr.clientssl.truststore.credential.file + jceks://file/tmp/truststore-hadoopdev-ssl.jceks + + java truststore credential file + + + \ No newline at end of file diff --git a/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-security.xml b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-security.xml new file mode 100644 index 0000000000..1a7cb6fe0e --- /dev/null +++ b/atlas-hub/pre-conf/ranger/install/conf.templates/enable/ranger-security.xml @@ -0,0 +1,17 @@ + +\nThu Apr 30 22:10:46 UTC 2020\n \ No newline at end of file diff --git a/atlas-hub/pre-conf/ranger/install/lib/commons-cli-1.2.jar b/atlas-hub/pre-conf/ranger/install/lib/commons-cli-1.2.jar new file mode 100644 index 0000000000..ce4b9fffe4 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/commons-cli-1.2.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/commons-collections-3.2.2.jar b/atlas-hub/pre-conf/ranger/install/lib/commons-collections-3.2.2.jar new file mode 100644 index 0000000000..fa5df82a63 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/commons-collections-3.2.2.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/commons-configuration2-2.1.1.jar b/atlas-hub/pre-conf/ranger/install/lib/commons-configuration2-2.1.1.jar new file mode 100644 index 0000000000..666baa09d2 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/commons-configuration2-2.1.1.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/commons-io-2.5.jar b/atlas-hub/pre-conf/ranger/install/lib/commons-io-2.5.jar new file mode 100644 index 0000000000..107b061f5f Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/commons-io-2.5.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/commons-lang-2.6.jar b/atlas-hub/pre-conf/ranger/install/lib/commons-lang-2.6.jar new file mode 100644 index 0000000000..98467d3a65 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/commons-lang-2.6.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/commons-logging-1.2.jar b/atlas-hub/pre-conf/ranger/install/lib/commons-logging-1.2.jar new file mode 100644 index 0000000000..93a3b9f6db Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/commons-logging-1.2.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/credentialbuilder-2.0.0.jar b/atlas-hub/pre-conf/ranger/install/lib/credentialbuilder-2.0.0.jar new file mode 100644 index 0000000000..b082d9bcea Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/credentialbuilder-2.0.0.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/guava-25.1-jre.jar b/atlas-hub/pre-conf/ranger/install/lib/guava-25.1-jre.jar new file mode 100644 index 0000000000..babc175535 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/guava-25.1-jre.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/hadoop-auth-3.1.1.jar b/atlas-hub/pre-conf/ranger/install/lib/hadoop-auth-3.1.1.jar new file mode 100644 index 0000000000..0d2a9dcd3f Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/hadoop-auth-3.1.1.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/hadoop-common-3.1.1.jar b/atlas-hub/pre-conf/ranger/install/lib/hadoop-common-3.1.1.jar new file mode 100644 index 0000000000..0bbed5ac7e Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/hadoop-common-3.1.1.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/htrace-core4-4.1.0-incubating.jar b/atlas-hub/pre-conf/ranger/install/lib/htrace-core4-4.1.0-incubating.jar new file mode 100644 index 0000000000..12349a2066 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/htrace-core4-4.1.0-incubating.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/ranger-plugins-cred-2.0.0.jar b/atlas-hub/pre-conf/ranger/install/lib/ranger-plugins-cred-2.0.0.jar new file mode 100644 index 0000000000..01653b1fc5 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/ranger-plugins-cred-2.0.0.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/ranger-plugins-installer-2.0.0.jar b/atlas-hub/pre-conf/ranger/install/lib/ranger-plugins-installer-2.0.0.jar new file mode 100644 index 0000000000..ab774b9329 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/ranger-plugins-installer-2.0.0.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/slf4j-api-1.7.30.jar b/atlas-hub/pre-conf/ranger/install/lib/slf4j-api-1.7.30.jar new file mode 100644 index 0000000000..29ac26fb8c Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/slf4j-api-1.7.30.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/stax2-api-3.1.4.jar b/atlas-hub/pre-conf/ranger/install/lib/stax2-api-3.1.4.jar new file mode 100644 index 0000000000..dded036928 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/stax2-api-3.1.4.jar differ diff --git a/atlas-hub/pre-conf/ranger/install/lib/woodstox-core-5.0.3.jar b/atlas-hub/pre-conf/ranger/install/lib/woodstox-core-5.0.3.jar new file mode 100644 index 0000000000..1c268641c8 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/install/lib/woodstox-core-5.0.3.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/eclipselink-2.5.2.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/eclipselink-2.5.2.jar new file mode 100644 index 0000000000..325e2a3f90 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/eclipselink-2.5.2.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/gethostname4j-0.0.2.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/gethostname4j-0.0.2.jar new file mode 100644 index 0000000000..507d8d7b4c Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/gethostname4j-0.0.2.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/guava-25.1-jre.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/guava-25.1-jre.jar new file mode 100644 index 0000000000..babc175535 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/guava-25.1-jre.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/jackson-jaxrs-1.9.13.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/jackson-jaxrs-1.9.13.jar new file mode 100644 index 0000000000..b85f45cf40 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/jackson-jaxrs-1.9.13.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/javax.persistence-2.1.0.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/javax.persistence-2.1.0.jar new file mode 100644 index 0000000000..e48d2e9618 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/javax.persistence-2.1.0.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/jersey-bundle-1.19.3.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/jersey-bundle-1.19.3.jar new file mode 100644 index 0000000000..bd786a7821 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/jersey-bundle-1.19.3.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/jna-5.2.0.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/jna-5.2.0.jar new file mode 100644 index 0000000000..0b5fabdd8b Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/jna-5.2.0.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/jna-platform-5.2.0.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/jna-platform-5.2.0.jar new file mode 100644 index 0000000000..4722681821 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/jna-platform-5.2.0.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/ranger-atlas-plugin-2.2.0.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/ranger-atlas-plugin-2.2.0.jar new file mode 100644 index 0000000000..178b6191eb Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/ranger-atlas-plugin-2.2.0.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/ranger-plugins-audit-2.2.0.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/ranger-plugins-audit-2.2.0.jar new file mode 100644 index 0000000000..e108d19e1d Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/ranger-plugins-audit-2.2.0.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/ranger-plugins-common-2.2.0.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/ranger-plugins-common-2.2.0.jar new file mode 100644 index 0000000000..cc23b8a0b2 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/ranger-plugins-common-2.2.0.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/ranger-plugins-cred-2.2.0-SNAPSHOT.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/ranger-plugins-cred-2.2.0-SNAPSHOT.jar new file mode 100644 index 0000000000..d270f48d96 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/ranger-plugins-cred-2.2.0-SNAPSHOT.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/solr-solrj-7.7.1.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/solr-solrj-7.7.1.jar new file mode 100644 index 0000000000..0cd9fd57ba Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-impl/solr-solrj-7.7.1.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-shim-3.0.0-SNAPSHOT.jar b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-shim-3.0.0-SNAPSHOT.jar new file mode 100644 index 0000000000..89893ad6ce Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-atlas-plugin-shim-3.0.0-SNAPSHOT.jar differ diff --git a/atlas-hub/pre-conf/ranger/lib/ranger-plugin-classloader-3.0.0-SNAPSHOT.jar b/atlas-hub/pre-conf/ranger/lib/ranger-plugin-classloader-3.0.0-SNAPSHOT.jar new file mode 100644 index 0000000000..866c8a72b6 Binary files /dev/null and b/atlas-hub/pre-conf/ranger/lib/ranger-plugin-classloader-3.0.0-SNAPSHOT.jar differ diff --git a/atlas-hub/pre-conf/ranger/ranger_credential_helper.py b/atlas-hub/pre-conf/ranger/ranger_credential_helper.py new file mode 100755 index 0000000000..b36adeace1 --- /dev/null +++ b/atlas-hub/pre-conf/ranger/ranger_credential_helper.py @@ -0,0 +1,75 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +from subprocess import Popen,PIPE +from optparse import OptionParser + +if os.getenv('JAVA_HOME') is None: + print "ERROR: JAVA_HOME environment property was not defined, exit." + sys.exit(1) +else: + JAVA_BIN=os.path.join(os.getenv('JAVA_HOME'),'bin','java') +print "Using Java:" + str(JAVA_BIN) + +def main(): + + parser = OptionParser() + + parser.add_option("-l", "--libpath", dest="library_path", help="Path to folder where credential libs are present") + parser.add_option("-f", "--file", dest="jceks_file_path", help="Path to jceks file to use") + parser.add_option("-k", "--key", dest="key", help="Key to use") + parser.add_option("-v", "--value", dest="value", help="Value to use") + parser.add_option("-c", "--create", dest="create", help="Add a new alias") + + (options, args) = parser.parse_args() + library_path = options.library_path + jceks_file_path = options.jceks_file_path + key = options.key + value = options.value + getorcreate = 'create' if options.create else 'get' + call_keystore(library_path, jceks_file_path, key, value, getorcreate) + + +def call_keystore(libpath, filepath, aliasKey, aliasValue='', getorcreate='get'): + finalLibPath = libpath.replace('\\','/').replace('//','/') + finalFilePath = 'jceks://file/'+filepath.replace('\\','/').replace('//','/') + if getorcreate == 'create': + commandtorun = [JAVA_BIN, '-cp', finalLibPath, 'org.apache.ranger.credentialapi.buildks' ,'create', aliasKey, '-value', aliasValue, '-provider',finalFilePath] + p = Popen(commandtorun,stdin=PIPE, stdout=PIPE, stderr=PIPE) + output, error = p.communicate() + statuscode = p.returncode + if statuscode == 0: + print "Alias " + aliasKey + " created successfully!" + else : + print "Error creating Alias!! Error: " + str(error) + + elif getorcreate == 'get': + commandtorun = [JAVA_BIN, '-cp', finalLibPath, 'org.apache.ranger.credentialapi.buildks' ,'get', aliasKey, '-provider',finalFilePath] + p = Popen(commandtorun,stdin=PIPE, stdout=PIPE, stderr=PIPE) + output, error = p.communicate() + statuscode = p.returncode + if statuscode == 0: + print "Alias : " + aliasKey + " Value : " + str(output) + else : + print "Error getting value!! Error: " + str(error) + + else: + print 'Invalid Arguments!!' + +if __name__ == '__main__': + main() diff --git a/atlas-hub/pre-conf/solr/currency.xml b/atlas-hub/pre-conf/solr/currency.xml new file mode 100644 index 0000000000..3a9c58afee --- /dev/null +++ b/atlas-hub/pre-conf/solr/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/atlas-hub/pre-conf/solr/lang/stopwords_en.txt b/atlas-hub/pre-conf/solr/lang/stopwords_en.txt new file mode 100644 index 0000000000..2c164c0b2a --- /dev/null +++ b/atlas-hub/pre-conf/solr/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/atlas-hub/pre-conf/solr/protwords.txt b/atlas-hub/pre-conf/solr/protwords.txt new file mode 100644 index 0000000000..1dfc0abecb --- /dev/null +++ b/atlas-hub/pre-conf/solr/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/atlas-hub/pre-conf/solr/schema.xml b/atlas-hub/pre-conf/solr/schema.xml new file mode 100644 index 0000000000..1d445b1500 --- /dev/null +++ b/atlas-hub/pre-conf/solr/schema.xml @@ -0,0 +1,534 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/atlas-hub/pre-conf/solr/solrconfig.xml b/atlas-hub/pre-conf/solr/solrconfig.xml new file mode 100644 index 0000000000..1d414f76e9 --- /dev/null +++ b/atlas-hub/pre-conf/solr/solrconfig.xml @@ -0,0 +1,625 @@ + + + + + + + + + 5.0.0 + + + ${solr.data.dir:} + + + + + + + + ${solr.hdfs.home:} + + ${solr.hdfs.confdir:} + + ${solr.hdfs.blockcache.enabled:true} + + ${solr.hdfs.blockcache.global:true} + + + + + + + + + + + + + + + + ${solr.lock.type:native} + + + true + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:15000} + false + + + + + ${solr.autoSoftCommit.maxTime:-1} + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + true + + + 20 + + + 200 + + + false + + + 2 + + + + + + + + + + + + + + + + + + + + explicit + 10 + + + + + + + + explicit + json + true + text + + + + + + + {!xport} + xsort + false + + + + query + + + + + + + text + + + + + + + + + + + + + + explicit + true + + + + + + + + + + + + + + true + false + + + terms + + + + + + *:* + + + + + + + timestamp + + + 5 + ttl + expire_at + + + expire_at_dt + + + + + diff --git a/atlas-hub/pre-conf/solr/stopwords.txt b/atlas-hub/pre-conf/solr/stopwords.txt new file mode 100644 index 0000000000..ae1e83eeb3 --- /dev/null +++ b/atlas-hub/pre-conf/solr/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/atlas-hub/pre-conf/solr/synonyms.txt b/atlas-hub/pre-conf/solr/synonyms.txt new file mode 100644 index 0000000000..7f72128303 --- /dev/null +++ b/atlas-hub/pre-conf/solr/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/auth-agents-common/src/main/java/org/apache/atlas/plugin/util/RangerRESTClient.java b/auth-agents-common/src/main/java/org/apache/atlas/plugin/util/RangerRESTClient.java index 9394e882ed..4a265a8430 100644 --- a/auth-agents-common/src/main/java/org/apache/atlas/plugin/util/RangerRESTClient.java +++ b/auth-agents-common/src/main/java/org/apache/atlas/plugin/util/RangerRESTClient.java @@ -35,6 +35,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.atlas.authorization.hadoop.utils.RangerCredentialProvider; import org.apache.atlas.authorization.utils.StringUtil; import javax.net.ssl.HostnameVerifier; @@ -188,6 +189,25 @@ public Client getClient() { private Client buildClient() { Client client = null; + if (mIsSSL) { + KeyManager[] kmList = getKeyManagers(); + TrustManager[] tmList = getTrustManagers(); + SSLContext sslContext = getSSLContext(kmList, tmList); + ClientConfig config = new DefaultClientConfig(); + + config.getClasses().add(JacksonJsonProvider.class); // to handle List<> unmarshalling + + HostnameVerifier hv = new HostnameVerifier() { + public boolean verify(String urlHostName, SSLSession session) { + return session.getPeerHost().equals(urlHostName); + } + }; + + config.getProperties().put(HTTPSProperties.PROPERTY_HTTPS_PROPERTIES, new HTTPSProperties(hv, sslContext)); + + client = Client.create(config); + } + if(client == null) { ClientConfig config = new DefaultClientConfig(); @@ -249,6 +269,75 @@ private void init(Configuration config) { } } + private KeyManager[] getKeyManagers() { + KeyManager[] kmList = null; + + String keyStoreFilepwd = getCredential(mKeyStoreURL, mKeyStoreAlias); + + kmList = getKeyManagers(mKeyStoreFile,keyStoreFilepwd); + return kmList; + } + + public KeyManager[] getKeyManagers(String keyStoreFile, String keyStoreFilePwd) { + KeyManager[] kmList = null; + + if (StringUtils.isNotEmpty(keyStoreFile) && StringUtils.isNotEmpty(keyStoreFilePwd)) { + InputStream in = null; + + try { + in = getFileInputStream(keyStoreFile); + + if (in != null) { + KeyStore keyStore = KeyStore.getInstance(mKeyStoreType); + + keyStore.load(in, keyStoreFilePwd.toCharArray()); + + KeyManagerFactory keyManagerFactory = KeyManagerFactory.getInstance(RANGER_SSL_KEYMANAGER_ALGO_TYPE); + + keyManagerFactory.init(keyStore, keyStoreFilePwd.toCharArray()); + + kmList = keyManagerFactory.getKeyManagers(); + } else { + LOG.error("Unable to obtain keystore from file [" + keyStoreFile + "]"); + throw new IllegalStateException("Unable to find keystore file :" + keyStoreFile); + } + } catch (KeyStoreException e) { + LOG.error("Unable to obtain from KeyStore :" + e.getMessage(), e); + throw new IllegalStateException("Unable to init keystore:" + e.getMessage(), e); + } catch (NoSuchAlgorithmException e) { + LOG.error("SSL algorithm is NOT available in the environment", e); + throw new IllegalStateException("SSL algorithm is NOT available in the environment :" + e.getMessage(), e); + } catch (CertificateException e) { + LOG.error("Unable to obtain the requested certification ", e); + throw new IllegalStateException("Unable to obtain the requested certification :" + e.getMessage(), e); + } catch (FileNotFoundException e) { + LOG.error("Unable to find the necessary SSL Keystore Files", e); + throw new IllegalStateException("Unable to find keystore file :" + keyStoreFile + ", error :" + e.getMessage(), e); + } catch (IOException e) { + LOG.error("Unable to read the necessary SSL Keystore Files", e); + throw new IllegalStateException("Unable to read keystore file :" + keyStoreFile + ", error :" + e.getMessage(), e); + } catch (UnrecoverableKeyException e) { + LOG.error("Unable to recover the key from keystore", e); + throw new IllegalStateException("Unable to recover the key from keystore :" + keyStoreFile+", error :" + e.getMessage(), e); + } finally { + close(in, keyStoreFile); + } + } + + return kmList; + } + + private TrustManager[] getTrustManagers() { + TrustManager[] tmList = null; + if (StringUtils.isNotEmpty(mTrustStoreURL) && StringUtils.isNotEmpty(mTrustStoreAlias)) { + String trustStoreFilepwd = getCredential(mTrustStoreURL, mTrustStoreAlias); + if (StringUtils.isNotEmpty(trustStoreFilepwd)) { + tmList = getTrustManagers(mTrustStoreFile, trustStoreFilepwd); + } + } + return tmList; + } + public TrustManager[] getTrustManagers(String trustStoreFile, String trustStoreFilepwd) { TrustManager[] tmList = null; @@ -324,6 +413,9 @@ protected SSLContext getSSLContext(KeyManager[] kmList, TrustManager[] tmList) { } } + private String getCredential(String url, String alias) { + return RangerCredentialProvider.getInstance().getCredentialString(url, alias); + } private InputStream getFileInputStream(String fileName) throws IOException { InputStream in = null; diff --git a/auth-agents-common/src/main/java/org/apache/atlas/plugin/util/RangerSslHelper.java b/auth-agents-common/src/main/java/org/apache/atlas/plugin/util/RangerSslHelper.java index 8f0bbad374..187185b268 100644 --- a/auth-agents-common/src/main/java/org/apache/atlas/plugin/util/RangerSslHelper.java +++ b/auth-agents-common/src/main/java/org/apache/atlas/plugin/util/RangerSslHelper.java @@ -22,6 +22,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.atlas.authorization.hadoop.utils.RangerCredentialProvider; import org.apache.atlas.authorization.utils.StringUtil; import javax.net.ssl.HostnameVerifier; @@ -89,7 +90,14 @@ public RangerSslHelper(String sslConfigFileName) { } } - + + public SSLContext createContext() { + readConfig(); + KeyManager[] kmList = getKeyManagers(); + TrustManager[] tmList = getTrustManagers(); + SSLContext sslContext = getSSLContext(kmList, tmList); + return sslContext; + } public HostnameVerifier getHostnameVerifier() { return _Hv; @@ -129,6 +137,92 @@ void readConfig() { } } + private KeyManager[] getKeyManagers() { + KeyManager[] kmList = null; + + String keyStoreFilepwd = getCredential(mKeyStoreURL, mKeyStoreAlias); + + if (!StringUtil.isEmpty(mKeyStoreFile) && !StringUtil.isEmpty(keyStoreFilepwd)) { + InputStream in = null; + + try { + in = getFileInputStream(mKeyStoreFile); + + if (in != null) { + KeyStore keyStore = KeyStore.getInstance(mKeyStoreType); + + keyStore.load(in, keyStoreFilepwd.toCharArray()); + + KeyManagerFactory keyManagerFactory = KeyManagerFactory.getInstance(RANGER_SSL_KEYMANAGER_ALGO_TYPE); + + keyManagerFactory.init(keyStore, keyStoreFilepwd.toCharArray()); + + kmList = keyManagerFactory.getKeyManagers(); + } else { + LOG.error("Unable to obtain keystore from file [" + mKeyStoreFile + "]"); + } + } catch (KeyStoreException e) { + LOG.error("Unable to obtain from KeyStore", e); + } catch (NoSuchAlgorithmException e) { + LOG.error("SSL algorithm is available in the environment", e); + } catch (CertificateException e) { + LOG.error("Unable to obtain the requested certification ", e); + } catch (FileNotFoundException e) { + LOG.error("Unable to find the necessary SSL Keystore and TrustStore Files", e); + } catch (IOException e) { + LOG.error("Unable to read the necessary SSL Keystore and TrustStore Files", e); + } catch (UnrecoverableKeyException e) { + LOG.error("Unable to recover the key from keystore", e); + } finally { + close(in, mKeyStoreFile); + } + } + + return kmList; + } + + private TrustManager[] getTrustManagers() { + TrustManager[] tmList = null; + + String trustStoreFilepwd = getCredential(mTrustStoreURL, mTrustStoreAlias); + + if (!StringUtil.isEmpty(mTrustStoreFile) && !StringUtil.isEmpty(trustStoreFilepwd)) { + InputStream in = null; + + try { + in = getFileInputStream(mTrustStoreFile); + + if (in != null) { + KeyStore trustStore = KeyStore.getInstance(mTrustStoreType); + + trustStore.load(in, trustStoreFilepwd.toCharArray()); + + TrustManagerFactory trustManagerFactory = TrustManagerFactory.getInstance(RANGER_SSL_TRUSTMANAGER_ALGO_TYPE); + + trustManagerFactory.init(trustStore); + + tmList = trustManagerFactory.getTrustManagers(); + } else { + LOG.error("Unable to obtain keystore from file [" + mTrustStoreFile + "]"); + } + } catch (KeyStoreException e) { + LOG.error("Unable to obtain from KeyStore", e); + } catch (NoSuchAlgorithmException e) { + LOG.error("SSL algorithm is available in the environment", e); + } catch (CertificateException e) { + LOG.error("Unable to obtain the requested certification ", e); + } catch (FileNotFoundException e) { + LOG.error("Unable to find the necessary SSL Keystore and TrustStore Files", e); + } catch (IOException e) { + LOG.error("Unable to read the necessary SSL Keystore and TrustStore Files", e); + } finally { + close(in, mTrustStoreFile); + } + } + + return tmList; + } + private SSLContext getSSLContext(KeyManager[] kmList, TrustManager[] tmList) { try { if(tmList != null) { @@ -147,6 +241,10 @@ private SSLContext getSSLContext(KeyManager[] kmList, TrustManager[] tmList) { return null; } + private String getCredential(String url, String alias) { + return RangerCredentialProvider.getInstance().getCredentialString(url, alias); + } + private InputStream getFileInputStream(String fileName) throws IOException { InputStream in = null; diff --git a/auth-audits/pom.xml b/auth-audits/pom.xml index 60037a3183..c19e686740 100644 --- a/auth-audits/pom.xml +++ b/auth-audits/pom.xml @@ -48,6 +48,46 @@ ${commons-lang.version} + + org.apache.solr + solr-solrj + ${solr.version} + + + io.netty + * + + + org.eclipse.jetty.http2 + * + + + org.apache.commons + commons-math3 + + + commons-io + commons-io + + + org.apache.httpcomponents + * + + + org.apache.zookeeper + * + + + org.codehaus.woodstox + * + + + org.eclipse.jetty + * + + + + org.elasticsearch.client elasticsearch-rest-high-level-client diff --git a/auth-audits/src/main/java/org/apache/atlas/audit/destination/ElasticSearchAuditDestination.java b/auth-audits/src/main/java/org/apache/atlas/audit/destination/ElasticSearchAuditDestination.java index b6c4ef22cf..114d3fe4eb 100644 --- a/auth-audits/src/main/java/org/apache/atlas/audit/destination/ElasticSearchAuditDestination.java +++ b/auth-audits/src/main/java/org/apache/atlas/audit/destination/ElasticSearchAuditDestination.java @@ -23,10 +23,17 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.http.HttpHost; +import org.apache.http.auth.AuthSchemeProvider; import org.apache.http.client.CredentialsProvider; +import org.apache.http.client.config.AuthSchemes; +import org.apache.http.config.Lookup; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.impl.auth.SPNegoSchemeFactory; import org.apache.atlas.audit.model.AuditEventBase; import org.apache.atlas.audit.model.AuthzAuditEvent; import org.apache.atlas.audit.provider.MiscUtil; +import org.apache.atlas.authorization.credutils.CredentialsProviderUtil; +import org.apache.atlas.authorization.credutils.kerberos.KerberosCredentialsProvider; import org.elasticsearch.action.admin.indices.open.OpenIndexRequest; import org.elasticsearch.action.bulk.BulkItemResponse; import org.elasticsearch.action.bulk.BulkRequest; @@ -178,6 +185,21 @@ synchronized RestHighLevelClient getClient() { } } } + if (subject != null) { + KerberosTicket ticket = CredentialsProviderUtil.getTGT(subject); + try { + if (new Date().getTime() > ticket.getEndTime().getTime()){ + client = null; + CredentialsProviderUtil.ticketExpireTime80 = 0; + newClient(); + } else if (CredentialsProviderUtil.ticketWillExpire(ticket)) { + subject = CredentialsProviderUtil.login(user, password); + } + } catch (PrivilegedActionException e) { + LOG.error("PrivilegedActionException:", e); + throw new RuntimeException(e); + } + } return client; } @@ -189,15 +211,37 @@ public static RestClientBuilder getRestClientBuilder(String urls, String protoco .map(x -> new HttpHost(x, port, protocol)) .toArray(i -> new HttpHost[i]) ); - LOG.error("ElasticSearch Credentials not provided!!"); - final CredentialsProvider credentialsProvider = null; - restClientBuilder.setHttpClientConfigCallback(clientBuilder -> - clientBuilder.setDefaultCredentialsProvider(credentialsProvider)); + if (StringUtils.isNotBlank(user) && StringUtils.isNotBlank(password) && !user.equalsIgnoreCase("NONE") && !password.equalsIgnoreCase("NONE")) { + if (password.contains("keytab") && new File(password).exists()) { + final KerberosCredentialsProvider credentialsProvider = + CredentialsProviderUtil.getKerberosCredentials(user, password); + Lookup authSchemeRegistry = RegistryBuilder.create() + .register(AuthSchemes.SPNEGO, new SPNegoSchemeFactory()).build(); + restClientBuilder.setHttpClientConfigCallback(clientBuilder -> { + clientBuilder.setDefaultCredentialsProvider(credentialsProvider); + clientBuilder.setDefaultAuthSchemeRegistry(authSchemeRegistry); + return clientBuilder; + }); + } else { + final CredentialsProvider credentialsProvider = + CredentialsProviderUtil.getBasicCredentials(user, password); + restClientBuilder.setHttpClientConfigCallback(clientBuilder -> + clientBuilder.setDefaultCredentialsProvider(credentialsProvider)); + } + } else { + LOG.error("ElasticSearch Credentials not provided!!"); + final CredentialsProvider credentialsProvider = null; + restClientBuilder.setHttpClientConfigCallback(clientBuilder -> + clientBuilder.setDefaultCredentialsProvider(credentialsProvider)); + } return restClientBuilder; } private RestHighLevelClient newClient() { try { + if (StringUtils.isNotBlank(user) && StringUtils.isNotBlank(password) && password.contains("keytab") && new File(password).exists()) { + subject = CredentialsProviderUtil.login(user, password); + } RestClientBuilder restClientBuilder = getRestClientBuilder(hosts, protocol, user, password, port); RestHighLevelClient restHighLevelClient = new RestHighLevelClient(restClientBuilder); diff --git a/auth-audits/src/main/java/org/apache/atlas/audit/destination/SolrAuditDestination.java b/auth-audits/src/main/java/org/apache/atlas/audit/destination/SolrAuditDestination.java new file mode 100644 index 0000000000..7b42926ca9 --- /dev/null +++ b/auth-audits/src/main/java/org/apache/atlas/audit/destination/SolrAuditDestination.java @@ -0,0 +1,489 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.atlas.audit.destination; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.atlas.audit.model.AuditEventBase; +import org.apache.atlas.audit.model.AuthzAuditEvent; +import org.apache.atlas.audit.provider.MiscUtil; +import org.apache.atlas.audit.utils.InMemoryJAASConfiguration; +import org.apache.atlas.audit.utils.KerberosAction; +import org.apache.atlas.audit.utils.KerberosJAASConfigUser; +import org.apache.atlas.audit.utils.KerberosUser; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.impl.HttpClientUtil; +import org.apache.solr.client.solrj.impl.Krb5HttpClientBuilder; +import org.apache.solr.client.solrj.impl.LBHttpSolrClient; +import org.apache.solr.client.solrj.impl.SolrHttpClientBuilder; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; + +import javax.net.ssl.KeyManager; +import javax.net.ssl.KeyManagerFactory; +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.TrustManagerFactory; +import javax.security.auth.login.LoginException; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.security.KeyManagementException; +import java.security.KeyStore; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.security.PrivilegedExceptionAction; +import java.security.SecureRandom; +import java.security.UnrecoverableKeyException; +import java.security.cert.CertificateException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Optional; +import java.util.Properties; + + +public class SolrAuditDestination extends AuditDestination { + private static final Log LOG = LogFactory + .getLog(SolrAuditDestination.class); + + public static final String PROP_SOLR_URLS = "urls"; + public static final String PROP_SOLR_ZK = "zookeepers"; + public static final String PROP_SOLR_COLLECTION = "collection"; + public static final String PROP_SOLR_FORCE_USE_INMEMORY_JAAS_CONFIG = "force.use.inmemory.jaas.config"; + + public static final String DEFAULT_COLLECTION_NAME = "ranger_audits"; + public static final String PROP_JAVA_SECURITY_AUTH_LOGIN_CONFIG = "java.security.auth.login.config"; + + private volatile SolrClient solrClient = null; + private volatile KerberosUser kerberosUser = null; + + public SolrAuditDestination() { + } + + @Override + public void init(Properties props, String propPrefix) { + LOG.info("init() called"); + super.init(props, propPrefix); + init(); + connect(); + } + + @Override + public void stop() { + LOG.info("SolrAuditDestination.stop() called.."); + logStatus(); + + if (solrClient != null) { + try { + solrClient.close(); + } catch (IOException ioe) { + LOG.error("Error while stopping slor!", ioe); + } finally { + solrClient = null; + } + } + + if (kerberosUser != null) { + try { + kerberosUser.logout(); + } catch (LoginException excp) { + LOG.error("Error logging out keytab user", excp); + } finally { + kerberosUser = null; + } + } + } + + synchronized void connect() { + SolrClient me = solrClient; + if (me == null) { + synchronized(SolrAuditDestination.class) { + me = solrClient; + if (solrClient == null) { + KeyManager[] kmList = getKeyManagers(); + TrustManager[] tmList = getTrustManagers(); + SSLContext sslContext = getSSLContext(kmList, tmList); + if(sslContext != null) { + SSLContext.setDefault(sslContext); + } + String urls = MiscUtil.getStringProperty(props, propPrefix + + "." + PROP_SOLR_URLS); + if (urls != null) { + urls = urls.trim(); + } + if (urls != null && urls.equalsIgnoreCase("NONE")) { + urls = null; + } + List solrURLs = new ArrayList(); + String zkHosts = null; + solrURLs = MiscUtil.toArray(urls, ","); + zkHosts = MiscUtil.getStringProperty(props, propPrefix + "." + + PROP_SOLR_ZK); + if (zkHosts != null && zkHosts.equalsIgnoreCase("NONE")) { + zkHosts = null; + } + String collectionName = MiscUtil.getStringProperty(props, + propPrefix + "." + PROP_SOLR_COLLECTION); + if (collectionName == null + || collectionName.equalsIgnoreCase("none")) { + collectionName = DEFAULT_COLLECTION_NAME; + } + + LOG.info("Solr zkHosts=" + zkHosts + ", solrURLs=" + urls + + ", collectionName=" + collectionName); + + if (zkHosts != null && !zkHosts.isEmpty()) { + LOG.info("Connecting to solr cloud using zkHosts=" + + zkHosts); + try { + // Instantiate + Krb5HttpClientBuilder krbBuild = new Krb5HttpClientBuilder(); + SolrHttpClientBuilder kb = krbBuild.getBuilder(); + HttpClientUtil.setHttpClientBuilder(kb); + + final List zkhosts = new ArrayList(Arrays.asList(zkHosts.split(","))); + final CloudSolrClient solrCloudClient = MiscUtil.executePrivilegedAction(new PrivilegedExceptionAction() { + @Override + public CloudSolrClient run() throws Exception { + CloudSolrClient solrCloudClient = new CloudSolrClient.Builder(zkhosts, Optional.empty()).build(); + return solrCloudClient; + }; + }); + + solrCloudClient.setDefaultCollection(collectionName); + me = solrClient = solrCloudClient; + } catch (Throwable t) { + LOG.fatal("Can't connect to Solr server. ZooKeepers=" + + zkHosts, t); + } + } else if (solrURLs != null && !solrURLs.isEmpty()) { + try { + LOG.info("Connecting to Solr using URLs=" + solrURLs); + Krb5HttpClientBuilder krbBuild = new Krb5HttpClientBuilder(); + SolrHttpClientBuilder kb = krbBuild.getBuilder(); + HttpClientUtil.setHttpClientBuilder(kb); + final List solrUrls = solrURLs; + final LBHttpSolrClient lbSolrClient = MiscUtil.executePrivilegedAction(new PrivilegedExceptionAction() { + @Override + public LBHttpSolrClient run() throws Exception { + LBHttpSolrClient.Builder builder = new LBHttpSolrClient.Builder(); + builder.withBaseSolrUrl(solrUrls.get(0)); + builder.withConnectionTimeout(1000); + LBHttpSolrClient lbSolrClient = builder.build(); + return lbSolrClient; + }; + }); + + for (int i = 1; i < solrURLs.size(); i++) { + lbSolrClient.addSolrServer(solrURLs.get(i)); + } + me = solrClient = lbSolrClient; + } catch (Throwable t) { + LOG.fatal("Can't connect to Solr server. URL=" + + solrURLs, t); + } + } + } + } + } + } + + @Override + public boolean log(Collection events) { + boolean ret = false; + try { + logStatusIfRequired(); + addTotalCount(events.size()); + + if (solrClient == null) { + connect(); + if (solrClient == null) { + // Solr is still not initialized. So need return error + addDeferredCount(events.size()); + return ret; + } + } + + final Collection docs = new ArrayList(); + for (AuditEventBase event : events) { + AuthzAuditEvent authzEvent = (AuthzAuditEvent) event; + // Convert AuditEventBase to Solr document + SolrInputDocument document = toSolrDoc(authzEvent); + docs.add(document); + } + try { + final UpdateResponse response = addDocsToSolr(solrClient, docs); + + if (response.getStatus() != 0) { + addFailedCount(events.size()); + logFailedEvent(events, response.toString()); + } else { + addSuccessCount(events.size()); + ret = true; + } + } catch (SolrException ex) { + addFailedCount(events.size()); + logFailedEvent(events, ex); + } + } catch (Throwable t) { + addDeferredCount(events.size()); + logError("Error sending message to Solr", t); + } + return ret; + } + + /* + * (non-Javadoc) + * + * @see org.apache.ranger.audit.provider.AuditProvider#flush() + */ + @Override + public void flush() { + + } + + SolrInputDocument toSolrDoc(AuthzAuditEvent auditEvent) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", auditEvent.getEventId()); + doc.addField("access", auditEvent.getAccessType()); + doc.addField("enforcer", auditEvent.getAclEnforcer()); + doc.addField("agent", auditEvent.getAgentId()); + doc.addField("repo", auditEvent.getRepositoryName()); + doc.addField("sess", auditEvent.getSessionId()); + doc.addField("reqUser", auditEvent.getUser()); + doc.addField("reqData", auditEvent.getRequestData()); + doc.addField("resource", auditEvent.getResourcePath()); + doc.addField("cliIP", auditEvent.getClientIP()); + doc.addField("logType", auditEvent.getLogType()); + doc.addField("result", auditEvent.getAccessResult()); + doc.addField("policy", auditEvent.getPolicyId()); + doc.addField("repoType", auditEvent.getRepositoryType()); + doc.addField("resType", auditEvent.getResourceType()); + doc.addField("reason", auditEvent.getResultReason()); + doc.addField("action", auditEvent.getAction()); + doc.addField("evtTime", auditEvent.getEventTime()); + doc.addField("seq_num", auditEvent.getSeqNum()); + doc.setField("event_count", auditEvent.getEventCount()); + doc.setField("event_dur_ms", auditEvent.getEventDurationMS()); + doc.setField("tags", auditEvent.getTags()); + doc.setField("cluster", auditEvent.getClusterName()); + doc.setField("zoneName", auditEvent.getZoneName()); + doc.setField("agentHost", auditEvent.getAgentHostname()); + doc.setField("policyVersion", auditEvent.getPolicyVersion()); + + return doc; + } + + public boolean isAsync() { + return true; + } + + private void init() { + LOG.info("==>SolrAuditDestination.init()" ); + try { + // SolrJ requires "java.security.auth.login.config" property to be set to identify itself that it is kerberized. So using a dummy property for it + // Acutal solrclient JAAS configs are read from the ranger--audit.xml present in components conf folder and set by InMemoryJAASConfiguration + // Refer InMemoryJAASConfiguration doc for JAAS Configuration + String confFileName = System.getProperty(PROP_JAVA_SECURITY_AUTH_LOGIN_CONFIG); + LOG.info("In solrAuditDestination.init() : JAAS Configuration set as [" + confFileName + "]"); + if ( System.getProperty(PROP_JAVA_SECURITY_AUTH_LOGIN_CONFIG) == null ) { + if ( MiscUtil.getBooleanProperty(props, propPrefix + "." + PROP_SOLR_FORCE_USE_INMEMORY_JAAS_CONFIG,false) ) { + System.setProperty(PROP_JAVA_SECURITY_AUTH_LOGIN_CONFIG, "/dev/null"); + } else { + LOG.warn("No Client JAAS config present in solr audit config. Ranger Audit to Kerberized Solr will fail..."); + } + } + + LOG.info("Loading SolrClient JAAS config from Ranger audit config if present..."); + + InMemoryJAASConfiguration conf = InMemoryJAASConfiguration.init(props); + + KerberosUser kerberosUser = new KerberosJAASConfigUser("Client", conf); + + if (kerberosUser.getPrincipal() != null) { + this.kerberosUser = kerberosUser; + } + } catch (Exception e) { + LOG.error("ERROR: Unable to load SolrClient JAAS config from Audit config file. Audit to Kerberized Solr will fail...", e); + } finally { + String confFileName = System.getProperty(PROP_JAVA_SECURITY_AUTH_LOGIN_CONFIG); + LOG.info("In solrAuditDestination.init() (finally) : JAAS Configuration set as [" + confFileName + "]"); + } + LOG.info("<==SolrAuditDestination.init()" ); + } + + private KeyManager[] getKeyManagers() { + KeyManager[] kmList = null; + String credentialProviderPath = MiscUtil.getStringProperty(props, RANGER_POLICYMGR_CLIENT_KEY_FILE_CREDENTIAL); + String keyStoreAlias = RANGER_POLICYMGR_CLIENT_KEY_FILE_CREDENTIAL_ALIAS; + String keyStoreFile = MiscUtil.getStringProperty(props, RANGER_POLICYMGR_CLIENT_KEY_FILE); + String keyStoreFilepwd = MiscUtil.getCredentialString(credentialProviderPath, keyStoreAlias); + if (StringUtils.isNotEmpty(keyStoreFile) && StringUtils.isNotEmpty(keyStoreFilepwd)) { + InputStream in = null; + + try { + in = getFileInputStream(keyStoreFile); + + if (in != null) { + String keyStoreType = MiscUtil.getStringProperty(props, RANGER_POLICYMGR_CLIENT_KEY_FILE_TYPE); + keyStoreType = StringUtils.isNotEmpty(keyStoreType) ? keyStoreType : RANGER_POLICYMGR_CLIENT_KEY_FILE_TYPE_DEFAULT; + KeyStore keyStore = KeyStore.getInstance(keyStoreType); + + keyStore.load(in, keyStoreFilepwd.toCharArray()); + + KeyManagerFactory keyManagerFactory = KeyManagerFactory.getInstance(RANGER_SSL_KEYMANAGER_ALGO_TYPE); + + keyManagerFactory.init(keyStore, keyStoreFilepwd.toCharArray()); + + kmList = keyManagerFactory.getKeyManagers(); + } else { + LOG.error("Unable to obtain keystore from file [" + keyStoreFile + "]"); + } + } catch (KeyStoreException e) { + LOG.error("Unable to obtain from KeyStore :" + e.getMessage(), e); + } catch (NoSuchAlgorithmException e) { + LOG.error("SSL algorithm is NOT available in the environment", e); + } catch (CertificateException e) { + LOG.error("Unable to obtain the requested certification ", e); + } catch (FileNotFoundException e) { + LOG.error("Unable to find the necessary SSL Keystore Files", e); + } catch (IOException e) { + LOG.error("Unable to read the necessary SSL Keystore Files", e); + } catch (UnrecoverableKeyException e) { + LOG.error("Unable to recover the key from keystore", e); + } finally { + close(in, keyStoreFile); + } + } + + return kmList; + } + + private TrustManager[] getTrustManagers() { + TrustManager[] tmList = null; + String credentialProviderPath = MiscUtil.getStringProperty(props, RANGER_POLICYMGR_TRUSTSTORE_FILE_CREDENTIAL); + String trustStoreAlias = RANGER_POLICYMGR_TRUSTSTORE_FILE_CREDENTIAL_ALIAS; + String trustStoreFile = MiscUtil.getStringProperty(props, RANGER_POLICYMGR_TRUSTSTORE_FILE); + String trustStoreFilepwd = MiscUtil.getCredentialString(credentialProviderPath, trustStoreAlias); + if (StringUtils.isNotEmpty(trustStoreFile) && StringUtils.isNotEmpty(trustStoreFilepwd)) { + InputStream in = null; + + try { + in = getFileInputStream(trustStoreFile); + + if (in != null) { + String trustStoreType = MiscUtil.getStringProperty(props, RANGER_POLICYMGR_TRUSTSTORE_FILE_TYPE); + trustStoreType = StringUtils.isNotEmpty(trustStoreType) ? trustStoreType : RANGER_POLICYMGR_TRUSTSTORE_FILE_TYPE_DEFAULT; + KeyStore trustStore = KeyStore.getInstance(trustStoreType); + + trustStore.load(in, trustStoreFilepwd.toCharArray()); + + TrustManagerFactory trustManagerFactory = TrustManagerFactory.getInstance(RANGER_SSL_TRUSTMANAGER_ALGO_TYPE); + + trustManagerFactory.init(trustStore); + + tmList = trustManagerFactory.getTrustManagers(); + } else { + LOG.error("Unable to obtain truststore from file [" + trustStoreFile + "]"); + } + } catch (KeyStoreException e) { + LOG.error("Unable to obtain from KeyStore", e); + } catch (NoSuchAlgorithmException e) { + LOG.error("SSL algorithm is NOT available in the environment :" + e.getMessage(), e); + } catch (CertificateException e) { + LOG.error("Unable to obtain the requested certification :" + e.getMessage(), e); + } catch (FileNotFoundException e) { + LOG.error("Unable to find the necessary SSL TrustStore File:" + trustStoreFile, e); + } catch (IOException e) { + LOG.error("Unable to read the necessary SSL TrustStore Files :" + trustStoreFile, e); + } finally { + close(in, trustStoreFile); + } + } + + return tmList; + } + + private SSLContext getSSLContext(KeyManager[] kmList, TrustManager[] tmList) { + SSLContext sslContext = null; + try { + sslContext = SSLContext.getInstance(RANGER_SSL_CONTEXT_ALGO_TYPE); + if (sslContext != null) { + sslContext.init(kmList, tmList, new SecureRandom()); + } + } catch (NoSuchAlgorithmException e) { + LOG.error("SSL algorithm is not available in the environment", e); + } catch (KeyManagementException e) { + LOG.error("Unable to initialise the SSLContext", e); + } + return sslContext; + } + + private UpdateResponse addDocsToSolr(final SolrClient solrClient, final Collection docs) throws Exception { + final UpdateResponse ret; + + try { + final PrivilegedExceptionAction action = () -> solrClient.add(docs); + + if (kerberosUser != null) { + // execute the privileged action as the given keytab user + final KerberosAction kerberosAction = new KerberosAction<>(kerberosUser, action, LOG); + + ret = (UpdateResponse) kerberosAction.execute(); + } else { + ret = action.run(); + } + } catch (Exception e) { + throw e; + } + + return ret; + } + + private InputStream getFileInputStream(String fileName) throws IOException { + InputStream in = null; + if (StringUtils.isNotEmpty(fileName)) { + File file = new File(fileName); + if (file != null && file.exists()) { + in = new FileInputStream(file); + } else { + in = ClassLoader.getSystemResourceAsStream(fileName); + } + } + return in; + } + + private void close(InputStream str, String filename) { + if (str != null) { + try { + str.close(); + } catch (IOException excp) { + LOG.error("Error while closing file: [" + filename + "]", excp); + } + } + } +} diff --git a/auth-audits/src/main/java/org/apache/atlas/audit/provider/AuditProviderFactory.java b/auth-audits/src/main/java/org/apache/atlas/audit/provider/AuditProviderFactory.java index 7b67e9c640..269da05423 100644 --- a/auth-audits/src/main/java/org/apache/atlas/audit/provider/AuditProviderFactory.java +++ b/auth-audits/src/main/java/org/apache/atlas/audit/provider/AuditProviderFactory.java @@ -23,6 +23,7 @@ import org.apache.hadoop.util.ShutdownHookManager; import org.apache.atlas.audit.destination.*; import org.apache.atlas.audit.provider.hdfs.HdfsAuditProvider; +import org.apache.atlas.audit.provider.solr.SolrAuditProvider; import org.apache.atlas.audit.queue.AuditAsyncQueue; import org.apache.atlas.audit.queue.AuditBatchQueue; import org.apache.atlas.audit.queue.AuditFileQueue; @@ -330,6 +331,19 @@ public synchronized void init(Properties props, String appType) { } }*/ + if (isAuditToSolrEnabled) { + LOG.info("SolrAuditProvider is enabled"); + SolrAuditProvider solrProvider = new SolrAuditProvider(); + solrProvider.init(props); + + if (solrProvider.isAsync()) { + AsyncAuditProvider asyncProvider = new AsyncAuditProvider( + "MySolrAuditProvider", 1000, 1000, solrProvider); + providers.add(asyncProvider); + } else { + providers.add(solrProvider); + } + } if (isAuditToLog4jEnabled) { Log4jAuditProvider log4jProvider = new Log4jAuditProvider(); @@ -402,6 +416,8 @@ private AuditHandler getProviderFromConfig(Properties props, provider = new FileAuditDestination(); } else if (providerName.equalsIgnoreCase("hdfs")) { provider = new HDFSAuditDestination(); + } else if (providerName.equalsIgnoreCase("solr")) { + provider = new SolrAuditDestination(); } else if (providerName.equalsIgnoreCase("elasticsearch")) { provider = new ElasticSearchAuditDestination(); } /*else if (providerName.equalsIgnoreCase("kafka")) { diff --git a/auth-audits/src/main/java/org/apache/atlas/audit/provider/MiscUtil.java b/auth-audits/src/main/java/org/apache/atlas/audit/provider/MiscUtil.java index c4b2045e7b..93974cb92b 100644 --- a/auth-audits/src/main/java/org/apache/atlas/audit/provider/MiscUtil.java +++ b/auth-audits/src/main/java/org/apache/atlas/audit/provider/MiscUtil.java @@ -25,6 +25,8 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authentication.util.KerberosName; import org.apache.hadoop.security.authentication.util.KerberosUtil; +import org.apache.atlas.authorization.hadoop.utils.RangerCredentialProvider; + import javax.security.auth.Subject; import javax.security.auth.login.AppConfigurationEntry; import javax.security.auth.login.Configuration; @@ -442,6 +444,14 @@ public static List toArray(String destListStr, String delim) { return list; } + public static String getCredentialString(String url, String alias) { + if (url != null && alias != null) { + return RangerCredentialProvider.getInstance() + .getCredentialString(url, alias); + } + return null; + } + public static UserGroupInformation createUGIFromSubject(Subject subject) throws IOException { logger.info("SUBJECT " + (subject == null ? "not found" : "found")); diff --git a/auth-audits/src/main/java/org/apache/atlas/audit/provider/solr/SolrAuditProvider.java b/auth-audits/src/main/java/org/apache/atlas/audit/provider/solr/SolrAuditProvider.java new file mode 100644 index 0000000000..914f45685e --- /dev/null +++ b/auth-audits/src/main/java/org/apache/atlas/audit/provider/solr/SolrAuditProvider.java @@ -0,0 +1,303 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.atlas.audit.provider.solr; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.atlas.audit.destination.AuditDestination; +import org.apache.atlas.audit.model.AuditEventBase; +import org.apache.atlas.audit.model.AuthzAuditEvent; +import org.apache.atlas.audit.provider.MiscUtil; +import org.apache.atlas.audit.utils.SolrAppUtil; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; + +import java.io.IOException; +import java.security.PrivilegedExceptionAction; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.Properties; + +public class SolrAuditProvider extends AuditDestination { + private static final Log LOG = LogFactory.getLog(SolrAuditProvider.class); + + public static final String AUDIT_MAX_QUEUE_SIZE_PROP = "xasecure.audit.solr.async.max.queue.size"; + public static final String AUDIT_MAX_FLUSH_INTERVAL_PROP = "xasecure.audit.solr.async.max.flush.interval.ms"; + public static final String AUDIT_RETRY_WAIT_PROP = "xasecure.audit.solr.retry.ms"; + + static final Object lock = new Object(); + volatile SolrClient solrClient = null; + Date lastConnectTime = null; + long lastFailTime = 0; + + int retryWaitTime = 30000; + + public SolrAuditProvider() { + } + + @Override + public void init(Properties props) { + LOG.info("init() called"); + super.init(props); + + retryWaitTime = MiscUtil.getIntProperty(props, + AUDIT_RETRY_WAIT_PROP, retryWaitTime); + } + + void connect() { + SolrClient me = solrClient; + if (me == null) { + synchronized (lock) { + me = solrClient; + if (me == null) { + final String solrURL = MiscUtil.getStringProperty(props, + "xasecure.audit.solr.solr_url"); + + if (lastConnectTime != null) { + // Let's wait for enough time before retrying + long diff = System.currentTimeMillis() + - lastConnectTime.getTime(); + if (diff < retryWaitTime) { + if (LOG.isDebugEnabled()) { + LOG.debug("Ignore connecting to solr url=" + + solrURL + ", lastConnect=" + diff + + "ms"); + } + return; + } + } + lastConnectTime = new Date(); + + if (solrURL == null || solrURL.isEmpty()) { + LOG.fatal("Solr URL for Audit is empty"); + return; + } + + try { + // TODO: Need to support SolrCloud also + solrClient = MiscUtil.executePrivilegedAction(new PrivilegedExceptionAction() { + @Override + public SolrClient run() throws Exception { + HttpSolrClient.Builder builder = new HttpSolrClient.Builder(); + builder.withBaseSolrUrl(solrURL); + builder.allowCompression(true); + builder.withConnectionTimeout(1000); + HttpSolrClient httpSolrClient = builder.build(); + return httpSolrClient; + }; + }); + + me = solrClient; + } catch (Throwable t) { + LOG.fatal("Can't connect to Solr server. URL=" + + solrURL, t); + } + } + } + } + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.ranger.audit.provider.AuditProvider#log(org.apache.ranger. + * audit.model.AuditEventBase) + */ + @Override + public boolean log(AuditEventBase event) { + if (!(event instanceof AuthzAuditEvent)) { + LOG.error(event.getClass().getName() + + " audit event class type is not supported"); + return false; + } + AuthzAuditEvent authzEvent = (AuthzAuditEvent) event; + // TODO: This should be done at a higher level + + if (authzEvent.getAgentHostname() == null) { + authzEvent.setAgentHostname(MiscUtil.getHostname()); + } + + if (authzEvent.getLogType() == null) { + authzEvent.setLogType("RangerAudit"); + } + + if (authzEvent.getEventId() == null) { + authzEvent.setEventId(MiscUtil.generateUniqueId()); + } + + try { + if (solrClient == null) { + connect(); + if (solrClient == null) { + // Solr is still not initialized. So need to throw error + return false; + } + } + + if (lastFailTime > 0) { + long diff = System.currentTimeMillis() - lastFailTime; + if (diff < retryWaitTime) { + if (LOG.isDebugEnabled()) { + LOG.debug("Ignore sending audit. lastConnect=" + diff + + " ms"); + } + return false; + } + } + // Convert AuditEventBase to Solr document + final SolrInputDocument document = toSolrDoc(authzEvent); + final Collection docs = Collections.singletonList(document); + final UpdateResponse response = SolrAppUtil.addDocsToSolr(solrClient, docs); + + if (response.getStatus() != 0) { + lastFailTime = System.currentTimeMillis(); + + // System.out.println("Response=" + response.toString() + // + ", status= " + response.getStatus() + ", event=" + // + event); + // throw new Exception("Aborting. event=" + event + + // ", response=" + // + response.toString()); + } else { + lastFailTime = 0; + } + + } catch (Throwable t) { + LOG.error("Error sending message to Solr", t); + return false; + } + return true; + } + + @Override + public boolean log(Collection events) { + for (AuditEventBase event : events) { + log(event); + } + return true; + } + + @Override + public boolean logJSON(String event) { + AuditEventBase eventObj = MiscUtil.fromJson(event, + AuthzAuditEvent.class); + return log(eventObj); + } + + @Override + public boolean logJSON(Collection events) { + for (String event : events) { + logJSON(event); + } + return false; + } + + /* + * (non-Javadoc) + * + * @see org.apache.ranger.audit.provider.AuditProvider#start() + */ + @Override + public void start() { + connect(); + } + + /* + * (non-Javadoc) + * + * @see org.apache.ranger.audit.provider.AuditProvider#stop() + */ + @Override + public void stop() { + LOG.info("SolrAuditProvider.stop() called.."); + try { + if (solrClient != null) { + solrClient.close(); + } + } catch (IOException ioe) { + LOG.error("Error while stopping slor!", ioe); + } finally { + solrClient = null; + } + } + + /* + * (non-Javadoc) + * + * @see org.apache.ranger.audit.provider.AuditProvider#waitToComplete() + */ + @Override + public void waitToComplete() { + + } + + + @Override + public void waitToComplete(long timeout) { + + } + + /* + * (non-Javadoc) + * + * @see org.apache.ranger.audit.provider.AuditProvider#flush() + */ + @Override + public void flush() { + // TODO Auto-generated method stub + + } + + SolrInputDocument toSolrDoc(AuthzAuditEvent auditEvent) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", auditEvent.getEventId()); + doc.addField("access", auditEvent.getAccessType()); + doc.addField("enforcer", auditEvent.getAclEnforcer()); + doc.addField("agent", auditEvent.getAgentId()); + doc.addField("repo", auditEvent.getRepositoryName()); + doc.addField("sess", auditEvent.getSessionId()); + doc.addField("reqUser", auditEvent.getUser()); + doc.addField("reqData", auditEvent.getRequestData()); + doc.addField("resource", auditEvent.getResourcePath()); + doc.addField("cliIP", auditEvent.getClientIP()); + doc.addField("logType", auditEvent.getLogType()); + doc.addField("result", auditEvent.getAccessResult()); + doc.addField("policy", auditEvent.getPolicyId()); + doc.addField("repoType", auditEvent.getRepositoryType()); + doc.addField("resType", auditEvent.getResourceType()); + doc.addField("reason", auditEvent.getResultReason()); + doc.addField("action", auditEvent.getAction()); + doc.addField("evtTime", auditEvent.getEventTime()); + doc.addField("tags", auditEvent.getTags()); + doc.addField("cluster", auditEvent.getClusterName()); + doc.addField("zone", auditEvent.getZoneName()); + doc.addField("agentHost", auditEvent.getAgentHostname()); + return doc; + } + + public boolean isAsync() { + return true; + } + +} diff --git a/auth-audits/src/main/java/org/apache/atlas/audit/utils/SolrAppUtil.java b/auth-audits/src/main/java/org/apache/atlas/audit/utils/SolrAppUtil.java new file mode 100644 index 0000000000..c232b945fc --- /dev/null +++ b/auth-audits/src/main/java/org/apache/atlas/audit/utils/SolrAppUtil.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.atlas.audit.utils; + +import org.apache.atlas.audit.provider.MiscUtil; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; + +import java.security.PrivilegedExceptionAction; +import java.util.Collection; + +public class SolrAppUtil { + public static UpdateResponse addDocsToSolr(final SolrClient solrClient, final Collection docs) throws Exception { + return MiscUtil.executePrivilegedAction(new PrivilegedExceptionAction() { + @Override + public UpdateResponse run() throws Exception { + return solrClient.add(docs); + } + }); + } +} diff --git a/common/pom.xml b/common/pom.xml index a6f4db3af7..ff4d502f12 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -60,6 +60,20 @@ + + org.apache.hadoop + hadoop-hdfs-client + ${hadoop.version} + + + javax.servlet + servlet-api + + + + compile + + commons-collections commons-collections diff --git a/common/src/main/java/org/apache/atlas/utils/AtlasPathExtractorUtil.java b/common/src/main/java/org/apache/atlas/utils/AtlasPathExtractorUtil.java index ee1f05b2a2..01a67b7209 100644 --- a/common/src/main/java/org/apache/atlas/utils/AtlasPathExtractorUtil.java +++ b/common/src/main/java/org/apache/atlas/utils/AtlasPathExtractorUtil.java @@ -544,13 +544,17 @@ private static AtlasEntity addHDFSPathEntity(Path path, PathExtractorContext con LOG.debug("==> addHDFSPathEntity(strPath={})", strPath); } - String attrPath = strPath; + String nameServiceID = HdfsNameServiceResolver.getNameServiceIDForPath(strPath); + String attrPath = StringUtils.isEmpty(nameServiceID) ? strPath : HdfsNameServiceResolver.getPathWithNameServiceID(strPath); String pathQualifiedName = getQualifiedName(attrPath, context.getMetadataNamespace()); AtlasEntity ret = context.getEntity(pathQualifiedName); if (ret == null) { ret = new AtlasEntity(HDFS_TYPE_PATH); + if (StringUtils.isNotEmpty(nameServiceID)) { + ret.setAttribute(ATTRIBUTE_NAMESERVICE_ID, nameServiceID); + } String name = Path.getPathWithoutSchemeAndAuthority(path).toString(); @@ -597,6 +601,9 @@ private static String getOzoneBucketName(Path path) { } private static String getQualifiedName(String path, String metadataNamespace) { + if (path.startsWith(HdfsNameServiceResolver.HDFS_SCHEME)) { + return path + QNAME_SEP_METADATA_NAMESPACE + metadataNamespace; + } return path.toLowerCase(); } diff --git a/common/src/main/java/org/apache/atlas/utils/HdfsNameServiceResolver.java b/common/src/main/java/org/apache/atlas/utils/HdfsNameServiceResolver.java new file mode 100644 index 0000000000..4b9d0bef22 --- /dev/null +++ b/common/src/main/java/org/apache/atlas/utils/HdfsNameServiceResolver.java @@ -0,0 +1,197 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.utils; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + + +public class HdfsNameServiceResolver { + private static final Logger LOG = LoggerFactory.getLogger(HdfsNameServiceResolver.class); + + public static final String HDFS_SCHEME = "hdfs://"; + + private static final int DEFAULT_PORT = 8020; + private static final String HDFS_NAMESERVICE_PROPERTY_KEY = "dfs.nameservices"; + private static final String HDFS_INTERNAL_NAMESERVICE_PROPERTY_KEY = "dfs.internal.nameservices"; + private static final String HDFS_NAMENODES_HA_NODES_PREFIX = "dfs.ha.namenodes."; + private static final String HDFS_NAMENODE_HA_ADDRESS_TEMPLATE = "dfs.namenode.rpc-address.%s.%s"; + private static final String HDFS_NAMENODE_ADDRESS_TEMPLATE = "dfs.namenode.rpc-address.%s"; + + private static final Map hostToNameServiceMap = new HashMap<>(); + + static { + init(); + } + + public static String getPathWithNameServiceID(String path) { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HdfsNameServiceResolver.getPathWithNameServiceID({})", path); + } + + String ret = path; + + // Only handle URLs that begin with hdfs:// + if (path != null && path.indexOf(HDFS_SCHEME) == 0) { + URI uri = new Path(path).toUri(); + String nsId; + + if (uri.getPort() != -1) { + nsId = hostToNameServiceMap.get(uri.getAuthority()); + } else { + nsId = hostToNameServiceMap.get(uri.getHost() + ":" + DEFAULT_PORT); + } + + if (nsId != null) { + ret = StringUtils.replaceOnce(path, uri.getAuthority(), nsId); + } + } + + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HdfsNameServiceResolver.getPathWithNameServiceID({}) = {}", path, ret); + } + + return ret; + } + + public static String getNameServiceIDForPath(String path) { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HdfsNameServiceResolver.getNameServiceIDForPath({})", path); + } + + String ret = ""; + + // Only handle path URLs that begin with hdfs:// + if (path != null && path.indexOf(HDFS_SCHEME) == 0) { + try { + URI uri = new Path(path).toUri(); + + if (uri != null) { + // URI can contain host and port + if (uri.getPort() != -1) { + ret = getNameServiceID(uri.getHost(), uri.getPort()); + } else { + // No port information present, it means the path might contain only host or the nameservice id itself + // Try resolving using default port + ret = getNameServiceID(uri.getHost(), DEFAULT_PORT); + + // If not resolved yet, then the path must contain nameServiceId + if (StringUtils.isEmpty(ret) && hostToNameServiceMap.containsValue(uri.getHost())) { + ret = uri.getHost(); + } + } + } + } catch (IllegalArgumentException ignored) { + // No need to do anything + } + } + + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HdfsNameServiceResolver.getNameServiceIDForPath({}) = {}", path, ret); + } + + return ret; + } + + private static String getNameServiceID(String host, int port) { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HdfsNameServiceResolver.getNameServiceID({}, {})", host, port); + } + + String ret = hostToNameServiceMap.getOrDefault(host + ":" + port, ""); + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HdfsNameServiceResolver.getNameServiceID({}, {}) = {}", host, port, ret); + } + + return ret; + } + + private static void init() { + if (LOG.isDebugEnabled()) { + LOG.debug("==> HdfsNameServiceResolver.init()"); + } + HdfsConfiguration hdfsConfiguration = new HdfsConfiguration(true); + + // Determine all available nameServiceIDs + String[] nameServiceIDs = hdfsConfiguration.getTrimmedStrings(HDFS_INTERNAL_NAMESERVICE_PROPERTY_KEY); + if (Objects.isNull(nameServiceIDs) || nameServiceIDs.length == 0) { + if (LOG.isDebugEnabled()) { + LOG.debug("NSID not found for {}, looking under {}", HDFS_INTERNAL_NAMESERVICE_PROPERTY_KEY, HDFS_NAMESERVICE_PROPERTY_KEY); + } + // Attempt another lookup using internal name service IDs key + nameServiceIDs = hdfsConfiguration.getTrimmedStrings(HDFS_NAMESERVICE_PROPERTY_KEY); + } + + if (Objects.nonNull(nameServiceIDs) && nameServiceIDs.length > 0) { + if (LOG.isDebugEnabled()) { + LOG.debug("NSIDs = {}", nameServiceIDs); + } + + boolean isHA; + for (String nameServiceID : nameServiceIDs) { + // Find NameNode addresses and map to the NameServiceID + String[] nameNodes = hdfsConfiguration.getTrimmedStrings(HDFS_NAMENODES_HA_NODES_PREFIX + nameServiceID); + isHA = nameNodes != null && nameNodes.length > 0; + + String nameNodeMappingKey, nameNodeAddress; + if (isHA) { + if (LOG.isDebugEnabled()) { + LOG.debug("Processing HA node info"); + } + + for (String nameNode : nameNodes) { + nameNodeMappingKey = String.format(HDFS_NAMENODE_HA_ADDRESS_TEMPLATE, nameServiceID, nameNode); + nameNodeAddress = hdfsConfiguration.get(nameNodeMappingKey, ""); + + // Add a mapping only if found + if (StringUtils.isNotEmpty(nameNodeAddress)) { + hostToNameServiceMap.put(nameNodeAddress, nameServiceID); + } + } + } else { + nameNodeMappingKey = String.format(HDFS_NAMENODE_ADDRESS_TEMPLATE, nameServiceID); + nameNodeAddress = hdfsConfiguration.get(nameNodeMappingKey, ""); + + // Add a mapping only if found + if (StringUtils.isNotEmpty(nameNodeAddress)) { + hostToNameServiceMap.put(nameNodeAddress, nameServiceID); + } + } + } + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("No NSID could be resolved"); + } + } + + if (LOG.isDebugEnabled()) { + LOG.debug("<== HdfsNameServiceResolver.init()"); + } + } +} diff --git a/common/src/test/java/org/apache/atlas/utils/HdfsNameServiceResolverTest.java b/common/src/test/java/org/apache/atlas/utils/HdfsNameServiceResolverTest.java new file mode 100644 index 0000000000..30bc2e1a32 --- /dev/null +++ b/common/src/test/java/org/apache/atlas/utils/HdfsNameServiceResolverTest.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.utils; + +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; + +public class HdfsNameServiceResolverTest { + + @Test + public void testResolution() { +// assertEquals(HdfsNameServiceResolver.getNameServiceID("test"), ""); +// assertEquals(HdfsNameServiceResolver.getNameServiceID("test1"), ""); +// assertEquals(HdfsNameServiceResolver.getNameServiceID("test", 8020), ""); +// assertEquals(HdfsNameServiceResolver.getNameServiceID("test1", 8020), ""); +// +// assertEquals(HdfsNameServiceResolver.getNameServiceID("ctr-e137-1514896590304-41888-01-000003"), "mycluster"); +// assertEquals(HdfsNameServiceResolver.getNameServiceID("ctr-e137-1514896590304-41888-01-000003", 8020), "mycluster"); +// assertEquals(HdfsNameServiceResolver.getNameServiceID("ctr-e137-1514896590304-41888-01-000004"), "mycluster"); +// assertEquals(HdfsNameServiceResolver.getNameServiceID("ctr-e137-1514896590304-41888-01-000004", 8020), "mycluster"); + + assertEquals(HdfsNameServiceResolver.getPathWithNameServiceID("hdfs://ctr-e137-1514896590304-41888-01-000004:8020/tmp/xyz"), "hdfs://mycluster/tmp/xyz"); + assertEquals(HdfsNameServiceResolver.getPathWithNameServiceID("hdfs://ctr-e137-1514896590304-41888-01-000004:8020/tmp/xyz/ctr-e137-1514896590304-41888-01-000004:8020"), "hdfs://mycluster/tmp/xyz/ctr-e137-1514896590304-41888-01-000004:8020"); + assertEquals(HdfsNameServiceResolver.getNameServiceIDForPath("hdfs://ctr-e137-1514896590304-41888-01-000004:8020/tmp/xyz"), "mycluster"); + + assertEquals(HdfsNameServiceResolver.getPathWithNameServiceID("hdfs://ctr-e137-1514896590304-41888-01-000003:8020/tmp/xyz"), "hdfs://mycluster/tmp/xyz"); + assertEquals(HdfsNameServiceResolver.getNameServiceIDForPath("hdfs://ctr-e137-1514896590304-41888-01-000003:8020/tmp/xyz"), "mycluster"); + + assertEquals(HdfsNameServiceResolver.getPathWithNameServiceID("hdfs://ctr-e137-1514896590304-41888-01-000003/tmp/xyz"), "hdfs://mycluster/tmp/xyz"); + assertEquals(HdfsNameServiceResolver.getNameServiceIDForPath("hdfs://ctr-e137-1514896590304-41888-01-000003/tmp/xyz"), "mycluster"); + + assertEquals(HdfsNameServiceResolver.getPathWithNameServiceID("hdfs://ctr-e137-1514896590304-41888-01-000003/tmp/xyz/ctr-e137-1514896590304-41888-01-000003"), "hdfs://mycluster/tmp/xyz/ctr-e137-1514896590304-41888-01-000003"); + assertEquals(HdfsNameServiceResolver.getNameServiceIDForPath("hdfs://ctr-e137-1514896590304-41888-01-000003/tmp/xyz/ctr-e137-1514896590304-41888-01-000003"), "mycluster"); + + assertEquals(HdfsNameServiceResolver.getPathWithNameServiceID("hdfs://mycluster/tmp/xyz"), "hdfs://mycluster/tmp/xyz"); + assertEquals(HdfsNameServiceResolver.getNameServiceIDForPath("hdfs://mycluster/tmp/xyz"), "mycluster"); + + } +} \ No newline at end of file diff --git a/graphdb/janus/pom.xml b/graphdb/janus/pom.xml index 3662d6b3b7..7ae163ec5e 100644 --- a/graphdb/janus/pom.xml +++ b/graphdb/janus/pom.xml @@ -203,6 +203,22 @@ + + + org.apache.solr + solr-solrj + ${solr.version} + + + org.codehaus.woodstox + woodstox-core-asl + + + io.netty + * + + + org.apache.lucene diff --git a/notification/pom.xml b/notification/pom.xml index 3cd4e07de9..4d7c810708 100644 --- a/notification/pom.xml +++ b/notification/pom.xml @@ -151,6 +151,11 @@ hadoop-common ${hadoop.version} + + org.apache.hadoop + hadoop-auth + ${hadoop.version} + org.slf4j slf4j-api diff --git a/pom.xml b/pom.xml index fdb9f3d5a3..6e23472641 100644 --- a/pom.xml +++ b/pom.xml @@ -709,7 +709,7 @@ 29.0-jre 4.1.0 ${hadoop.version} - 3.3.6 + 3.3.0 2.3.3 3.1.0 0.8.1 @@ -806,10 +806,23 @@ webapp docs + addons/hdfs-model plugin-classloader + addons/hive-bridge-shim + addons/hive-bridge + addons/falcon-bridge-shim + addons/falcon-bridge + addons/sqoop-bridge-shim + addons/sqoop-bridge + addons/hbase-bridge-shim + addons/hbase-bridge + addons/hbase-testing-util addons/kafka-bridge tools/classification-updater tools/atlas-index-repair + addons/impala-hook-api + addons/impala-bridge-shim + addons/impala-bridge distro atlas-examples @@ -1000,6 +1013,71 @@ + + org.apache.hadoop + hadoop-hdfs + ${hadoop.version} + + + javax.servlet + * + + + tomcat + * + + + org.mortbay.jetty + * + + + com.google.guava + guava + + + org.codehaus.jackson + * + + + + + + org.apache.hadoop + hadoop-auth + ${hadoop.version} + + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + org.mortbay.jetty + * + + + + + + org.apache.hadoop + hadoop-annotations + ${hadoop.version} + + + + org.apache.hadoop + hadoop-minikdc + ${hadoop.version} + + + junit + junit + + + test + + org.apache.curator diff --git a/repository/src/main/java/org/apache/atlas/repository/graph/GraphBackedSearchIndexer.java b/repository/src/main/java/org/apache/atlas/repository/graph/GraphBackedSearchIndexer.java index 550b69e4ad..3a730b0ce9 100755 --- a/repository/src/main/java/org/apache/atlas/repository/graph/GraphBackedSearchIndexer.java +++ b/repository/src/main/java/org/apache/atlas/repository/graph/GraphBackedSearchIndexer.java @@ -130,7 +130,7 @@ public GraphBackedSearchIndexer(AtlasTypeRegistry typeRegistry) throws AtlasExce this.typeRegistry = typeRegistry; //make sure solr index follows graph backed index listener - //addIndexListener(new SolrIndexHelper(typeRegistry)); + addIndexListener(new SolrIndexHelper(typeRegistry)); if (!HAConfiguration.isHAEnabled(configuration)) { initialize(provider.get()); diff --git a/webapp/pom.xml b/webapp/pom.xml index 3f918eb1ef..8783276f6d 100755 --- a/webapp/pom.xml +++ b/webapp/pom.xml @@ -183,6 +183,38 @@ + + org.apache.hadoop + hadoop-minikdc + + + + org.apache.hadoop + hadoop-hdfs + + + javax.servlet + servlet-api + + + org.eclipse.jetty + * + + + io.netty + netty-handler + + + io.netty + netty-transport-native-epoll + + + log4j + log4j + + + + org.apache.curator @@ -473,6 +505,18 @@ jna 5.2.0 + + org.apache.hadoop + hadoop-hdfs-client + ${hadoop.version} + + + + + org.apache.hadoop + hadoop-aws + ${hadoop.version} + diff --git a/webapp/src/main/java/org/apache/atlas/util/AccessAuditLogsIndexCreator.java b/webapp/src/main/java/org/apache/atlas/util/AccessAuditLogsIndexCreator.java index 48b7498c8a..f5786cf7d3 100644 --- a/webapp/src/main/java/org/apache/atlas/util/AccessAuditLogsIndexCreator.java +++ b/webapp/src/main/java/org/apache/atlas/util/AccessAuditLogsIndexCreator.java @@ -17,6 +17,7 @@ package org.apache.atlas.util; import org.apache.atlas.AtlasConfiguration; +import org.apache.atlas.authorization.credutils.CredentialsProviderUtil; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.configuration.Configuration; import org.apache.commons.lang.StringUtils; @@ -203,12 +204,19 @@ public static RestClientBuilder getRestClientBuilder(String urls, String protoco .map(x -> new HttpHost(x, port, protocol)) .toArray(i -> new HttpHost[i]) ); + if (StringUtils.isNotBlank(user) && StringUtils.isNotBlank(password) && !user.equalsIgnoreCase("NONE") && !password.equalsIgnoreCase("NONE")) { - LOG.error("ElasticSearch Credentials not provided!!"); - final CredentialsProvider credentialsProvider = null; - restClientBuilder.setHttpClientConfigCallback(clientBuilder -> - clientBuilder.setDefaultCredentialsProvider(credentialsProvider)); + final CredentialsProvider credentialsProvider = + CredentialsProviderUtil.getBasicCredentials(user, password); + restClientBuilder.setHttpClientConfigCallback(clientBuilder -> + clientBuilder.setDefaultCredentialsProvider(credentialsProvider)); + } else { + LOG.error("ElasticSearch Credentials not provided!!"); + final CredentialsProvider credentialsProvider = null; + restClientBuilder.setHttpClientConfigCallback(clientBuilder -> + clientBuilder.setDefaultCredentialsProvider(credentialsProvider)); + } return restClientBuilder; } diff --git a/webapp/src/test/java/org/apache/atlas/web/filters/AtlasAuthenticationKerberosFilterTest.java b/webapp/src/test/java/org/apache/atlas/web/filters/AtlasAuthenticationKerberosFilterTest.java new file mode 100644 index 0000000000..5628b177e4 --- /dev/null +++ b/webapp/src/test/java/org/apache/atlas/web/filters/AtlasAuthenticationKerberosFilterTest.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.web.filters; + +import org.apache.atlas.RequestContext; +import org.apache.atlas.web.security.BaseSecurityTest; +import org.apache.atlas.web.service.EmbeddedServer; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdfs.web.URLConnectionFactory; +import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.webapp.WebAppContext; +import org.testng.annotations.Test; + +import javax.security.auth.Subject; +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import javax.security.auth.login.LoginContext; +import javax.security.auth.login.LoginException; +import java.io.File; +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.security.PrivilegedExceptionAction; + +import static org.testng.Assert.assertEquals; + +/** + * + */ +public class AtlasAuthenticationKerberosFilterTest extends BaseSecurityTest { + public static final String TEST_USER_JAAS_SECTION = "TestUser"; + public static final String TESTUSER = "testuser"; + public static final String TESTPASS = "testpass"; + + private File userKeytabFile; + private File httpKeytabFile; + + class TestEmbeddedServer extends EmbeddedServer { + public TestEmbeddedServer(int port, String path) throws IOException { + super(ATLAS_DEFAULT_BIND_ADDRESS, port, path); + } + + Server getServer() { + return server; + } + + @Override + protected WebAppContext getWebAppContext(String path) { + WebAppContext application = new WebAppContext(path, "/"); + application.setDescriptor(System.getProperty("projectBaseDir") + "/webapp/src/test/webapp/WEB-INF/web.xml"); + application.setClassLoader(Thread.currentThread().getContextClassLoader()); + return application; + } + } + + @Test(enabled = false) + public void testKerberosBasedLogin() throws Exception { + String originalConf = System.getProperty("atlas.conf"); + + setupKDCAndPrincipals(); + TestEmbeddedServer server = null; + + try { + // setup the atlas-application.properties file + String confDirectory = generateKerberosTestProperties(); + System.setProperty("atlas.conf", confDirectory); + + // need to create the web application programmatically in order to control the injection of the test + // application properties + server = new TestEmbeddedServer(23000, "webapp/target/apache-atlas"); + + startEmbeddedServer(server.getServer()); + + final URLConnectionFactory connectionFactory = URLConnectionFactory.DEFAULT_SYSTEM_CONNECTION_FACTORY; + // attempt to hit server and get rejected + URL url = new URL("http://localhost:23000/"); + HttpURLConnection connection = (HttpURLConnection) connectionFactory.openConnection(url, false); + connection.setRequestMethod("GET"); + connection.connect(); + + assertEquals(connection.getResponseCode(), 401); + + // need to populate the ticket cache with a local user, so logging in... + Subject subject = loginTestUser(); + + Subject.doAs(subject, new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + // attempt to hit server and get rejected + URL url = new URL("http://localhost:23000/"); + HttpURLConnection connection = (HttpURLConnection) connectionFactory.openConnection(url, true); + connection.setRequestMethod("GET"); + connection.connect(); + + assertEquals(connection.getResponseCode(), 200); + assertEquals(RequestContext.get().getUser(), TESTUSER); + return null; + } + }); + } finally { + server.getServer().stop(); + kdc.stop(); + + if (originalConf != null) { + System.setProperty("atlas.conf", originalConf); + } else { + System.clearProperty("atlas.conf"); + } + + } + } + + protected Subject loginTestUser() throws LoginException, IOException { + LoginContext lc = new LoginContext(TEST_USER_JAAS_SECTION, new CallbackHandler() { + + @Override + public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException { + for (Callback callback : callbacks) { + if (callback instanceof PasswordCallback) { + PasswordCallback passwordCallback = (PasswordCallback) callback; + passwordCallback.setPassword(TESTPASS.toCharArray()); + } + if (callback instanceof NameCallback) { + NameCallback nameCallback = (NameCallback) callback; + nameCallback.setName(TESTUSER); + } + } + } + }); + // attempt authentication + lc.login(); + return lc.getSubject(); + } + + protected String generateKerberosTestProperties() throws Exception { + PropertiesConfiguration props = new PropertiesConfiguration(); + props.setProperty("atlas.http.authentication.enabled", "true"); + props.setProperty("atlas.http.authentication.type", "kerberos"); + props.setProperty("atlas.http.authentication.kerberos.principal", "HTTP/localhost@" + kdc.getRealm()); + props.setProperty("atlas.http.authentication.kerberos.keytab", httpKeytabFile.getAbsolutePath()); + props.setProperty("atlas.http.authentication.kerberos.name.rules", + "RULE:[1:$1@$0](.*@EXAMPLE.COM)s/@.*//\nDEFAULT"); + + return writeConfiguration(props); + } + + public void setupKDCAndPrincipals() throws Exception { + // set up the KDC + File kdcWorkDir = startKDC(); + + userKeytabFile = createKeytab(kdc, kdcWorkDir, "dgi", "dgi.keytab"); + httpKeytabFile = createKeytab(kdc, kdcWorkDir, "HTTP", "spnego.service.keytab"); + + // create a test user principal + kdc.createPrincipal(TESTUSER, TESTPASS); + + StringBuilder jaas = new StringBuilder(1024); + jaas.append("TestUser {\n" + + " com.sun.security.auth.module.Krb5LoginModule required\nuseTicketCache=true;\n" + + "};\n"); + jaas.append(createJAASEntry("Client", "dgi", userKeytabFile)); + jaas.append(createJAASEntry("Server", "HTTP", httpKeytabFile)); + + File jaasFile = new File(kdcWorkDir, "jaas.txt"); + FileUtils.write(jaasFile, jaas.toString()); + bindJVMtoJAASFile(jaasFile); + } + +} diff --git a/webapp/src/test/java/org/apache/atlas/web/filters/AtlasAuthenticationSimpleFilterIT.java b/webapp/src/test/java/org/apache/atlas/web/filters/AtlasAuthenticationSimpleFilterIT.java new file mode 100644 index 0000000000..fc066cdfd4 --- /dev/null +++ b/webapp/src/test/java/org/apache/atlas/web/filters/AtlasAuthenticationSimpleFilterIT.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.web.filters; + +import org.apache.atlas.web.security.BaseSecurityTest; +import org.testng.Assert; +import org.testng.annotations.Test; +import org.apache.commons.codec.binary.Base64; +import javax.ws.rs.core.Response; +import java.net.HttpURLConnection; +import java.net.URL; + +import static org.testng.Assert.assertEquals; + +/** + * + */ +public class AtlasAuthenticationSimpleFilterIT extends BaseSecurityTest { + private Base64 enc = new Base64(); + + @Test(enabled = false) + public void testSimpleLoginForValidUser() throws Exception { + URL url = new URL("http://localhost:31000/api/atlas/admin/session"); + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + connection.setRequestMethod("GET"); + String userpassword = "admin:admin"; // right password + String encodedAuthorization = enc.encodeToString(userpassword.getBytes()); + connection.setRequestProperty("Authorization", "Basic " + + encodedAuthorization); + connection.connect(); + + assertEquals(connection.getResponseCode(), Response.Status.OK.getStatusCode()); + } + + + @Test(enabled = true) + public void testAccessforUnauthenticatedResource() throws Exception { + + URL url = new URL("http://localhost:31000/api/atlas/admin/status"); + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + connection.setRequestMethod("GET"); + connection.connect(); + assertEquals(connection.getResponseCode(), Response.Status.OK.getStatusCode()); + + } + + + + + @Test(enabled = false) + public void testSimpleLoginWithInvalidCrendentials() throws Exception { + + URL url = new URL("http://localhost:31000/api/atlas/admin/session"); + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + connection.setRequestMethod("GET"); + String userpassword = "admin:admin1"; //wrong password + String encodedAuthorization = enc.encodeToString(userpassword.getBytes()); + connection.setRequestProperty("Authorization", "Basic " + + encodedAuthorization); + connection.connect(); + assertEquals(connection.getResponseCode(), 401); + } + +} diff --git a/webapp/src/test/java/org/apache/atlas/web/listeners/LoginProcessorIT.java b/webapp/src/test/java/org/apache/atlas/web/listeners/LoginProcessorIT.java new file mode 100644 index 0000000000..bb807381b7 --- /dev/null +++ b/webapp/src/test/java/org/apache/atlas/web/listeners/LoginProcessorIT.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.web.listeners; + +import org.apache.atlas.web.security.BaseSecurityTest; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.security.UserGroupInformation; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; + +/** + * + */ +public class LoginProcessorIT extends BaseSecurityTest { + + protected static final String kerberosRule = "RULE:[1:$1@$0](.*@EXAMPLE.COM)s/@.*//\nDEFAULT"; + + @Test + public void testDefaultSimpleLogin() throws Exception { + LoginProcessor processor = new LoginProcessor() { + @Override + protected org.apache.commons.configuration.Configuration getApplicationConfiguration() { + return new PropertiesConfiguration(); + } + }; + processor.login(); + + Assert.assertNotNull(UserGroupInformation.getCurrentUser()); + Assert.assertFalse(UserGroupInformation.isLoginKeytabBased()); + Assert.assertFalse(UserGroupInformation.isSecurityEnabled()); + } + + @Test + public void testKerberosLogin() throws Exception { + final File keytab = setupKDCAndPrincipals(); + + LoginProcessor processor = new LoginProcessor() { + @Override + protected org.apache.commons.configuration.Configuration getApplicationConfiguration() { + PropertiesConfiguration config = new PropertiesConfiguration(); + config.setProperty("atlas.authentication.method", "kerberos"); + config.setProperty("atlas.authentication.principal", "dgi@EXAMPLE.COM"); + config.setProperty("atlas.authentication.keytab", keytab.getAbsolutePath()); + return config; + } + + @Override + protected Configuration getHadoopConfiguration() { + Configuration config = new Configuration(false); + config.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos"); + config.setBoolean(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, true); + config.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL, kerberosRule); + + return config; + } + + @Override + protected boolean isHadoopCluster() { + return true; + } + }; + processor.login(); + + Assert.assertTrue(UserGroupInformation.getLoginUser().getShortUserName().endsWith("dgi")); + Assert.assertNotNull(UserGroupInformation.getCurrentUser()); + Assert.assertTrue(UserGroupInformation.isLoginKeytabBased()); + Assert.assertTrue(UserGroupInformation.isSecurityEnabled()); + + kdc.stop(); + + } + + private File setupKDCAndPrincipals() throws Exception { + // set up the KDC + File kdcWorkDir = startKDC(); + + Assert.assertNotNull(kdc.getRealm()); + + File keytabFile = createKeytab(kdc, kdcWorkDir, "dgi", "dgi.keytab"); + + return keytabFile; + } + +} diff --git a/webapp/src/test/java/org/apache/atlas/web/security/BaseSSLAndKerberosTest.java b/webapp/src/test/java/org/apache/atlas/web/security/BaseSSLAndKerberosTest.java new file mode 100644 index 0000000000..f59b7a4c76 --- /dev/null +++ b/webapp/src/test/java/org/apache/atlas/web/security/BaseSSLAndKerberosTest.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.web.security; + +import org.apache.atlas.security.SecurityProperties; +import org.apache.atlas.web.service.SecureEmbeddedServer; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.security.alias.CredentialProvider; +import org.apache.hadoop.security.alias.CredentialProviderFactory; +import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.webapp.WebAppContext; + +import java.io.File; +import java.io.IOException; + +/** + * + */ +public class BaseSSLAndKerberosTest extends BaseSecurityTest { + public static final String TEST_USER_JAAS_SECTION = "TestUser"; + public static final String TESTUSER = "testuser"; + public static final String TESTPASS = "testpass"; + protected static final String DGI_URL = "https://localhost:21443/"; + protected Path jksPath; + protected String providerUrl; + protected File httpKeytabFile; + protected File userKeytabFile; + + protected BaseSSLAndKerberosTest() { + System.setProperty("https.protocols", "TLSv1.2"); + } + + class TestSecureEmbeddedServer extends SecureEmbeddedServer { + + public TestSecureEmbeddedServer(int port, String path) throws IOException { + super(ATLAS_DEFAULT_BIND_ADDRESS, port, path); + } + + public Server getServer() { + return server; + } + + @Override + protected WebAppContext getWebAppContext(String path) { + WebAppContext application = new WebAppContext(path, "/"); + application.setDescriptor(System.getProperty("projectBaseDir") + "/webapp/src/test/webapp/WEB-INF/web.xml"); + application.setClassLoader(Thread.currentThread().getContextClassLoader()); + return application; + } + } + + protected void setupCredentials() throws Exception { + Configuration conf = new Configuration(false); + + File file = new File(jksPath.toUri().getPath()); + file.delete(); + conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, providerUrl); + + CredentialProvider provider = CredentialProviderFactory.getProviders(conf).get(0); + + // create new aliases + try { + + char[] storepass = {'k', 'e', 'y', 'p', 'a', 's', 's'}; + provider.createCredentialEntry(SecurityProperties.KEYSTORE_PASSWORD_KEY, storepass); + + char[] trustpass = {'k', 'e', 'y', 'p', 'a', 's', 's'}; + provider.createCredentialEntry(SecurityProperties.TRUSTSTORE_PASSWORD_KEY, trustpass); + + char[] trustpass2 = {'k', 'e', 'y', 'p', 'a', 's', 's'}; + provider.createCredentialEntry("ssl.client.truststore.password", trustpass2); + + char[] certpass = {'k', 'e', 'y', 'p', 'a', 's', 's'}; + provider.createCredentialEntry(SecurityProperties.SERVER_CERT_PASSWORD_KEY, certpass); + + // write out so that it can be found in checks + provider.flush(); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + public void setupKDCAndPrincipals() throws Exception { + // set up the KDC + File kdcWorkDir = startKDC(); + + userKeytabFile = createKeytab(kdc, kdcWorkDir, "dgi", "dgi.keytab"); + //createKeytab(kdc, kdcWorkDir, "zookeeper", "dgi.keytab"); + httpKeytabFile = createKeytab(kdc, kdcWorkDir, "HTTP", "spnego.service.keytab"); + + // create a test user principal + kdc.createPrincipal(TESTUSER, TESTPASS); + + StringBuilder jaas = new StringBuilder(1024); + jaas.append(TEST_USER_JAAS_SECTION + " {\n" + + " com.sun.security.auth.module.Krb5LoginModule required\nuseTicketCache=true;\n" + + "};\n"); + jaas.append(createJAASEntry("Client", "dgi", userKeytabFile)); + jaas.append(createJAASEntry("Server", "HTTP", httpKeytabFile)); + + File jaasFile = new File(kdcWorkDir, "jaas.txt"); + FileUtils.write(jaasFile, jaas.toString()); + bindJVMtoJAASFile(jaasFile); + } +} diff --git a/webapp/src/test/java/org/apache/atlas/web/security/BaseSecurityTest.java b/webapp/src/test/java/org/apache/atlas/web/security/BaseSecurityTest.java new file mode 100644 index 0000000000..55709c48a5 --- /dev/null +++ b/webapp/src/test/java/org/apache/atlas/web/security/BaseSecurityTest.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.web.security; + +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.web.TestUtils; +import org.apache.commons.configuration.ConfigurationException; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.hadoop.minikdc.MiniKdc; +import org.apache.hadoop.security.ssl.SSLFactory; +import org.apache.hadoop.security.ssl.SSLHostnameVerifier; +import org.apache.zookeeper.Environment; +import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.webapp.WebAppContext; +import org.testng.Assert; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.net.URL; +import java.nio.file.Files; +import java.util.Locale; +import java.util.Properties; + +import static org.apache.atlas.security.SecurityProperties.CERT_STORES_CREDENTIAL_PROVIDER_PATH; +import static org.apache.atlas.security.SecurityProperties.KEYSTORE_FILE_KEY; +import static org.apache.atlas.security.SecurityProperties.TLS_ENABLED; +import static org.apache.atlas.security.SecurityProperties.TRUSTSTORE_FILE_KEY; +import static org.apache.atlas.security.SecurityProperties.CLIENT_AUTH_KEY; +import static org.apache.atlas.security.SecurityProperties.SSL_CLIENT_PROPERTIES; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.atlas.AtlasException; +import org.apache.hadoop.security.alias.CredentialProviderFactory; + +/** + * + */ +public class BaseSecurityTest { + private static final String JAAS_ENTRY = "%s { \n" + " %s required\n" + // kerberos module + + " keyTab=\"%s\"\n" + " debug=true\n" + " principal=\"%s\"\n" + " useKeyTab=true\n" + + " useTicketCache=false\n" + " doNotPrompt=true\n" + " storeKey=true;\n" + "}; \n"; + protected MiniKdc kdc; + + protected void generateTestProperties(Properties props) throws ConfigurationException, IOException { + PropertiesConfiguration config = + new PropertiesConfiguration(System.getProperty("user.dir") + + "/../src/conf/" + ApplicationProperties.APPLICATION_PROPERTIES); + for (String propName : props.stringPropertyNames()) { + config.setProperty(propName, props.getProperty(propName)); + } + File file = new File(System.getProperty("user.dir"), ApplicationProperties.APPLICATION_PROPERTIES); + file.deleteOnExit(); + Writer fileWriter = new FileWriter(file); + config.save(fileWriter); + } + + protected void startEmbeddedServer(Server server) throws Exception { + WebAppContext webapp = new WebAppContext(); + webapp.setContextPath("/"); + webapp.setWar(getWarPath()); + server.setHandler(webapp); + + server.start(); + } + + protected File startKDC() throws Exception { + File target = Files.createTempDirectory("sectest").toFile(); + File kdcWorkDir = new File(target, "kdc"); + Properties kdcConf = MiniKdc.createConf(); + kdcConf.setProperty(MiniKdc.DEBUG, "true"); + kdc = new MiniKdc(kdcConf, kdcWorkDir); + kdc.start(); + + Assert.assertNotNull(kdc.getRealm()); + return kdcWorkDir; + } + + public String createJAASEntry(String context, String principal, File keytab) { + String keytabpath = keytab.getAbsolutePath(); + // fix up for windows; no-op on unix + keytabpath = keytabpath.replace('\\', '/'); + return String.format(Locale.ENGLISH, JAAS_ENTRY, context, getKerberosAuthModuleForJVM(), keytabpath, principal); + } + + protected String getKerberosAuthModuleForJVM() { + if (System.getProperty("java.vendor").contains("IBM")) { + return "com.ibm.security.auth.module.Krb5LoginModule"; + } else { + return "com.sun.security.auth.module.Krb5LoginModule"; + } + } + + protected void bindJVMtoJAASFile(File jaasFile) { + String path = jaasFile.getAbsolutePath(); + System.setProperty(Environment.JAAS_CONF_KEY, path); + disableZookeeperSecurity(); + } + + /* We only want Atlas to work in secure mode for the tests + * for otherwise a lot more configuration is required to + * make other components like Kafka run in secure mode. + */ + private void disableZookeeperSecurity() { + System.setProperty("zookeeper.sasl.client", "false"); + System.setProperty("zookeeper.sasl.clientconfig", ""); + } + + protected File createKeytab(MiniKdc kdc, File kdcWorkDir, String principal, String filename) throws Exception { + File keytab = new File(kdcWorkDir, filename); + kdc.createPrincipal(keytab, principal, principal + "/localhost", principal + "/127.0.0.1"); + return keytab; + } + + protected String getWarPath() { + return TestUtils.getWarPath(); + } + + protected PropertiesConfiguration getSSLConfiguration(String providerUrl) { + String projectBaseDirectory = System.getProperty("projectBaseDir"); + final PropertiesConfiguration configuration = new PropertiesConfiguration(); + configuration.setProperty("atlas.services.enabled", false); + configuration.setProperty(TLS_ENABLED, true); + configuration.setProperty(TRUSTSTORE_FILE_KEY, projectBaseDirectory + "/webapp/target/atlas.keystore"); + configuration.setProperty(KEYSTORE_FILE_KEY, projectBaseDirectory + "/webapp/target/atlas.keystore"); + configuration.setProperty(CERT_STORES_CREDENTIAL_PROVIDER_PATH, providerUrl); + configuration.setProperty(SSLFactory.SSL_HOSTNAME_VERIFIER_KEY, + SSLHostnameVerifier.DEFAULT_AND_LOCALHOST.toString()); + return configuration; + } + + public static String writeConfiguration(final PropertiesConfiguration configuration) throws Exception { + String confLocation = System.getProperty("atlas.conf"); + URL url; + if (confLocation == null) { + url = BaseSecurityTest.class.getResource("/" + ApplicationProperties.APPLICATION_PROPERTIES); + } else { + url = new File(confLocation, ApplicationProperties.APPLICATION_PROPERTIES).toURI().toURL(); + } + PropertiesConfiguration configuredProperties = new PropertiesConfiguration(); + configuredProperties.load(url); + + configuredProperties.copy(configuration); + + String persistDir = TestUtils.getTempDirectory(); + configuredProperties.setProperty("atlas.authentication.method.file", "true"); + configuredProperties.setProperty("atlas.authentication.method.file.filename", persistDir + + "/users-credentials"); + configuredProperties.setProperty("atlas.auth.policy.file",persistDir + + "/policy-store.txt" ); + TestUtils.writeConfiguration(configuredProperties, persistDir + File.separator + + ApplicationProperties.APPLICATION_PROPERTIES); + setupUserCredential(persistDir); + setUpPolicyStore(persistDir); + ApplicationProperties.forceReload(); + return persistDir; + } + + public static void setupUserCredential(String tmpDir) throws Exception { + + StringBuilder credentialFileStr = new StringBuilder(1024); + credentialFileStr.append("admin=ADMIN::8c6976e5b5410415bde908bd4dee15dfb167a9c873fc4bb8a81f6f2ab448a918\n"); + credentialFileStr.append("michael=DATA_SCIENTIST::95bfb24de17d285d734b9eaa9109bfe922adc85f20d2e5e66a78bddb4a4ebddb\n"); + credentialFileStr.append("paul=DATA_STEWARD::e7c0dcf5f8a93e93791e9bac1ae454a691c1d2a902fc4256d489e96c1b9ac68c\n"); + credentialFileStr.append("testuser=DATA_STEWARD::e7c0dcf5f8a93e93791e9bac1ae454a691c1d2a902fc4256d489e96c1b9ac68c\n"); + File credentialFile = new File(tmpDir, "users-credentials"); + FileUtils.write(credentialFile, credentialFileStr.toString()); + } + + public static void setUpPolicyStore(String tmpDir) throws Exception { + StringBuilder policyStr = new StringBuilder(1024); + policyStr.append("adminPolicy;;admin:rwud;;ROLE_ADMIN:rwud;;type:*,entity:*,operation:*\n"); + policyStr.append("dataStewardPolicy;;testuser:rwud;;DATA_STEWARD:rwu;;type:*,entity:*\n"); + + File policyFile = new File(tmpDir, "policy-store.txt"); + FileUtils.write(policyFile, policyStr.toString()); + } + + public static void persistSSLClientConfiguration(org.apache.commons.configuration.Configuration clientConfig) + throws AtlasException, IOException { + //trust settings + Configuration configuration = new Configuration(false); + File sslClientFile = getSSLClientFile(); + if (!sslClientFile.exists()) { + configuration.set("ssl.client.truststore.type", "jks"); + configuration.set("ssl.client.truststore.location", clientConfig.getString(TRUSTSTORE_FILE_KEY)); + if (clientConfig.getBoolean(CLIENT_AUTH_KEY, false)) { + // need to get client key properties + configuration.set("ssl.client.keystore.location", clientConfig.getString(KEYSTORE_FILE_KEY)); + configuration.set("ssl.client.keystore.type", "jks"); + } + // add the configured credential provider + configuration.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, + clientConfig.getString(CERT_STORES_CREDENTIAL_PROVIDER_PATH)); + String hostnameVerifier = clientConfig.getString(SSLFactory.SSL_HOSTNAME_VERIFIER_KEY); + if (hostnameVerifier != null) { + configuration.set(SSLFactory.SSL_HOSTNAME_VERIFIER_KEY, hostnameVerifier); + } + + configuration.writeXml(new FileWriter(sslClientFile)); + } + } + + private static File getSSLClientFile() throws AtlasException { + File sslDir; + try { + String persistDir = null; + URL resource = BaseSecurityTest.class.getResource("/"); + if (resource != null) { + persistDir = resource.toURI().getPath(); + } + assert persistDir != null; + sslDir = new File(persistDir); + + // LOG.info("ssl-client.xml will be created in {}", sslDir); + } catch (Exception e) { + throw new AtlasException("Failed to find client configuration directory", e); + } + return new File(sslDir, SSL_CLIENT_PROPERTIES); + } +} diff --git a/webapp/src/test/java/org/apache/atlas/web/security/NegativeSSLAndKerberosTest.java b/webapp/src/test/java/org/apache/atlas/web/security/NegativeSSLAndKerberosTest.java new file mode 100755 index 0000000000..d3cf35ca0f --- /dev/null +++ b/webapp/src/test/java/org/apache/atlas/web/security/NegativeSSLAndKerberosTest.java @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.web.security; + +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.AtlasClient; +import org.apache.atlas.web.TestUtils; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.security.alias.JavaKeyStoreProvider; +import org.testng.Assert; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.File; +import java.net.URL; +import java.nio.file.Files; + +import static org.apache.atlas.security.SecurityProperties.TLS_ENABLED; + +/** + * Perform all the necessary setup steps for client and server comm over SSL/Kerberos, but then don't estalish a + * kerberos user for the invocation. Need a separate use case since the Jersey layer cached the URL connection handler, + * which indirectly caches the kerberos delegation token. + */ +public class NegativeSSLAndKerberosTest extends BaseSSLAndKerberosTest { + + private TestSecureEmbeddedServer secureEmbeddedServer; + private String originalConf; + private AtlasClient dgiClient; + + //@BeforeClass + public void setUp() throws Exception { + jksPath = new Path(Files.createTempDirectory("tempproviders").toString(), "test.jks"); + providerUrl = JavaKeyStoreProvider.SCHEME_NAME + "://file/" + jksPath.toUri(); + + String persistDir = TestUtils.getTempDirectory(); + + setupKDCAndPrincipals(); + setupCredentials(); + + // client will actually only leverage subset of these properties + final PropertiesConfiguration configuration = getSSLConfiguration(providerUrl); + + persistSSLClientConfiguration(configuration); + + TestUtils.writeConfiguration(configuration, persistDir + File.separator + + ApplicationProperties.APPLICATION_PROPERTIES); + + String confLocation = System.getProperty("atlas.conf"); + URL url; + if (confLocation == null) { + url = NegativeSSLAndKerberosTest.class.getResource("/" + ApplicationProperties.APPLICATION_PROPERTIES); + } else { + url = new File(confLocation, ApplicationProperties.APPLICATION_PROPERTIES).toURI().toURL(); + } + configuration.load(url); + + configuration.setProperty(TLS_ENABLED, true); + configuration.setProperty("atlas.authentication.method.kerberos", "true"); + configuration.setProperty("atlas.authentication.keytab",userKeytabFile.getAbsolutePath()); + configuration.setProperty("atlas.authentication.principal","dgi/localhost@"+kdc.getRealm()); + + configuration.setProperty("atlas.authentication.method.file", "false"); + configuration.setProperty("atlas.authentication.method.kerberos", "true"); + configuration.setProperty("atlas.authentication.method.kerberos.principal", "HTTP/localhost@" + kdc.getRealm()); + configuration.setProperty("atlas.authentication.method.kerberos.keytab", httpKeytabFile.getAbsolutePath()); + configuration.setProperty("atlas.authentication.method.kerberos.name.rules", + "RULE:[1:$1@$0](.*@EXAMPLE.COM)s/@.*//\nDEFAULT"); + + configuration.setProperty("atlas.authentication.method.file", "true"); + configuration.setProperty("atlas.authentication.method.file.filename", persistDir + + "/users-credentials"); + configuration.setProperty("atlas.auth.policy.file",persistDir + + "/policy-store.txt" ); + configuration.setProperty("atlas.authentication.method.trustedproxy", "false"); + + TestUtils.writeConfiguration(configuration, persistDir + File.separator + + ApplicationProperties.APPLICATION_PROPERTIES); + + setupUserCredential(persistDir); + setUpPolicyStore(persistDir); + + // save original setting + originalConf = System.getProperty("atlas.conf"); + System.setProperty("atlas.conf", persistDir); + + dgiClient = new AtlasClient(configuration, DGI_URL); + + + secureEmbeddedServer = new TestSecureEmbeddedServer(21443, getWarPath()) { + @Override + public Configuration getConfiguration() { + return configuration; + } + }; + secureEmbeddedServer.getServer().start(); + } + + //@AfterClass + public void tearDown() throws Exception { + if (secureEmbeddedServer != null) { + secureEmbeddedServer.getServer().stop(); + } + + if (kdc != null) { + kdc.stop(); + } + + if (originalConf != null) { + System.setProperty("atlas.conf", originalConf); + } + } + + @Test(enabled = false) + public void testUnsecuredClient() throws Exception { + try { + dgiClient.listTypes(); + Assert.fail("Should have failed with GSSException"); + } catch(Exception e) { + e.printStackTrace(); + Assert.assertTrue(e.getMessage().contains("Mechanism level: Failed to find any Kerberos tgt")); + } + } +} diff --git a/webapp/src/test/java/org/apache/atlas/web/security/SSLAndKerberosTest.java b/webapp/src/test/java/org/apache/atlas/web/security/SSLAndKerberosTest.java new file mode 100755 index 0000000000..4384aa1535 --- /dev/null +++ b/webapp/src/test/java/org/apache/atlas/web/security/SSLAndKerberosTest.java @@ -0,0 +1,184 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.web.security; + +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.AtlasClient; +import org.apache.atlas.web.TestUtils; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.alias.JavaKeyStoreProvider; +import org.testng.annotations.Test; +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.nio.file.Files; +import java.security.PrivilegedExceptionAction; + +import javax.security.auth.Subject; +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import javax.security.auth.login.LoginContext; +import javax.security.auth.login.LoginException; + +import static org.apache.atlas.security.SecurityProperties.TLS_ENABLED; + +public class SSLAndKerberosTest extends BaseSSLAndKerberosTest { + public static final String TEST_USER_JAAS_SECTION = "TestUser"; + public static final String TESTUSER = "testuser"; + public static final String TESTPASS = "testpass"; + + private static final String DGI_URL = "https://localhost:21443/"; + private AtlasClient dgiCLient; + private TestSecureEmbeddedServer secureEmbeddedServer; + private Subject subject; + private String originalConf; + private String originalHomeDir; + + //@BeforeClass + public void setUp() throws Exception { + jksPath = new Path(Files.createTempDirectory("tempproviders").toString(), "test.jks"); + providerUrl = JavaKeyStoreProvider.SCHEME_NAME + "://file/" + jksPath.toUri(); + + String persistDir = TestUtils.getTempDirectory(); + + setupKDCAndPrincipals(); + setupCredentials(); + + // client will actually only leverage subset of these properties + final PropertiesConfiguration configuration = getSSLConfiguration(providerUrl); + + persistSSLClientConfiguration(configuration); + + TestUtils.writeConfiguration(configuration, persistDir + File.separator + + ApplicationProperties.APPLICATION_PROPERTIES); + + String confLocation = System.getProperty("atlas.conf"); + URL url; + if (confLocation == null) { + url = SSLAndKerberosTest.class.getResource("/" + ApplicationProperties.APPLICATION_PROPERTIES); + } else { + url = new File(confLocation, ApplicationProperties.APPLICATION_PROPERTIES).toURI().toURL(); + } + configuration.load(url); + configuration.setProperty(TLS_ENABLED, true); + configuration.setProperty("atlas.authentication.method.kerberos", "true"); + configuration.setProperty("atlas.authentication.keytab",userKeytabFile.getAbsolutePath()); + configuration.setProperty("atlas.authentication.principal","dgi/localhost@"+kdc.getRealm()); + + configuration.setProperty("atlas.authentication.method.file", "false"); + configuration.setProperty("atlas.authentication.method.trustedproxy", "false"); + configuration.setProperty("atlas.authentication.method.kerberos", "true"); + configuration.setProperty("atlas.authentication.method.kerberos.principal", "HTTP/localhost@" + kdc.getRealm()); + configuration.setProperty("atlas.authentication.method.kerberos.keytab", httpKeytabFile.getAbsolutePath()); + configuration.setProperty("atlas.authentication.method.kerberos.name.rules", + "RULE:[1:$1@$0](.*@EXAMPLE.COM)s/@.*//\nDEFAULT"); + + configuration.setProperty("atlas.authentication.method.file", "true"); + configuration.setProperty("atlas.authentication.method.file.filename", persistDir + + "/users-credentials"); + configuration.setProperty("atlas.auth.policy.file",persistDir + + "/policy-store.txt" ); + TestUtils.writeConfiguration(configuration, persistDir + File.separator + + "atlas-application.properties"); + + setupUserCredential(persistDir); + setUpPolicyStore(persistDir); + + subject = loginTestUser(); + UserGroupInformation.loginUserFromSubject(subject); + UserGroupInformation proxyUser = UserGroupInformation.createProxyUser( + "testUser", + UserGroupInformation.getLoginUser()); + + // save original setting + originalConf = System.getProperty("atlas.conf"); + System.setProperty("atlas.conf", persistDir); + + originalHomeDir = System.getProperty("atlas.home"); + System.setProperty("atlas.home", TestUtils.getTargetDirectory()); + + dgiCLient = proxyUser.doAs(new PrivilegedExceptionAction() { + @Override + public AtlasClient run() throws Exception { + return new AtlasClient(configuration, DGI_URL); + } + }); + + + secureEmbeddedServer = new TestSecureEmbeddedServer(21443, getWarPath()) { + @Override + public PropertiesConfiguration getConfiguration() { + return configuration; + } + }; + secureEmbeddedServer.getServer().start(); + } + + //@AfterClass + public void tearDown() throws Exception { + if (secureEmbeddedServer != null) { + secureEmbeddedServer.getServer().stop(); + } + + if (kdc != null) { + kdc.stop(); + } + + if (originalConf != null) { + System.setProperty("atlas.conf", originalConf); + } + + if(originalHomeDir !=null){ + System.setProperty("atlas.home", originalHomeDir); + } + } + + protected Subject loginTestUser() throws LoginException, IOException { + LoginContext lc = new LoginContext(TEST_USER_JAAS_SECTION, new CallbackHandler() { + + @Override + public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException { + for (Callback callback : callbacks) { + if (callback instanceof PasswordCallback) { + PasswordCallback passwordCallback = (PasswordCallback) callback; + passwordCallback.setPassword(TESTPASS.toCharArray()); + } + if (callback instanceof NameCallback) { + NameCallback nameCallback = (NameCallback) callback; + nameCallback.setName(TESTUSER); + } + } + } + }); + // attempt authentication + lc.login(); + return lc.getSubject(); + } + + @Test(enabled = false) + public void testService() throws Exception { + dgiCLient.listTypes(); + } + +} diff --git a/webapp/src/test/java/org/apache/atlas/web/security/SSLTest.java b/webapp/src/test/java/org/apache/atlas/web/security/SSLTest.java new file mode 100755 index 0000000000..ee652f0570 --- /dev/null +++ b/webapp/src/test/java/org/apache/atlas/web/security/SSLTest.java @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.web.security; + +import org.apache.atlas.AtlasClient; +import org.apache.atlas.web.TestUtils; +import org.apache.atlas.web.service.SecureEmbeddedServer; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.security.alias.CredentialProvider; +import org.apache.hadoop.security.alias.CredentialProviderFactory; +import org.apache.hadoop.security.alias.JavaKeyStoreProvider; +import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.webapp.WebAppContext; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; + +import static org.apache.atlas.security.SecurityProperties.KEYSTORE_PASSWORD_KEY; +import static org.apache.atlas.security.SecurityProperties.SERVER_CERT_PASSWORD_KEY; +import static org.apache.atlas.security.SecurityProperties.TRUSTSTORE_PASSWORD_KEY; + +public class SSLTest extends BaseSSLAndKerberosTest { + private AtlasClient atlasClient; + private Path jksPath; + private String providerUrl; + private TestSecureEmbeddedServer secureEmbeddedServer; + private String originalConf; + private String originalHomeDir; + + class TestSecureEmbeddedServer extends SecureEmbeddedServer { + + public TestSecureEmbeddedServer(int port, String path) throws IOException { + super(ATLAS_DEFAULT_BIND_ADDRESS, port, path); + } + + public Server getServer() { + return server; + } + + @Override + protected WebAppContext getWebAppContext(String path) { + WebAppContext application = new WebAppContext(path, "/"); + application.setDescriptor(System.getProperty("projectBaseDir") + "/webapp/src/test/webapp/WEB-INF/web.xml"); + application.setClassLoader(Thread.currentThread().getContextClassLoader()); + return application; + } + } + + //@BeforeClass + public void setUp() throws Exception { + jksPath = new Path(Files.createTempDirectory("tempproviders").toString(), "test.jks"); + providerUrl = JavaKeyStoreProvider.SCHEME_NAME + "://file/" + jksPath.toUri(); + + setupCredentials(); + final PropertiesConfiguration configuration = getSSLConfiguration(providerUrl); + String persistDir = writeConfiguration(configuration); + persistSSLClientConfiguration(configuration); + + originalConf = System.getProperty("atlas.conf"); + System.setProperty("atlas.conf", persistDir); + + originalHomeDir = System.getProperty("atlas.home"); + System.setProperty("atlas.home", TestUtils.getTargetDirectory()); + + atlasClient = new AtlasClient(configuration, new String[]{DGI_URL},new String[]{"admin","admin"}); + + secureEmbeddedServer = new TestSecureEmbeddedServer(21443, getWarPath()) { + @Override + public org.apache.commons.configuration.Configuration getConfiguration() { + return configuration; + } + }; + secureEmbeddedServer.getServer().start(); + } + + //@AfterClass + public void tearDown() throws Exception { + if (secureEmbeddedServer != null) { + secureEmbeddedServer.getServer().stop(); + } + + if (originalConf != null) { + System.setProperty("atlas.conf", originalConf); + } + + if(originalHomeDir !=null){ + System.setProperty("atlas.home", originalHomeDir); + } + } + + protected void setupCredentials() throws Exception { + Configuration conf = new Configuration(false); + + File file = new File(jksPath.toUri().getPath()); + file.delete(); + conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, providerUrl); + + CredentialProvider provider = CredentialProviderFactory.getProviders(conf).get(0); + + // create new aliases + try { + + char[] storepass = {'k', 'e', 'y', 'p', 'a', 's', 's'}; + provider.createCredentialEntry(KEYSTORE_PASSWORD_KEY, storepass); + + char[] trustpass = {'k', 'e', 'y', 'p', 'a', 's', 's'}; + provider.createCredentialEntry(TRUSTSTORE_PASSWORD_KEY, trustpass); + + char[] trustpass2 = {'k', 'e', 'y', 'p', 'a', 's', 's'}; + provider.createCredentialEntry("ssl.client.truststore.password", trustpass2); + + char[] certpass = {'k', 'e', 'y', 'p', 'a', 's', 's'}; + provider.createCredentialEntry(SERVER_CERT_PASSWORD_KEY, certpass); + + // write out so that it can be found in checks + provider.flush(); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + //@Test + public void testService() throws Exception { + atlasClient.listTypes(); + } +} diff --git a/webapp/src/test/java/org/apache/atlas/web/service/SecureEmbeddedServerTest.java b/webapp/src/test/java/org/apache/atlas/web/service/SecureEmbeddedServerTest.java new file mode 100644 index 0000000000..f941941953 --- /dev/null +++ b/webapp/src/test/java/org/apache/atlas/web/service/SecureEmbeddedServerTest.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.atlas.web.service; + +import org.apache.atlas.ApplicationProperties; +import org.apache.atlas.web.TestUtils; +import org.apache.atlas.web.security.BaseSecurityTest; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.eclipse.jetty.webapp.WebAppContext; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.net.HttpURLConnection; +import java.net.URL; + +import static org.apache.atlas.security.SecurityProperties.CERT_STORES_CREDENTIAL_PROVIDER_PATH; + +public class SecureEmbeddedServerTest extends SecureEmbeddedServerTestBase { + @Test + public void testServerConfiguredUsingCredentialProvider() throws Exception { + // setup the configuration + final PropertiesConfiguration configuration = new PropertiesConfiguration(); + configuration.setProperty(CERT_STORES_CREDENTIAL_PROVIDER_PATH, providerUrl); + configuration.setProperty("atlas.services.enabled", false); + configuration.setProperty("atlas.notification.embedded", "false"); + // setup the credential provider + setupCredentials(); + + String persistDir = BaseSecurityTest.writeConfiguration(configuration); + String originalConf = System.getProperty("atlas.conf"); + System.setProperty("atlas.conf", persistDir); + + ApplicationProperties.forceReload(); + SecureEmbeddedServer secureEmbeddedServer = null; + try { + secureEmbeddedServer = new SecureEmbeddedServer(EmbeddedServer.ATLAS_DEFAULT_BIND_ADDRESS, + 21443, TestUtils.getWarPath()) { + @Override + protected PropertiesConfiguration getConfiguration() { + return configuration; + } + + @Override + protected WebAppContext getWebAppContext(String path) { + WebAppContext application = new WebAppContext(path, "/"); + application.setDescriptor( + System.getProperty("projectBaseDir") + "/webapp/src/test/webapp/WEB-INF/web.xml"); + application.setClassLoader(Thread.currentThread().getContextClassLoader()); + return application; + } + + }; + secureEmbeddedServer.server.start(); + + URL url = new URL("https://localhost:21443/api/atlas/admin/status"); + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + connection.setRequestMethod("GET"); + connection.connect(); + + // test to see whether server is up and root page can be served + Assert.assertEquals(connection.getResponseCode(), 200); + } catch(Throwable e) { + Assert.fail("War deploy failed", e); + } finally { + secureEmbeddedServer.server.stop(); + + if (originalConf == null) { + System.clearProperty("atlas.conf"); + } else { + System.setProperty("atlas.conf", originalConf); + } + } + } +}