keyValueMap = new HashMap<>();
+
+ String[] tags = keyValueString.split(",");
+ for (String tag : tags) {
+ int index = tag.indexOf("=");
+ String tagKey = tag.substring(0, index).trim();
+ String tagValue = tag.substring(index + 1, tag.length()).trim();
+ keyValueMap.put(tagKey, tagValue);
+ }
+ return keyValueMap;
+ }
+
+ public static String getUser() throws FalconException {
+ try {
+ return CurrentUser.getAuthenticatedUGI().getShortUserName();
+ } catch (Exception ioe) {
+ //Ignore is failed to get user, uses login user
+ }
+ return null;
+ }
+}
diff --git a/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/bridge/FalconBridge.java b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/bridge/FalconBridge.java
new file mode 100644
index 0000000000..cbf002f4fa
--- /dev/null
+++ b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/bridge/FalconBridge.java
@@ -0,0 +1,416 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.falcon.bridge;
+
+import org.apache.atlas.AtlasClient;
+import org.apache.atlas.AtlasConstants;
+import org.apache.atlas.falcon.Util.EventUtil;
+import org.apache.atlas.falcon.model.FalconDataTypes;
+import org.apache.atlas.hive.bridge.HiveMetaStoreBridge;
+import org.apache.atlas.hive.model.HiveDataTypes;
+import org.apache.atlas.v1.model.instance.Referenceable;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.falcon.FalconException;
+import org.apache.falcon.entity.CatalogStorage;
+import org.apache.falcon.entity.FeedHelper;
+import org.apache.falcon.entity.FileSystemStorage;
+import org.apache.falcon.entity.ProcessHelper;
+import org.apache.falcon.entity.store.ConfigurationStore;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.feed.CatalogTable;
+import org.apache.falcon.entity.v0.feed.ClusterType;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.entity.v0.feed.Location;
+import org.apache.falcon.entity.v0.feed.LocationType;
+import org.apache.falcon.entity.v0.process.Cluster;
+import org.apache.falcon.entity.v0.process.Input;
+import org.apache.falcon.entity.v0.process.Output;
+import org.apache.falcon.entity.v0.process.Workflow;
+import org.apache.falcon.workflow.WorkflowExecutionArgs;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A Bridge Utility to register Falcon entities metadata to Atlas.
+ */
+public class FalconBridge {
+ private static final Logger LOG = LoggerFactory.getLogger(FalconBridge.class);
+
+ public static final String COLO = "colo";
+ public static final String TAGS = "tags";
+ public static final String GROUPS = "groups";
+ public static final String PIPELINES = "pipelines";
+ public static final String WFPROPERTIES = "workflow-properties";
+ public static final String RUNSON = "runs-on";
+ public static final String STOREDIN = "stored-in";
+ public static final String FREQUENCY = "frequency";
+ public static final String ATTRIBUTE_DB = "db";
+
+ /**
+ * Creates cluster entity
+ *
+ * @param cluster ClusterEntity
+ * @return cluster instance reference
+ */
+ public static Referenceable createClusterEntity(final org.apache.falcon.entity.v0.cluster.Cluster cluster) {
+ LOG.info("Creating cluster Entity : {}", cluster.getName());
+
+ Referenceable clusterRef = new Referenceable(FalconDataTypes.FALCON_CLUSTER.getName());
+
+ clusterRef.set(AtlasClient.NAME, cluster.getName());
+ clusterRef.set(AtlasClient.DESCRIPTION, cluster.getDescription());
+ clusterRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, cluster.getName());
+
+ clusterRef.set(FalconBridge.COLO, cluster.getColo());
+
+ if (cluster.getACL() != null) {
+ clusterRef.set(AtlasClient.OWNER, cluster.getACL().getGroup());
+ }
+
+ if (StringUtils.isNotEmpty(cluster.getTags())) {
+ clusterRef.set(FalconBridge.TAGS,
+ EventUtil.convertKeyValueStringToMap(cluster.getTags()));
+ }
+
+ return clusterRef;
+ }
+
+ private static Referenceable createFeedEntity(Feed feed, Referenceable clusterReferenceable) {
+ LOG.info("Creating feed dataset: {}", feed.getName());
+
+ Referenceable feedEntity = new Referenceable(FalconDataTypes.FALCON_FEED.getName());
+ feedEntity.set(AtlasClient.NAME, feed.getName());
+ feedEntity.set(AtlasClient.DESCRIPTION, feed.getDescription());
+ String feedQualifiedName =
+ getFeedQualifiedName(feed.getName(), (String) clusterReferenceable.get(AtlasClient.NAME));
+ feedEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, feedQualifiedName);
+ feedEntity.set(FalconBridge.FREQUENCY, feed.getFrequency().toString());
+ feedEntity.set(FalconBridge.STOREDIN, clusterReferenceable);
+ if (feed.getACL() != null) {
+ feedEntity.set(AtlasClient.OWNER, feed.getACL().getOwner());
+ }
+
+ if (StringUtils.isNotEmpty(feed.getTags())) {
+ feedEntity.set(FalconBridge.TAGS,
+ EventUtil.convertKeyValueStringToMap(feed.getTags()));
+ }
+
+ if (feed.getGroups() != null) {
+ feedEntity.set(FalconBridge.GROUPS, feed.getGroups());
+ }
+
+ return feedEntity;
+ }
+
+ public static List createFeedCreationEntity(Feed feed, ConfigurationStore falconStore) throws FalconException, URISyntaxException {
+ LOG.info("Creating feed : {}", feed.getName());
+
+ List entities = new ArrayList<>();
+
+ if (feed.getClusters() != null) {
+ List replicationInputs = new ArrayList<>();
+ List replicationOutputs = new ArrayList<>();
+
+ for (org.apache.falcon.entity.v0.feed.Cluster feedCluster : feed.getClusters().getClusters()) {
+ org.apache.falcon.entity.v0.cluster.Cluster cluster = falconStore.get(EntityType.CLUSTER,
+ feedCluster.getName());
+
+ // set cluster
+ Referenceable clusterReferenceable = getClusterEntityReference(cluster.getName(), cluster.getColo());
+ entities.add(clusterReferenceable);
+
+ // input as hive_table or hdfs_path, output as falcon_feed dataset
+ List inputs = new ArrayList<>();
+ List inputReferenceables = getInputEntities(cluster, feed);
+ if (inputReferenceables != null) {
+ entities.addAll(inputReferenceables);
+ inputs.add(inputReferenceables.get(inputReferenceables.size() - 1));
+ }
+
+ List outputs = new ArrayList<>();
+ Referenceable feedEntity = createFeedEntity(feed, clusterReferenceable);
+ if (feedEntity != null) {
+ entities.add(feedEntity);
+ outputs.add(feedEntity);
+ }
+
+ if (!inputs.isEmpty() || !outputs.isEmpty()) {
+ Referenceable feedCreateEntity = new Referenceable(FalconDataTypes.FALCON_FEED_CREATION.getName());
+ String feedQualifiedName = getFeedQualifiedName(feed.getName(), cluster.getName());
+
+ feedCreateEntity.set(AtlasClient.NAME, feed.getName());
+ feedCreateEntity.set(AtlasClient.DESCRIPTION, "Feed creation - " + feed.getName());
+ feedCreateEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, feedQualifiedName);
+
+ if (!inputs.isEmpty()) {
+ feedCreateEntity.set(AtlasClient.PROCESS_ATTRIBUTE_INPUTS, inputs);
+ }
+ if (!outputs.isEmpty()) {
+ feedCreateEntity.set(AtlasClient.PROCESS_ATTRIBUTE_OUTPUTS, outputs);
+ }
+
+ feedCreateEntity.set(FalconBridge.STOREDIN, clusterReferenceable);
+ entities.add(feedCreateEntity);
+ }
+
+ if (ClusterType.SOURCE == feedCluster.getType()) {
+ replicationInputs.add(feedEntity);
+ } else if (ClusterType.TARGET == feedCluster.getType()) {
+ replicationOutputs.add(feedEntity);
+ }
+ }
+
+ if (!replicationInputs.isEmpty() && !replicationInputs.isEmpty()) {
+ Referenceable feedReplicationEntity = new Referenceable(FalconDataTypes
+ .FALCON_FEED_REPLICATION.getName());
+
+ feedReplicationEntity.set(AtlasClient.NAME, feed.getName());
+ feedReplicationEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, feed.getName());
+
+ feedReplicationEntity.set(AtlasClient.PROCESS_ATTRIBUTE_INPUTS, replicationInputs);
+ feedReplicationEntity.set(AtlasClient.PROCESS_ATTRIBUTE_OUTPUTS, replicationOutputs);
+ entities.add(feedReplicationEntity);
+ }
+
+ }
+ return entities;
+ }
+
+ /**
+ * Creates process entity
+ *
+ * @param process process entity
+ * @param falconStore config store
+ * @return process instance reference
+ *
+ * @throws FalconException if retrieving from the configuration store fail
+ */
+ public static List createProcessEntity(org.apache.falcon.entity.v0.process.Process process,
+ ConfigurationStore falconStore) throws FalconException {
+ LOG.info("Creating process Entity : {}", process.getName());
+
+ // The requirement is for each cluster, create a process entity with name
+ // clustername.processname
+ List entities = new ArrayList<>();
+
+ if (process.getClusters() != null) {
+
+ for (Cluster processCluster : process.getClusters().getClusters()) {
+ org.apache.falcon.entity.v0.cluster.Cluster cluster =
+ falconStore.get(EntityType.CLUSTER, processCluster.getName());
+ Referenceable clusterReferenceable = getClusterEntityReference(cluster.getName(), cluster.getColo());
+ entities.add(clusterReferenceable);
+
+ List inputs = new ArrayList<>();
+ if (process.getInputs() != null) {
+ for (Input input : process.getInputs().getInputs()) {
+ Feed feed = falconStore.get(EntityType.FEED, input.getFeed());
+ Referenceable inputReferenceable = getFeedDataSetReference(feed, clusterReferenceable);
+ entities.add(inputReferenceable);
+ inputs.add(inputReferenceable);
+ }
+ }
+
+ List outputs = new ArrayList<>();
+ if (process.getOutputs() != null) {
+ for (Output output : process.getOutputs().getOutputs()) {
+ Feed feed = falconStore.get(EntityType.FEED, output.getFeed());
+ Referenceable outputReferenceable = getFeedDataSetReference(feed, clusterReferenceable);
+ entities.add(outputReferenceable);
+ outputs.add(outputReferenceable);
+ }
+ }
+
+ if (!inputs.isEmpty() || !outputs.isEmpty()) {
+
+ Referenceable processEntity = new Referenceable(FalconDataTypes.FALCON_PROCESS.getName());
+ processEntity.set(AtlasClient.NAME, process.getName());
+ processEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ getProcessQualifiedName(process.getName(), cluster.getName()));
+ processEntity.set(FalconBridge.FREQUENCY, process.getFrequency().toString());
+
+ if (!inputs.isEmpty()) {
+ processEntity.set(AtlasClient.PROCESS_ATTRIBUTE_INPUTS, inputs);
+ }
+ if (!outputs.isEmpty()) {
+ processEntity.set(AtlasClient.PROCESS_ATTRIBUTE_OUTPUTS, outputs);
+ }
+
+ // set cluster
+ processEntity.set(FalconBridge.RUNSON, clusterReferenceable);
+
+ // Set user
+ if (process.getACL() != null) {
+ processEntity.set(AtlasClient.OWNER, process.getACL().getOwner());
+ }
+
+ if (StringUtils.isNotEmpty(process.getTags())) {
+ processEntity.set(FalconBridge.TAGS,
+ EventUtil.convertKeyValueStringToMap(process.getTags()));
+ }
+
+ if (process.getPipelines() != null) {
+ processEntity.set(FalconBridge.PIPELINES, process.getPipelines());
+ }
+
+ processEntity.set(FalconBridge.WFPROPERTIES,
+ getProcessEntityWFProperties(process.getWorkflow(),
+ process.getName()));
+
+ entities.add(processEntity);
+ }
+
+ }
+ }
+ return entities;
+ }
+
+ private static List getInputEntities(org.apache.falcon.entity.v0.cluster.Cluster cluster,
+ Feed feed) throws URISyntaxException {
+ org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
+
+ if(feedCluster != null) {
+ final CatalogTable table = getTable(feedCluster, feed);
+ if (table != null) {
+ CatalogStorage storage = new CatalogStorage(cluster, table);
+ return createHiveTableInstance(cluster.getName(), storage.getDatabase().toLowerCase(),
+ storage.getTable().toLowerCase());
+ } else {
+ List locations = FeedHelper.getLocations(feedCluster, feed);
+ if (CollectionUtils.isNotEmpty(locations)) {
+ Location dataLocation = FileSystemStorage.getLocation(locations, LocationType.DATA);
+ if (dataLocation != null) {
+ final String pathUri = normalize(dataLocation.getPath());
+ LOG.info("Registering DFS Path {} ", pathUri);
+ return fillHDFSDataSet(pathUri, cluster.getName());
+ }
+ }
+ }
+ }
+
+ return null;
+ }
+
+ private static CatalogTable getTable(org.apache.falcon.entity.v0.feed.Cluster cluster, Feed feed) {
+ // check if table is overridden in cluster
+ if (cluster.getTable() != null) {
+ return cluster.getTable();
+ }
+
+ return feed.getTable();
+ }
+
+ private static List fillHDFSDataSet(final String pathUri, final String clusterName) {
+ List entities = new ArrayList<>();
+ Referenceable ref = new Referenceable(HiveMetaStoreBridge.HDFS_PATH);
+ ref.set("path", pathUri);
+ // Path path = new Path(pathUri);
+ // ref.set("name", path.getName());
+ //TODO - Fix after ATLAS-542 to shorter Name
+ Path path = new Path(pathUri);
+ ref.set(AtlasClient.NAME, Path.getPathWithoutSchemeAndAuthority(path).toString().toLowerCase());
+ ref.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, pathUri);
+ ref.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, clusterName);
+ entities.add(ref);
+ return entities;
+ }
+
+ private static Referenceable createHiveDatabaseInstance(String clusterName, String dbName) {
+ Referenceable dbRef = new Referenceable(HiveDataTypes.HIVE_DB.getName());
+ dbRef.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, clusterName);
+ dbRef.set(AtlasClient.NAME, dbName);
+ dbRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ HiveMetaStoreBridge.getDBQualifiedName(clusterName, dbName));
+ return dbRef;
+ }
+
+ private static List createHiveTableInstance(String clusterName, String dbName,
+ String tableName) {
+ List entities = new ArrayList<>();
+ Referenceable dbRef = createHiveDatabaseInstance(clusterName, dbName);
+ entities.add(dbRef);
+
+ Referenceable tableRef = new Referenceable(HiveDataTypes.HIVE_TABLE.getName());
+ tableRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ HiveMetaStoreBridge.getTableQualifiedName(clusterName, dbName, tableName));
+ tableRef.set(AtlasClient.NAME, tableName.toLowerCase());
+ tableRef.set(ATTRIBUTE_DB, dbRef);
+ entities.add(tableRef);
+
+ return entities;
+ }
+
+ private static Referenceable getClusterEntityReference(final String clusterName,
+ final String colo) {
+ LOG.info("Getting reference for entity {}", clusterName);
+ Referenceable clusterRef = new Referenceable(FalconDataTypes.FALCON_CLUSTER.getName());
+ clusterRef.set(AtlasClient.NAME, String.format("%s", clusterName));
+ clusterRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, clusterName);
+ clusterRef.set(FalconBridge.COLO, colo);
+ return clusterRef;
+ }
+
+
+ private static Referenceable getFeedDataSetReference(Feed feed, Referenceable clusterReference) {
+ LOG.info("Getting reference for entity {}", feed.getName());
+ Referenceable feedDatasetRef = new Referenceable(FalconDataTypes.FALCON_FEED.getName());
+ feedDatasetRef.set(AtlasClient.NAME, feed.getName());
+ feedDatasetRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, getFeedQualifiedName(feed.getName(),
+ (String) clusterReference.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME)));
+ feedDatasetRef.set(FalconBridge.STOREDIN, clusterReference);
+ feedDatasetRef.set(FalconBridge.FREQUENCY, feed.getFrequency());
+ return feedDatasetRef;
+ }
+
+ private static Map getProcessEntityWFProperties(final Workflow workflow,
+ final String processName) {
+ Map wfProperties = new HashMap<>();
+ wfProperties.put(WorkflowExecutionArgs.USER_WORKFLOW_NAME.getName(),
+ ProcessHelper.getProcessWorkflowName(workflow.getName(), processName));
+ wfProperties.put(WorkflowExecutionArgs.USER_WORKFLOW_VERSION.getName(),
+ workflow.getVersion());
+ wfProperties.put(WorkflowExecutionArgs.USER_WORKFLOW_ENGINE.getName(),
+ workflow.getEngine().value());
+
+ return wfProperties;
+ }
+
+ public static String getFeedQualifiedName(final String feedName, final String clusterName) {
+ return String.format("%s@%s", feedName, clusterName);
+ }
+
+ public static String getProcessQualifiedName(final String processName, final String clusterName) {
+ return String.format("%s@%s", processName, clusterName);
+ }
+
+ public static String normalize(final String str) {
+ if (StringUtils.isBlank(str)) {
+ return null;
+ }
+ return str.toLowerCase().trim();
+ }
+}
diff --git a/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/event/FalconEvent.java b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/event/FalconEvent.java
new file mode 100644
index 0000000000..51db894ab6
--- /dev/null
+++ b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/event/FalconEvent.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.falcon.event;
+
+import org.apache.falcon.entity.v0.Entity;
+
+/**
+ * Falcon event to interface with Atlas Service.
+ */
+public class FalconEvent {
+ protected String user;
+ protected OPERATION operation;
+ protected Entity entity;
+
+ public FalconEvent(String doAsUser, OPERATION falconOperation, Entity entity) {
+ this.user = doAsUser;
+ this.operation = falconOperation;
+ this.entity = entity;
+ }
+
+ public enum OPERATION {
+ ADD_CLUSTER,
+ UPDATE_CLUSTER,
+ ADD_FEED,
+ UPDATE_FEED,
+ ADD_PROCESS,
+ UPDATE_PROCESS,
+ }
+
+ public String getUser() {
+ return user;
+ }
+
+ public OPERATION getOperation() {
+ return operation;
+ }
+
+ public Entity getEntity() {
+ return entity;
+ }
+}
diff --git a/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/hook/FalconHook.java b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/hook/FalconHook.java
new file mode 100644
index 0000000000..b8a73cbe63
--- /dev/null
+++ b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/hook/FalconHook.java
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.falcon.hook;
+
+import org.apache.atlas.falcon.bridge.FalconBridge;
+import org.apache.atlas.falcon.event.FalconEvent;
+import org.apache.atlas.falcon.publisher.FalconEventPublisher;
+import org.apache.atlas.hook.AtlasHook;
+import org.apache.atlas.model.notification.HookNotification;
+import org.apache.atlas.v1.model.instance.Referenceable;
+import org.apache.atlas.v1.model.notification.HookNotificationV1.EntityCreateRequest;
+import org.apache.falcon.FalconException;
+import org.apache.falcon.entity.store.ConfigurationStore;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.entity.v0.process.Process;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.List;
+import static org.apache.atlas.repository.Constants.FALCON_SOURCE;
+
+/**
+ * Falcon hook sends lineage information to the Atlas Service.
+ */
+public class FalconHook extends AtlasHook implements FalconEventPublisher {
+ private static final Logger LOG = LoggerFactory.getLogger(FalconHook.class);
+
+ private static ConfigurationStore STORE;
+
+ @Override
+ public String getMessageSource() {
+ return FALCON_SOURCE;
+ }
+
+ private enum Operation {
+ ADD,
+ UPDATE
+ }
+
+ static {
+ try {
+ STORE = ConfigurationStore.get();
+ } catch (Exception e) {
+ LOG.error("Caught exception initializing the falcon hook.", e);
+ }
+
+ LOG.info("Created Atlas Hook for Falcon");
+ }
+
+ @Override
+ public void publish(final Data data) {
+ final FalconEvent event = data.getEvent();
+ try {
+ fireAndForget(event);
+ } catch (Throwable t) {
+ LOG.warn("Error in processing data {}", data, t);
+ }
+ }
+
+ private void fireAndForget(FalconEvent event) throws FalconException, URISyntaxException {
+ LOG.info("Entered Atlas hook for Falcon hook operation {}", event.getOperation());
+ List messages = new ArrayList<>();
+
+ Operation op = getOperation(event.getOperation());
+ String user = getUser(event.getUser());
+ LOG.info("fireAndForget user:{}", user);
+ switch (op) {
+ case ADD:
+ messages.add(new EntityCreateRequest(user, createEntities(event, user)));
+ break;
+
+ }
+ notifyEntities(messages, null);
+ }
+
+ private List createEntities(FalconEvent event, String user) throws FalconException, URISyntaxException {
+ List entities = new ArrayList<>();
+
+ switch (event.getOperation()) {
+ case ADD_CLUSTER:
+ entities.add(FalconBridge
+ .createClusterEntity((org.apache.falcon.entity.v0.cluster.Cluster) event.getEntity()));
+ break;
+
+ case ADD_PROCESS:
+ entities.addAll(FalconBridge.createProcessEntity((Process) event.getEntity(), STORE));
+ break;
+
+ case ADD_FEED:
+ entities.addAll(FalconBridge.createFeedCreationEntity((Feed) event.getEntity(), STORE));
+ break;
+
+ case UPDATE_CLUSTER:
+ case UPDATE_FEED:
+ case UPDATE_PROCESS:
+ default:
+ LOG.info("Falcon operation {} is not valid or supported", event.getOperation());
+ }
+
+ return entities;
+ }
+
+ private static Operation getOperation(final FalconEvent.OPERATION op) throws FalconException {
+ switch (op) {
+ case ADD_CLUSTER:
+ case ADD_FEED:
+ case ADD_PROCESS:
+ return Operation.ADD;
+
+ case UPDATE_CLUSTER:
+ case UPDATE_FEED:
+ case UPDATE_PROCESS:
+ return Operation.UPDATE;
+
+ default:
+ throw new FalconException("Falcon operation " + op + " is not valid or supported");
+ }
+ }
+}
+
diff --git a/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/model/FalconDataTypes.java b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/model/FalconDataTypes.java
new file mode 100644
index 0000000000..e36ff23aff
--- /dev/null
+++ b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/model/FalconDataTypes.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.falcon.model;
+
+/**
+ * Falcon Data Types for model and bridge.
+ */
+public enum FalconDataTypes {
+ // Classes
+ FALCON_CLUSTER,
+ FALCON_FEED_CREATION,
+ FALCON_FEED,
+ FALCON_FEED_REPLICATION,
+ FALCON_PROCESS;
+
+ public String getName() {
+ return name().toLowerCase();
+ }
+
+}
diff --git a/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/publisher/FalconEventPublisher.java b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/publisher/FalconEventPublisher.java
new file mode 100644
index 0000000000..a01ec14beb
--- /dev/null
+++ b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/publisher/FalconEventPublisher.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.falcon.publisher;
+
+
+import org.apache.atlas.falcon.event.FalconEvent;
+
+/**
+ * Falcon publisher for Atlas
+ */
+public interface FalconEventPublisher {
+ class Data {
+ private FalconEvent event;
+
+ public Data(FalconEvent event) {
+ this.event = event;
+ }
+
+ public FalconEvent getEvent() {
+ return event;
+ }
+ }
+
+ void publish(final Data data);
+}
diff --git a/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/service/AtlasService.java b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/service/AtlasService.java
new file mode 100644
index 0000000000..7482ba7b82
--- /dev/null
+++ b/addons/falcon-bridge/src/main/java/org/apache/atlas/falcon/service/AtlasService.java
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.falcon.service;
+
+import org.apache.atlas.falcon.Util.EventUtil;
+import org.apache.atlas.falcon.event.FalconEvent;
+import org.apache.atlas.falcon.hook.FalconHook;
+import org.apache.atlas.falcon.publisher.FalconEventPublisher;
+import org.apache.falcon.FalconException;
+import org.apache.falcon.entity.v0.Entity;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.service.ConfigurationChangeListener;
+import org.apache.falcon.service.FalconService;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Atlas service to publish Falcon events
+ */
+public class AtlasService implements FalconService, ConfigurationChangeListener {
+
+ private static final Logger LOG = LoggerFactory.getLogger(AtlasService.class);
+ private FalconEventPublisher publisher;
+
+ /**
+ * Constant for the service name.
+ */
+ public static final String SERVICE_NAME = AtlasService.class.getSimpleName();
+
+ @Override
+ public String getName() {
+ return SERVICE_NAME;
+ }
+
+ @Override
+ public void init() throws FalconException {
+ publisher = new FalconHook();
+ }
+
+ @Override
+ public void destroy() throws FalconException {
+ }
+
+ @Override
+ public void onAdd(Entity entity) throws FalconException {
+ try {
+ EntityType entityType = entity.getEntityType();
+ switch (entityType) {
+ case CLUSTER:
+ addEntity(entity, FalconEvent.OPERATION.ADD_CLUSTER);
+ break;
+
+ case PROCESS:
+ addEntity(entity, FalconEvent.OPERATION.ADD_PROCESS);
+ break;
+
+ case FEED:
+ addEntity(entity, FalconEvent.OPERATION.ADD_FEED);
+ break;
+
+ default:
+ LOG.debug("Entity type not processed {}", entityType);
+ }
+ } catch(Throwable t) {
+ LOG.warn("Error handling entity {}", entity, t);
+ }
+ }
+
+ @Override
+ public void onRemove(Entity entity) throws FalconException {
+ }
+
+ @Override
+ public void onChange(Entity oldEntity, Entity newEntity) throws FalconException {
+ /**
+ * Skipping update for now - update uses full update currently and this might result in all attributes wiped for hive entities
+ EntityType entityType = newEntity.getEntityType();
+ switch (entityType) {
+ case CLUSTER:
+ addEntity(newEntity, FalconEvent.OPERATION.UPDATE_CLUSTER);
+ break;
+
+ case PROCESS:
+ addEntity(newEntity, FalconEvent.OPERATION.UPDATE_PROCESS);
+ break;
+
+ case FEED:
+ FalconEvent.OPERATION operation = isReplicationFeed((Feed) newEntity) ?
+ FalconEvent.OPERATION.UPDATE_REPLICATION_FEED :
+ FalconEvent.OPERATION.UPDATE_FEED;
+ addEntity(newEntity, operation);
+ break;
+
+ default:
+ LOG.debug("Entity type not processed {}", entityType);
+ }
+ **/
+ }
+
+ @Override
+ public void onReload(Entity entity) throws FalconException {
+ //Since there is no import script that can import existing falcon entities to atlas, adding on falcon service start
+ onAdd(entity);
+ }
+
+ private void addEntity(Entity entity, FalconEvent.OPERATION operation) throws FalconException {
+ LOG.info("Adding {} entity to Atlas: {}", entity.getEntityType().name(), entity.getName());
+
+ try {
+ FalconEvent event =
+ new FalconEvent(EventUtil.getUser(), operation, entity);
+ FalconEventPublisher.Data data = new FalconEventPublisher.Data(event);
+ publisher.publish(data);
+ } catch (Exception ex) {
+ throw new FalconException("Unable to publish data to publisher " + ex.getMessage(), ex);
+ }
+ }
+}
diff --git a/addons/falcon-bridge/src/test/java/org/apache/atlas/falcon/hook/FalconHookIT.java b/addons/falcon-bridge/src/test/java/org/apache/atlas/falcon/hook/FalconHookIT.java
new file mode 100644
index 0000000000..24f36168c9
--- /dev/null
+++ b/addons/falcon-bridge/src/test/java/org/apache/atlas/falcon/hook/FalconHookIT.java
@@ -0,0 +1,351 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.falcon.hook;
+
+import com.sun.jersey.api.client.ClientResponse;
+import org.apache.atlas.ApplicationProperties;
+import org.apache.atlas.AtlasClient;
+import org.apache.atlas.AtlasServiceException;
+import org.apache.atlas.falcon.bridge.FalconBridge;
+import org.apache.atlas.falcon.model.FalconDataTypes;
+import org.apache.atlas.hive.bridge.HiveMetaStoreBridge;
+import org.apache.atlas.hive.model.HiveDataTypes;
+import org.apache.atlas.v1.model.instance.Id;
+import org.apache.atlas.v1.model.instance.Referenceable;
+import org.apache.atlas.v1.typesystem.types.utils.TypesUtil;
+import org.apache.atlas.utils.AuthenticationUtil;
+import org.apache.atlas.utils.ParamChecker;
+import org.apache.commons.configuration.Configuration;
+import org.apache.commons.lang.RandomStringUtils;
+import org.apache.atlas.falcon.service.AtlasService;
+import org.apache.falcon.entity.FeedHelper;
+import org.apache.falcon.entity.FileSystemStorage;
+import org.apache.falcon.entity.store.ConfigurationStore;
+import org.apache.falcon.entity.v0.Entity;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.cluster.Cluster;
+import org.apache.falcon.entity.v0.feed.Feed;
+import org.apache.falcon.entity.v0.feed.Location;
+import org.apache.falcon.entity.v0.feed.LocationType;
+import org.apache.falcon.entity.v0.process.Process;
+import org.apache.falcon.security.CurrentUser;
+import org.slf4j.Logger;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import javax.xml.bind.JAXBException;
+import java.util.List;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.fail;
+
+public class FalconHookIT {
+ public static final Logger LOG = org.slf4j.LoggerFactory.getLogger(FalconHookIT.class);
+
+ public static final String CLUSTER_RESOURCE = "/cluster.xml";
+ public static final String FEED_RESOURCE = "/feed.xml";
+ public static final String FEED_HDFS_RESOURCE = "/feed-hdfs.xml";
+ public static final String FEED_REPLICATION_RESOURCE = "/feed-replication.xml";
+ public static final String PROCESS_RESOURCE = "/process.xml";
+
+ private AtlasClient atlasClient;
+
+ private static final ConfigurationStore STORE = ConfigurationStore.get();
+
+ @BeforeClass
+ public void setUp() throws Exception {
+ Configuration atlasProperties = ApplicationProperties.get();
+ if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) {
+ atlasClient = new AtlasClient(atlasProperties.getStringArray(HiveMetaStoreBridge.ATLAS_ENDPOINT), new String[]{"admin", "admin"});
+ } else {
+ atlasClient = new AtlasClient(atlasProperties.getStringArray(HiveMetaStoreBridge.ATLAS_ENDPOINT));
+ }
+
+ AtlasService service = new AtlasService();
+ service.init();
+ STORE.registerListener(service);
+ CurrentUser.authenticate(System.getProperty("user.name"));
+ }
+
+ private boolean isDataModelAlreadyRegistered() throws Exception {
+ try {
+ atlasClient.getType(FalconDataTypes.FALCON_PROCESS.getName());
+ LOG.info("Hive data model is already registered!");
+ return true;
+ } catch(AtlasServiceException ase) {
+ if (ase.getStatus() == ClientResponse.Status.NOT_FOUND) {
+ return false;
+ }
+ throw ase;
+ }
+ }
+
+ private T loadEntity(EntityType type, String resource, String name) throws JAXBException {
+ Entity entity = (Entity) type.getUnmarshaller().unmarshal(this.getClass().getResourceAsStream(resource));
+ switch (entity.getEntityType()) {
+ case CLUSTER:
+ ((Cluster) entity).setName(name);
+ break;
+
+ case FEED:
+ ((Feed) entity).setName(name);
+ break;
+
+ case PROCESS:
+ ((Process) entity).setName(name);
+ break;
+ }
+ return (T)entity;
+ }
+
+ private String random() {
+ return RandomStringUtils.randomAlphanumeric(10);
+ }
+
+ private String getTableUri(String dbName, String tableName) {
+ return String.format("catalog:%s:%s#ds=${YEAR}-${MONTH}-${DAY}-${HOUR}", dbName, tableName);
+ }
+
+ @Test
+ public void testCreateProcess() throws Exception {
+ Cluster cluster = loadEntity(EntityType.CLUSTER, CLUSTER_RESOURCE, "cluster" + random());
+ STORE.publish(EntityType.CLUSTER, cluster);
+ assertClusterIsRegistered(cluster);
+
+ Feed infeed = getTableFeed(FEED_RESOURCE, cluster.getName(), null);
+ String infeedId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ FalconBridge.getFeedQualifiedName(infeed.getName(), cluster.getName())).getId()._getId();
+
+ Feed outfeed = getTableFeed(FEED_RESOURCE, cluster.getName());
+ String outFeedId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ FalconBridge.getFeedQualifiedName(outfeed.getName(), cluster.getName())).getId()._getId();
+
+ Process process = loadEntity(EntityType.PROCESS, PROCESS_RESOURCE, "process" + random());
+ process.getClusters().getClusters().get(0).setName(cluster.getName());
+ process.getInputs().getInputs().get(0).setFeed(infeed.getName());
+ process.getOutputs().getOutputs().get(0).setFeed(outfeed.getName());
+ STORE.publish(EntityType.PROCESS, process);
+
+ String pid = assertProcessIsRegistered(process, cluster.getName());
+ Referenceable processEntity = atlasClient.getEntity(pid);
+ assertNotNull(processEntity);
+ assertEquals(processEntity.get(AtlasClient.NAME), process.getName());
+ assertEquals(((List)processEntity.get("inputs")).get(0)._getId(), infeedId);
+ assertEquals(((List)processEntity.get("outputs")).get(0)._getId(), outFeedId);
+ }
+
+ private String assertProcessIsRegistered(Process process, String clusterName) throws Exception {
+ return assertEntityIsRegistered(FalconDataTypes.FALCON_PROCESS.getName(),
+ AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ FalconBridge.getProcessQualifiedName(process.getName(), clusterName));
+ }
+
+ private String assertClusterIsRegistered(Cluster cluster) throws Exception {
+ return assertEntityIsRegistered(FalconDataTypes.FALCON_CLUSTER.getName(),
+ AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, cluster.getName());
+ }
+
+ private TypesUtil.Pair getHDFSFeed(String feedResource, String clusterName) throws Exception {
+ Feed feed = loadEntity(EntityType.FEED, feedResource, "feed" + random());
+ org.apache.falcon.entity.v0.feed.Cluster feedCluster = feed.getClusters().getClusters().get(0);
+ feedCluster.setName(clusterName);
+ STORE.publish(EntityType.FEED, feed);
+ String feedId = assertFeedIsRegistered(feed, clusterName);
+ assertFeedAttributes(feedId);
+
+ String processId = assertEntityIsRegistered(FalconDataTypes.FALCON_FEED_CREATION.getName(),
+ AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ FalconBridge.getFeedQualifiedName(feed.getName(), clusterName));
+ Referenceable processEntity = atlasClient.getEntity(processId);
+ assertEquals(((List)processEntity.get("outputs")).get(0).getId(), feedId);
+
+ String inputId = ((List) processEntity.get("inputs")).get(0).getId();
+ Referenceable pathEntity = atlasClient.getEntity(inputId);
+ assertEquals(pathEntity.getTypeName(), HiveMetaStoreBridge.HDFS_PATH);
+
+ List locations = FeedHelper.getLocations(feedCluster, feed);
+ Location dataLocation = FileSystemStorage.getLocation(locations, LocationType.DATA);
+ assertEquals(pathEntity.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME),
+ FalconBridge.normalize(dataLocation.getPath()));
+
+ return TypesUtil.Pair.of(feedId, feed);
+ }
+
+ private Feed getTableFeed(String feedResource, String clusterName) throws Exception {
+ return getTableFeed(feedResource, clusterName, null);
+ }
+
+ private Feed getTableFeed(String feedResource, String clusterName, String secondClusterName) throws Exception {
+ Feed feed = loadEntity(EntityType.FEED, feedResource, "feed" + random());
+ org.apache.falcon.entity.v0.feed.Cluster feedCluster = feed.getClusters().getClusters().get(0);
+ feedCluster.setName(clusterName);
+ String dbName = "db" + random();
+ String tableName = "table" + random();
+ feedCluster.getTable().setUri(getTableUri(dbName, tableName));
+
+ String dbName2 = "db" + random();
+ String tableName2 = "table" + random();
+
+ if (secondClusterName != null) {
+ org.apache.falcon.entity.v0.feed.Cluster feedCluster2 = feed.getClusters().getClusters().get(1);
+ feedCluster2.setName(secondClusterName);
+ feedCluster2.getTable().setUri(getTableUri(dbName2, tableName2));
+ }
+
+ STORE.publish(EntityType.FEED, feed);
+ String feedId = assertFeedIsRegistered(feed, clusterName);
+ assertFeedAttributes(feedId);
+ verifyFeedLineage(feed.getName(), clusterName, feedId, dbName, tableName);
+
+ if (secondClusterName != null) {
+ String feedId2 = assertFeedIsRegistered(feed, secondClusterName);
+ assertFeedAttributes(feedId2);
+ verifyFeedLineage(feed.getName(), secondClusterName, feedId2, dbName2, tableName2);
+ }
+ return feed;
+ }
+
+ private void assertFeedAttributes(String feedId) throws Exception {
+ Referenceable feedEntity = atlasClient.getEntity(feedId);
+ assertEquals(feedEntity.get(AtlasClient.OWNER), "testuser");
+ assertEquals(feedEntity.get(FalconBridge.FREQUENCY), "hours(1)");
+ assertEquals(feedEntity.get(AtlasClient.DESCRIPTION), "test input");
+ }
+
+ private void verifyFeedLineage(String feedName, String clusterName, String feedId, String dbName, String tableName)
+ throws Exception{
+ //verify that lineage from hive table to falcon feed is created
+ String processId = assertEntityIsRegistered(FalconDataTypes.FALCON_FEED_CREATION.getName(),
+ AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ FalconBridge.getFeedQualifiedName(feedName, clusterName));
+ Referenceable processEntity = atlasClient.getEntity(processId);
+ assertEquals(((List)processEntity.get("outputs")).get(0).getId(), feedId);
+
+ String inputId = ((List) processEntity.get("inputs")).get(0).getId();
+ Referenceable tableEntity = atlasClient.getEntity(inputId);
+ assertEquals(tableEntity.getTypeName(), HiveDataTypes.HIVE_TABLE.getName());
+ assertEquals(tableEntity.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME),
+ HiveMetaStoreBridge.getTableQualifiedName(clusterName, dbName, tableName));
+
+ }
+
+ private String assertFeedIsRegistered(Feed feed, String clusterName) throws Exception {
+ return assertEntityIsRegistered(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ FalconBridge.getFeedQualifiedName(feed.getName(), clusterName));
+ }
+
+ @Test
+ public void testReplicationFeed() throws Exception {
+ Cluster srcCluster = loadEntity(EntityType.CLUSTER, CLUSTER_RESOURCE, "cluster" + random());
+ STORE.publish(EntityType.CLUSTER, srcCluster);
+ assertClusterIsRegistered(srcCluster);
+
+ Cluster targetCluster = loadEntity(EntityType.CLUSTER, CLUSTER_RESOURCE, "cluster" + random());
+ STORE.publish(EntityType.CLUSTER, targetCluster);
+ assertClusterIsRegistered(targetCluster);
+
+ Feed feed = getTableFeed(FEED_REPLICATION_RESOURCE, srcCluster.getName(), targetCluster.getName());
+ String inId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ FalconBridge.getFeedQualifiedName(feed.getName(), srcCluster.getName())).getId()._getId();
+ String outId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ FalconBridge.getFeedQualifiedName(feed.getName(), targetCluster.getName())).getId()._getId();
+
+
+ String processId = assertEntityIsRegistered(FalconDataTypes.FALCON_FEED_REPLICATION.getName(),
+ AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, feed.getName());
+ Referenceable process = atlasClient.getEntity(processId);
+ assertEquals(((List)process.get("inputs")).get(0)._getId(), inId);
+ assertEquals(((List)process.get("outputs")).get(0)._getId(), outId);
+ }
+
+ @Test
+ public void testCreateProcessWithHDFSFeed() throws Exception {
+ Cluster cluster = loadEntity(EntityType.CLUSTER, CLUSTER_RESOURCE, "cluster" + random());
+ STORE.publish(EntityType.CLUSTER, cluster);
+
+ TypesUtil.Pair result = getHDFSFeed(FEED_HDFS_RESOURCE, cluster.getName());
+ Feed infeed = result.right;
+ String infeedId = result.left;
+
+ Feed outfeed = getTableFeed(FEED_RESOURCE, cluster.getName());
+ String outfeedId = atlasClient.getEntity(FalconDataTypes.FALCON_FEED.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ FalconBridge.getFeedQualifiedName(outfeed.getName(), cluster.getName())).getId()._getId();
+
+ Process process = loadEntity(EntityType.PROCESS, PROCESS_RESOURCE, "process" + random());
+ process.getClusters().getClusters().get(0).setName(cluster.getName());
+ process.getInputs().getInputs().get(0).setFeed(infeed.getName());
+ process.getOutputs().getOutputs().get(0).setFeed(outfeed.getName());
+ STORE.publish(EntityType.PROCESS, process);
+
+ String pid = assertProcessIsRegistered(process, cluster.getName());
+ Referenceable processEntity = atlasClient.getEntity(pid);
+ assertEquals(processEntity.get(AtlasClient.NAME), process.getName());
+ assertEquals(processEntity.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME),
+ FalconBridge.getProcessQualifiedName(process.getName(), cluster.getName()));
+ assertEquals(((List)processEntity.get("inputs")).get(0)._getId(), infeedId);
+ assertEquals(((List)processEntity.get("outputs")).get(0)._getId(), outfeedId);
+ }
+
+ private String assertEntityIsRegistered(final String typeName, final String property, final String value) throws Exception {
+ waitFor(80000, new Predicate() {
+ @Override
+ public void evaluate() throws Exception {
+ Referenceable entity = atlasClient.getEntity(typeName, property, value);
+ assertNotNull(entity);
+ }
+ });
+ Referenceable entity = atlasClient.getEntity(typeName, property, value);
+ return entity.getId()._getId();
+ }
+
+ public interface Predicate {
+ /**
+ * Perform a predicate evaluation.
+ *
+ * @return the boolean result of the evaluation.
+ * @throws Exception thrown if the predicate evaluation could not evaluate.
+ */
+ void evaluate() throws Exception;
+ }
+
+ /**
+ * Wait for a condition, expressed via a {@link Predicate} to become true.
+ *
+ * @param timeout maximum time in milliseconds to wait for the predicate to become true.
+ * @param predicate predicate waiting on.
+ */
+ protected void waitFor(int timeout, Predicate predicate) throws Exception {
+ ParamChecker.notNull(predicate, "predicate");
+ long mustEnd = System.currentTimeMillis() + timeout;
+
+ while (true) {
+ try {
+ predicate.evaluate();
+ return;
+ } catch(Error | Exception e) {
+ if (System.currentTimeMillis() >= mustEnd) {
+ fail("Assertions failed. Failing after waiting for timeout " + timeout + " msecs", e);
+ }
+ LOG.debug("Waiting up to {} msec as assertion failed", mustEnd - System.currentTimeMillis(), e);
+ Thread.sleep(400);
+ }
+ }
+ }
+}
diff --git a/addons/falcon-bridge/src/test/resources/atlas-application.properties b/addons/falcon-bridge/src/test/resources/atlas-application.properties
new file mode 100644
index 0000000000..3b12e5fb33
--- /dev/null
+++ b/addons/falcon-bridge/src/test/resources/atlas-application.properties
@@ -0,0 +1,125 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+######### Atlas Server Configs #########
+atlas.rest.address=http://localhost:31000
+
+######### Graph Database Configs #########
+
+
+# Graph database implementation. Value inserted by maven.
+atlas.graphdb.backend=org.apache.atlas.repository.graphdb.janus.AtlasJanusGraphDatabase
+atlas.graph.index.search.solr.wait-searcher=true
+
+# Graph Storage
+atlas.graph.storage.backend=berkeleyje
+
+# Entity repository implementation
+atlas.EntityAuditRepository.impl=org.apache.atlas.repository.audit.InMemoryEntityAuditRepository
+
+# Graph Search Index Backend
+atlas.graph.index.search.backend=solr
+
+#Berkeley storage directory
+atlas.graph.storage.directory=${sys:atlas.data}/berkley
+
+#hbase
+#For standalone mode , specify localhost
+#for distributed mode, specify zookeeper quorum here
+
+atlas.graph.storage.hostname=${graph.storage.hostname}
+atlas.graph.storage.hbase.regions-per-server=1
+atlas.graph.storage.lock.wait-time=10000
+
+#ElasticSearch
+atlas.graph.index.search.directory=${sys:atlas.data}/es
+atlas.graph.index.search.elasticsearch.client-only=false
+atlas.graph.index.search.elasticsearch.local-mode=true
+atlas.graph.index.search.elasticsearch.create.sleep=2000
+
+# Solr cloud mode properties
+atlas.graph.index.search.solr.mode=cloud
+atlas.graph.index.search.solr.zookeeper-url=${solr.zk.address}
+atlas.graph.index.search.solr.embedded=true
+atlas.graph.index.search.max-result-set-size=150
+
+
+######### Notification Configs #########
+atlas.notification.embedded=true
+
+atlas.kafka.zookeeper.connect=localhost:19026
+atlas.kafka.bootstrap.servers=localhost:19027
+atlas.kafka.data=${sys:atlas.data}/kafka
+atlas.kafka.zookeeper.session.timeout.ms=4000
+atlas.kafka.zookeeper.sync.time.ms=20
+atlas.kafka.consumer.timeout.ms=4000
+atlas.kafka.auto.commit.interval.ms=100
+atlas.kafka.hook.group.id=atlas
+atlas.kafka.entities.group.id=atlas_entities
+#atlas.kafka.auto.commit.enable=false
+
+atlas.kafka.enable.auto.commit=false
+atlas.kafka.auto.offset.reset=earliest
+atlas.kafka.session.timeout.ms=30000
+atlas.kafka.offsets.topic.replication.factor=1
+
+
+
+######### Entity Audit Configs #########
+atlas.audit.hbase.tablename=ATLAS_ENTITY_AUDIT_EVENTS
+atlas.audit.zookeeper.session.timeout.ms=1000
+atlas.audit.hbase.zookeeper.quorum=localhost
+atlas.audit.hbase.zookeeper.property.clientPort=19026
+
+######### Security Properties #########
+
+# SSL config
+atlas.enableTLS=false
+atlas.server.https.port=31443
+
+######### Security Properties #########
+
+hbase.security.authentication=simple
+
+atlas.hook.falcon.synchronous=true
+
+######### JAAS Configuration ########
+
+atlas.jaas.KafkaClient.loginModuleName = com.sun.security.auth.module.Krb5LoginModule
+atlas.jaas.KafkaClient.loginModuleControlFlag = required
+atlas.jaas.KafkaClient.option.useKeyTab = true
+atlas.jaas.KafkaClient.option.storeKey = true
+atlas.jaas.KafkaClient.option.serviceName = kafka
+atlas.jaas.KafkaClient.option.keyTab = /etc/security/keytabs/atlas.service.keytab
+atlas.jaas.KafkaClient.option.principal = atlas/_HOST@EXAMPLE.COM
+
+######### High Availability Configuration ########
+atlas.server.ha.enabled=false
+#atlas.server.ids=id1
+#atlas.server.address.id1=localhost:21000
+
+######### Atlas Authorization #########
+atlas.authorizer.impl=none
+# atlas.authorizer.impl=simple
+# atlas.authorizer.simple.authz.policy.file=atlas-simple-authz-policy.json
+
+######### Atlas Authentication #########
+atlas.authentication.method.file=true
+atlas.authentication.method.ldap.type=none
+atlas.authentication.method.kerberos=false
+# atlas.authentication.method.file.filename=users-credentials.properties
diff --git a/addons/falcon-bridge/src/test/resources/atlas-log4j.xml b/addons/falcon-bridge/src/test/resources/atlas-log4j.xml
new file mode 100755
index 0000000000..262a710f7a
--- /dev/null
+++ b/addons/falcon-bridge/src/test/resources/atlas-log4j.xml
@@ -0,0 +1,137 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/addons/falcon-bridge/src/test/resources/cluster.xml b/addons/falcon-bridge/src/test/resources/cluster.xml
new file mode 100644
index 0000000000..b183847db3
--- /dev/null
+++ b/addons/falcon-bridge/src/test/resources/cluster.xml
@@ -0,0 +1,45 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/addons/falcon-bridge/src/test/resources/feed-hdfs.xml b/addons/falcon-bridge/src/test/resources/feed-hdfs.xml
new file mode 100644
index 0000000000..435db07451
--- /dev/null
+++ b/addons/falcon-bridge/src/test/resources/feed-hdfs.xml
@@ -0,0 +1,39 @@
+
+
+
+ online,bi
+
+ hours(1)
+ UTC
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/addons/falcon-bridge/src/test/resources/feed-replication.xml b/addons/falcon-bridge/src/test/resources/feed-replication.xml
new file mode 100644
index 0000000000..dcd427b180
--- /dev/null
+++ b/addons/falcon-bridge/src/test/resources/feed-replication.xml
@@ -0,0 +1,43 @@
+
+
+
+ online,bi
+
+ hours(1)
+ UTC
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/addons/falcon-bridge/src/test/resources/feed.xml b/addons/falcon-bridge/src/test/resources/feed.xml
new file mode 100644
index 0000000000..473c745ce8
--- /dev/null
+++ b/addons/falcon-bridge/src/test/resources/feed.xml
@@ -0,0 +1,38 @@
+
+
+
+ online,bi
+
+ hours(1)
+ UTC
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/addons/falcon-bridge/src/test/resources/hive-site.xml b/addons/falcon-bridge/src/test/resources/hive-site.xml
new file mode 100644
index 0000000000..f058c2edc2
--- /dev/null
+++ b/addons/falcon-bridge/src/test/resources/hive-site.xml
@@ -0,0 +1,48 @@
+
+
+
+
+
+
+ hive.exec.post.hooks
+ org.apache.atlas.hive.hook.HiveHook
+
+
+
+ hive.support.concurrency
+ false
+
+
+
+ hive.metastore.warehouse.dir
+ ${user.dir}/target/metastore
+
+
+
+ javax.jdo.option.ConnectionURL
+ jdbc:derby:${user.dir}/target/metastore_db;create=true
+
+
+
+ atlas.hook.hive.synchronous
+ true
+
+
+
+ fs.pfile.impl
+ org.apache.hadoop.fs.ProxyLocalFileSystem
+
+
\ No newline at end of file
diff --git a/addons/falcon-bridge/src/test/resources/process.xml b/addons/falcon-bridge/src/test/resources/process.xml
new file mode 100644
index 0000000000..b94d0a8470
--- /dev/null
+++ b/addons/falcon-bridge/src/test/resources/process.xml
@@ -0,0 +1,53 @@
+
+
+
+
+ consumer=consumer@xyz.com, owner=producer@xyz.com, department=forecasting
+
+
+
+
+
+
+
+ 1
+ FIFO
+ days(1)
+ UTC
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/addons/falcon-bridge/src/test/resources/startup.properties b/addons/falcon-bridge/src/test/resources/startup.properties
new file mode 100644
index 0000000000..9623470396
--- /dev/null
+++ b/addons/falcon-bridge/src/test/resources/startup.properties
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+*.domain=debug
+*.config.store.persist=false
+*.config.store.uri=target/config_store
\ No newline at end of file
diff --git a/addons/falcon-bridge/src/test/resources/users-credentials.properties b/addons/falcon-bridge/src/test/resources/users-credentials.properties
new file mode 100644
index 0000000000..da69923502
--- /dev/null
+++ b/addons/falcon-bridge/src/test/resources/users-credentials.properties
@@ -0,0 +1,3 @@
+#username=group::sha256+salt-password
+admin=ADMIN::a4a88c0872bf652bb9ed803ece5fd6e82354838a9bf59ab4babb1dab322154e1
+rangertagsync=RANGER_TAG_SYNC::0afe7a1968b07d4c3ff4ed8c2d809a32ffea706c66cd795ead9048e81cfaf034
diff --git a/addons/hbase-bridge-shim/pom.xml b/addons/hbase-bridge-shim/pom.xml
new file mode 100644
index 0000000000..eb1b2e9493
--- /dev/null
+++ b/addons/hbase-bridge-shim/pom.xml
@@ -0,0 +1,60 @@
+
+
+
+
+ 4.0.0
+
+ apache-atlas
+ org.apache.atlas
+ 3.0.0-SNAPSHOT
+ ../../
+
+ hbase-bridge-shim
+ Apache Atlas Hbase Bridge Shim Module
+ Apache Atlas Hbase Bridge Shim
+ jar
+
+
+
+
+ org.apache.atlas
+ atlas-plugin-classloader
+
+
+ org.apache.hbase
+ hbase-server
+ ${hbase.version}
+ provided
+
+
+ javax.servlet
+ servlet-api
+
+
+ com.github.stephenc.findbugs
+ findbugs-annotations
+
+
+ javax.ws.rs
+ *
+
+
+
+
+
diff --git a/addons/hbase-bridge-shim/src/main/java/org/apache/atlas/hbase/hook/HBaseAtlasCoprocessor.java b/addons/hbase-bridge-shim/src/main/java/org/apache/atlas/hbase/hook/HBaseAtlasCoprocessor.java
new file mode 100755
index 0000000000..0b69104b17
--- /dev/null
+++ b/addons/hbase-bridge-shim/src/main/java/org/apache/atlas/hbase/hook/HBaseAtlasCoprocessor.java
@@ -0,0 +1,277 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.atlas.hbase.hook;
+
+
+import org.apache.atlas.plugin.classloader.AtlasPluginClassLoader;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.NamespaceDescriptor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.CoprocessorEnvironment;
+import org.apache.hadoop.hbase.client.SnapshotDescription;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
+import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.MasterObserver;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionObserver;
+import org.apache.hadoop.hbase.coprocessor.RegionServerObserver;
+
+import java.io.IOException;
+import java.util.Optional;
+
+
+public class HBaseAtlasCoprocessor implements MasterCoprocessor, MasterObserver, RegionObserver, RegionServerObserver {
+ public static final Log LOG = LogFactory.getLog(HBaseAtlasCoprocessor.class);
+
+ private static final String ATLAS_PLUGIN_TYPE = "hbase";
+ private static final String ATLAS_HBASE_HOOK_IMPL_CLASSNAME = "org.apache.atlas.hbase.hook.HBaseAtlasCoprocessor";
+
+ private AtlasPluginClassLoader atlasPluginClassLoader = null;
+ private Object impl = null;
+ private MasterObserver implMasterObserver = null;
+ private RegionObserver implRegionObserver = null;
+ private RegionServerObserver implRegionServerObserver = null;
+ private MasterCoprocessor implMasterCoprocessor = null;
+
+ public HBaseAtlasCoprocessor() {
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasCoprocessor.HBaseAtlasCoprocessor()");
+ }
+
+ this.init();
+
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.HBaseAtlasCoprocessor()");
+ }
+ }
+
+ private void init(){
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasCoprocessor.init()");
+ }
+
+ try {
+ atlasPluginClassLoader = AtlasPluginClassLoader.getInstance(ATLAS_PLUGIN_TYPE, this.getClass());
+
+ @SuppressWarnings("unchecked")
+ Class> cls = Class.forName(ATLAS_HBASE_HOOK_IMPL_CLASSNAME, true, atlasPluginClassLoader);
+
+ activatePluginClassLoader();
+
+ impl = cls.newInstance();
+ implMasterObserver = (MasterObserver)impl;
+ implRegionObserver = (RegionObserver)impl;
+ implRegionServerObserver = (RegionServerObserver)impl;
+ implMasterCoprocessor = (MasterCoprocessor)impl;
+
+ } catch (Exception e) {
+ // check what need to be done
+ LOG.error("Error Enabling RangerHbasePlugin", e);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.init()");
+ }
+ }
+
+ @Override
+ public Optional getMasterObserver() {
+ return Optional.of(this);
+ }
+
+ @Override
+ public void start(CoprocessorEnvironment env) throws IOException {
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasCoprocessor.start()");
+ }
+
+ try {
+ activatePluginClassLoader();
+ if (env instanceof MasterCoprocessorEnvironment) {
+ implMasterCoprocessor.start(env);
+ }
+ } finally {
+ deactivatePluginClassLoader();
+ }
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.start()");
+ }
+ }
+
+ @Override
+ public void postCreateTable(ObserverContext ctx, TableDescriptor desc, RegionInfo[] regions) throws IOException {
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasCoprocessor.postCreateTable()");
+ }
+
+ try {
+ activatePluginClassLoader();
+ implMasterObserver.postCreateTable(ctx, desc, regions);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.postCreateTable()");
+ }
+ }
+
+ @Override
+ public void postModifyTable(ObserverContext ctx, TableName tableName, TableDescriptor htd) throws IOException {
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasCoprocessor.postModifyTable()");
+ }
+
+ try {
+ activatePluginClassLoader();
+ implMasterObserver.postModifyTable(ctx, tableName, htd);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.postModifyTable()");
+ }
+ }
+
+ @Override
+ public void postDeleteTable(ObserverContext ctx, TableName tableName) throws IOException {
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasCoprocessor.postDeleteTable()");
+ }
+
+ try {
+ activatePluginClassLoader();
+ implMasterObserver.postDeleteTable(ctx, tableName);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.postDeleteTable()");
+ }
+ }
+
+ @Override
+ public void postCreateNamespace(ObserverContext ctx, NamespaceDescriptor ns) throws IOException {
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasCoprocessor.preCreateNamespace()");
+ }
+
+ try {
+ activatePluginClassLoader();
+ implMasterObserver.postCreateNamespace(ctx, ns);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.preCreateNamespace()");
+ }
+ }
+
+ @Override
+ public void postDeleteNamespace(ObserverContext ctx, String ns) throws IOException {
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasCoprocessor.preDeleteNamespace()");
+ }
+
+ try {
+ activatePluginClassLoader();
+ implMasterObserver.postDeleteNamespace(ctx, ns);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.preDeleteNamespace()");
+ }
+ }
+ @Override
+ public void postModifyNamespace(ObserverContext ctx, NamespaceDescriptor ns) throws IOException {
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasCoprocessor.preModifyNamespace()");
+ }
+
+ try {
+ activatePluginClassLoader();
+ implMasterObserver.preModifyNamespace(ctx, ns);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.preModifyNamespace()");
+ }
+ }
+
+ @Override
+ public void postCloneSnapshot(ObserverContext observerContext, SnapshotDescription snapshot, TableDescriptor tableDescriptor) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasCoprocessor.postCloneSnapshot()");
+ }
+
+ try {
+ activatePluginClassLoader();
+ implMasterObserver.postCloneSnapshot(observerContext,snapshot,tableDescriptor);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.postCloneSnapshot()");
+ }
+ }
+
+ @Override
+ public void postRestoreSnapshot(ObserverContext observerContext, SnapshotDescription snapshot, TableDescriptor tableDescriptor) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasCoprocessor.postRestoreSnapshot()");
+ }
+
+ try {
+ activatePluginClassLoader();
+ implMasterObserver.postRestoreSnapshot(observerContext,snapshot,tableDescriptor);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.postRestoreSnapshot()");
+ }
+ }
+
+ private void activatePluginClassLoader() {
+ if(atlasPluginClassLoader != null) {
+ atlasPluginClassLoader.activate();
+ }
+ }
+
+ private void deactivatePluginClassLoader() {
+ if(atlasPluginClassLoader != null) {
+ atlasPluginClassLoader.deactivate();
+ }
+ }
+
+}
diff --git a/addons/hbase-bridge/pom.xml b/addons/hbase-bridge/pom.xml
new file mode 100644
index 0000000000..a6ed51421f
--- /dev/null
+++ b/addons/hbase-bridge/pom.xml
@@ -0,0 +1,568 @@
+
+
+
+
+ 4.0.0
+
+ apache-atlas
+ org.apache.atlas
+ 3.0.0-SNAPSHOT
+ ../../
+
+ hbase-bridge
+ Apache Atlas Hbase Bridge Module
+ Apache Atlas Hbase Bridge
+ jar
+
+
+ 3.0.3
+ 9.3.14.v20161028
+
+
+
+
+
+ org.apache.hbase
+ hbase-server
+ ${hbase.version}
+ provided
+
+
+ javax.servlet
+ servlet-api
+
+
+ org.mortbay.jetty
+ servlet-api-2.5
+
+
+
+
+
+ org.apache.atlas
+ atlas-notification
+
+
+
+
+ com.sun.jersey
+ jersey-bundle
+ 1.19
+ test
+
+
+
+ org.apache.atlas
+ atlas-webapp
+ war
+ test
+
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.version}
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ ${hadoop.version}
+
+
+ javax.servlet
+ servlet-api
+
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs-client
+ ${hadoop.version}
+
+
+
+ org.apache.hadoop
+ hadoop-annotations
+
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ ${hadoop.version}
+
+
+ javax.servlet
+ servlet-api
+
+
+
+
+
+ org.testng
+ testng
+
+
+
+ org.mockito
+ mockito-all
+
+
+
+ org.apache.httpcomponents
+ httpcore
+ ${httpcomponents-httpcore.version}
+
+
+
+ org.eclipse.jetty
+ jetty-webapp
+ ${jetty.version}
+ compile
+
+
+
+ org.eclipse.jetty
+ jetty-server
+ test
+
+
+
+ org.apache.hbase
+ hbase-server
+ ${hbase.version}
+ test-jar
+ test
+
+
+ org.mortbay.jetty
+ servlet-api-2.5
+
+
+ org.eclipse.jetty
+ jetty-server
+
+
+
+
+
+ junit
+ junit
+ test
+ 4.12
+
+
+
+ org.apache.hbase
+ hbase-client
+ ${hbase.version}
+
+
+ org.apache.hbase
+ hbase-common
+ ${hbase.version}
+
+
+ com.github.stephenc.findbugs
+ findbugs-annotations
+
+
+
+
+ org.apache.hbase
+ hbase-hadoop2-compat
+ ${hbase.version}
+ test-jar
+ test
+
+
+ org.apache.hbase
+ hbase-hadoop-compat
+ ${hbase.version}
+ test-jar
+ test
+
+
+ com.google.guava
+ guava
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop.version}
+ compile
+
+
+ javax.servlet
+ servlet-api
+
+
+
+
+ org.apache.hadoop
+ hadoop-auth
+ ${hadoop.version}
+ compile
+
+
+ org.apache.atlas
+ atlas-client-v2
+ ${project.version}
+
+
+ org.apache.hbase
+ hbase-zookeeper
+ test-jar
+ test
+ ${hbase.version}
+
+
+ org.apache.hbase
+ hbase-common
+ test-jar
+ ${hbase.version}
+ test
+
+
+
+
+ org.apache.hbase
+ hbase-testing-util
+ ${hbase.version}
+
+
+
+
+
+
+ dist
+
+
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+
+
+ copy-hook
+ package
+
+ copy
+
+
+ ${project.build.directory}/dependency/hook/hbase/atlas-hbase-plugin-impl
+ false
+ false
+ true
+
+
+ ${project.groupId}
+ ${project.artifactId}
+ ${project.version}
+
+
+ ${project.groupId}
+ atlas-client-common
+ ${project.version}
+
+
+ ${project.groupId}
+ atlas-client-v2
+ ${project.version}
+
+
+ ${project.groupId}
+ atlas-intg
+ ${project.version}
+
+
+ ${project.groupId}
+ atlas-notification
+ ${project.version}
+
+
+ ${project.groupId}
+ atlas-common
+ ${project.version}
+
+
+ org.apache.kafka
+ kafka_${kafka.scala.binary.version}
+ ${kafka.version}
+
+
+ org.apache.kafka
+ kafka-clients
+ ${kafka.version}
+
+
+ com.sun.jersey.contribs
+ jersey-multipart
+ ${jersey.version}
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ ${jackson.databind.version}
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+ ${jackson.version}
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+ ${jackson.version}
+
+
+ commons-configuration
+ commons-configuration
+ ${commons-conf.version}
+
+
+ com.sun.jersey
+ jersey-json
+ ${jersey.version}
+
+
+ javax.ws.rs
+ jsr311-api
+ ${jsr.version}
+
+
+
+
+
+ copy-hook-shim
+ package
+
+ copy
+
+
+ ${project.build.directory}/dependency/hook/hbase
+ false
+ false
+ true
+
+
+ ${project.groupId}
+ hbase-bridge-shim
+ ${project.version}
+
+
+ ${project.groupId}
+ atlas-plugin-classloader
+ ${project.version}
+
+
+
+
+
+
+
+
+
+
+
+
+
+ org.eclipse.jetty
+ jetty-maven-plugin
+ ${jetty.version}
+
+ ${skipTests}
+
+
+ 31000
+ 60000
+
+ ../../webapp/target/atlas-webapp-${project.version}.war
+ true
+ ../../webapp/src/main/webapp
+
+ /
+ ${project.basedir}/../../webapp/src/main/webapp/WEB-INF/web.xml
+
+ true
+
+ true
+
+ atlas.home
+ ${project.build.directory}
+
+
+ atlas.conf
+ ${project.build.directory}/test-classes
+
+
+ atlas.data
+ ${project.build.directory}/data
+
+
+ atlas.log.dir
+ ${project.build.directory}/logs
+
+
+ atlas.log.file
+ application.log
+
+
+ log4j.configuration
+ file:///${project.build.directory}/test-classes/atlas-log4j.xml
+
+
+ atlas.graphdb.backend
+ ${graphdb.backend.impl}
+
+
+ embedded.solr.directory
+ ${project.build.directory}
+
+
+ solr.log.dir
+ ${project.build.directory}/logs
+
+
+ org.eclipse.jetty.annotations.maxWait
+ 5000
+
+
+ atlas-stop
+ 31001
+ ${jetty-maven-plugin.stopWait}
+ jar
+
+
+
+ org.apache.curator
+ curator-client
+ ${curator.version}
+
+
+
+ org.apache.zookeeper
+ zookeeper
+ ${zookeeper.version}
+
+
+
+
+ start-jetty
+ pre-integration-test
+
+
+ stop
+ deploy-war
+
+
+ true
+
+
+
+ stop-jetty
+ post-integration-test
+
+ stop
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-site-plugin
+
+
+ org.apache.maven.doxia
+ doxia-module-twiki
+ ${doxia.version}
+
+
+ org.apache.maven.doxia
+ doxia-core
+ ${doxia.version}
+
+
+
+
+
+ site
+
+ prepare-package
+
+
+
+ false
+ false
+
+
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+ 1.2.1
+ false
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-resources-plugin
+
+
+ copy-resources
+ validate
+
+ copy-resources
+
+
+ ${basedir}/target/models
+
+
+ ${basedir}/../models
+
+ 0000-Area0/**
+ 1000-Hadoop/**
+
+
+
+
+
+
+ copy-solr-resources
+ validate
+
+ copy-resources
+
+
+ ${project.build.directory}/solr
+
+
+ ${basedir}/../../test-tools/src/main/resources/solr
+
+
+
+
+
+
+
+
+
+
diff --git a/addons/hbase-bridge/src/bin/import-hbase.sh b/addons/hbase-bridge/src/bin/import-hbase.sh
new file mode 100644
index 0000000000..a343036faf
--- /dev/null
+++ b/addons/hbase-bridge/src/bin/import-hbase.sh
@@ -0,0 +1,162 @@
+#!/bin/bash
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. See accompanying LICENSE file.
+#
+
+# resolve links - $0 may be a softlink
+PRG="${0}"
+
+[[ `uname -s` == *"CYGWIN"* ]] && CYGWIN=true
+
+while [ -h "${PRG}" ]; do
+ ls=`ls -ld "${PRG}"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "${PRG}"`/"$link"
+ fi
+done
+
+echo ">>>>> $PRG"
+
+BASEDIR=`dirname ${PRG}`
+BASEDIR=`cd ${BASEDIR}/..;pwd`
+
+echo ">>>>> $BASEDIR"
+
+if test -z "${JAVA_HOME}"
+then
+ JAVA_BIN=`which java`
+ JAR_BIN=`which jar`
+else
+ JAVA_BIN="${JAVA_HOME}/bin/java"
+ JAR_BIN="${JAVA_HOME}/bin/jar"
+fi
+export JAVA_BIN
+
+if [ ! -e "${JAVA_BIN}" ] || [ ! -e "${JAR_BIN}" ]; then
+ echo "$JAVA_BIN and/or $JAR_BIN not found on the system. Please make sure java and jar commands are available."
+ exit 1
+fi
+
+# Construct Atlas classpath using jars from hook/hbase/atlas-hbase-plugin-impl/ directory.
+for i in "${BASEDIR}/hook/hbase/atlas-hbase-plugin-impl/"*.jar; do
+ ATLASCPPATH="${ATLASCPPATH}:$i"
+done
+
+if [ -z "${ATLAS_CONF_DIR}" ] && [ -e /etc/atlas/conf ];then
+ ATLAS_CONF_DIR=/etc/atlas/conf
+fi
+ATLASCPPATH=${ATLASCPPATH}:${ATLAS_CONF_DIR}
+
+# log dir for applications
+ATLAS_LOG_DIR="${ATLAS_LOG_DIR:-/var/log/atlas}"
+export ATLAS_LOG_DIR
+LOGFILE="$ATLAS_LOG_DIR/import-hbase.log"
+
+TIME=`date +%Y%m%d%H%M%s`
+
+#Add HBase conf in classpath
+if [ ! -z "$HBASE_CONF_DIR" ]; then
+ HBASE_CONF=$HBASE_CONF_DIR
+elif [ ! -z "$HBASE_HOME" ]; then
+ HBASE_CONF="$HBASE_HOME/conf"
+elif [ -e /etc/hbase/conf ]; then
+ HBASE_CONF="/etc/hbase/conf"
+else
+ echo "Could not find a valid HBASE configuration"
+ exit 1
+fi
+
+echo Using HBase configuration directory "[$HBASE_CONF]"
+
+
+if [ -f "${HBASE_CONF}/hbase-env.sh" ]; then
+ . "${HBASE_CONF}/hbase-env.sh"
+fi
+
+if [ -z "$HBASE_HOME" ]; then
+ if [ -d "${BASEDIR}/../hbase" ]; then
+ HBASE_HOME=${BASEDIR}/../hbase
+ else
+ echo "Please set HBASE_HOME to the root of HBase installation"
+ exit 1
+ fi
+fi
+
+HBASE_CP="${HBASE_CONF}"
+
+for i in "${HBASE_HOME}/lib/"*.jar; do
+ HBASE_CP="${HBASE_CP}:$i"
+done
+
+#Add hadoop conf in classpath
+if [ ! -z "$HADOOP_CLASSPATH" ]; then
+ HADOOP_CP=$HADOOP_CLASSPATH
+elif [ ! -z "$HADOOP_HOME" ]; then
+ HADOOP_CP=`$HADOOP_HOME/bin/hadoop classpath`
+elif [ $(command -v hadoop) ]; then
+ HADOOP_CP=`hadoop classpath`
+ echo $HADOOP_CP
+else
+ echo "Environment variable HADOOP_CLASSPATH or HADOOP_HOME need to be set"
+ exit 1
+fi
+
+CP="${HBASE_CP}:${HADOOP_CP}:${ATLASCPPATH}"
+
+# If running in cygwin, convert pathnames and classpath to Windows format.
+if [ "${CYGWIN}" == "true" ]
+then
+ ATLAS_LOG_DIR=`cygpath -w ${ATLAS_LOG_DIR}`
+ LOGFILE=`cygpath -w ${LOGFILE}`
+ HBASE_CP=`cygpath -w ${HBASE_CP}`
+ HADOOP_CP=`cygpath -w ${HADOOP_CP}`
+ CP=`cygpath -w -p ${CP}`
+fi
+
+JAVA_PROPERTIES="$ATLAS_OPTS -Datlas.log.dir=$ATLAS_LOG_DIR -Datlas.log.file=import-hbase.log
+-Dlog4j.configuration=atlas-hbase-import-log4j.xml"
+
+IMPORT_ARGS=
+JVM_ARGS=
+
+while true
+do
+ option=$1
+ shift
+
+ case "$option" in
+ -n) IMPORT_ARGS="$IMPORT_ARGS -n $1"; shift;;
+ -t) IMPORT_ARGS="$IMPORT_ARGS -t $1"; shift;;
+ -f) IMPORT_ARGS="$IMPORT_ARGS -f $1"; shift;;
+ --namespace) IMPORT_ARGS="$IMPORT_ARGS --namespace $1"; shift;;
+ --table) IMPORT_ARGS="$IMPORT_ARGS --table $1"; shift;;
+ --filename) IMPORT_ARGS="$IMPORT_ARGS --filename $1"; shift;;
+ "") break;;
+ *) JVM_ARGS="$JVM_ARGS $option"
+ esac
+done
+
+JAVA_PROPERTIES="${JAVA_PROPERTIES} ${JVM_ARGS}"
+
+echo "Log file for import is $LOGFILE"
+
+"${JAVA_BIN}" ${JAVA_PROPERTIES} -cp "${CP}" org.apache.atlas.hbase.bridge.HBaseBridge $IMPORT_ARGS
+
+RETVAL=$?
+[ $RETVAL -eq 0 ] && echo HBase Data Model imported successfully!!!
+[ $RETVAL -ne 0 ] && echo Failed to import HBase Data Model!!!
+
+exit $RETVAL
diff --git a/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/bridge/HBaseAtlasHook.java b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/bridge/HBaseAtlasHook.java
new file mode 100644
index 0000000000..8e6c57dba3
--- /dev/null
+++ b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/bridge/HBaseAtlasHook.java
@@ -0,0 +1,678 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hbase.bridge;
+
+import org.apache.atlas.AtlasConstants;
+import org.apache.atlas.hbase.model.HBaseOperationContext;
+import org.apache.atlas.hbase.model.HBaseDataTypes;
+import org.apache.atlas.hook.AtlasHook;
+import org.apache.atlas.model.instance.AtlasEntity;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
+import org.apache.atlas.model.instance.AtlasObjectId;
+import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2;
+import org.apache.atlas.model.notification.HookNotification.EntityDeleteRequestV2;
+import org.apache.atlas.model.notification.HookNotification.EntityUpdateRequestV2;
+import org.apache.atlas.type.AtlasTypeUtil;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.NamespaceDescriptor;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.apache.atlas.repository.Constants.HBASE_SOURCE;
+
+// This will register Hbase entities into Atlas
+public class HBaseAtlasHook extends AtlasHook {
+ private static final Logger LOG = LoggerFactory.getLogger(HBaseAtlasHook.class);
+
+
+ public static final String ATTR_DESCRIPTION = "description";
+ public static final String ATTR_ATLAS_ENDPOINT = "atlas.rest.address";
+ public static final String ATTR_PARAMETERS = "parameters";
+ public static final String ATTR_URI = "uri";
+ public static final String ATTR_NAMESPACE = "namespace";
+ public static final String ATTR_TABLE = "table";
+ public static final String ATTR_COLUMNFAMILIES = "column_families";
+ public static final String ATTR_CREATE_TIME = "createTime";
+ public static final String ATTR_MODIFIED_TIME = "modifiedTime";
+ public static final String ATTR_OWNER = "owner";
+ public static final String ATTR_NAME = "name";
+
+ // column addition metadata
+ public static final String ATTR_TABLE_MAX_FILESIZE = "maxFileSize";
+ public static final String ATTR_TABLE_ISREADONLY = "isReadOnly";
+ public static final String ATTR_TABLE_ISCOMPACTION_ENABLED = "isCompactionEnabled";
+ public static final String ATTR_TABLE_ISNORMALIZATION_ENABLED = "isNormalizationEnabled";
+ public static final String ATTR_TABLE_REPLICATION_PER_REGION = "replicasPerRegion";
+ public static final String ATTR_TABLE_DURABLILITY = "durability";
+ public static final String ATTR_TABLE_NORMALIZATION_ENABLED = "isNormalizationEnabled";
+
+ // column family additional metadata
+ public static final String ATTR_CF_BLOOMFILTER_TYPE = "bloomFilterType";
+ public static final String ATTR_CF_COMPRESSION_TYPE = "compressionType";
+ public static final String ATTR_CF_COMPACTION_COMPRESSION_TYPE = "compactionCompressionType";
+ public static final String ATTR_CF_ENCRYPTION_TYPE = "encryptionType";
+ public static final String ATTR_CF_INMEMORY_COMPACTION_POLICY = "inMemoryCompactionPolicy";
+ public static final String ATTR_CF_KEEP_DELETE_CELLS = "keepDeletedCells";
+ public static final String ATTR_CF_MAX_VERSIONS = "maxVersions";
+ public static final String ATTR_CF_MIN_VERSIONS = "minVersions";
+ public static final String ATTR_CF_DATA_BLOCK_ENCODING = "dataBlockEncoding";
+ public static final String ATTR_CF_STORAGE_POLICY = "StoragePolicy";
+ public static final String ATTR_CF_TTL = "ttl";
+ public static final String ATTR_CF_BLOCK_CACHE_ENABLED = "blockCacheEnabled";
+ public static final String ATTR_CF_CACHED_BLOOM_ON_WRITE = "cacheBloomsOnWrite";
+ public static final String ATTR_CF_CACHED_DATA_ON_WRITE = "cacheDataOnWrite";
+ public static final String ATTR_CF_CACHED_INDEXES_ON_WRITE = "cacheIndexesOnWrite";
+ public static final String ATTR_CF_EVICT_BLOCK_ONCLOSE = "evictBlocksOnClose";
+ public static final String ATTR_CF_PREFETCH_BLOCK_ONOPEN = "prefetchBlocksOnOpen";
+ public static final String ATTR_CF_NEW_VERSION_BEHAVIOR = "newVersionBehavior";
+ public static final String ATTR_CF_MOB_ENABLED = "isMobEnabled";
+ public static final String ATTR_CF_MOB_COMPATCTPARTITION_POLICY = "mobCompactPartitionPolicy";
+
+ public static final String HBASE_NAMESPACE_QUALIFIED_NAME = "%s@%s";
+ public static final String HBASE_TABLE_QUALIFIED_NAME_FORMAT = "%s:%s@%s";
+ public static final String HBASE_COLUMN_FAMILY_QUALIFIED_NAME_FORMAT = "%s:%s.%s@%s";
+
+ private static final String REFERENCEABLE_ATTRIBUTE_NAME = "qualifiedName";
+
+ public static final String RELATIONSHIP_HBASE_TABLE_COLUMN_FAMILIES = "hbase_table_column_families";
+ public static final String RELATIONSHIP_HBASE_TABLE_NAMESPACE = "hbase_table_namespace";
+
+ private static volatile HBaseAtlasHook me;
+
+ public enum OPERATION {
+ CREATE_NAMESPACE("create_namespace"),
+ ALTER_NAMESPACE("alter_namespace"),
+ DELETE_NAMESPACE("delete_namespace"),
+ CREATE_TABLE("create_table"),
+ ALTER_TABLE("alter_table"),
+ DELETE_TABLE("delete_table"),
+ CREATE_COLUMN_FAMILY("create_column_Family"),
+ ALTER_COLUMN_FAMILY("alter_column_Family"),
+ DELETE_COLUMN_FAMILY("delete_column_Family");
+
+ private final String name;
+
+ OPERATION(String s) {
+ name = s;
+ }
+
+ public String getName() {
+ return name;
+ }
+ }
+
+ public static HBaseAtlasHook getInstance() {
+ HBaseAtlasHook ret = me;
+
+ if (ret == null) {
+ try {
+ synchronized (HBaseAtlasHook.class) {
+ ret = me;
+
+ if (ret == null) {
+ me = ret = new HBaseAtlasHook();
+ }
+ }
+ } catch (Exception e) {
+ LOG.error("Caught exception instantiating the Atlas HBase hook.", e);
+ }
+ }
+
+ return ret;
+ }
+
+ public HBaseAtlasHook() {
+ }
+
+ public void createAtlasInstances(HBaseOperationContext hbaseOperationContext) {
+ OPERATION operation = hbaseOperationContext.getOperation();
+
+ LOG.info("HBaseAtlasHook(operation={})", operation);
+
+ switch (operation) {
+ case CREATE_NAMESPACE:
+ case ALTER_NAMESPACE:
+ createOrUpdateNamespaceInstance(hbaseOperationContext);
+ break;
+ case DELETE_NAMESPACE:
+ deleteNameSpaceInstance(hbaseOperationContext);
+ break;
+ case CREATE_TABLE:
+ case ALTER_TABLE:
+ createOrUpdateTableInstance(hbaseOperationContext);
+ break;
+ case DELETE_TABLE:
+ deleteTableInstance(hbaseOperationContext);
+ break;
+ case CREATE_COLUMN_FAMILY:
+ case ALTER_COLUMN_FAMILY:
+ createOrUpdateColumnFamilyInstance(hbaseOperationContext);
+ break;
+ case DELETE_COLUMN_FAMILY:
+ deleteColumnFamilyInstance(hbaseOperationContext);
+ break;
+ }
+ }
+
+ private void createOrUpdateNamespaceInstance(HBaseOperationContext hbaseOperationContext) {
+ AtlasEntity nameSpace = buildNameSpace(hbaseOperationContext);
+
+ switch (hbaseOperationContext.getOperation()) {
+ case CREATE_NAMESPACE:
+ LOG.info("Create NameSpace {}", nameSpace.getAttribute(REFERENCEABLE_ATTRIBUTE_NAME));
+
+ hbaseOperationContext.addMessage(new EntityCreateRequestV2(hbaseOperationContext.getUser(), new AtlasEntitiesWithExtInfo(nameSpace)));
+ break;
+
+ case ALTER_NAMESPACE:
+ LOG.info("Modify NameSpace {}", nameSpace.getAttribute(REFERENCEABLE_ATTRIBUTE_NAME));
+
+ hbaseOperationContext.addMessage(new EntityUpdateRequestV2(hbaseOperationContext.getUser(), new AtlasEntitiesWithExtInfo(nameSpace)));
+ break;
+ }
+ }
+
+ private void deleteNameSpaceInstance(HBaseOperationContext hbaseOperationContext) {
+ String nameSpaceQName = getNameSpaceQualifiedName(getMetadataNamespace(), hbaseOperationContext.getNameSpace());
+ AtlasObjectId nameSpaceId = new AtlasObjectId(HBaseDataTypes.HBASE_NAMESPACE.getName(), REFERENCEABLE_ATTRIBUTE_NAME, nameSpaceQName);
+
+ LOG.info("Delete NameSpace {}", nameSpaceQName);
+
+ hbaseOperationContext.addMessage(new EntityDeleteRequestV2(hbaseOperationContext.getUser(), Collections.singletonList(nameSpaceId)));
+ }
+
+ private void createOrUpdateTableInstance(HBaseOperationContext hbaseOperationContext) {
+ AtlasEntity nameSpace = buildNameSpace(hbaseOperationContext);
+ AtlasEntity table = buildTable(hbaseOperationContext, nameSpace);
+ List columnFamilies = buildColumnFamilies(hbaseOperationContext, nameSpace, table);
+
+ table.setRelationshipAttribute(ATTR_COLUMNFAMILIES, AtlasTypeUtil.getAtlasRelatedObjectIds(columnFamilies, RELATIONSHIP_HBASE_TABLE_COLUMN_FAMILIES));
+
+ AtlasEntitiesWithExtInfo entities = new AtlasEntitiesWithExtInfo(table);
+
+ entities.addReferredEntity(nameSpace);
+
+ if (CollectionUtils.isNotEmpty(columnFamilies)) {
+ for (AtlasEntity columnFamily : columnFamilies) {
+ entities.addReferredEntity(columnFamily);
+ }
+ }
+
+ switch (hbaseOperationContext.getOperation()) {
+ case CREATE_TABLE:
+ LOG.info("Create Table {}", table.getAttribute(REFERENCEABLE_ATTRIBUTE_NAME));
+
+ hbaseOperationContext.addMessage(new EntityCreateRequestV2(hbaseOperationContext.getUser(), entities));
+ break;
+
+ case ALTER_TABLE:
+ LOG.info("Modify Table {}", table.getAttribute(REFERENCEABLE_ATTRIBUTE_NAME));
+
+ hbaseOperationContext.addMessage(new EntityUpdateRequestV2(hbaseOperationContext.getUser(), entities));
+ break;
+ }
+ }
+
+ private void deleteTableInstance(HBaseOperationContext hbaseOperationContext) {
+ TableName tableName = hbaseOperationContext.getTableName();
+ String nameSpaceName = tableName.getNamespaceAsString();
+
+ if (nameSpaceName == null) {
+ nameSpaceName = tableName.getNameWithNamespaceInclAsString();
+ }
+
+ String tableNameStr = tableName.getNameAsString();
+ String tableQName = getTableQualifiedName(getMetadataNamespace(), nameSpaceName, tableNameStr);
+ AtlasObjectId tableId = new AtlasObjectId(HBaseDataTypes.HBASE_TABLE.getName(), REFERENCEABLE_ATTRIBUTE_NAME, tableQName);
+
+ LOG.info("Delete Table {}", tableQName);
+
+ hbaseOperationContext.addMessage(new EntityDeleteRequestV2(hbaseOperationContext.getUser(), Collections.singletonList(tableId)));
+ }
+
+ private void createOrUpdateColumnFamilyInstance(HBaseOperationContext hbaseOperationContext) {
+ AtlasEntity nameSpace = buildNameSpace(hbaseOperationContext);
+ AtlasEntity table = buildTable(hbaseOperationContext, nameSpace);
+ AtlasEntity columnFamily = buildColumnFamily(hbaseOperationContext, hbaseOperationContext.gethColumnDescriptor(), nameSpace, table);
+
+ AtlasEntitiesWithExtInfo entities = new AtlasEntitiesWithExtInfo(columnFamily);
+
+ entities.addReferredEntity(nameSpace);
+ entities.addReferredEntity(table);
+
+ switch (hbaseOperationContext.getOperation()) {
+ case CREATE_COLUMN_FAMILY:
+ LOG.info("Create ColumnFamily {}", columnFamily.getAttribute(REFERENCEABLE_ATTRIBUTE_NAME));
+
+ hbaseOperationContext.addMessage(new EntityCreateRequestV2(hbaseOperationContext.getUser(), entities));
+ break;
+
+ case ALTER_COLUMN_FAMILY:
+ LOG.info("Alter ColumnFamily {}", columnFamily.getAttribute(REFERENCEABLE_ATTRIBUTE_NAME));
+
+ hbaseOperationContext.addMessage(new EntityUpdateRequestV2(hbaseOperationContext.getUser(), entities));
+ break;
+ }
+ }
+
+ private void deleteColumnFamilyInstance(HBaseOperationContext hbaseOperationContext) {
+ TableName tableName = hbaseOperationContext.getTableName();
+ String nameSpaceName = tableName.getNamespaceAsString();
+
+ if (nameSpaceName == null) {
+ nameSpaceName = tableName.getNameWithNamespaceInclAsString();
+ }
+
+ String tableNameStr = tableName.getNameAsString();
+ String columnFamilyName = hbaseOperationContext.getColummFamily();
+ String columnFamilyQName = getColumnFamilyQualifiedName(getMetadataNamespace(), nameSpaceName, tableNameStr, columnFamilyName);
+ AtlasObjectId columnFamilyId = new AtlasObjectId(HBaseDataTypes.HBASE_COLUMN_FAMILY.getName(), REFERENCEABLE_ATTRIBUTE_NAME, columnFamilyQName);
+
+ LOG.info("Delete ColumnFamily {}", columnFamilyQName);
+
+ hbaseOperationContext.addMessage(new EntityDeleteRequestV2(hbaseOperationContext.getUser(), Collections.singletonList(columnFamilyId)));
+ }
+
+
+ /**
+ * Construct the qualified name used to uniquely identify a ColumnFamily instance in Atlas.
+ *
+ * @param metadataNamespace Metadata namespace of the cluster to which the HBase component belongs
+ * @param nameSpace Name of the HBase database to which the Table belongs
+ * @param tableName Name of the HBase table
+ * @param columnFamily Name of the ColumnFamily
+ * @return Unique qualified name to identify the Table instance in Atlas.
+ */
+ public static String getColumnFamilyQualifiedName(String metadataNamespace, String nameSpace, String tableName, String columnFamily) {
+ if (metadataNamespace == null || nameSpace == null || tableName == null || columnFamily == null) {
+ return null;
+ } else {
+ return String.format(HBASE_COLUMN_FAMILY_QUALIFIED_NAME_FORMAT, nameSpace, stripNameSpace(tableName), columnFamily, metadataNamespace);
+ }
+ }
+
+ /**
+ * Construct the qualified name used to uniquely identify a Table instance in Atlas.
+ *
+ * @param metadataNamespace Metadata namespace of the cluster to which the HBase component belongs
+ * @param nameSpace Name of the HBase database to which the Table belongs
+ * @param tableName Name of the HBase table
+ * @return Unique qualified name to identify the Table instance in Atlas.
+ */
+ public static String getTableQualifiedName(String metadataNamespace, String nameSpace, String tableName) {
+ if (metadataNamespace == null || nameSpace == null || tableName == null) {
+ return null;
+ } else {
+ return String.format(HBASE_TABLE_QUALIFIED_NAME_FORMAT, nameSpace, stripNameSpace(tableName), metadataNamespace);
+ }
+ }
+
+ /**
+ * Construct the qualified name used to uniquely identify a HBase NameSpace instance in Atlas.
+ *
+ * @param metadataNamespace Metadata namespace of the cluster to which the HBase component belongs
+ * @param nameSpace
+ * @return Unique qualified name to identify the HBase NameSpace instance in Atlas.
+ */
+ public static String getNameSpaceQualifiedName(String metadataNamespace, String nameSpace) {
+ if (metadataNamespace == null || nameSpace == null) {
+ return null;
+ } else {
+ return String.format(HBASE_NAMESPACE_QUALIFIED_NAME, nameSpace, metadataNamespace);
+ }
+ }
+
+ private static String stripNameSpace(String tableName) {
+ return tableName.substring(tableName.indexOf(":") + 1);
+ }
+
+ private AtlasEntity buildNameSpace(HBaseOperationContext hbaseOperationContext) {
+ AtlasEntity nameSpace = new AtlasEntity(HBaseDataTypes.HBASE_NAMESPACE.getName());
+ NamespaceDescriptor nameSpaceDesc = hbaseOperationContext.getNamespaceDescriptor();
+ String nameSpaceName = nameSpaceDesc == null ? null : hbaseOperationContext.getNamespaceDescriptor().getName();
+
+ if (nameSpaceName == null) {
+ nameSpaceName = hbaseOperationContext.getNameSpace();
+ }
+
+ Date now = new Date(System.currentTimeMillis());
+
+ nameSpace.setAttribute(ATTR_NAME, nameSpaceName);
+ nameSpace.setAttribute(REFERENCEABLE_ATTRIBUTE_NAME, getNameSpaceQualifiedName(getMetadataNamespace(), nameSpaceName));
+ nameSpace.setAttribute(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, getMetadataNamespace());
+ nameSpace.setAttribute(ATTR_DESCRIPTION, nameSpaceName);
+ nameSpace.setAttribute(ATTR_PARAMETERS, hbaseOperationContext.getHbaseConf());
+ nameSpace.setAttribute(ATTR_OWNER, hbaseOperationContext.getOwner());
+ nameSpace.setAttribute(ATTR_MODIFIED_TIME, now);
+
+ if (OPERATION.CREATE_NAMESPACE.equals(hbaseOperationContext.getOperation())) {
+ nameSpace.setAttribute(ATTR_CREATE_TIME, now);
+ }
+
+ return nameSpace;
+ }
+
+ private AtlasEntity buildTable(HBaseOperationContext hbaseOperationContext, AtlasEntity nameSpace) {
+ AtlasEntity table = new AtlasEntity(HBaseDataTypes.HBASE_TABLE.getName());
+ String tableName = getTableName(hbaseOperationContext);
+ String nameSpaceName = (String) nameSpace.getAttribute(ATTR_NAME);
+ String tableQName = getTableQualifiedName(getMetadataNamespace(), nameSpaceName, tableName);
+ OPERATION operation = hbaseOperationContext.getOperation();
+ Date now = new Date(System.currentTimeMillis());
+
+ table.setAttribute(REFERENCEABLE_ATTRIBUTE_NAME, tableQName);
+ table.setAttribute(ATTR_NAME, tableName);
+ table.setAttribute(ATTR_URI, tableName);
+ table.setAttribute(ATTR_OWNER, hbaseOperationContext.getOwner());
+ table.setAttribute(ATTR_DESCRIPTION, tableName);
+ table.setAttribute(ATTR_PARAMETERS, hbaseOperationContext.getHbaseConf());
+ table.setRelationshipAttribute(ATTR_NAMESPACE, AtlasTypeUtil.getAtlasRelatedObjectId(nameSpace, RELATIONSHIP_HBASE_TABLE_NAMESPACE));
+
+ TableDescriptor tableDescriptor = hbaseOperationContext.gethTableDescriptor();
+ if (tableDescriptor != null) {
+ table.setAttribute(ATTR_TABLE_MAX_FILESIZE, tableDescriptor.getMaxFileSize());
+ table.setAttribute(ATTR_TABLE_REPLICATION_PER_REGION, tableDescriptor.getRegionReplication());
+ table.setAttribute(ATTR_TABLE_ISREADONLY, tableDescriptor.isReadOnly());
+ table.setAttribute(ATTR_TABLE_ISNORMALIZATION_ENABLED, tableDescriptor.isNormalizationEnabled());
+ table.setAttribute(ATTR_TABLE_ISCOMPACTION_ENABLED, tableDescriptor.isCompactionEnabled());
+ table.setAttribute(ATTR_TABLE_DURABLILITY, (tableDescriptor.getDurability() != null ? tableDescriptor.getDurability().name() : null));
+ table.setAttribute(ATTR_TABLE_NORMALIZATION_ENABLED, tableDescriptor.isNormalizationEnabled());
+ }
+
+ switch (operation) {
+ case CREATE_TABLE:
+ table.setAttribute(ATTR_CREATE_TIME, now);
+ table.setAttribute(ATTR_MODIFIED_TIME, now);
+ break;
+ case CREATE_COLUMN_FAMILY:
+ table.setAttribute(ATTR_MODIFIED_TIME, now);
+ break;
+ case ALTER_TABLE:
+ case ALTER_COLUMN_FAMILY:
+ table.setAttribute(ATTR_MODIFIED_TIME, now);
+ break;
+ default:
+ break;
+ }
+
+ return table;
+ }
+
+ private List buildColumnFamilies(HBaseOperationContext hbaseOperationContext, AtlasEntity nameSpace, AtlasEntity table) {
+ List columnFamilies = new ArrayList<>();
+ ColumnFamilyDescriptor[] columnFamilyDescriptors = hbaseOperationContext.gethColumnDescriptors();
+
+ if (columnFamilyDescriptors != null) {
+ for (ColumnFamilyDescriptor columnFamilyDescriptor : columnFamilyDescriptors) {
+ AtlasEntity columnFamily = buildColumnFamily(hbaseOperationContext, columnFamilyDescriptor, nameSpace, table);
+
+ columnFamilies.add(columnFamily);
+ }
+ }
+
+ return columnFamilies;
+ }
+
+ private AtlasEntity buildColumnFamily(HBaseOperationContext hbaseOperationContext, ColumnFamilyDescriptor columnFamilyDescriptor, AtlasEntity nameSpace, AtlasEntity table) {
+ AtlasEntity columnFamily = new AtlasEntity(HBaseDataTypes.HBASE_COLUMN_FAMILY.getName());
+ String columnFamilyName = columnFamilyDescriptor.getNameAsString();
+ String tableName = (String) table.getAttribute(ATTR_NAME);
+ String nameSpaceName = (String) nameSpace.getAttribute(ATTR_NAME);
+ String columnFamilyQName = getColumnFamilyQualifiedName(getMetadataNamespace(), nameSpaceName, tableName, columnFamilyName);
+ Date now = new Date(System.currentTimeMillis());
+
+ columnFamily.setAttribute(ATTR_NAME, columnFamilyName);
+ columnFamily.setAttribute(ATTR_DESCRIPTION, columnFamilyName);
+ columnFamily.setAttribute(REFERENCEABLE_ATTRIBUTE_NAME, columnFamilyQName);
+ columnFamily.setAttribute(ATTR_OWNER, hbaseOperationContext.getOwner());
+ columnFamily.setRelationshipAttribute(ATTR_TABLE, AtlasTypeUtil.getAtlasRelatedObjectId(table, RELATIONSHIP_HBASE_TABLE_COLUMN_FAMILIES));
+
+ if (columnFamilyDescriptor!= null) {
+ columnFamily.setAttribute(ATTR_CF_BLOCK_CACHE_ENABLED, columnFamilyDescriptor.isBlockCacheEnabled());
+ columnFamily.setAttribute(ATTR_CF_BLOOMFILTER_TYPE, (columnFamilyDescriptor.getBloomFilterType() != null ? columnFamilyDescriptor.getBloomFilterType().name():null));
+ columnFamily.setAttribute(ATTR_CF_CACHED_BLOOM_ON_WRITE, columnFamilyDescriptor.isCacheBloomsOnWrite());
+ columnFamily.setAttribute(ATTR_CF_CACHED_DATA_ON_WRITE, columnFamilyDescriptor.isCacheDataOnWrite());
+ columnFamily.setAttribute(ATTR_CF_CACHED_INDEXES_ON_WRITE, columnFamilyDescriptor.isCacheIndexesOnWrite());
+ columnFamily.setAttribute(ATTR_CF_COMPACTION_COMPRESSION_TYPE, (columnFamilyDescriptor.getCompactionCompressionType() != null ? columnFamilyDescriptor.getCompactionCompressionType().name():null));
+ columnFamily.setAttribute(ATTR_CF_COMPRESSION_TYPE, (columnFamilyDescriptor.getCompressionType() != null ? columnFamilyDescriptor.getCompressionType().name():null));
+ columnFamily.setAttribute(ATTR_CF_DATA_BLOCK_ENCODING, (columnFamilyDescriptor.getDataBlockEncoding() != null ? columnFamilyDescriptor.getDataBlockEncoding().name():null));
+ columnFamily.setAttribute(ATTR_CF_ENCRYPTION_TYPE, columnFamilyDescriptor.getEncryptionType());
+ columnFamily.setAttribute(ATTR_CF_EVICT_BLOCK_ONCLOSE, columnFamilyDescriptor.isEvictBlocksOnClose());
+ columnFamily.setAttribute(ATTR_CF_INMEMORY_COMPACTION_POLICY, (columnFamilyDescriptor.getInMemoryCompaction() != null ? columnFamilyDescriptor.getInMemoryCompaction().name():null));
+ columnFamily.setAttribute(ATTR_CF_KEEP_DELETE_CELLS, ( columnFamilyDescriptor.getKeepDeletedCells() != null ? columnFamilyDescriptor.getKeepDeletedCells().name():null));
+ columnFamily.setAttribute(ATTR_CF_MAX_VERSIONS, columnFamilyDescriptor.getMaxVersions());
+ columnFamily.setAttribute(ATTR_CF_MIN_VERSIONS, columnFamilyDescriptor.getMinVersions());
+ columnFamily.setAttribute(ATTR_CF_NEW_VERSION_BEHAVIOR, columnFamilyDescriptor.isNewVersionBehavior());
+ columnFamily.setAttribute(ATTR_CF_MOB_ENABLED, columnFamilyDescriptor.isMobEnabled());
+ columnFamily.setAttribute(ATTR_CF_MOB_COMPATCTPARTITION_POLICY, ( columnFamilyDescriptor.getMobCompactPartitionPolicy() != null ? columnFamilyDescriptor.getMobCompactPartitionPolicy().name():null));
+ columnFamily.setAttribute(ATTR_CF_PREFETCH_BLOCK_ONOPEN, columnFamilyDescriptor.isPrefetchBlocksOnOpen());
+ columnFamily.setAttribute(ATTR_CF_STORAGE_POLICY, columnFamilyDescriptor.getStoragePolicy());
+ columnFamily.setAttribute(ATTR_CF_TTL, columnFamilyDescriptor.getTimeToLive());
+ }
+
+ switch (hbaseOperationContext.getOperation()) {
+ case CREATE_COLUMN_FAMILY:
+ case CREATE_TABLE:
+ columnFamily.setAttribute(ATTR_CREATE_TIME, now);
+ columnFamily.setAttribute(ATTR_MODIFIED_TIME, now);
+ break;
+
+ case ALTER_COLUMN_FAMILY:
+ columnFamily.setAttribute(ATTR_MODIFIED_TIME, now);
+ break;
+
+ default:
+ break;
+ }
+
+ return columnFamily;
+ }
+
+ public String getMessageSource() {
+ return HBASE_SOURCE;
+ }
+
+ private String getTableName(HBaseOperationContext hbaseOperationContext) {
+ final String ret;
+
+ TableName tableName = hbaseOperationContext.getTableName();
+
+ if (tableName != null) {
+ ret = tableName.getNameAsString();
+ } else {
+ TableDescriptor tableDescriptor = hbaseOperationContext.gethTableDescriptor();
+
+ ret = (tableDescriptor != null) ? tableDescriptor.getTableName().getNameAsString() : null;
+ }
+
+ return ret;
+ }
+
+ public void sendHBaseNameSpaceOperation(final NamespaceDescriptor namespaceDescriptor, final String nameSpace, final OPERATION operation, ObserverContext ctx) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasHook.sendHBaseNameSpaceOperation()");
+ }
+
+ try {
+ final UserGroupInformation ugi = getUGI(ctx);
+ final User user = getActiveUser(ctx);
+ final String userName = (user != null) ? user.getShortName() : null;
+ HBaseOperationContext hbaseOperationContext = handleHBaseNameSpaceOperation(namespaceDescriptor, nameSpace, operation, ugi, userName);
+
+ sendNotification(hbaseOperationContext);
+ } catch (Throwable t) {
+ LOG.error("HBaseAtlasHook.sendHBaseNameSpaceOperation(): failed to send notification", t);
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasHook.sendHBaseNameSpaceOperation()");
+ }
+ }
+
+ public void sendHBaseTableOperation(TableDescriptor tableDescriptor, final TableName tableName, final OPERATION operation, ObserverContext ctx) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasHook.sendHBaseTableOperation()");
+ }
+
+ try {
+ final UserGroupInformation ugi = getUGI(ctx);
+ final User user = getActiveUser(ctx);
+ final String userName = (user != null) ? user.getShortName() : null;
+ HBaseOperationContext hbaseOperationContext = handleHBaseTableOperation(tableDescriptor, tableName, operation, ugi, userName);
+
+ sendNotification(hbaseOperationContext);
+ } catch (Throwable t) {
+ LOG.error("<== HBaseAtlasHook.sendHBaseTableOperation(): failed to send notification", t);
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasHook.sendHBaseTableOperation()");
+ }
+ }
+
+ private void sendNotification(HBaseOperationContext hbaseOperationContext) {
+ UserGroupInformation ugi = hbaseOperationContext.getUgi();
+
+ if (ugi != null && ugi.getRealUser() != null) {
+ ugi = ugi.getRealUser();
+ }
+
+ notifyEntities(hbaseOperationContext.getMessages(), ugi);
+ }
+
+ private HBaseOperationContext handleHBaseNameSpaceOperation(NamespaceDescriptor namespaceDescriptor, String nameSpace, OPERATION operation, UserGroupInformation ugi, String userName) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasHook.handleHBaseNameSpaceOperation()");
+ }
+
+ HBaseOperationContext hbaseOperationContext = new HBaseOperationContext(namespaceDescriptor, nameSpace, operation, ugi, userName, userName);
+ createAtlasInstances(hbaseOperationContext);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasHook.handleHBaseNameSpaceOperation(): {}", hbaseOperationContext);
+ }
+
+ return hbaseOperationContext;
+ }
+
+ private HBaseOperationContext handleHBaseTableOperation(TableDescriptor tableDescriptor, TableName tableName, OPERATION operation, UserGroupInformation ugi, String userName) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasHook.handleHBaseTableOperation()");
+ }
+
+ Map hbaseConf = null;
+ String owner = null;
+ String tableNameSpace = null;
+ TableName hbaseTableName = null;
+ ColumnFamilyDescriptor[] columnFamilyDescriptors = null;
+
+ if (tableDescriptor != null) {
+ owner = tableDescriptor.getOwnerString();
+ hbaseConf = null;
+ hbaseTableName = tableDescriptor.getTableName();
+ if (hbaseTableName != null) {
+ tableNameSpace = hbaseTableName.getNamespaceAsString();
+ if (tableNameSpace == null) {
+ tableNameSpace = hbaseTableName.getNameWithNamespaceInclAsString();
+ }
+ }
+ }
+
+ if (owner == null) {
+ owner = userName;
+ }
+
+ if (tableDescriptor != null) {
+ columnFamilyDescriptors = tableDescriptor.getColumnFamilies();
+ }
+
+ HBaseOperationContext hbaseOperationContext = new HBaseOperationContext(tableNameSpace, tableDescriptor, tableName, columnFamilyDescriptors, operation, ugi, userName, owner, hbaseConf);
+ createAtlasInstances(hbaseOperationContext);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasHook.handleHBaseTableOperation(): {}", hbaseOperationContext);
+ }
+ return hbaseOperationContext;
+ }
+
+ private HBaseOperationContext handleHBaseColumnFamilyOperation(ColumnFamilyDescriptor columnFamilyDescriptor, TableName tableName, String columnFamily, OPERATION operation, UserGroupInformation ugi, String userName) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasHook.handleHBaseColumnFamilyOperation()");
+ }
+
+ String owner = userName;
+ Map hbaseConf = new HashMap<>();
+
+ String tableNameSpace = tableName.getNamespaceAsString();
+ if (tableNameSpace == null) {
+ tableNameSpace = tableName.getNameWithNamespaceInclAsString();
+ }
+
+ if (columnFamilyDescriptor != null) {
+ hbaseConf = columnFamilyDescriptor.getConfiguration();
+ }
+
+ HBaseOperationContext hbaseOperationContext = new HBaseOperationContext(tableNameSpace, tableName, columnFamilyDescriptor, columnFamily, operation, ugi, userName, owner, hbaseConf);
+ createAtlasInstances(hbaseOperationContext);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasHook.handleHBaseColumnFamilyOperation(): {}", hbaseOperationContext);
+ }
+ return hbaseOperationContext;
+ }
+
+ private UserGroupInformation getUGI(ObserverContext> ctx) {
+ UserGroupInformation ugi = null;
+ User user = null;
+ try {
+ user = getActiveUser(ctx);
+ ugi = UserGroupInformation.getLoginUser();
+ } catch (Exception e) {
+ // not setting the UGI here
+ }
+
+ if (ugi == null) {
+ if (user != null) {
+ ugi = user.getUGI();
+ }
+ }
+
+ LOG.info("HBaseAtlasHook: UGI: {}", ugi);
+ return ugi;
+ }
+
+ private User getActiveUser(ObserverContext> ctx) throws IOException {
+ return (User)ctx.getCaller().orElse(User.getCurrent());
+ }
+}
diff --git a/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/bridge/HBaseBridge.java b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/bridge/HBaseBridge.java
new file mode 100644
index 0000000000..f5a1d2ad51
--- /dev/null
+++ b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/bridge/HBaseBridge.java
@@ -0,0 +1,720 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hbase.bridge;
+
+import org.apache.atlas.ApplicationProperties;
+import org.apache.atlas.AtlasClientV2;
+import org.apache.atlas.hbase.model.HBaseDataTypes;
+import org.apache.atlas.hook.AtlasHookException;
+import org.apache.atlas.model.instance.AtlasEntity;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
+import org.apache.atlas.model.instance.AtlasEntityHeader;
+import org.apache.atlas.model.instance.AtlasObjectId;
+import org.apache.atlas.model.instance.EntityMutationResponse;
+import org.apache.atlas.type.AtlasTypeUtil;
+import org.apache.atlas.utils.AtlasConfigurationUtil;
+import org.apache.atlas.utils.AuthenticationUtil;
+import org.apache.commons.cli.BasicParser;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.configuration.Configuration;
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.NamespaceDescriptor;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class HBaseBridge {
+ private static final Logger LOG = LoggerFactory.getLogger(HBaseBridge.class);
+
+ private static final int EXIT_CODE_SUCCESS = 0;
+ private static final int EXIT_CODE_FAILED = 1;
+ private static final String ATLAS_ENDPOINT = "atlas.rest.address";
+ private static final String DEFAULT_ATLAS_URL = "http://localhost:21000/";
+ private static final String CLUSTER_NAME_KEY = "atlas.cluster.name";
+ private static final String DEFAULT_CLUSTER_NAME = "primary";
+ private static final String HBASE_METADATA_NAMESPACE = "atlas.metadata.namespace";
+ private static final String QUALIFIED_NAME = "qualifiedName";
+ private static final String NAME = "name";
+ private static final String URI = "uri";
+ private static final String OWNER = "owner";
+ private static final String DESCRIPTION_ATTR = "description";
+ private static final String CLUSTERNAME = "clusterName";
+ private static final String NAMESPACE = "namespace";
+ private static final String TABLE = "table";
+ private static final String COLUMN_FAMILIES = "column_families";
+
+ // table metadata
+ private static final String ATTR_TABLE_MAX_FILESIZE = "maxFileSize";
+ private static final String ATTR_TABLE_ISREADONLY = "isReadOnly";
+ private static final String ATTR_TABLE_ISCOMPACTION_ENABLED = "isCompactionEnabled";
+ private static final String ATTR_TABLE_REPLICATION_PER_REGION = "replicasPerRegion";
+ private static final String ATTR_TABLE_DURABLILITY = "durability";
+ private static final String ATTR_TABLE_NORMALIZATION_ENABLED = "isNormalizationEnabled";
+
+ // column family metadata
+ private static final String ATTR_CF_BLOOMFILTER_TYPE = "bloomFilterType";
+ private static final String ATTR_CF_COMPRESSION_TYPE = "compressionType";
+ private static final String ATTR_CF_COMPACTION_COMPRESSION_TYPE = "compactionCompressionType";
+ private static final String ATTR_CF_ENCRYPTION_TYPE = "encryptionType";
+ private static final String ATTR_CF_KEEP_DELETE_CELLS = "keepDeletedCells";
+ private static final String ATTR_CF_MAX_VERSIONS = "maxVersions";
+ private static final String ATTR_CF_MIN_VERSIONS = "minVersions";
+ private static final String ATTR_CF_DATA_BLOCK_ENCODING = "dataBlockEncoding";
+ private static final String ATTR_CF_TTL = "ttl";
+ private static final String ATTR_CF_BLOCK_CACHE_ENABLED = "blockCacheEnabled";
+ private static final String ATTR_CF_CACHED_BLOOM_ON_WRITE = "cacheBloomsOnWrite";
+ private static final String ATTR_CF_CACHED_DATA_ON_WRITE = "cacheDataOnWrite";
+ private static final String ATTR_CF_CACHED_INDEXES_ON_WRITE = "cacheIndexesOnWrite";
+ private static final String ATTR_CF_EVICT_BLOCK_ONCLOSE = "evictBlocksOnClose";
+ private static final String ATTR_CF_PREFETCH_BLOCK_ONOPEN = "prefetchBlocksOnOpen";
+ private static final String ATTRIBUTE_QUALIFIED_NAME = "qualifiedName";
+ private static final String ATTR_CF_INMEMORY_COMPACTION_POLICY = "inMemoryCompactionPolicy";
+ private static final String ATTR_CF_MOB_COMPATCTPARTITION_POLICY = "mobCompactPartitionPolicy";
+ private static final String ATTR_CF_MOB_ENABLED = "isMobEnabled";
+ private static final String ATTR_CF_NEW_VERSION_BEHAVIOR = "newVersionBehavior";
+
+ private static final String HBASE_NAMESPACE_QUALIFIED_NAME = "%s@%s";
+ private static final String HBASE_TABLE_QUALIFIED_NAME_FORMAT = "%s:%s@%s";
+ private static final String HBASE_COLUMN_FAMILY_QUALIFIED_NAME_FORMAT = "%s:%s.%s@%s";
+
+ private final String metadataNamespace;
+ private final AtlasClientV2 atlasClientV2;
+ private final Admin hbaseAdmin;
+
+
+ public static void main(String[] args) {
+ int exitCode = EXIT_CODE_FAILED;
+ AtlasClientV2 atlasClientV2 =null;
+
+ try {
+ Options options = new Options();
+ options.addOption("n","namespace", true, "namespace");
+ options.addOption("t", "table", true, "tablename");
+ options.addOption("f", "filename", true, "filename");
+
+ CommandLineParser parser = new BasicParser();
+ CommandLine cmd = parser.parse(options, args);
+ String namespaceToImport = cmd.getOptionValue("n");
+ String tableToImport = cmd.getOptionValue("t");
+ String fileToImport = cmd.getOptionValue("f");
+ Configuration atlasConf = ApplicationProperties.get();
+ String[] urls = atlasConf.getStringArray(ATLAS_ENDPOINT);
+
+ if (urls == null || urls.length == 0) {
+ urls = new String[] { DEFAULT_ATLAS_URL };
+ }
+
+
+ if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) {
+ String[] basicAuthUsernamePassword = AuthenticationUtil.getBasicAuthenticationInput();
+
+ atlasClientV2 = new AtlasClientV2(urls, basicAuthUsernamePassword);
+ } else {
+ UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
+
+ atlasClientV2 = new AtlasClientV2(ugi, ugi.getShortUserName(), urls);
+ }
+
+ HBaseBridge importer = new HBaseBridge(atlasConf, atlasClientV2);
+
+ if (StringUtils.isNotEmpty(fileToImport)) {
+ File f = new File(fileToImport);
+
+ if (f.exists() && f.canRead()) {
+ BufferedReader br = new BufferedReader(new FileReader(f));
+ String line = null;
+
+ while((line = br.readLine()) != null) {
+ String val[] = line.split(":");
+
+ if (ArrayUtils.isNotEmpty(val)) {
+ namespaceToImport = val[0];
+
+ if (val.length > 1) {
+ tableToImport = val[1];
+ } else {
+ tableToImport = "";
+ }
+
+ importer.importHBaseEntities(namespaceToImport, tableToImport);
+ }
+ }
+
+ exitCode = EXIT_CODE_SUCCESS;
+ } else {
+ LOG.error("Failed to read the file");
+ }
+ } else {
+ importer.importHBaseEntities(namespaceToImport, tableToImport);
+
+ exitCode = EXIT_CODE_SUCCESS;
+ }
+ } catch(ParseException e) {
+ LOG.error("Failed to parse arguments. Error: ", e.getMessage());
+ printUsage();
+ } catch(Exception e) {
+ System.out.println("ImportHBaseEntities failed. Please check the log file for the detailed error message");
+
+ LOG.error("ImportHBaseEntities failed", e);
+ }finally {
+ if(atlasClientV2!=null) {
+ atlasClientV2.close();
+ }
+ }
+
+ System.exit(exitCode);
+ }
+
+ public HBaseBridge(Configuration atlasConf, AtlasClientV2 atlasClientV2) throws Exception {
+ this.atlasClientV2 = atlasClientV2;
+ this.metadataNamespace = getMetadataNamespace(atlasConf);
+
+ org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create();
+
+ LOG.info("checking HBase availability..");
+
+ HBaseAdmin.available(conf);
+
+ LOG.info("HBase is available");
+
+ Connection conn = ConnectionFactory.createConnection(conf);
+
+ hbaseAdmin = conn.getAdmin();
+ }
+
+ private String getMetadataNamespace(Configuration config) {
+ return AtlasConfigurationUtil.getRecentString(config, HBASE_METADATA_NAMESPACE, getClusterName(config));
+ }
+
+ private String getClusterName(Configuration config) {
+ return config.getString(CLUSTER_NAME_KEY, DEFAULT_CLUSTER_NAME);
+ }
+
+ private boolean importHBaseEntities(String namespaceToImport, String tableToImport) throws Exception {
+ boolean ret = false;
+
+ if (StringUtils.isEmpty(namespaceToImport) && StringUtils.isEmpty(tableToImport)) {
+ // when both NameSpace and Table options are not present
+ importNameSpaceAndTable();
+ ret = true;
+ } else if (StringUtils.isNotEmpty(namespaceToImport)) {
+ // When Namespace option is present or both namespace and table options are present
+ importNameSpaceWithTable(namespaceToImport, tableToImport);
+ ret = true;
+ } else if (StringUtils.isNotEmpty(tableToImport)) {
+ importTable(tableToImport);
+ ret = true;
+ }
+
+ return ret;
+ }
+
+ public void importNameSpace(final String nameSpace) throws Exception {
+ List matchingNameSpaceDescriptors = getMatchingNameSpaces(nameSpace);
+
+ if (CollectionUtils.isNotEmpty(matchingNameSpaceDescriptors)) {
+ for (NamespaceDescriptor namespaceDescriptor : matchingNameSpaceDescriptors) {
+ createOrUpdateNameSpace(namespaceDescriptor);
+ }
+ } else {
+ throw new AtlasHookException("No NameSpace found for the given criteria. NameSpace = " + nameSpace);
+ }
+ }
+
+ public void importTable(final String tableName) throws Exception {
+ String tableNameStr = null;
+ TableDescriptor[] htds = hbaseAdmin.listTables(Pattern.compile(tableName));
+
+ if (ArrayUtils.isNotEmpty(htds)) {
+ for (TableDescriptor htd : htds) {
+ String tblNameWithNameSpace = htd.getTableName().getNameWithNamespaceInclAsString();
+ String tblNameWithOutNameSpace = htd.getTableName().getNameAsString();
+
+ if (tableName.equals(tblNameWithNameSpace)) {
+ tableNameStr = tblNameWithNameSpace;
+ } else if (tableName.equals(tblNameWithOutNameSpace)) {
+ tableNameStr = tblNameWithOutNameSpace;
+ } else {
+ // when wild cards are used in table name
+ if (tblNameWithNameSpace != null) {
+ tableNameStr = tblNameWithNameSpace;
+ } else if (tblNameWithOutNameSpace != null) {
+ tableNameStr = tblNameWithOutNameSpace;
+ }
+ }
+
+ byte[] nsByte = htd.getTableName().getNamespace();
+ String nsName = new String(nsByte);
+ NamespaceDescriptor nsDescriptor = hbaseAdmin.getNamespaceDescriptor(nsName);
+ AtlasEntityWithExtInfo entity = createOrUpdateNameSpace(nsDescriptor);
+ ColumnFamilyDescriptor[] hcdts = htd.getColumnFamilies();
+
+ createOrUpdateTable(nsName, tableNameStr, entity.getEntity(), htd, hcdts);
+ }
+ } else {
+ throw new AtlasHookException("No Table found for the given criteria. Table = " + tableName);
+ }
+ }
+
+ private void importNameSpaceAndTable() throws Exception {
+ NamespaceDescriptor[] namespaceDescriptors = hbaseAdmin.listNamespaceDescriptors();
+
+ if (ArrayUtils.isNotEmpty(namespaceDescriptors)) {
+ for (NamespaceDescriptor namespaceDescriptor : namespaceDescriptors) {
+ String namespace = namespaceDescriptor.getName();
+
+ importNameSpace(namespace);
+ }
+ }
+
+ TableDescriptor[] htds = hbaseAdmin.listTables();
+
+ if (ArrayUtils.isNotEmpty(htds)) {
+ for (TableDescriptor htd : htds) {
+ String tableName = htd.getTableName().getNameAsString();
+
+ importTable(tableName);
+ }
+ }
+ }
+
+ private void importNameSpaceWithTable(String namespaceToImport, String tableToImport) throws Exception {
+ importNameSpace(namespaceToImport);
+
+ List hTableDescriptors = new ArrayList<>();
+
+ if (StringUtils.isEmpty(tableToImport)) {
+ List matchingNameSpaceDescriptors = getMatchingNameSpaces(namespaceToImport);
+
+ if (CollectionUtils.isNotEmpty(matchingNameSpaceDescriptors)) {
+ hTableDescriptors = getTableDescriptors(matchingNameSpaceDescriptors);
+ }
+ } else {
+ tableToImport = namespaceToImport +":" + tableToImport;
+
+ TableDescriptor[] htds = hbaseAdmin.listTables(Pattern.compile(tableToImport));
+
+ hTableDescriptors.addAll(Arrays.asList(htds));
+ }
+
+ if (CollectionUtils.isNotEmpty(hTableDescriptors)) {
+ for (TableDescriptor htd : hTableDescriptors) {
+ String tblName = htd.getTableName().getNameAsString();
+
+ importTable(tblName);
+ }
+ }
+ }
+
+ private List getMatchingNameSpaces(String nameSpace) throws Exception {
+ List ret = new ArrayList<>();
+ NamespaceDescriptor[] namespaceDescriptors = hbaseAdmin.listNamespaceDescriptors();
+ Pattern pattern = Pattern.compile(nameSpace);
+
+ for (NamespaceDescriptor namespaceDescriptor:namespaceDescriptors){
+ String nmSpace = namespaceDescriptor.getName();
+ Matcher matcher = pattern.matcher(nmSpace);
+
+ if (matcher.find()){
+ ret.add(namespaceDescriptor);
+ }
+ }
+
+ return ret;
+ }
+
+ private List getTableDescriptors(List namespaceDescriptors) throws Exception {
+ List ret = new ArrayList<>();
+
+ for(NamespaceDescriptor namespaceDescriptor:namespaceDescriptors) {
+ TableDescriptor[] tableDescriptors = hbaseAdmin.listTableDescriptorsByNamespace(namespaceDescriptor.getName());
+
+ ret.addAll(Arrays.asList(tableDescriptors));
+ }
+
+ return ret;
+ }
+
+ protected AtlasEntityWithExtInfo createOrUpdateNameSpace(NamespaceDescriptor namespaceDescriptor) throws Exception {
+ String nsName = namespaceDescriptor.getName();
+ String nsQualifiedName = getNameSpaceQualifiedName(metadataNamespace, nsName);
+ AtlasEntityWithExtInfo nsEntity = findNameSpaceEntityInAtlas(nsQualifiedName);
+
+ if (nsEntity == null) {
+ LOG.info("Importing NameSpace: " + nsQualifiedName);
+
+ AtlasEntity entity = getNameSpaceEntity(nsName, null);
+
+ nsEntity = createEntityInAtlas(new AtlasEntityWithExtInfo(entity));
+ } else {
+ LOG.info("NameSpace already present in Atlas. Updating it..: " + nsQualifiedName);
+
+ AtlasEntity entity = getNameSpaceEntity(nsName, nsEntity.getEntity());
+
+ nsEntity.setEntity(entity);
+
+ nsEntity = updateEntityInAtlas(nsEntity);
+ }
+ return nsEntity;
+ }
+
+ protected AtlasEntityWithExtInfo createOrUpdateTable(String nameSpace, String tableName, AtlasEntity nameSapceEntity, TableDescriptor htd, ColumnFamilyDescriptor[] hcdts) throws Exception {
+ String owner = htd.getOwnerString();
+ String tblQualifiedName = getTableQualifiedName(metadataNamespace, nameSpace, tableName);
+ AtlasEntityWithExtInfo ret = findTableEntityInAtlas(tblQualifiedName);
+
+ if (ret == null) {
+ LOG.info("Importing Table: " + tblQualifiedName);
+
+ AtlasEntity entity = getTableEntity(nameSpace, tableName, owner, nameSapceEntity, htd, null);
+
+ ret = createEntityInAtlas(new AtlasEntityWithExtInfo(entity));
+ } else {
+ LOG.info("Table already present in Atlas. Updating it..: " + tblQualifiedName);
+
+ AtlasEntity entity = getTableEntity(nameSpace, tableName, owner, nameSapceEntity, htd, ret.getEntity());
+
+ ret.setEntity(entity);
+
+ ret = updateEntityInAtlas(ret);
+ }
+
+ AtlasEntity tableEntity = ret.getEntity();
+
+ if (tableEntity != null) {
+ List cfEntities = createOrUpdateColumnFamilies(nameSpace, tableName, owner, hcdts, tableEntity);
+
+ List cfIDs = new ArrayList<>();
+
+ if (CollectionUtils.isNotEmpty(cfEntities)) {
+ for (AtlasEntityWithExtInfo cfEntity : cfEntities) {
+ cfIDs.add(AtlasTypeUtil.getAtlasObjectId(cfEntity.getEntity()));
+ }
+ }
+ tableEntity.setRelationshipAttribute(COLUMN_FAMILIES, AtlasTypeUtil.getAtlasRelatedObjectIdList(cfIDs, HBaseAtlasHook.RELATIONSHIP_HBASE_TABLE_COLUMN_FAMILIES));
+ }
+
+ return ret;
+ }
+
+ protected List createOrUpdateColumnFamilies(String nameSpace, String tableName, String owner, ColumnFamilyDescriptor[] hcdts , AtlasEntity tableEntity) throws Exception {
+ List ret = new ArrayList<>();
+
+ if (hcdts != null) {
+ AtlasObjectId tableId = AtlasTypeUtil.getAtlasObjectId(tableEntity);
+
+ for (ColumnFamilyDescriptor columnFamilyDescriptor : hcdts) {
+ String cfName = columnFamilyDescriptor.getNameAsString();
+ String cfQualifiedName = getColumnFamilyQualifiedName(metadataNamespace, nameSpace, tableName, cfName);
+ AtlasEntityWithExtInfo cfEntity = findColumnFamiltyEntityInAtlas(cfQualifiedName);
+
+ if (cfEntity == null) {
+ LOG.info("Importing Column-family: " + cfQualifiedName);
+
+ AtlasEntity entity = getColumnFamilyEntity(nameSpace, tableName, owner, columnFamilyDescriptor, tableId, null);
+
+ cfEntity = createEntityInAtlas(new AtlasEntityWithExtInfo(entity));
+ } else {
+ LOG.info("ColumnFamily already present in Atlas. Updating it..: " + cfQualifiedName);
+
+ AtlasEntity entity = getColumnFamilyEntity(nameSpace, tableName, owner, columnFamilyDescriptor, tableId, cfEntity.getEntity());
+
+ cfEntity.setEntity(entity);
+
+ cfEntity = updateEntityInAtlas(cfEntity);
+ }
+
+ ret.add(cfEntity);
+ }
+ }
+
+ return ret;
+ }
+
+ private AtlasEntityWithExtInfo findNameSpaceEntityInAtlas(String nsQualifiedName) {
+ AtlasEntityWithExtInfo ret = null;
+
+ try {
+ ret = findEntityInAtlas(HBaseDataTypes.HBASE_NAMESPACE.getName(), nsQualifiedName);
+ clearRelationshipAttributes(ret);
+ } catch (Exception e) {
+ ret = null; // entity doesn't exist in Atlas
+ }
+
+ return ret;
+ }
+
+ private AtlasEntityWithExtInfo findTableEntityInAtlas(String tableQualifiedName) {
+ AtlasEntityWithExtInfo ret = null;
+
+ try {
+ ret = findEntityInAtlas(HBaseDataTypes.HBASE_TABLE.getName(), tableQualifiedName);
+ clearRelationshipAttributes(ret);
+ } catch (Exception e) {
+ ret = null; // entity doesn't exist in Atlas
+ }
+
+ return ret;
+ }
+
+ private AtlasEntityWithExtInfo findColumnFamiltyEntityInAtlas(String columnFamilyQualifiedName) {
+ AtlasEntityWithExtInfo ret = null;
+
+ try {
+ ret = findEntityInAtlas(HBaseDataTypes.HBASE_COLUMN_FAMILY.getName(), columnFamilyQualifiedName);
+ clearRelationshipAttributes(ret);
+ } catch (Exception e) {
+ ret = null; // entity doesn't exist in Atlas
+ }
+
+ return ret;
+ }
+
+ private AtlasEntityWithExtInfo findEntityInAtlas(String typeName, String qualifiedName) throws Exception {
+ Map attributes = Collections.singletonMap(QUALIFIED_NAME, qualifiedName);
+
+ return atlasClientV2.getEntityByAttribute(typeName, attributes);
+ }
+
+ private AtlasEntity getNameSpaceEntity(String nameSpace, AtlasEntity nsEtity) {
+ AtlasEntity ret = null ;
+
+ if (nsEtity == null) {
+ ret = new AtlasEntity(HBaseDataTypes.HBASE_NAMESPACE.getName());
+ } else {
+ ret = nsEtity;
+ }
+
+ String qualifiedName = getNameSpaceQualifiedName(metadataNamespace, nameSpace);
+
+ ret.setAttribute(QUALIFIED_NAME, qualifiedName);
+ ret.setAttribute(CLUSTERNAME, metadataNamespace);
+ ret.setAttribute(NAME, nameSpace);
+ ret.setAttribute(DESCRIPTION_ATTR, nameSpace);
+
+ return ret;
+ }
+
+ private AtlasEntity getTableEntity(String nameSpace, String tableName, String owner, AtlasEntity nameSpaceEntity, TableDescriptor htd, AtlasEntity atlasEntity) {
+ AtlasEntity ret = null;
+
+ if (atlasEntity == null) {
+ ret = new AtlasEntity(HBaseDataTypes.HBASE_TABLE.getName());
+ } else {
+ ret = atlasEntity;
+ }
+
+ String tableQualifiedName = getTableQualifiedName(metadataNamespace, nameSpace, tableName);
+
+ ret.setAttribute(QUALIFIED_NAME, tableQualifiedName);
+ ret.setAttribute(CLUSTERNAME, metadataNamespace);
+ ret.setRelationshipAttribute(NAMESPACE, AtlasTypeUtil.getAtlasRelatedObjectId(nameSpaceEntity, HBaseAtlasHook.RELATIONSHIP_HBASE_TABLE_NAMESPACE));
+ ret.setAttribute(NAME, tableName);
+ ret.setAttribute(DESCRIPTION_ATTR, tableName);
+ ret.setAttribute(OWNER, owner);
+ ret.setAttribute(URI, tableName);
+ ret.setAttribute(ATTR_TABLE_MAX_FILESIZE, htd.getMaxFileSize());
+ ret.setAttribute(ATTR_TABLE_REPLICATION_PER_REGION, htd.getRegionReplication());
+ ret.setAttribute(ATTR_TABLE_ISREADONLY, htd.isReadOnly());
+ ret.setAttribute(ATTR_TABLE_ISCOMPACTION_ENABLED, htd.isCompactionEnabled());
+ ret.setAttribute(ATTR_TABLE_DURABLILITY, (htd.getDurability() != null ? htd.getDurability().name() : null));
+ ret.setAttribute(ATTR_TABLE_NORMALIZATION_ENABLED, htd.isNormalizationEnabled());
+
+ return ret;
+ }
+
+ private AtlasEntity getColumnFamilyEntity(String nameSpace, String tableName, String owner, ColumnFamilyDescriptor hcdt, AtlasObjectId tableId, AtlasEntity atlasEntity){
+ AtlasEntity ret = null;
+
+ if (atlasEntity == null) {
+ ret = new AtlasEntity(HBaseDataTypes.HBASE_COLUMN_FAMILY.getName());
+ } else {
+ ret = atlasEntity;
+ }
+
+ String cfName = hcdt.getNameAsString();
+ String cfQualifiedName = getColumnFamilyQualifiedName(metadataNamespace, nameSpace, tableName, cfName);
+
+ ret.setAttribute(QUALIFIED_NAME, cfQualifiedName);
+ ret.setAttribute(CLUSTERNAME, metadataNamespace);
+ ret.setRelationshipAttribute(TABLE, AtlasTypeUtil.getAtlasRelatedObjectId(tableId, HBaseAtlasHook.RELATIONSHIP_HBASE_TABLE_COLUMN_FAMILIES));
+ ret.setAttribute(NAME, cfName);
+ ret.setAttribute(DESCRIPTION_ATTR, cfName);
+ ret.setAttribute(OWNER, owner);
+ ret.setAttribute(ATTR_CF_BLOCK_CACHE_ENABLED, hcdt.isBlockCacheEnabled());
+ ret.setAttribute(ATTR_CF_BLOOMFILTER_TYPE, (hcdt.getBloomFilterType() != null ? hcdt.getBloomFilterType().name():null));
+ ret.setAttribute(ATTR_CF_CACHED_BLOOM_ON_WRITE, hcdt.isCacheBloomsOnWrite());
+ ret.setAttribute(ATTR_CF_CACHED_DATA_ON_WRITE, hcdt.isCacheDataOnWrite());
+ ret.setAttribute(ATTR_CF_CACHED_INDEXES_ON_WRITE, hcdt.isCacheIndexesOnWrite());
+ ret.setAttribute(ATTR_CF_COMPACTION_COMPRESSION_TYPE, (hcdt.getCompactionCompressionType() != null ? hcdt.getCompactionCompressionType().name():null));
+ ret.setAttribute(ATTR_CF_COMPRESSION_TYPE, (hcdt.getCompressionType() != null ? hcdt.getCompressionType().name():null));
+ ret.setAttribute(ATTR_CF_DATA_BLOCK_ENCODING, (hcdt.getDataBlockEncoding() != null ? hcdt.getDataBlockEncoding().name():null));
+ ret.setAttribute(ATTR_CF_ENCRYPTION_TYPE, hcdt.getEncryptionType());
+ ret.setAttribute(ATTR_CF_EVICT_BLOCK_ONCLOSE, hcdt.isEvictBlocksOnClose());
+ ret.setAttribute(ATTR_CF_KEEP_DELETE_CELLS, ( hcdt.getKeepDeletedCells() != null ? hcdt.getKeepDeletedCells().name():null));
+ ret.setAttribute(ATTR_CF_MAX_VERSIONS, hcdt.getMaxVersions());
+ ret.setAttribute(ATTR_CF_MIN_VERSIONS, hcdt.getMinVersions());
+ ret.setAttribute(ATTR_CF_PREFETCH_BLOCK_ONOPEN, hcdt.isPrefetchBlocksOnOpen());
+ ret.setAttribute(ATTR_CF_TTL, hcdt.getTimeToLive());
+ ret.setAttribute(ATTR_CF_INMEMORY_COMPACTION_POLICY, (hcdt.getInMemoryCompaction() != null ? hcdt.getInMemoryCompaction().name():null));
+ ret.setAttribute(ATTR_CF_MOB_COMPATCTPARTITION_POLICY, ( hcdt.getMobCompactPartitionPolicy() != null ? hcdt.getMobCompactPartitionPolicy().name():null));
+ ret.setAttribute(ATTR_CF_MOB_ENABLED,hcdt.isMobEnabled());
+ ret.setAttribute(ATTR_CF_NEW_VERSION_BEHAVIOR,hcdt.isNewVersionBehavior());
+
+ return ret;
+ }
+
+ private AtlasEntityWithExtInfo createEntityInAtlas(AtlasEntityWithExtInfo entity) throws Exception {
+ AtlasEntityWithExtInfo ret = null;
+ EntityMutationResponse response = atlasClientV2.createEntity(entity);
+ List entities = response.getCreatedEntities();
+
+ if (CollectionUtils.isNotEmpty(entities)) {
+ AtlasEntityWithExtInfo getByGuidResponse = atlasClientV2.getEntityByGuid(entities.get(0).getGuid());
+
+ ret = getByGuidResponse;
+
+ LOG.info("Created {} entity: name={}, guid={}", ret.getEntity().getTypeName(), ret.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), ret.getEntity().getGuid());
+ }
+ return ret;
+ }
+
+ private AtlasEntityWithExtInfo updateEntityInAtlas(AtlasEntityWithExtInfo entity) throws Exception {
+ AtlasEntityWithExtInfo ret = null;
+ EntityMutationResponse response = atlasClientV2.updateEntity(entity);
+
+ if (response != null) {
+ List entities = response.getUpdatedEntities();
+
+ if (CollectionUtils.isNotEmpty(entities)) {
+ AtlasEntityWithExtInfo getByGuidResponse = atlasClientV2.getEntityByGuid(entities.get(0).getGuid());
+
+ ret = getByGuidResponse;
+
+ LOG.info("Updated {} entity: name={}, guid={} ", ret.getEntity().getTypeName(), ret.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), ret.getEntity().getGuid());
+ } else {
+ LOG.info("Entity: name={} ", entity.toString() + " not updated as it is unchanged from what is in Atlas" );
+ ret = entity;
+ }
+ } else {
+ LOG.info("Entity: name={} ", entity.toString() + " not updated as it is unchanged from what is in Atlas" );
+ ret = entity;
+ }
+
+ return ret;
+ }
+
+ /**
+ * Construct the qualified name used to uniquely identify a ColumnFamily instance in Atlas.
+ * @param metadataNamespace Metadata namespace of the cluster to which the Hbase component belongs
+ * @param nameSpace Name of the Hbase database to which the Table belongs
+ * @param tableName Name of the Hbase table
+ * @param columnFamily Name of the ColumnFamily
+ * @return Unique qualified name to identify the Table instance in Atlas.
+ */
+ private static String getColumnFamilyQualifiedName(String metadataNamespace, String nameSpace, String tableName, String columnFamily) {
+ tableName = stripNameSpace(tableName);
+ return String.format(HBASE_COLUMN_FAMILY_QUALIFIED_NAME_FORMAT, nameSpace, tableName, columnFamily, metadataNamespace);
+ }
+
+ /**
+ * Construct the qualified name used to uniquely identify a Table instance in Atlas.
+ * @param metadataNamespace Metadata namespace of the cluster to which the Hbase component belongs
+ * @param nameSpace Name of the Hbase database to which the Table belongs
+ * @param tableName Name of the Hbase table
+ * @return Unique qualified name to identify the Table instance in Atlas.
+ */
+ private static String getTableQualifiedName(String metadataNamespace, String nameSpace, String tableName) {
+ tableName = stripNameSpace(tableName);
+ return String.format(HBASE_TABLE_QUALIFIED_NAME_FORMAT, nameSpace, tableName, metadataNamespace);
+ }
+
+ /**
+ * Construct the qualified name used to uniquely identify a Hbase NameSpace instance in Atlas.
+ * @param metadataNamespace Metadata namespace of the cluster to which the Hbase component belongs
+ * @param nameSpace Name of the NameSpace
+ * @return Unique qualified name to identify the HBase NameSpace instance in Atlas.
+ */
+ private static String getNameSpaceQualifiedName(String metadataNamespace, String nameSpace) {
+ return String.format(HBASE_NAMESPACE_QUALIFIED_NAME, nameSpace, metadataNamespace);
+ }
+
+ private static String stripNameSpace(String tableName){
+ tableName = tableName.substring(tableName.indexOf(":")+1);
+
+ return tableName;
+ }
+
+ private static void printUsage() {
+ System.out.println("Usage 1: import-hbase.sh [-n OR --namespace ] [-t OR --table ]");
+ System.out.println("Usage 2: import-hbase.sh [-f ]" );
+ System.out.println(" Format:");
+ System.out.println(" namespace1:tbl1");
+ System.out.println(" namespace1:tbl2");
+ System.out.println(" namespace2:tbl1");
+ }
+
+ private void clearRelationshipAttributes(AtlasEntityWithExtInfo entity) {
+ if (entity != null) {
+ clearRelationshipAttributes(entity.getEntity());
+
+ if (entity.getReferredEntities() != null) {
+ clearRelationshipAttributes(entity.getReferredEntities().values());
+ }
+ }
+ }
+
+ private void clearRelationshipAttributes(Collection entities) {
+ if (entities != null) {
+ for (AtlasEntity entity : entities) {
+ clearRelationshipAttributes(entity);
+ }
+ }
+ }
+
+ private void clearRelationshipAttributes(AtlasEntity entity) {
+ if (entity != null && entity.getRelationshipAttributes() != null) {
+ entity.getRelationshipAttributes().clear();
+ }
+ }
+}
diff --git a/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/hook/HBaseAtlasCoprocessor.java b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/hook/HBaseAtlasCoprocessor.java
new file mode 100644
index 0000000000..313132de6c
--- /dev/null
+++ b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/hook/HBaseAtlasCoprocessor.java
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hbase.hook;
+
+
+import org.apache.atlas.hbase.bridge.HBaseAtlasHook;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.NamespaceDescriptor;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.SnapshotDescription;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
+import org.apache.hadoop.hbase.coprocessor.BulkLoadObserver;
+import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.MasterObserver;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionObserver;
+import org.apache.hadoop.hbase.coprocessor.RegionServerObserver;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+public class HBaseAtlasCoprocessor implements MasterCoprocessor, MasterObserver, RegionObserver, RegionServerObserver {
+ private static final Logger LOG = LoggerFactory.getLogger(HBaseAtlasCoprocessor.class);
+
+ final HBaseAtlasHook hbaseAtlasHook;
+
+ public HBaseAtlasCoprocessor() {
+ hbaseAtlasHook = HBaseAtlasHook.getInstance();
+ }
+
+ @Override
+ public void postCreateTable(ObserverContext observerContext, TableDescriptor tableDescriptor, RegionInfo[] hRegionInfos) throws IOException {
+ LOG.info("==> HBaseAtlasCoprocessor.postCreateTable()");
+
+ hbaseAtlasHook.sendHBaseTableOperation(tableDescriptor, null, HBaseAtlasHook.OPERATION.CREATE_TABLE, observerContext);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.postCreateTable()");
+ }
+ }
+
+ @Override
+ public void postDeleteTable(ObserverContext observerContext, TableName tableName) throws IOException {
+ LOG.info("==> HBaseAtlasCoprocessor.postDeleteTable()");
+ hbaseAtlasHook.sendHBaseTableOperation(null, tableName, HBaseAtlasHook.OPERATION.DELETE_TABLE, observerContext);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.postDeleteTable()");
+ }
+ }
+
+ @Override
+ public void postModifyTable(ObserverContext observerContext, TableName tableName, TableDescriptor tableDescriptor) throws IOException {
+ LOG.info("==> HBaseAtlasCoprocessor.postModifyTable()");
+ hbaseAtlasHook.sendHBaseTableOperation(tableDescriptor, tableName, HBaseAtlasHook.OPERATION.ALTER_TABLE, observerContext);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.postModifyTable()");
+ }
+ }
+
+ @Override
+ public void postCreateNamespace(ObserverContext observerContext, NamespaceDescriptor namespaceDescriptor) throws IOException {
+ LOG.info("==> HBaseAtlasCoprocessor.postCreateNamespace()");
+
+ hbaseAtlasHook.sendHBaseNameSpaceOperation(namespaceDescriptor, null, HBaseAtlasHook.OPERATION.CREATE_NAMESPACE, observerContext);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.postCreateNamespace()");
+ }
+ }
+
+ @Override
+ public void postDeleteNamespace(ObserverContext observerContext, String s) throws IOException {
+ LOG.info("==> HBaseAtlasCoprocessor.postDeleteNamespace()");
+
+ hbaseAtlasHook.sendHBaseNameSpaceOperation(null, s, HBaseAtlasHook.OPERATION.DELETE_NAMESPACE, observerContext);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HBaseAtlasCoprocessor.postDeleteNamespace()");
+ }
+ }
+
+ @Override
+ public void postModifyNamespace(ObserverContext observerContext, NamespaceDescriptor namespaceDescriptor) throws IOException {
+ LOG.info("==> HBaseAtlasCoprocessor.postModifyNamespace()");
+
+ hbaseAtlasHook.sendHBaseNameSpaceOperation(namespaceDescriptor, null, HBaseAtlasHook.OPERATION.ALTER_NAMESPACE, observerContext);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.postModifyNamespace()");
+ }
+ }
+
+ @Override
+ public void postCloneSnapshot(ObserverContext observerContext, SnapshotDescription snapshot, TableDescriptor tableDescriptor) throws IOException {
+ LOG.info("==> HBaseAtlasCoprocessor.postCloneSnapshot()");
+
+ hbaseAtlasHook.sendHBaseTableOperation(tableDescriptor, null, HBaseAtlasHook.OPERATION.CREATE_TABLE, observerContext);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.postCloneSnapshot()");
+ }
+ }
+
+ @Override
+ public void postRestoreSnapshot(ObserverContext observerContext, SnapshotDescription snapshot, TableDescriptor tableDescriptor) throws IOException {
+ LOG.info("==> HBaseAtlasCoprocessor.postRestoreSnapshot()");
+
+ hbaseAtlasHook.sendHBaseTableOperation(tableDescriptor, snapshot.getTableName(), HBaseAtlasHook.OPERATION.ALTER_TABLE, observerContext);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HBaseAtlasCoprocessor.postRestoreSnapshot()");
+ }
+ }
+
+}
+
+
diff --git a/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/model/HBaseDataTypes.java b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/model/HBaseDataTypes.java
new file mode 100644
index 0000000000..b83e1b54ba
--- /dev/null
+++ b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/model/HBaseDataTypes.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hbase.model;
+
+/**
+ * HBASE Data Types for model and bridge.
+ */
+public enum HBaseDataTypes {
+ // Classes
+ HBASE_NAMESPACE,
+ HBASE_TABLE,
+ HBASE_COLUMN_FAMILY,
+ HBASE_COLUMN;
+
+ public String getName() {
+ return name().toLowerCase();
+ }
+}
diff --git a/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/model/HBaseOperationContext.java b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/model/HBaseOperationContext.java
new file mode 100644
index 0000000000..1ef7c07dec
--- /dev/null
+++ b/addons/hbase-bridge/src/main/java/org/apache/atlas/hbase/model/HBaseOperationContext.java
@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hbase.model;
+
+import org.apache.atlas.hbase.bridge.HBaseAtlasHook;
+import org.apache.atlas.model.notification.HookNotification;
+import org.apache.hadoop.hbase.NamespaceDescriptor;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import java.util.Map;
+
+public class HBaseOperationContext {
+ private final UserGroupInformation ugi;
+ private final Map hbaseConf;
+ private final HBaseAtlasHook.OPERATION operation;
+ private final String user;
+ private final NamespaceDescriptor namespaceDescriptor;
+ private final TableDescriptor tableDescriptor;
+ private final ColumnFamilyDescriptor[] columnFamilyDescriptors;
+ private final TableName tableName;
+ private final String nameSpace;
+ private final String columnFamily;
+ private final String owner;
+ private final ColumnFamilyDescriptor columnFamilyDescriptor;
+
+ public HBaseOperationContext(NamespaceDescriptor namespaceDescriptor, String nameSpace, TableDescriptor tableDescriptor, TableName tableName, ColumnFamilyDescriptor[] columnFamilyDescriptors,
+ ColumnFamilyDescriptor columnFamilyDescriptor, String columnFamily, HBaseAtlasHook.OPERATION operation, UserGroupInformation ugi , String user, String owner,
+ Map hbaseConf) {
+ this.namespaceDescriptor = namespaceDescriptor;
+ this.nameSpace = nameSpace;
+ this.tableDescriptor = tableDescriptor;
+ this.tableName = tableName;
+ this.columnFamilyDescriptors = columnFamilyDescriptors;
+ this.columnFamilyDescriptor = columnFamilyDescriptor;
+ this.columnFamily = columnFamily;
+ this.operation = operation;
+ this.ugi = ugi;
+ this.user = user;
+ this.owner = owner;
+ this.hbaseConf = hbaseConf;
+ }
+
+ public HBaseOperationContext(NamespaceDescriptor namespaceDescriptor, String nameSpace, HBaseAtlasHook.OPERATION operation, UserGroupInformation ugi , String user, String owner) {
+ this(namespaceDescriptor, nameSpace, null, null, null, null, null, operation, ugi, user, owner, null);
+ }
+
+ public HBaseOperationContext(String nameSpace, TableDescriptor tableDescriptor, TableName tableName, ColumnFamilyDescriptor[] columnFamilyDescriptors, HBaseAtlasHook.OPERATION operation, UserGroupInformation ugi, String user, String owner, Map hbaseConf) {
+ this(null, nameSpace, tableDescriptor, tableName, columnFamilyDescriptors, null, null, operation, ugi, user, owner, hbaseConf);
+ }
+
+ public HBaseOperationContext(String nameSpace, TableName tableName, ColumnFamilyDescriptor columnFamilyDescriptor, String columnFamily, HBaseAtlasHook.OPERATION operation, UserGroupInformation ugi, String user, String owner, Map hbaseConf) {
+ this(null, nameSpace, null, tableName, null, columnFamilyDescriptor, columnFamily, operation, ugi, user, owner, hbaseConf);
+ }
+
+ private List messages = new ArrayList<>();
+
+ public UserGroupInformation getUgi() {
+ return ugi;
+ }
+
+ public Map getHbaseConf() {
+ return hbaseConf;
+ }
+
+ public String getUser() {
+ return user;
+ }
+
+ public HBaseAtlasHook.OPERATION getOperation() {
+ return operation;
+ }
+
+ public NamespaceDescriptor getNamespaceDescriptor() {
+ return namespaceDescriptor;
+ }
+
+ public TableDescriptor gethTableDescriptor() {
+ return tableDescriptor;
+ }
+
+ public ColumnFamilyDescriptor[] gethColumnDescriptors() {
+ return columnFamilyDescriptors;
+ }
+
+ public TableName getTableName() {
+ return tableName;
+ }
+
+ public String getNameSpace() {
+ return nameSpace;
+ }
+
+ public ColumnFamilyDescriptor gethColumnDescriptor() {
+ return columnFamilyDescriptor;
+ }
+
+ public String getColummFamily() {
+ return columnFamily;
+ }
+
+ public void addMessage(HookNotification message) {
+ messages.add(message);
+ }
+
+ public String getOwner() {
+ return owner;
+ }
+
+ public List getMessages() {
+ return messages;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ toString(sb);
+ return sb.toString();
+ }
+
+ public StringBuilder toString(StringBuilder sb) {
+ sb.append("HBaseOperationContext={");
+ sb.append("Operation={").append(operation).append("} ");
+ sb.append("User ={").append(user).append("} ");
+ if (nameSpace != null ) {
+ sb.append("NameSpace={").append(nameSpace).append("}");
+ } else {
+ if (namespaceDescriptor != null) {
+ sb.append("NameSpace={").append(namespaceDescriptor.toString()).append("}");
+ }
+ }
+ if (tableName != null ) {
+ sb.append("Table={").append(tableName).append("}");
+ } else {
+ if ( columnFamilyDescriptor != null) {
+ sb.append("Table={").append(tableDescriptor.toString()).append("}");
+ }
+ }
+ if (columnFamily != null ) {
+ sb.append("Columm Family={").append(columnFamily).append("}");
+ } else {
+ if ( columnFamilyDescriptor != null) {
+ sb.append("Columm Family={").append(columnFamilyDescriptor.toString()).append("}");
+ }
+ }
+ sb.append("Message ={").append(getMessages()).append("} ");
+ sb.append(" }");
+ return sb;
+ }
+
+}
diff --git a/addons/hbase-bridge/src/main/resources/atlas-hbase-import-log4j.xml b/addons/hbase-bridge/src/main/resources/atlas-hbase-import-log4j.xml
new file mode 100644
index 0000000000..3fc2dcf9c3
--- /dev/null
+++ b/addons/hbase-bridge/src/main/resources/atlas-hbase-import-log4j.xml
@@ -0,0 +1,55 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/addons/hbase-bridge/src/test/java/org/apache/atlas/hbase/HBaseAtlasHookIT.java b/addons/hbase-bridge/src/test/java/org/apache/atlas/hbase/HBaseAtlasHookIT.java
new file mode 100644
index 0000000000..e346788713
--- /dev/null
+++ b/addons/hbase-bridge/src/test/java/org/apache/atlas/hbase/HBaseAtlasHookIT.java
@@ -0,0 +1,307 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hbase;
+
+import org.apache.atlas.ApplicationProperties;
+import org.apache.atlas.AtlasClient;
+import org.apache.atlas.AtlasClientV2;
+import org.apache.atlas.hbase.bridge.HBaseAtlasHook;
+import org.apache.atlas.hbase.model.HBaseDataTypes;
+import org.apache.atlas.model.instance.AtlasEntity;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
+import org.apache.atlas.utils.AuthenticationUtil;
+import org.apache.atlas.utils.ParamChecker;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.net.ServerSocket;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.fail;
+import static org.testng.AssertJUnit.assertFalse;
+
+
+public class HBaseAtlasHookIT {
+ private static final Logger LOG = LoggerFactory.getLogger(HBaseAtlasHookIT.class);
+ protected static final String ATLAS_URL = "http://localhost:31000/";
+ protected static final String CLUSTER_NAME = "primary";
+
+ private HBaseTestingUtility utility;
+ private int port;
+ private AtlasClientV2 atlasClient;
+
+
+ @BeforeClass
+ public void setUp() {
+ try {
+ createHBaseCluster();
+ createAtlasClient();
+ } catch (Exception e) {
+ LOG.error("Unable to create Hbase Admin for Testing ", e);
+ }
+ }
+
+ @AfterClass
+ public void cleanup() throws Exception {
+ LOG.info("Stopping mini cluster.. ");
+ utility.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testGetMetaTableRows() throws Exception {
+ List results = utility.getMetaTableRows();
+ assertFalse("results should have some entries and is empty.", results.isEmpty());
+ }
+
+ @Test (enabled = false)
+ public void testCreateNamesapce() throws Exception {
+ final Configuration conf = HBaseConfiguration.create();
+
+ conf.set("hbase.zookeeper.quorum", "localhost");
+ conf.set("hbase.zookeeper.property.clientPort", String.valueOf(port));
+ conf.set("zookeeper.znode.parent", "/hbase-unsecure");
+
+ Connection conn = ConnectionFactory.createConnection(conf);
+ Admin admin = conn.getAdmin();
+ NamespaceDescriptor ns = NamespaceDescriptor.create("test_namespace").build();
+
+ admin.createNamespace(ns);
+
+ //assert on qualified name
+ String nameSpace = assertNameSpaceIsRegistered(ns.getName());
+ AtlasClientV2 atlasClient = getAtlasClient();
+
+ if (atlasClient != null) {
+ AtlasEntityWithExtInfo nameSpaceRef = atlasClient.getEntityByGuid(nameSpace);
+ String nameSpaceQualifiedName = HBaseAtlasHook.getNameSpaceQualifiedName(CLUSTER_NAME, ns.getName());
+
+ Assert.assertEquals(nameSpaceRef.getEntity().getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), nameSpaceQualifiedName);
+ } else {
+ Assert.fail("Unable to create AtlasClient for Testing");
+ }
+ }
+
+ @Test (enabled = false)
+ public void testCreateTable() throws Exception {
+ final Configuration conf = HBaseConfiguration.create();
+
+ conf.set("hbase.zookeeper.quorum", "localhost");
+ conf.set("hbase.zookeeper.property.clientPort", String.valueOf(port));
+ conf.set("zookeeper.znode.parent", "/hbase-unsecure");
+
+ Connection conn = ConnectionFactory.createConnection(conf);
+ Admin admin = conn.getAdmin();
+ String namespace = "test_namespace1";
+ String tablename = "test_table";
+
+ // Create a table
+ if (!admin.tableExists(TableName.valueOf(namespace, tablename))) {
+ NamespaceDescriptor ns = NamespaceDescriptor.create(namespace).build();
+
+ admin.createNamespace(ns);
+
+ HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf(namespace, tablename));
+
+ tableDescriptor.addFamily(new HColumnDescriptor("colfam1"));
+
+ admin.createTable(tableDescriptor);
+ }
+
+ //assert on qualified name
+ String table = assertTableIsRegistered(namespace, tablename);
+ AtlasClientV2 atlasClient = getAtlasClient();
+
+ if (atlasClient != null) {
+ AtlasEntityWithExtInfo tableRef = atlasClient.getEntityByGuid(table);
+ String entityName = HBaseAtlasHook.getTableQualifiedName(CLUSTER_NAME, namespace, tablename);
+
+ Assert.assertEquals(tableRef.getEntity().getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), entityName);
+ } else {
+ Assert.fail("Unable to create AtlasClient for Testing");
+ }
+ }
+
+ // Methods for creating HBase
+
+ private void createAtlasClient() {
+ try {
+ org.apache.commons.configuration.Configuration configuration = ApplicationProperties.get();
+ String[] atlasEndPoint = configuration.getStringArray(HBaseAtlasHook.ATTR_ATLAS_ENDPOINT);
+
+ configuration.setProperty("atlas.cluster.name", CLUSTER_NAME);
+
+ if (atlasEndPoint == null || atlasEndPoint.length == 0) {
+ atlasEndPoint = new String[]{ATLAS_URL};
+ }
+
+ Iterator keys = configuration.getKeys();
+ while (keys.hasNext()) {
+ String key = keys.next();
+ LOG.info("{} = {} ", key, configuration.getString(key));
+ }
+
+ if (AuthenticationUtil.isKerberosAuthenticationEnabled()) {
+ atlasClient = new AtlasClientV2(atlasEndPoint);
+ } else {
+ atlasClient = new AtlasClientV2(configuration, atlasEndPoint, new String[]{"admin", "admin"});
+ }
+ } catch (Exception e) {
+ LOG.error("Unable to create AtlasClient for Testing ", e);
+ }
+ }
+
+ private static int getFreePort() throws IOException {
+ ServerSocket serverSocket = new ServerSocket(0);
+ int port = serverSocket.getLocalPort();
+
+ serverSocket.close();
+
+ return port;
+ }
+
+ private void createHBaseCluster() throws Exception {
+ LOG.info("Creating Hbase Admin...");
+
+ port = getFreePort();
+ utility = new HBaseTestingUtility();
+
+ utility.getConfiguration().set("test.hbase.zookeeper.property.clientPort", String.valueOf(port));
+ utility.getConfiguration().set("hbase.master.port", String.valueOf(getFreePort()));
+ utility.getConfiguration().set("hbase.master.info.port", String.valueOf(getFreePort()));
+ utility.getConfiguration().set("hbase.regionserver.port", String.valueOf(getFreePort()));
+ utility.getConfiguration().set("hbase.regionserver.info.port", String.valueOf(getFreePort()));
+ utility.getConfiguration().set("zookeeper.znode.parent", "/hbase-unsecure");
+ utility.getConfiguration().set("hbase.table.sanity.checks", "false");
+ utility.getConfiguration().set("hbase.coprocessor.master.classes", "org.apache.atlas.hbase.hook.HBaseAtlasCoprocessor");
+
+ utility.startMiniCluster();
+ }
+
+
+ public AtlasClientV2 getAtlasClient() {
+ AtlasClientV2 ret = null;
+ if (atlasClient != null) {
+ ret = atlasClient;
+ }
+ return ret;
+ }
+
+ protected String assertNameSpaceIsRegistered(String nameSpace) throws Exception {
+ return assertNameSpaceIsRegistered(nameSpace, null);
+ }
+
+ protected String assertNameSpaceIsRegistered(String nameSpace, HBaseAtlasHookIT.AssertPredicate assertPredicate) throws Exception {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Searching for nameSpace {}", nameSpace);
+ }
+ String nameSpaceQualifiedName = HBaseAtlasHook.getNameSpaceQualifiedName(CLUSTER_NAME, nameSpace);
+ return assertEntityIsRegistered(HBaseDataTypes.HBASE_NAMESPACE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ nameSpaceQualifiedName, assertPredicate);
+ }
+
+ protected String assertTableIsRegistered(String nameSpace, String tableName) throws Exception {
+ return assertTableIsRegistered(nameSpace, tableName, null);
+ }
+
+ protected String assertTableIsRegistered(String nameSpace, String tableName, HBaseAtlasHookIT.AssertPredicate assertPredicate) throws Exception {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Searching for nameSpace:Table {} {}", nameSpace, tableName);
+ }
+ String tableQualifiedName = HBaseAtlasHook.getTableQualifiedName(CLUSTER_NAME, nameSpace, tableName);
+ return assertEntityIsRegistered(HBaseDataTypes.HBASE_TABLE.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName,
+ assertPredicate);
+ }
+
+ public interface AssertPredicate {
+ void assertOnEntity(AtlasEntity entity) throws Exception;
+ }
+
+ public interface Predicate {
+ /**
+ * Perform a predicate evaluation.
+ *
+ * @return the boolean result of the evaluation.
+ * @throws Exception thrown if the predicate evaluation could not evaluate.
+ */
+ void evaluate() throws Exception;
+ }
+
+
+ protected String assertEntityIsRegistered(final String typeName, final String property, final String value,
+ final HBaseAtlasHookIT.AssertPredicate assertPredicate) throws Exception {
+ waitFor(30000, new HBaseAtlasHookIT.Predicate() {
+ @Override
+ public void evaluate() throws Exception {
+ AtlasEntityWithExtInfo entity = atlasClient.getEntityByAttribute(typeName, Collections.singletonMap(property, value));
+
+ assertNotNull(entity);
+
+ if (assertPredicate != null) {
+ assertPredicate.assertOnEntity(entity.getEntity());
+ }
+ }
+ });
+
+ AtlasEntityWithExtInfo entity = atlasClient.getEntityByAttribute(typeName, Collections.singletonMap(property, value));
+
+ return entity.getEntity().getGuid();
+ }
+
+ /**
+ * Wait for a condition, expressed via a {@link HBaseAtlasHookIT.Predicate} to become true.
+ *
+ * @param timeout maximum time in milliseconds to wait for the predicate to become true.
+ * @param predicate predicate waiting on.
+ */
+ protected void waitFor(int timeout, HBaseAtlasHookIT.Predicate predicate) throws Exception {
+ ParamChecker.notNull(predicate, "predicate");
+ long mustEnd = System.currentTimeMillis() + timeout;
+
+ while (true) {
+ try {
+ predicate.evaluate();
+ return;
+ } catch (Error | Exception e) {
+ if (System.currentTimeMillis() >= mustEnd) {
+ fail("Assertions failed. Failing after waiting for timeout " + timeout + " msecs", e);
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Waiting up to {} msec as assertion failed", mustEnd - System.currentTimeMillis(), e);
+ }
+ Thread.sleep(5000);
+ }
+ }
+ }
+
+
+}
diff --git a/addons/hbase-bridge/src/test/resources/atlas-application.properties b/addons/hbase-bridge/src/test/resources/atlas-application.properties
new file mode 100644
index 0000000000..3b12e5fb33
--- /dev/null
+++ b/addons/hbase-bridge/src/test/resources/atlas-application.properties
@@ -0,0 +1,125 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+######### Atlas Server Configs #########
+atlas.rest.address=http://localhost:31000
+
+######### Graph Database Configs #########
+
+
+# Graph database implementation. Value inserted by maven.
+atlas.graphdb.backend=org.apache.atlas.repository.graphdb.janus.AtlasJanusGraphDatabase
+atlas.graph.index.search.solr.wait-searcher=true
+
+# Graph Storage
+atlas.graph.storage.backend=berkeleyje
+
+# Entity repository implementation
+atlas.EntityAuditRepository.impl=org.apache.atlas.repository.audit.InMemoryEntityAuditRepository
+
+# Graph Search Index Backend
+atlas.graph.index.search.backend=solr
+
+#Berkeley storage directory
+atlas.graph.storage.directory=${sys:atlas.data}/berkley
+
+#hbase
+#For standalone mode , specify localhost
+#for distributed mode, specify zookeeper quorum here
+
+atlas.graph.storage.hostname=${graph.storage.hostname}
+atlas.graph.storage.hbase.regions-per-server=1
+atlas.graph.storage.lock.wait-time=10000
+
+#ElasticSearch
+atlas.graph.index.search.directory=${sys:atlas.data}/es
+atlas.graph.index.search.elasticsearch.client-only=false
+atlas.graph.index.search.elasticsearch.local-mode=true
+atlas.graph.index.search.elasticsearch.create.sleep=2000
+
+# Solr cloud mode properties
+atlas.graph.index.search.solr.mode=cloud
+atlas.graph.index.search.solr.zookeeper-url=${solr.zk.address}
+atlas.graph.index.search.solr.embedded=true
+atlas.graph.index.search.max-result-set-size=150
+
+
+######### Notification Configs #########
+atlas.notification.embedded=true
+
+atlas.kafka.zookeeper.connect=localhost:19026
+atlas.kafka.bootstrap.servers=localhost:19027
+atlas.kafka.data=${sys:atlas.data}/kafka
+atlas.kafka.zookeeper.session.timeout.ms=4000
+atlas.kafka.zookeeper.sync.time.ms=20
+atlas.kafka.consumer.timeout.ms=4000
+atlas.kafka.auto.commit.interval.ms=100
+atlas.kafka.hook.group.id=atlas
+atlas.kafka.entities.group.id=atlas_entities
+#atlas.kafka.auto.commit.enable=false
+
+atlas.kafka.enable.auto.commit=false
+atlas.kafka.auto.offset.reset=earliest
+atlas.kafka.session.timeout.ms=30000
+atlas.kafka.offsets.topic.replication.factor=1
+
+
+
+######### Entity Audit Configs #########
+atlas.audit.hbase.tablename=ATLAS_ENTITY_AUDIT_EVENTS
+atlas.audit.zookeeper.session.timeout.ms=1000
+atlas.audit.hbase.zookeeper.quorum=localhost
+atlas.audit.hbase.zookeeper.property.clientPort=19026
+
+######### Security Properties #########
+
+# SSL config
+atlas.enableTLS=false
+atlas.server.https.port=31443
+
+######### Security Properties #########
+
+hbase.security.authentication=simple
+
+atlas.hook.falcon.synchronous=true
+
+######### JAAS Configuration ########
+
+atlas.jaas.KafkaClient.loginModuleName = com.sun.security.auth.module.Krb5LoginModule
+atlas.jaas.KafkaClient.loginModuleControlFlag = required
+atlas.jaas.KafkaClient.option.useKeyTab = true
+atlas.jaas.KafkaClient.option.storeKey = true
+atlas.jaas.KafkaClient.option.serviceName = kafka
+atlas.jaas.KafkaClient.option.keyTab = /etc/security/keytabs/atlas.service.keytab
+atlas.jaas.KafkaClient.option.principal = atlas/_HOST@EXAMPLE.COM
+
+######### High Availability Configuration ########
+atlas.server.ha.enabled=false
+#atlas.server.ids=id1
+#atlas.server.address.id1=localhost:21000
+
+######### Atlas Authorization #########
+atlas.authorizer.impl=none
+# atlas.authorizer.impl=simple
+# atlas.authorizer.simple.authz.policy.file=atlas-simple-authz-policy.json
+
+######### Atlas Authentication #########
+atlas.authentication.method.file=true
+atlas.authentication.method.ldap.type=none
+atlas.authentication.method.kerberos=false
+# atlas.authentication.method.file.filename=users-credentials.properties
diff --git a/addons/hbase-bridge/src/test/resources/atlas-log4j.xml b/addons/hbase-bridge/src/test/resources/atlas-log4j.xml
new file mode 100755
index 0000000000..2c9815ff54
--- /dev/null
+++ b/addons/hbase-bridge/src/test/resources/atlas-log4j.xml
@@ -0,0 +1,143 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/addons/hbase-bridge/src/test/resources/users-credentials.properties b/addons/hbase-bridge/src/test/resources/users-credentials.properties
new file mode 100644
index 0000000000..5046dbaf64
--- /dev/null
+++ b/addons/hbase-bridge/src/test/resources/users-credentials.properties
@@ -0,0 +1,3 @@
+#username=group::sha256-password
+admin=ADMIN::a4a88c0872bf652bb9ed803ece5fd6e82354838a9bf59ab4babb1dab322154e1
+rangertagsync=RANGER_TAG_SYNC::0afe7a1968b07d4c3ff4ed8c2d809a32ffea706c66cd795ead9048e81cfaf034
diff --git a/addons/hbase-testing-util/pom.xml b/addons/hbase-testing-util/pom.xml
new file mode 100644
index 0000000000..982e9c85ea
--- /dev/null
+++ b/addons/hbase-testing-util/pom.xml
@@ -0,0 +1,228 @@
+
+
+
+ 4.0.0
+
+ apache-atlas
+ org.apache.atlas
+ 3.0.0-SNAPSHOT
+ ../../
+
+ hbase-testing-util
+ Apache HBase - Testing Util
+ HBase Testing Utilities.
+ jar
+
+
+ 3.0.3
+ 9.3.14.v20161028
+
+
+
+
+ org.testng
+ testng
+
+
+
+ org.apache.hbase
+ hbase-server
+ ${hbase.version}
+ compile
+
+
+
+ org.apache.hbase
+ hbase-server
+ ${hbase.version}
+ test-jar
+ compile
+
+
+
+ org.apache.hbase
+ hbase-zookeeper
+ ${hbase.version}
+ jar
+ compile
+
+
+
+ org.apache.hbase
+ hbase-zookeeper
+ ${hbase.version}
+ test-jar
+ compile
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ ${hadoop.version}
+ compile
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ ${hadoop.version}
+ test-jar
+ compile
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop.version}
+
+
+ junit
+ junit
+
+
+
+
+
+ junit
+ junit
+ ${junit.version}
+
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ ${hadoop.version}
+ compile
+
+
+ org.apache.htrace
+ htrace-core
+
+
+
+
+ org.apache.hadoop
+ hadoop-minikdc
+ ${hadoop.version}
+
+
+
+ org.apache.hbase
+ hbase-hadoop-compat
+ ${hbase.version}
+ jar
+ compile
+
+
+
+ org.apache.hbase
+ hbase-hadoop-compat
+ ${hbase.version}
+ test-jar
+ compile
+
+
+
+ org.apache.hbase
+ hbase-hadoop2-compat
+ ${hbase.version}
+ jar
+ compile
+
+
+ com.github.stephenc.findbugs
+ findbugs-annotations
+
+
+
+
+
+ org.apache.hbase
+ hbase-hadoop2-compat
+ ${hbase.version}
+ test-jar
+ compile
+
+
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+
+ org.apache.hbase
+ hbase-common
+ ${hbase.version}
+ jar
+ compile
+
+
+ com.github.stephenc.findbugs
+ findbugs-annotations
+
+
+
+
+
+ org.apache.hbase
+ hbase-common
+ ${hbase.version}
+ test-jar
+ compile
+
+
+
+ org.apache.hbase
+ hbase-annotations
+ ${hbase.version}
+ test-jar
+ compile
+
+
+ jdk.tools
+ jdk.tools
+
+
+
+
+
+ org.apache.hbase
+ hbase-protocol
+ ${hbase.version}
+ jar
+ compile
+
+
+
+ org.apache.hbase
+ hbase-client
+ ${hbase.version}
+ jar
+ compile
+
+
+
+
+
+
+
+
diff --git a/addons/hbase-testing-util/src/test/java/org/apache/atlas/hbase/TestHBaseTestingUtilSpinup.java b/addons/hbase-testing-util/src/test/java/org/apache/atlas/hbase/TestHBaseTestingUtilSpinup.java
new file mode 100644
index 0000000000..0beb035521
--- /dev/null
+++ b/addons/hbase-testing-util/src/test/java/org/apache/atlas/hbase/TestHBaseTestingUtilSpinup.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.atlas.hbase;
+
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+
+import static org.testng.AssertJUnit.assertFalse;
+
+
+/**
+ * Make sure we can spin up a HBTU without a hbase-site.xml
+ */
+public class TestHBaseTestingUtilSpinup {
+ private static final Logger LOG = LoggerFactory.getLogger(TestHBaseTestingUtilSpinup.class);
+ private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ UTIL.startMiniCluster();
+ if (!UTIL.getHBaseCluster().waitForActiveAndReadyMaster(30000)) {
+ throw new RuntimeException("Active master not ready");
+ }
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testGetMetaTableRows() throws Exception {
+ List results = UTIL.getMetaTableRows();
+ assertFalse("results should have some entries and is empty.", results.isEmpty());
+ }
+
+}
diff --git a/addons/hbase-testing-util/src/test/resources/atlas-log4j.xml b/addons/hbase-testing-util/src/test/resources/atlas-log4j.xml
new file mode 100755
index 0000000000..47d4d5c7ce
--- /dev/null
+++ b/addons/hbase-testing-util/src/test/resources/atlas-log4j.xml
@@ -0,0 +1,130 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/addons/hdfs-model/pom.xml b/addons/hdfs-model/pom.xml
new file mode 100644
index 0000000000..05ba173684
--- /dev/null
+++ b/addons/hdfs-model/pom.xml
@@ -0,0 +1,127 @@
+
+
+
+
+ 4.0.0
+
+ apache-atlas
+ org.apache.atlas
+ 3.0.0-SNAPSHOT
+ ../../
+
+ hdfs-model
+ Apache Atlas FileSystem Model
+ Apache Atlas FileSystem Model
+ jar
+
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+
+
+ org.apache.atlas
+ atlas-client-v1
+
+
+
+ org.apache.atlas
+ atlas-notification
+
+
+
+ org.mockito
+ mockito-all
+
+
+
+
+
+
+ org.testng
+ testng
+
+
+
+ org.apache.atlas
+ atlas-graphdb-impls
+ pom
+ test
+
+
+
+ org.apache.atlas
+ atlas-repository
+ test
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-site-plugin
+
+
+ org.apache.maven.doxia
+ doxia-module-twiki
+ ${doxia.version}
+
+
+ org.apache.maven.doxia
+ doxia-core
+ ${doxia.version}
+
+
+
+
+
+ site
+
+ prepare-package
+
+
+
+ false
+ false
+
+
+
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+ 1.2.1
+ false
+
+
+
+
+
+
+
+
diff --git a/addons/hive-bridge-shim/pom.xml b/addons/hive-bridge-shim/pom.xml
new file mode 100755
index 0000000000..849ca2a8f6
--- /dev/null
+++ b/addons/hive-bridge-shim/pom.xml
@@ -0,0 +1,47 @@
+
+
+
+
+ 4.0.0
+
+ apache-atlas
+ org.apache.atlas
+ 3.0.0-SNAPSHOT
+ ../../
+
+ hive-bridge-shim
+ Apache Atlas Hive Bridge Shim Module
+ Apache Atlas Hive Bridge Shim
+ jar
+
+
+
+
+ org.apache.atlas
+ atlas-plugin-classloader
+
+
+
+ org.apache.hive
+ hive-exec
+ ${hive.version}
+ provided
+
+
+
diff --git a/addons/hive-bridge-shim/src/main/java/org/apache/atlas/hive/hook/HiveHook.java b/addons/hive-bridge-shim/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
new file mode 100755
index 0000000000..2a4d067e59
--- /dev/null
+++ b/addons/hive-bridge-shim/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hive.hook;
+
+
+import org.apache.atlas.plugin.classloader.AtlasPluginClassLoader;
+import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
+import org.apache.hadoop.hive.ql.hooks.HookContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Hive hook used for atlas entity registration.
+ */
+public class HiveHook implements ExecuteWithHookContext {
+ private static final Logger LOG = LoggerFactory.getLogger(HiveHook.class);
+
+ private static final String ATLAS_PLUGIN_TYPE = "hive";
+ private static final String ATLAS_HIVE_HOOK_IMPL_CLASSNAME = "org.apache.atlas.hive.hook.HiveHook";
+
+ private AtlasPluginClassLoader atlasPluginClassLoader = null;
+ private ExecuteWithHookContext hiveHookImpl = null;
+
+ public HiveHook() {
+ this.initialize();
+ }
+
+ @Override
+ public void run(final HookContext hookContext) throws Exception {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HiveHook.run({})", hookContext);
+ }
+
+ try {
+ activatePluginClassLoader();
+ hiveHookImpl.run(hookContext);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HiveHook.run({})", hookContext);
+ }
+ }
+
+ private void initialize() {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HiveHook.initialize()");
+ }
+
+ try {
+ atlasPluginClassLoader = AtlasPluginClassLoader.getInstance(ATLAS_PLUGIN_TYPE, this.getClass());
+
+ @SuppressWarnings("unchecked")
+ Class cls = (Class) Class
+ .forName(ATLAS_HIVE_HOOK_IMPL_CLASSNAME, true, atlasPluginClassLoader);
+
+ activatePluginClassLoader();
+
+ hiveHookImpl = cls.newInstance();
+ } catch (Exception excp) {
+ LOG.error("Error instantiating Atlas hook implementation", excp);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HiveHook.initialize()");
+ }
+ }
+
+ private void activatePluginClassLoader() {
+ if (atlasPluginClassLoader != null) {
+ atlasPluginClassLoader.activate();
+ }
+ }
+
+ private void deactivatePluginClassLoader() {
+ if (atlasPluginClassLoader != null) {
+ atlasPluginClassLoader.deactivate();
+ }
+ }
+}
diff --git a/addons/hive-bridge-shim/src/main/java/org/apache/atlas/hive/hook/HiveMetastoreHook.java b/addons/hive-bridge-shim/src/main/java/org/apache/atlas/hive/hook/HiveMetastoreHook.java
new file mode 100644
index 0000000000..2894e99bdd
--- /dev/null
+++ b/addons/hive-bridge-shim/src/main/java/org/apache/atlas/hive/hook/HiveMetastoreHook.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.atlas.hive.hook;
+
+import org.apache.atlas.plugin.classloader.AtlasPluginClassLoader;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.metastore.MetaStoreEventListener;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.events.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Hive Metastore hook to capture DDL operations for atlas entity registration.
+ */
+public class HiveMetastoreHook extends MetaStoreEventListener {
+ private static final String ATLAS_PLUGIN_TYPE = "hive";
+ private static final String ATLAS_HIVE_METASTORE_HOOK_IMPL_CLASSNAME = "org.apache.atlas.hive.hook.HiveMetastoreHookImpl";
+ public static final Logger LOG = LoggerFactory.getLogger(HiveMetastoreHook.class);
+
+ private AtlasPluginClassLoader atlasPluginClassLoader = null;
+ private MetaStoreEventListener atlasMetastoreHookImpl = null;
+ private Configuration config;
+
+ public HiveMetastoreHook(Configuration config) {
+ super(config);
+
+ this.config = config;
+
+ this.initialize();
+ }
+
+ private void initialize() {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HiveMetastoreHook.initialize()");
+ }
+
+ try {
+ atlasPluginClassLoader = AtlasPluginClassLoader.getInstance(ATLAS_PLUGIN_TYPE, this.getClass());
+
+ @SuppressWarnings("unchecked")
+ Class cls = (Class)
+ Class.forName(ATLAS_HIVE_METASTORE_HOOK_IMPL_CLASSNAME, true, atlasPluginClassLoader);
+
+ activatePluginClassLoader();
+
+ atlasMetastoreHookImpl = cls.getDeclaredConstructor(Configuration.class).newInstance(config);
+ } catch (Exception ex) {
+ LOG.error("Error instantiating Atlas hook implementation", ex);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HiveMetastoreHook.initialize()");
+ }
+ }
+
+ @Override
+ public void onCreateTable(CreateTableEvent tableEvent) throws MetaException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HiveMetastoreHook.onCreateTable()");
+ }
+
+ try {
+ activatePluginClassLoader();
+
+ atlasMetastoreHookImpl.onCreateTable(tableEvent);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HiveMetastoreHook.onCreateTable()");
+ }
+ }
+
+ @Override
+ public void onDropTable(DropTableEvent tableEvent) throws MetaException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HiveMetastoreHook.onDropTable()");
+ }
+
+ try {
+ activatePluginClassLoader();
+
+ atlasMetastoreHookImpl.onDropTable(tableEvent);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HiveMetastoreHook.onDropTable()");
+ }
+ }
+
+ @Override
+ public void onAlterTable(AlterTableEvent tableEvent) throws MetaException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HiveMetastoreHook.onAlterTable()");
+ }
+
+ try {
+ activatePluginClassLoader();
+
+ atlasMetastoreHookImpl.onAlterTable(tableEvent);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HiveMetastoreHook.onAlterTable()");
+ }
+ }
+
+ @Override
+ public void onCreateDatabase(CreateDatabaseEvent dbEvent) throws MetaException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HiveMetastoreHook.onCreateDatabase()");
+ }
+
+ try {
+ activatePluginClassLoader();
+
+ atlasMetastoreHookImpl.onCreateDatabase(dbEvent);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HiveMetastoreHook.onCreateDatabase()");
+ }
+ }
+
+ @Override
+ public void onDropDatabase(DropDatabaseEvent dbEvent) throws MetaException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HiveMetastoreHook.onDropDatabase()");
+ }
+
+ try {
+ activatePluginClassLoader();
+
+ atlasMetastoreHookImpl.onDropDatabase(dbEvent);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HiveMetastoreHook.onDropDatabase()");
+ }
+ }
+
+ @Override
+ public void onAlterDatabase(AlterDatabaseEvent dbEvent) throws MetaException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HiveMetastoreHook.onAlterDatabase()");
+ }
+
+ try {
+ activatePluginClassLoader();
+
+ atlasMetastoreHookImpl.onAlterDatabase(dbEvent);
+ } finally {
+ deactivatePluginClassLoader();
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HiveMetastoreHook.onAlterDatabase()");
+ }
+ }
+
+ private void activatePluginClassLoader() {
+ if (atlasPluginClassLoader != null) {
+ atlasPluginClassLoader.activate();
+ }
+ }
+
+ private void deactivatePluginClassLoader() {
+ if (atlasPluginClassLoader != null) {
+ atlasPluginClassLoader.deactivate();
+ }
+ }
+}
\ No newline at end of file
diff --git a/addons/hive-bridge/pom.xml b/addons/hive-bridge/pom.xml
new file mode 100755
index 0000000000..8c3636e5d6
--- /dev/null
+++ b/addons/hive-bridge/pom.xml
@@ -0,0 +1,535 @@
+
+
+
+
+ 4.0.0
+
+ apache-atlas
+ org.apache.atlas
+ 3.0.0-SNAPSHOT
+ ../../
+
+ hive-bridge
+ Apache Atlas Hive Bridge Module
+ Apache Atlas Hive Bridge
+ jar
+
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+
+ org.apache.hive
+ hive-metastore
+ ${hive.version}
+ provided
+
+
+ org.mortbay.jetty
+ *
+
+
+ org.eclipse.jetty
+ *
+
+
+ com.github.stephenc.findbugs
+ findbugs-annotations
+
+
+
+
+
+
+ org.apache.hive
+ hive-exec
+ ${hive.version}
+ provided
+
+
+ javax.servlet
+ *
+
+
+
+
+
+ org.apache.hive
+ hive-jdbc
+ ${hive.version}
+ test
+
+
+ javax.servlet
+ *
+
+
+ javax.ws.rs
+ *
+
+
+ org.eclipse.jetty
+ *
+
+
+
+
+
+ org.apache.hive
+ hive-cli
+ ${hive.version}
+ test
+
+
+ javax.servlet
+ *
+
+
+ org.eclipse.jetty.aggregate
+ *
+
+
+
+
+
+
+ org.apache.atlas
+ atlas-client-v1
+
+
+
+ org.apache.atlas
+ atlas-client-v2
+ ${project.version}
+
+
+
+ org.apache.atlas
+ atlas-notification
+
+
+
+
+ org.apache.atlas
+ atlas-webapp
+ war
+ test
+
+
+
+ org.apache.hadoop
+ hadoop-client
+
+
+ javax.servlet
+ servlet-api
+
+
+ org.eclipse.jetty
+ *
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-annotations
+
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ ${jackson.databind.version}
+
+
+
+ org.testng
+ testng
+
+
+
+ org.mockito
+ mockito-all
+
+
+
+ org.eclipse.jetty
+ jetty-server
+ test
+
+
+
+ org.apache.atlas
+ atlas-graphdb-impls
+ pom
+ test
+
+
+
+ org.apache.atlas
+ atlas-intg
+ tests
+ test
+
+
+
+ org.apache.atlas
+ atlas-repository
+ tests
+ test
+
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+ ${jackson.version}
+ test
+
+
+
+
+
+ dist
+
+
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+
+
+ copy-hook
+ package
+
+ copy
+
+
+ ${project.build.directory}/dependency/hook/hive/atlas-hive-plugin-impl
+ false
+ false
+ true
+
+
+ ${project.groupId}
+ ${project.artifactId}
+ ${project.version}
+
+
+ ${project.groupId}
+ atlas-client-common
+ ${project.version}
+
+
+ ${project.groupId}
+ atlas-client-v1
+ ${project.version}
+
+
+ ${project.groupId}
+ atlas-client-v2
+ ${project.version}
+
+
+ ${project.groupId}
+ atlas-intg
+ ${project.version}
+
+
+ ${project.groupId}
+ atlas-notification
+ ${project.version}
+
+
+ ${project.groupId}
+ atlas-common
+ ${project.version}
+
+
+ org.apache.kafka
+ kafka_${kafka.scala.binary.version}
+ ${kafka.version}
+
+
+ org.apache.kafka
+ kafka-clients
+ ${kafka.version}
+
+
+ com.sun.jersey.contribs
+ jersey-multipart
+ ${jersey.version}
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ ${jackson.databind.version}
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+ ${jackson.version}
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+ ${jackson.version}
+
+
+ commons-configuration
+ commons-configuration
+ ${commons-conf.version}
+
+
+ com.sun.jersey
+ jersey-json
+ ${jersey.version}
+
+
+ javax.ws.rs
+ jsr311-api
+ ${jsr.version}
+
+
+
+
+
+ copy-hook-shim
+ package
+
+ copy
+
+
+ ${project.build.directory}/dependency/hook/hive
+ false
+ false
+ true
+
+
+ ${project.groupId}
+ hive-bridge-shim
+ ${project.version}
+
+
+ ${project.groupId}
+ atlas-plugin-classloader
+ ${project.version}
+
+
+
+
+
+
+
+
+
+
+
+
+
+ org.eclipse.jetty
+ jetty-maven-plugin
+ ${jetty.version}
+
+ ${skipTests}
+
+
+ 31000
+ 60000
+
+ ../../webapp/target/atlas-webapp-${project.version}.war
+ true
+ ../../webapp/src/main/webapp
+
+ /
+ ${project.basedir}/../../webapp/src/main/webapp/WEB-INF/web.xml
+
+ true
+
+ true
+
+ atlas.home
+ ${project.build.directory}
+
+
+ atlas.conf
+ ${project.build.directory}/test-classes
+
+
+ atlas.data
+ ${project.build.directory}/data
+
+
+ atlas.log.dir
+ ${project.build.directory}/logs
+
+
+ atlas.log.file
+ application.log
+
+
+ log4j.configuration
+ file:///${project.build.directory}/test-classes/atlas-log4j.xml
+
+
+ atlas.graphdb.backend
+ ${graphdb.backend.impl}
+
+
+ embedded.solr.directory
+ ${project.build.directory}
+
+
+ solr.log.dir
+ ${project.build.directory}/logs
+
+
+ org.eclipse.jetty.annotations.maxWait
+ 5000
+
+
+ atlas-stop
+ 31001
+ ${jetty-maven-plugin.stopWait}
+ ${debug.jetty.daemon}
+ ${project.build.testOutputDirectory}
+ true
+ jar
+
+
+
+ org.apache.curator
+ curator-client
+ ${curator.version}
+
+
+
+ org.apache.zookeeper
+ zookeeper
+ ${zookeeper.version}
+
+
+
+
+ start-jetty
+ pre-integration-test
+
+
+ stop
+ deploy-war
+
+
+
+ stop-jetty
+ post-integration-test
+
+ stop
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-site-plugin
+
+
+ org.apache.maven.doxia
+ doxia-module-twiki
+ ${doxia.version}
+
+
+ org.apache.maven.doxia
+ doxia-core
+ ${doxia.version}
+
+
+
+
+
+ site
+
+ prepare-package
+
+
+
+ false
+ false
+
+
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+ 1.2.1
+ false
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-resources-plugin
+
+
+ copy-resources
+ validate
+
+ copy-resources
+
+
+ ${basedir}/target/models
+
+
+ ${basedir}/../models
+
+ 0000-Area0/0010-base_model.json
+ 1000-Hadoop/**
+
+
+
+
+
+
+ copy-solr-resources
+ validate
+
+ copy-resources
+
+
+ ${project.build.directory}/solr
+
+
+ ${basedir}/../../test-tools/src/main/resources/solr
+
+
+
+
+
+
+
+
+
+
diff --git a/addons/hive-bridge/src/bin/import-hive.sh b/addons/hive-bridge/src/bin/import-hive.sh
new file mode 100755
index 0000000000..ebe6976f0e
--- /dev/null
+++ b/addons/hive-bridge/src/bin/import-hive.sh
@@ -0,0 +1,173 @@
+#!/bin/bash
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. See accompanying LICENSE file.
+#
+# resolve links - $0 may be a softlink
+PRG="${0}"
+
+[[ `uname -s` == *"CYGWIN"* ]] && CYGWIN=true
+
+while [ -h "${PRG}" ]; do
+ ls=`ls -ld "${PRG}"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "${PRG}"`/"$link"
+ fi
+done
+
+BASEDIR=`dirname ${PRG}`
+BASEDIR=`cd ${BASEDIR}/..;pwd`
+
+if test -z "${JAVA_HOME}"
+then
+ JAVA_BIN=`which java`
+ JAR_BIN=`which jar`
+else
+ JAVA_BIN="${JAVA_HOME}/bin/java"
+ JAR_BIN="${JAVA_HOME}/bin/jar"
+fi
+export JAVA_BIN
+
+if [ ! -e "${JAVA_BIN}" ] || [ ! -e "${JAR_BIN}" ]; then
+ echo "$JAVA_BIN and/or $JAR_BIN not found on the system. Please make sure java and jar commands are available."
+ exit 1
+fi
+
+# Construct Atlas classpath using jars from hook/hive/atlas-hive-plugin-impl/ directory.
+for i in "${BASEDIR}/hook/hive/atlas-hive-plugin-impl/"*.jar; do
+ ATLASCPPATH="${ATLASCPPATH}:$i"
+done
+
+if [ -z "${ATLAS_CONF_DIR}" ] && [ -e /etc/atlas/conf ];then
+ ATLAS_CONF_DIR=/etc/atlas/conf
+fi
+ATLASCPPATH=${ATLASCPPATH}:${ATLAS_CONF_DIR}
+
+# log dir for applications
+ATLAS_LOG_DIR="${ATLAS_LOG_DIR:-/var/log/atlas}"
+export ATLAS_LOG_DIR
+LOGFILE="$ATLAS_LOG_DIR/import-hive.log"
+
+TIME=`date +%Y%m%d%H%M%s`
+
+#Add hive conf in classpath
+if [ ! -z "$HIVE_CONF_DIR" ]; then
+ HIVE_CONF=$HIVE_CONF_DIR
+elif [ ! -z "$HIVE_HOME" ]; then
+ HIVE_CONF="$HIVE_HOME/conf"
+elif [ -e /etc/hive/conf ]; then
+ HIVE_CONF="/etc/hive/conf"
+else
+ echo "Could not find a valid HIVE configuration"
+ exit 1
+fi
+
+echo Using Hive configuration directory ["$HIVE_CONF"]
+
+
+if [ -f "${HIVE_CONF}/hive-env.sh" ]; then
+ . "${HIVE_CONF}/hive-env.sh"
+fi
+
+if [ -z "$HIVE_HOME" ]; then
+ if [ -d "${BASEDIR}/../hive" ]; then
+ HIVE_HOME=${BASEDIR}/../hive
+ else
+ echo "Please set HIVE_HOME to the root of Hive installation"
+ exit 1
+ fi
+fi
+
+HIVE_CP="${HIVE_CONF}"
+# Multiple jars in HIVE_CP_EXCLUDE_LIST can be added using "\|" separator
+# Ex: HIVE_CP_EXCLUDE_LIST="javax.ws.rs-api\|jersey-multipart"
+# exclude log4j libs from hive classpath to avoid conflict
+HIVE_CP_EXCLUDE_LIST="javax.ws.rs-api\|log4j-slf4j-impl\|log4j-1.2-api\|log4j-api\|log4j-core\|log4j-web"
+
+for i in $(find "${HIVE_HOME}/lib/" -name "*.jar" | grep -v "$HIVE_CP_EXCLUDE_LIST"); do
+ HIVE_CP="${HIVE_CP}:$i"
+done
+
+#Add hadoop conf in classpath
+if [ ! -z "$HADOOP_CLASSPATH" ]; then
+ HADOOP_CP=$HADOOP_CLASSPATH
+elif [ ! -z "$HADOOP_HOME" ]; then
+ HADOOP_CP=`$HADOOP_HOME/bin/hadoop classpath`
+elif [ $(command -v hadoop) ]; then
+ HADOOP_CP=`hadoop classpath`
+ echo $HADOOP_CP
+else
+ echo "Environment variable HADOOP_CLASSPATH or HADOOP_HOME need to be set"
+ exit 1
+fi
+
+CP="${HIVE_CP}:${HADOOP_CP}:${ATLASCPPATH}"
+
+# If running in cygwin, convert pathnames and classpath to Windows format.
+if [ "${CYGWIN}" == "true" ]
+then
+ ATLAS_LOG_DIR=`cygpath -w ${ATLAS_LOG_DIR}`
+ LOGFILE=`cygpath -w ${LOGFILE}`
+ HIVE_CP=`cygpath -w ${HIVE_CP}`
+ HADOOP_CP=`cygpath -w ${HADOOP_CP}`
+ CP=`cygpath -w -p ${CP}`
+fi
+
+JAVA_PROPERTIES="$ATLAS_OPTS -Datlas.log.dir=$ATLAS_LOG_DIR -Datlas.log.file=import-hive.log
+-Dlog4j.configuration=atlas-hive-import-log4j.xml"
+
+IMPORT_ARGS=
+JVM_ARGS=
+
+while true
+do
+ option=$1
+ shift
+
+ case "$option" in
+ -d) IMPORT_ARGS="$IMPORT_ARGS -d $1"; shift;;
+ -t) IMPORT_ARGS="$IMPORT_ARGS -t $1"; shift;;
+ -f) IMPORT_ARGS="$IMPORT_ARGS -f $1"; shift;;
+ -o) IMPORT_ARGS="$IMPORT_ARGS -o $1"; shift;;
+ -i) IMPORT_ARGS="$IMPORT_ARGS -i";;
+ -h) export HELP_OPTION="true"; IMPORT_ARGS="$IMPORT_ARGS -h";;
+ --database) IMPORT_ARGS="$IMPORT_ARGS --database $1"; shift;;
+ --table) IMPORT_ARGS="$IMPORT_ARGS --table $1"; shift;;
+ --filename) IMPORT_ARGS="$IMPORT_ARGS --filename $1"; shift;;
+ --output) IMPORT_ARGS="$IMPORT_ARGS --output $1"; shift;;
+ --ignoreBulkImport) IMPORT_ARGS="$IMPORT_ARGS --ignoreBulkImport";;
+ --help) export HELP_OPTION="true"; IMPORT_ARGS="$IMPORT_ARGS --help";;
+ -deleteNonExisting) IMPORT_ARGS="$IMPORT_ARGS -deleteNonExisting";;
+ "") break;;
+ *) IMPORT_ARGS="$IMPORT_ARGS $option"
+ esac
+done
+
+JAVA_PROPERTIES="${JAVA_PROPERTIES} ${JVM_ARGS}"
+
+if [ -z ${HELP_OPTION} ]; then
+ echo "Log file for import is $LOGFILE"
+fi
+
+"${JAVA_BIN}" ${JAVA_PROPERTIES} -cp "${CP}" org.apache.atlas.hive.bridge.HiveMetaStoreBridge $IMPORT_ARGS
+
+RETVAL=$?
+if [ -z ${HELP_OPTION} ]; then
+ [ $RETVAL -eq 0 ] && echo Hive Meta Data imported successfully!
+ [ $RETVAL -eq 1 ] && echo Failed to import Hive Meta Data! Check logs at: $LOGFILE for details.
+fi
+
+exit $RETVAL
+
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
new file mode 100755
index 0000000000..28365bc5c3
--- /dev/null
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
@@ -0,0 +1,1264 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hive.bridge;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.sun.jersey.api.client.ClientResponse;
+import org.apache.atlas.type.AtlasTypeUtil;
+import org.apache.atlas.ApplicationProperties;
+import org.apache.atlas.AtlasClientV2;
+import org.apache.atlas.AtlasServiceException;
+import org.apache.atlas.hive.hook.events.BaseHiveEvent;
+import org.apache.atlas.hive.model.HiveDataTypes;
+import org.apache.atlas.hook.AtlasHookException;
+import org.apache.atlas.model.discovery.AtlasSearchResult;
+import org.apache.atlas.model.discovery.SearchParameters;
+import org.apache.atlas.model.instance.AtlasEntityHeader;
+import org.apache.atlas.model.instance.EntityMutationResponse;
+import org.apache.atlas.model.instance.EntityMutations;
+import org.apache.atlas.utils.AtlasPathExtractorUtil;
+import org.apache.atlas.utils.AuthenticationUtil;
+import org.apache.atlas.utils.HdfsNameServiceResolver;
+import org.apache.atlas.utils.AtlasConfigurationUtil;
+import org.apache.atlas.model.instance.AtlasEntity;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
+import org.apache.atlas.model.instance.AtlasObjectId;
+import org.apache.atlas.model.instance.AtlasStruct;
+import org.apache.atlas.utils.PathExtractorContext;
+
+import org.apache.commons.cli.BasicParser;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.MissingArgumentException;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.collections.MapUtils;
+import org.apache.commons.configuration.Configuration;
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.RandomStringUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Order;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import static org.apache.atlas.hive.hook.events.BaseHiveEvent.*;
+
+/**
+ * A Bridge Utility that imports metadata from the Hive Meta Store
+ * and registers them in Atlas.
+ */
+
+public class HiveMetaStoreBridge {
+ private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridge.class);
+
+ public static final String CONF_PREFIX = "atlas.hook.hive.";
+ public static final String CLUSTER_NAME_KEY = "atlas.cluster.name";
+ public static final String HIVE_USERNAME = "atlas.hook.hive.default.username";
+ public static final String HIVE_METADATA_NAMESPACE = "atlas.metadata.namespace";
+ public static final String HDFS_PATH_CONVERT_TO_LOWER_CASE = CONF_PREFIX + "hdfs_path.convert_to_lowercase";
+ public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION = CONF_PREFIX + "aws_s3.atlas.model.version";
+ public static final String DEFAULT_CLUSTER_NAME = "primary";
+ public static final String TEMP_TABLE_PREFIX = "_temp-";
+ public static final String ATLAS_ENDPOINT = "atlas.rest.address";
+ public static final String SEP = ":".intern();
+ public static final String HDFS_PATH = "hdfs_path";
+ public static final String DEFAULT_METASTORE_CATALOG = "hive";
+ public static final String HIVE_TABLE_DB_EDGE_LABEL = "__hive_table.db";
+ public static final String HOOK_HIVE_PAGE_LIMIT = CONF_PREFIX + "page.limit";
+
+ static final String OPTION_OUTPUT_FILEPATH_SHORT = "o";
+ static final String OPTION_OUTPUT_FILEPATH_LONG = "output";
+ static final String OPTION_IGNORE_BULK_IMPORT_SHORT = "i";
+ static final String OPTION_IGNORE_BULK_IMPORT_LONG = "ignoreBulkImport";
+ static final String OPTION_DATABASE_SHORT = "d";
+ static final String OPTION_DATABASE_LONG = "database";
+ static final String OPTION_TABLE_SHORT = "t";
+ static final String OPTION_TABLE_LONG = "table";
+ static final String OPTION_IMPORT_DATA_FILE_SHORT = "f";
+ static final String OPTION_IMPORT_DATA_FILE_LONG = "filename";
+ static final String OPTION_FAIL_ON_ERROR = "failOnError";
+ static final String OPTION_DELETE_NON_EXISTING = "deleteNonExisting";
+ static final String OPTION_HELP_SHORT = "h";
+ static final String OPTION_HELP_LONG = "help";
+
+ public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2 = "v2";
+
+ private static final int EXIT_CODE_SUCCESS = 0;
+ private static final int EXIT_CODE_FAILED = 1;
+ private static final int EXIT_CODE_INVALID_ARG = 2;
+
+ private static final String DEFAULT_ATLAS_URL = "http://localhost:21000/";
+ private static int pageLimit = 10000;
+
+ private final String metadataNamespace;
+ private final Hive hiveClient;
+ private final AtlasClientV2 atlasClientV2;
+ private final boolean convertHdfsPathToLowerCase;
+
+ private String awsS3AtlasModelVersion = null;
+
+ public static void main(String[] args) {
+ int exitCode = EXIT_CODE_FAILED;
+ AtlasClientV2 atlasClientV2 = null;
+ Options acceptedCliOptions = prepareCommandLineOptions();
+
+ try {
+ CommandLine cmd = new BasicParser().parse(acceptedCliOptions, args);
+ List argsNotProcessed = cmd.getArgList();
+
+ if (argsNotProcessed != null && argsNotProcessed.size() > 0) {
+ throw new ParseException("Unrecognized arguments.");
+ }
+
+ if (cmd.hasOption(OPTION_HELP_SHORT)) {
+ printUsage(acceptedCliOptions);
+ exitCode = EXIT_CODE_SUCCESS;
+ } else {
+ Configuration atlasConf = ApplicationProperties.get();
+ String[] atlasEndpoint = atlasConf.getStringArray(ATLAS_ENDPOINT);
+
+ if (atlasEndpoint == null || atlasEndpoint.length == 0) {
+ atlasEndpoint = new String[] { DEFAULT_ATLAS_URL };
+ }
+
+ if (!AuthenticationUtil.isKerberosAuthenticationEnabled()) {
+ String[] basicAuthUsernamePassword = AuthenticationUtil.getBasicAuthenticationInput();
+
+ atlasClientV2 = new AtlasClientV2(atlasEndpoint, basicAuthUsernamePassword);
+ } else {
+ UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
+
+ atlasClientV2 = new AtlasClientV2(ugi, ugi.getShortUserName(), atlasEndpoint);
+ }
+
+ boolean createZip = cmd.hasOption(OPTION_OUTPUT_FILEPATH_LONG);
+
+ if (createZip) {
+ HiveMetaStoreBridgeV2 hiveMetaStoreBridgeV2 = new HiveMetaStoreBridgeV2(atlasConf, new HiveConf(), atlasClientV2);
+
+ if (hiveMetaStoreBridgeV2.exportDataToZipAndRunAtlasImport(cmd)) {
+ exitCode = EXIT_CODE_SUCCESS;
+ }
+ } else {
+ HiveMetaStoreBridge hiveMetaStoreBridge = new HiveMetaStoreBridge(atlasConf, new HiveConf(), atlasClientV2);
+
+ if (hiveMetaStoreBridge.importDataDirectlyToAtlas(cmd)) {
+ exitCode = EXIT_CODE_SUCCESS;
+ }
+ }
+ }
+ } catch(ParseException e) {
+ LOG.error("Invalid argument. Error: {}", e.getMessage());
+ System.out.println("Invalid argument. Error: " + e.getMessage());
+ exitCode = EXIT_CODE_INVALID_ARG;
+
+ if (!(e instanceof MissingArgumentException)) {
+ printUsage(acceptedCliOptions);
+ }
+ } catch(Exception e) {
+ LOG.error("Import Failed", e);
+ } finally {
+ if( atlasClientV2 !=null) {
+ atlasClientV2.close();
+ }
+ }
+
+ System.exit(exitCode);
+ }
+
+ private static Options prepareCommandLineOptions() {
+ Options acceptedCliOptions = new Options();
+
+ return acceptedCliOptions.addOption(OPTION_OUTPUT_FILEPATH_SHORT, OPTION_OUTPUT_FILEPATH_LONG, true, "Output path or file for Zip import")
+ .addOption(OPTION_IGNORE_BULK_IMPORT_SHORT, OPTION_IGNORE_BULK_IMPORT_LONG, false, "Ignore bulk Import for Zip import")
+ .addOption(OPTION_DATABASE_SHORT, OPTION_DATABASE_LONG, true, "Database name")
+ .addOption(OPTION_TABLE_SHORT, OPTION_TABLE_LONG, true, "Table name")
+ .addOption(OPTION_IMPORT_DATA_FILE_SHORT, OPTION_IMPORT_DATA_FILE_LONG, true, "Filename")
+ .addOption(OPTION_FAIL_ON_ERROR, false, "failOnError")
+ .addOption(OPTION_DELETE_NON_EXISTING, false, "Delete database and table entities in Atlas if not present in Hive")
+ .addOption(OPTION_HELP_SHORT, OPTION_HELP_LONG, false, "Print this help message");
+ }
+
+ private static void printUsage(Options options) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp("import-hive.sh", options);
+ System.out.println();
+ System.out.println("Usage options:");
+ System.out.println(" Usage 1: import-hive.sh [-d OR --database ] " );
+ System.out.println(" Imports specified database and its tables ...");
+ System.out.println();
+ System.out.println(" Usage 2: import-hive.sh [-d OR --database ] [-t OR --table ]");
+ System.out.println(" Imports specified table within that database ...");
+ System.out.println();
+ System.out.println(" Usage 3: import-hive.sh");
+ System.out.println(" Imports all databases and tables...");
+ System.out.println();
+ System.out.println(" Usage 4: import-hive.sh -f ");
+ System.out.println(" Imports all databases and tables in the file...");
+ System.out.println(" Format:");
+ System.out.println(" database1:tbl1");
+ System.out.println(" database1:tbl2");
+ System.out.println(" database2:tbl2");
+ System.out.println();
+ System.out.println(" Usage 5: import-hive.sh [-deleteNonExisting] " );
+ System.out.println(" Deletes databases and tables which are not in Hive ...");
+ System.out.println();
+ System.out.println(" Usage 6: import-hive.sh -o [-f ] [-d OR --database ] [-t OR --table ]");
+ System.out.println(" To create zip file with exported data and import the zip file at Atlas ...");
+ System.out.println();
+ System.out.println(" Usage 7: import-hive.sh -i -o [-f ] [-d OR --database ] [-t OR --table ]");
+ System.out.println(" To create zip file with exported data without importing to Atlas which can be imported later ...");
+ System.out.println();
+ }
+
+ /**
+ * Construct a HiveMetaStoreBridge.
+ * @param hiveConf {@link HiveConf} for Hive component in the cluster
+ */
+ public HiveMetaStoreBridge(Configuration atlasProperties, HiveConf hiveConf, AtlasClientV2 atlasClientV2) throws Exception {
+ this.metadataNamespace = getMetadataNamespace(atlasProperties);
+ this.hiveClient = Hive.get(hiveConf);
+ this.atlasClientV2 = atlasClientV2;
+ this.convertHdfsPathToLowerCase = atlasProperties.getBoolean(HDFS_PATH_CONVERT_TO_LOWER_CASE, false);
+ this.awsS3AtlasModelVersion = atlasProperties.getString(HOOK_AWS_S3_ATLAS_MODEL_VERSION, HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2);
+ if (atlasProperties != null) {
+ pageLimit = atlasProperties.getInteger(HOOK_HIVE_PAGE_LIMIT, 10000);
+ }
+ }
+
+ /**
+ * Construct a HiveMetaStoreBridge.
+ * @param hiveConf {@link HiveConf} for Hive component in the cluster
+ */
+ public HiveMetaStoreBridge(Configuration atlasProperties, HiveConf hiveConf) throws Exception {
+ this(atlasProperties, hiveConf, null);
+ }
+
+ HiveMetaStoreBridge(String metadataNamespace, Hive hiveClient, AtlasClientV2 atlasClientV2) {
+ this(metadataNamespace, hiveClient, atlasClientV2, true);
+ }
+
+ HiveMetaStoreBridge(String metadataNamespace, Hive hiveClient, AtlasClientV2 atlasClientV2, boolean convertHdfsPathToLowerCase) {
+ this.metadataNamespace = metadataNamespace;
+ this.hiveClient = hiveClient;
+ this.atlasClientV2 = atlasClientV2;
+ this.convertHdfsPathToLowerCase = convertHdfsPathToLowerCase;
+ }
+
+ public String getMetadataNamespace(Configuration config) {
+ return AtlasConfigurationUtil.getRecentString(config, HIVE_METADATA_NAMESPACE, getClusterName(config));
+ }
+
+ private String getClusterName(Configuration config) {
+ return config.getString(CLUSTER_NAME_KEY, DEFAULT_CLUSTER_NAME);
+ }
+
+ public String getMetadataNamespace() {
+ return metadataNamespace;
+ }
+
+ public Hive getHiveClient() {
+ return hiveClient;
+ }
+
+ public boolean isConvertHdfsPathToLowerCase() {
+ return convertHdfsPathToLowerCase;
+ }
+
+ public boolean importDataDirectlyToAtlas(CommandLine cmd) throws Exception {
+ LOG.info("Importing Hive metadata");
+ boolean ret = false;
+
+ String databaseToImport = cmd.getOptionValue(OPTION_DATABASE_SHORT);
+ String tableToImport = cmd.getOptionValue(OPTION_TABLE_SHORT);
+ String fileToImport = cmd.getOptionValue(OPTION_IMPORT_DATA_FILE_SHORT);
+
+ boolean failOnError = cmd.hasOption(OPTION_FAIL_ON_ERROR);
+ boolean deleteNonExisting = cmd.hasOption(OPTION_DELETE_NON_EXISTING);
+
+ LOG.info("delete non existing flag : {} ", deleteNonExisting);
+
+ if (deleteNonExisting) {
+ deleteEntitiesForNonExistingHiveMetadata(failOnError);
+ ret = true;
+ } else if (StringUtils.isNotEmpty(fileToImport)) {
+ File f = new File(fileToImport);
+
+ if (f.exists() && f.canRead()) {
+ BufferedReader br = new BufferedReader(new FileReader(f));
+ String line = null;
+
+ while((line = br.readLine()) != null) {
+ String val[] = line.split(":");
+
+ if (ArrayUtils.isNotEmpty(val)) {
+ databaseToImport = val[0];
+
+ if (val.length > 1) {
+ tableToImport = val[1];
+ } else {
+ tableToImport = "";
+ }
+
+ importDatabases(failOnError, databaseToImport, tableToImport);
+ }
+ }
+ ret = true;
+ } else {
+ LOG.error("Failed to read the input file: " + fileToImport);
+ }
+ } else {
+ importDatabases(failOnError, databaseToImport, tableToImport);
+ ret = true;
+ }
+ return ret;
+ }
+
+ @VisibleForTesting
+ public void importHiveMetadata(String databaseToImport, String tableToImport, boolean failOnError) throws Exception {
+ LOG.info("Importing Hive metadata");
+
+ importDatabases(failOnError, databaseToImport, tableToImport);
+ }
+
+ private void importDatabases(boolean failOnError, String databaseToImport, String tableToImport) throws Exception {
+ List databaseNames = null;
+
+ if (StringUtils.isEmpty(databaseToImport) && StringUtils.isEmpty(tableToImport)) {
+ //when both database and table to import are empty, import all
+ databaseNames = hiveClient.getAllDatabases();
+ } else if (StringUtils.isEmpty(databaseToImport) && StringUtils.isNotEmpty(tableToImport)) {
+ //when database is empty and table is not, then check table has database name in it and import that db and table
+ if (isTableWithDatabaseName(tableToImport)) {
+ String val[] = tableToImport.split("\\.");
+ if (val.length > 1) {
+ databaseToImport = val[0];
+ tableToImport = val[1];
+ }
+ databaseNames = hiveClient.getDatabasesByPattern(databaseToImport);
+ } else {
+ databaseNames = hiveClient.getAllDatabases();
+ }
+ } else {
+ //when database to import has some value then, import that db and all table under it.
+ databaseNames = hiveClient.getDatabasesByPattern(databaseToImport);
+ }
+
+ if(!CollectionUtils.isEmpty(databaseNames)) {
+ LOG.info("Found {} databases", databaseNames.size());
+
+ for (String databaseName : databaseNames) {
+ AtlasEntityWithExtInfo dbEntity = registerDatabase(databaseName);
+
+ if (dbEntity != null) {
+ importTables(dbEntity.getEntity(), databaseName, tableToImport, failOnError);
+ }
+ }
+ } else {
+ LOG.error("No database found");
+ System.exit(EXIT_CODE_FAILED);
+ }
+ }
+
+ /**
+ * Imports all tables for the given db
+ * @param dbEntity
+ * @param databaseName
+ * @param failOnError
+ * @throws Exception
+ */
+ private int importTables(AtlasEntity dbEntity, String databaseName, String tblName, final boolean failOnError) throws Exception {
+ int tablesImported = 0;
+
+ final List tableNames;
+
+ if (StringUtils.isEmpty(tblName)) {
+ tableNames = hiveClient.getAllTables(databaseName);
+ } else {
+ tableNames = hiveClient.getTablesByPattern(databaseName, tblName);
+ }
+
+ if(!CollectionUtils.isEmpty(tableNames)) {
+ LOG.info("Found {} tables to import in database {}", tableNames.size(), databaseName);
+
+ try {
+ for (String tableName : tableNames) {
+ int imported = importTable(dbEntity, databaseName, tableName, failOnError);
+
+ tablesImported += imported;
+ }
+ } finally {
+ if (tablesImported == tableNames.size()) {
+ LOG.info("Successfully imported {} tables from database {}", tablesImported, databaseName);
+ } else {
+ LOG.error("Imported {} of {} tables from database {}. Please check logs for errors during import", tablesImported, tableNames.size(), databaseName);
+ }
+ }
+ } else {
+ LOG.error("No tables to import in database {}", databaseName);
+ }
+
+ return tablesImported;
+ }
+
+ @VisibleForTesting
+ public int importTable(AtlasEntity dbEntity, String databaseName, String tableName, final boolean failOnError) throws Exception {
+ try {
+ Table table = hiveClient.getTable(databaseName, tableName);
+ AtlasEntityWithExtInfo tableEntity = registerTable(dbEntity, table);
+
+ if (table.getTableType() == TableType.EXTERNAL_TABLE) {
+ String processQualifiedName = getTableProcessQualifiedName(metadataNamespace, table);
+ AtlasEntityWithExtInfo processEntity = findProcessEntity(processQualifiedName);
+
+ if (processEntity == null) {
+ String tableLocationString = isConvertHdfsPathToLowerCase() ? lower(table.getDataLocation().toString()) : table.getDataLocation().toString();
+ Path location = table.getDataLocation();
+ String query = getCreateTableString(table, tableLocationString);
+
+ PathExtractorContext pathExtractorCtx = new PathExtractorContext(getMetadataNamespace(), isConvertHdfsPathToLowerCase(), awsS3AtlasModelVersion);
+ AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(location, pathExtractorCtx);
+ AtlasEntity pathInst = entityWithExtInfo.getEntity();
+ AtlasEntity tableInst = tableEntity.getEntity();
+ AtlasEntity processInst = new AtlasEntity(HiveDataTypes.HIVE_PROCESS.getName());
+
+ long now = System.currentTimeMillis();
+
+ processInst.setAttribute(ATTRIBUTE_QUALIFIED_NAME, processQualifiedName);
+ processInst.setAttribute(ATTRIBUTE_NAME, query);
+ processInst.setAttribute(ATTRIBUTE_CLUSTER_NAME, metadataNamespace);
+ processInst.setRelationshipAttribute(ATTRIBUTE_INPUTS, Collections.singletonList(AtlasTypeUtil.getAtlasRelatedObjectId(pathInst, RELATIONSHIP_DATASET_PROCESS_INPUTS)));
+ processInst.setRelationshipAttribute(ATTRIBUTE_OUTPUTS, Collections.singletonList(AtlasTypeUtil.getAtlasRelatedObjectId(tableInst, RELATIONSHIP_PROCESS_DATASET_OUTPUTS)));
+ String userName = table.getOwner();
+ if (StringUtils.isEmpty(userName)) {
+ userName = ApplicationProperties.get().getString(HIVE_USERNAME, "hive");
+ }
+ processInst.setAttribute(ATTRIBUTE_USER_NAME, userName);
+ processInst.setAttribute(ATTRIBUTE_START_TIME, now);
+ processInst.setAttribute(ATTRIBUTE_END_TIME, now);
+ processInst.setAttribute(ATTRIBUTE_OPERATION_TYPE, "CREATETABLE");
+ processInst.setAttribute(ATTRIBUTE_QUERY_TEXT, query);
+ processInst.setAttribute(ATTRIBUTE_QUERY_ID, query);
+ processInst.setAttribute(ATTRIBUTE_QUERY_PLAN, "{}");
+ processInst.setAttribute(ATTRIBUTE_RECENT_QUERIES, Collections.singletonList(query));
+
+ AtlasEntitiesWithExtInfo createTableProcess = new AtlasEntitiesWithExtInfo();
+
+ createTableProcess.addEntity(processInst);
+
+ if (pathExtractorCtx.getKnownEntities() != null) {
+ pathExtractorCtx.getKnownEntities().values().forEach(entity -> createTableProcess.addEntity(entity));
+ } else {
+ createTableProcess.addEntity(pathInst);
+ }
+
+ registerInstances(createTableProcess);
+ } else {
+ LOG.info("Process {} is already registered", processQualifiedName);
+ }
+ }
+
+ return 1;
+ } catch (Exception e) {
+ LOG.error("Import failed for hive_table {}", tableName, e);
+
+ if (failOnError) {
+ throw e;
+ }
+
+ return 0;
+ }
+ }
+
+ /**
+ * Checks if db is already registered, else creates and registers db entity
+ * @param databaseName
+ * @return
+ * @throws Exception
+ */
+ private AtlasEntityWithExtInfo registerDatabase(String databaseName) throws Exception {
+ AtlasEntityWithExtInfo ret = null;
+ Database db = hiveClient.getDatabase(databaseName);
+
+ if (db != null) {
+ ret = findDatabase(metadataNamespace, databaseName);
+
+ if (ret == null) {
+ ret = registerInstance(new AtlasEntityWithExtInfo(toDbEntity(db)));
+ } else {
+ LOG.info("Database {} is already registered - id={}. Updating it.", databaseName, ret.getEntity().getGuid());
+
+ ret.setEntity(toDbEntity(db, ret.getEntity()));
+
+ updateInstance(ret);
+ }
+ }
+
+ return ret;
+ }
+
+ private AtlasEntityWithExtInfo registerTable(AtlasEntity dbEntity, Table table) throws AtlasHookException {
+ try {
+ AtlasEntityWithExtInfo ret;
+ AtlasEntityWithExtInfo tableEntity = findTableEntity(table);
+
+ if (tableEntity == null) {
+ tableEntity = toTableEntity(dbEntity, table);
+
+ ret = registerInstance(tableEntity);
+ } else {
+ LOG.info("Table {}.{} is already registered with id {}. Updating entity.", table.getDbName(), table.getTableName(), tableEntity.getEntity().getGuid());
+
+ ret = toTableEntity(dbEntity, table, tableEntity);
+
+ updateInstance(ret);
+ }
+
+ return ret;
+ } catch (Exception e) {
+ throw new AtlasHookException("HiveMetaStoreBridge.registerTable() failed.", e);
+ }
+ }
+
+ /**
+ * Registers an entity in atlas
+ * @param entity
+ * @return
+ * @throws Exception
+ */
+ private AtlasEntityWithExtInfo registerInstance(AtlasEntityWithExtInfo entity) throws Exception {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("creating {} entity: {}", entity.getEntity().getTypeName(), entity);
+ }
+
+ AtlasEntityWithExtInfo ret = null;
+ EntityMutationResponse response = atlasClientV2.createEntity(entity);
+ List createdEntities = response.getEntitiesByOperation(EntityMutations.EntityOperation.CREATE);
+
+ if (CollectionUtils.isNotEmpty(createdEntities)) {
+ for (AtlasEntityHeader createdEntity : createdEntities) {
+ if (ret == null) {
+ ret = atlasClientV2.getEntityByGuid(createdEntity.getGuid());
+
+ LOG.info("Created {} entity: name={}, guid={}", ret.getEntity().getTypeName(), ret.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), ret.getEntity().getGuid());
+ } else if (ret.getEntity(createdEntity.getGuid()) == null) {
+ AtlasEntityWithExtInfo newEntity = atlasClientV2.getEntityByGuid(createdEntity.getGuid());
+
+ ret.addReferredEntity(newEntity.getEntity());
+
+ if (MapUtils.isNotEmpty(newEntity.getReferredEntities())) {
+ for (Map.Entry entry : newEntity.getReferredEntities().entrySet()) {
+ ret.addReferredEntity(entry.getKey(), entry.getValue());
+ }
+ }
+
+ LOG.info("Created {} entity: name={}, guid={}", newEntity.getEntity().getTypeName(), newEntity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), newEntity.getEntity().getGuid());
+ }
+ }
+ }
+
+ clearRelationshipAttributes(ret);
+
+ return ret;
+ }
+
+ /**
+ * Registers an entity in atlas
+ * @param entities
+ * @return
+ * @throws Exception
+ */
+ private AtlasEntitiesWithExtInfo registerInstances(AtlasEntitiesWithExtInfo entities) throws Exception {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("creating {} entities: {}", entities.getEntities().size(), entities);
+ }
+
+ AtlasEntitiesWithExtInfo ret = null;
+ EntityMutationResponse response = atlasClientV2.createEntities(entities);
+ List createdEntities = response.getEntitiesByOperation(EntityMutations.EntityOperation.CREATE);
+
+ if (CollectionUtils.isNotEmpty(createdEntities)) {
+ ret = new AtlasEntitiesWithExtInfo();
+
+ for (AtlasEntityHeader createdEntity : createdEntities) {
+ AtlasEntityWithExtInfo entity = atlasClientV2.getEntityByGuid(createdEntity.getGuid());
+
+ ret.addEntity(entity.getEntity());
+
+ if (MapUtils.isNotEmpty(entity.getReferredEntities())) {
+ for (Map.Entry entry : entity.getReferredEntities().entrySet()) {
+ ret.addReferredEntity(entry.getKey(), entry.getValue());
+ }
+ }
+
+ LOG.info("Created {} entity: name={}, guid={}", entity.getEntity().getTypeName(), entity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), entity.getEntity().getGuid());
+ }
+ }
+
+ clearRelationshipAttributes(ret);
+
+ return ret;
+ }
+
+ private void updateInstance(AtlasEntityWithExtInfo entity) throws AtlasServiceException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("updating {} entity: {}", entity.getEntity().getTypeName(), entity);
+ }
+
+ atlasClientV2.updateEntity(entity);
+
+ LOG.info("Updated {} entity: name={}, guid={}", entity.getEntity().getTypeName(), entity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), entity.getEntity().getGuid());
+ }
+
+ /**
+ * Create a Hive Database entity
+ * @param hiveDB The Hive {@link Database} object from which to map properties
+ * @return new Hive Database AtlasEntity
+ * @throws HiveException
+ */
+ private AtlasEntity toDbEntity(Database hiveDB) throws HiveException {
+ return toDbEntity(hiveDB, null);
+ }
+
+ private AtlasEntity toDbEntity(Database hiveDB, AtlasEntity dbEntity) {
+ if (dbEntity == null) {
+ dbEntity = new AtlasEntity(HiveDataTypes.HIVE_DB.getName());
+ }
+
+ String dbName = getDatabaseName(hiveDB);
+
+ dbEntity.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getDBQualifiedName(metadataNamespace, dbName));
+ dbEntity.setAttribute(ATTRIBUTE_NAME, dbName);
+ dbEntity.setAttribute(ATTRIBUTE_DESCRIPTION, hiveDB.getDescription());
+ dbEntity.setAttribute(ATTRIBUTE_OWNER, hiveDB.getOwnerName());
+
+ dbEntity.setAttribute(ATTRIBUTE_CLUSTER_NAME, metadataNamespace);
+ dbEntity.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getPathWithNameServiceID(hiveDB.getLocationUri()));
+ dbEntity.setAttribute(ATTRIBUTE_PARAMETERS, hiveDB.getParameters());
+
+ if (hiveDB.getOwnerType() != null) {
+ dbEntity.setAttribute(ATTRIBUTE_OWNER_TYPE, OWNER_TYPE_TO_ENUM_VALUE.get(hiveDB.getOwnerType().getValue()));
+ }
+
+ return dbEntity;
+ }
+
+ public static String getDatabaseName(Database hiveDB) {
+ String dbName = hiveDB.getName().toLowerCase();
+ String catalogName = hiveDB.getCatalogName() != null ? hiveDB.getCatalogName().toLowerCase() : null;
+
+ if (StringUtils.isNotEmpty(catalogName) && !StringUtils.equals(catalogName, DEFAULT_METASTORE_CATALOG)) {
+ dbName = catalogName + SEP + dbName;
+ }
+
+ return dbName;
+ }
+
+ /**
+ * Create a new table instance in Atlas
+ * @param database AtlasEntity for Hive {@link AtlasEntity} to which this table belongs
+ * @param hiveTable reference to the Hive {@link Table} from which to map properties
+ * @return Newly created Hive AtlasEntity
+ * @throws Exception
+ */
+ private AtlasEntityWithExtInfo toTableEntity(AtlasEntity database, Table hiveTable) throws AtlasHookException {
+ return toTableEntity(database, hiveTable, null);
+ }
+
+ private AtlasEntityWithExtInfo toTableEntity(AtlasEntity database, final Table hiveTable, AtlasEntityWithExtInfo table) throws AtlasHookException {
+ if (table == null) {
+ table = new AtlasEntityWithExtInfo(new AtlasEntity(HiveDataTypes.HIVE_TABLE.getName()));
+ }
+
+ AtlasEntity tableEntity = table.getEntity();
+ String tableQualifiedName = getTableQualifiedName(metadataNamespace, hiveTable);
+ long createTime = BaseHiveEvent.getTableCreateTime(hiveTable);
+ long lastAccessTime = hiveTable.getLastAccessTime() > 0 ? hiveTable.getLastAccessTime() : createTime;
+
+ tableEntity.setRelationshipAttribute(ATTRIBUTE_DB, AtlasTypeUtil.getAtlasRelatedObjectId(database, RELATIONSHIP_HIVE_TABLE_DB));
+ tableEntity.setAttribute(ATTRIBUTE_QUALIFIED_NAME, tableQualifiedName);
+ tableEntity.setAttribute(ATTRIBUTE_NAME, hiveTable.getTableName().toLowerCase());
+ tableEntity.setAttribute(ATTRIBUTE_OWNER, hiveTable.getOwner());
+
+ tableEntity.setAttribute(ATTRIBUTE_CREATE_TIME, createTime);
+ tableEntity.setAttribute(ATTRIBUTE_LAST_ACCESS_TIME, lastAccessTime);
+ tableEntity.setAttribute(ATTRIBUTE_RETENTION, hiveTable.getRetention());
+ tableEntity.setAttribute(ATTRIBUTE_PARAMETERS, hiveTable.getParameters());
+ tableEntity.setAttribute(ATTRIBUTE_COMMENT, hiveTable.getParameters().get(ATTRIBUTE_COMMENT));
+ tableEntity.setAttribute(ATTRIBUTE_TABLE_TYPE, hiveTable.getTableType().name());
+ tableEntity.setAttribute(ATTRIBUTE_TEMPORARY, hiveTable.isTemporary());
+
+ if (hiveTable.getViewOriginalText() != null) {
+ tableEntity.setAttribute(ATTRIBUTE_VIEW_ORIGINAL_TEXT, hiveTable.getViewOriginalText());
+ }
+
+ if (hiveTable.getViewExpandedText() != null) {
+ tableEntity.setAttribute(ATTRIBUTE_VIEW_EXPANDED_TEXT, hiveTable.getViewExpandedText());
+ }
+
+ AtlasEntity sdEntity = toStorageDescEntity(hiveTable.getSd(), tableQualifiedName, getStorageDescQFName(tableQualifiedName), AtlasTypeUtil.getObjectId(tableEntity));
+ List partKeys = toColumns(hiveTable.getPartitionKeys(), tableEntity, RELATIONSHIP_HIVE_TABLE_PART_KEYS);
+ List columns = toColumns(hiveTable.getCols(), tableEntity, RELATIONSHIP_HIVE_TABLE_COLUMNS);
+
+ tableEntity.setRelationshipAttribute(ATTRIBUTE_STORAGEDESC, AtlasTypeUtil.getAtlasRelatedObjectId(sdEntity, RELATIONSHIP_HIVE_TABLE_STORAGE_DESC));
+ tableEntity.setRelationshipAttribute(ATTRIBUTE_PARTITION_KEYS, AtlasTypeUtil.getAtlasRelatedObjectIds(partKeys, RELATIONSHIP_HIVE_TABLE_PART_KEYS));
+ tableEntity.setRelationshipAttribute(ATTRIBUTE_COLUMNS, AtlasTypeUtil.getAtlasRelatedObjectIds(columns, RELATIONSHIP_HIVE_TABLE_COLUMNS));
+
+ table.addReferredEntity(database);
+ table.addReferredEntity(sdEntity);
+
+ if (partKeys != null) {
+ for (AtlasEntity partKey : partKeys) {
+ table.addReferredEntity(partKey);
+ }
+ }
+
+ if (columns != null) {
+ for (AtlasEntity column : columns) {
+ table.addReferredEntity(column);
+ }
+ }
+
+ table.setEntity(tableEntity);
+
+ return table;
+ }
+
+ private AtlasEntity toStorageDescEntity(StorageDescriptor storageDesc, String tableQualifiedName, String sdQualifiedName, AtlasObjectId tableId ) throws AtlasHookException {
+ AtlasEntity ret = new AtlasEntity(HiveDataTypes.HIVE_STORAGEDESC.getName());
+
+ ret.setRelationshipAttribute(ATTRIBUTE_TABLE, AtlasTypeUtil.getAtlasRelatedObjectId(tableId, RELATIONSHIP_HIVE_TABLE_STORAGE_DESC));
+ ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, sdQualifiedName);
+ ret.setAttribute(ATTRIBUTE_PARAMETERS, storageDesc.getParameters());
+ ret.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getPathWithNameServiceID(storageDesc.getLocation()));
+ ret.setAttribute(ATTRIBUTE_INPUT_FORMAT, storageDesc.getInputFormat());
+ ret.setAttribute(ATTRIBUTE_OUTPUT_FORMAT, storageDesc.getOutputFormat());
+ ret.setAttribute(ATTRIBUTE_COMPRESSED, storageDesc.isCompressed());
+ ret.setAttribute(ATTRIBUTE_NUM_BUCKETS, storageDesc.getNumBuckets());
+ ret.setAttribute(ATTRIBUTE_STORED_AS_SUB_DIRECTORIES, storageDesc.isStoredAsSubDirectories());
+
+ if (storageDesc.getBucketCols().size() > 0) {
+ ret.setAttribute(ATTRIBUTE_BUCKET_COLS, storageDesc.getBucketCols());
+ }
+
+ if (storageDesc.getSerdeInfo() != null) {
+ SerDeInfo serdeInfo = storageDesc.getSerdeInfo();
+
+ LOG.debug("serdeInfo = {}", serdeInfo);
+ // SkewedInfo skewedInfo = storageDesc.getSkewedInfo();
+
+ AtlasStruct serdeInfoStruct = new AtlasStruct(HiveDataTypes.HIVE_SERDE.getName());
+
+ serdeInfoStruct.setAttribute(ATTRIBUTE_NAME, serdeInfo.getName());
+ serdeInfoStruct.setAttribute(ATTRIBUTE_SERIALIZATION_LIB, serdeInfo.getSerializationLib());
+ serdeInfoStruct.setAttribute(ATTRIBUTE_PARAMETERS, serdeInfo.getParameters());
+
+ ret.setAttribute(ATTRIBUTE_SERDE_INFO, serdeInfoStruct);
+ }
+
+ if (CollectionUtils.isNotEmpty(storageDesc.getSortCols())) {
+ List sortColsStruct = new ArrayList<>();
+
+ for (Order sortcol : storageDesc.getSortCols()) {
+ String hiveOrderName = HiveDataTypes.HIVE_ORDER.getName();
+ AtlasStruct colStruct = new AtlasStruct(hiveOrderName);
+ colStruct.setAttribute("col", sortcol.getCol());
+ colStruct.setAttribute("order", sortcol.getOrder());
+
+ sortColsStruct.add(colStruct);
+ }
+
+ ret.setAttribute(ATTRIBUTE_SORT_COLS, sortColsStruct);
+ }
+
+ return ret;
+ }
+
+ private List toColumns(List schemaList, AtlasEntity table, String relationshipType) throws AtlasHookException {
+ List ret = new ArrayList<>();
+
+ int columnPosition = 0;
+ for (FieldSchema fs : schemaList) {
+ LOG.debug("Processing field {}", fs);
+
+ AtlasEntity column = new AtlasEntity(HiveDataTypes.HIVE_COLUMN.getName());
+
+ column.setRelationshipAttribute(ATTRIBUTE_TABLE, AtlasTypeUtil.getAtlasRelatedObjectId(table, relationshipType));
+ column.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getColumnQualifiedName((String) table.getAttribute(ATTRIBUTE_QUALIFIED_NAME), fs.getName()));
+ column.setAttribute(ATTRIBUTE_NAME, fs.getName());
+ column.setAttribute(ATTRIBUTE_OWNER, table.getAttribute(ATTRIBUTE_OWNER));
+ column.setAttribute(ATTRIBUTE_COL_TYPE, fs.getType());
+ column.setAttribute(ATTRIBUTE_COL_POSITION, columnPosition++);
+ column.setAttribute(ATTRIBUTE_COMMENT, fs.getComment());
+
+ ret.add(column);
+ }
+ return ret;
+ }
+
+ /**
+ * Gets the atlas entity for the database
+ * @param databaseName database Name
+ * @param metadataNamespace cluster name
+ * @return AtlasEntity for database if exists, else null
+ * @throws Exception
+ */
+ private AtlasEntityWithExtInfo findDatabase(String metadataNamespace, String databaseName) throws Exception {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Searching Atlas for database {}", databaseName);
+ }
+
+ String typeName = HiveDataTypes.HIVE_DB.getName();
+
+ return findEntity(typeName, getDBQualifiedName(metadataNamespace, databaseName), true, true);
+ }
+
+ /**
+ * Gets Atlas Entity for the table
+ *
+ * @param hiveTable
+ * @return table entity from Atlas if exists, else null
+ * @throws Exception
+ */
+ private AtlasEntityWithExtInfo findTableEntity(Table hiveTable) throws Exception {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Searching Atlas for table {}.{}", hiveTable.getDbName(), hiveTable.getTableName());
+ }
+
+ String typeName = HiveDataTypes.HIVE_TABLE.getName();
+ String tblQualifiedName = getTableQualifiedName(getMetadataNamespace(), hiveTable.getDbName(), hiveTable.getTableName());
+
+ return findEntity(typeName, tblQualifiedName, true, true);
+ }
+
+ private AtlasEntityWithExtInfo findProcessEntity(String qualifiedName) throws Exception{
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Searching Atlas for process {}", qualifiedName);
+ }
+
+ String typeName = HiveDataTypes.HIVE_PROCESS.getName();
+
+ return findEntity(typeName, qualifiedName , true , true);
+ }
+
+ private AtlasEntityWithExtInfo findEntity(final String typeName, final String qualifiedName , boolean minExtInfo, boolean ignoreRelationship) throws AtlasServiceException {
+ AtlasEntityWithExtInfo ret = null;
+
+ try {
+ ret = atlasClientV2.getEntityByAttribute(typeName, Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, qualifiedName), minExtInfo, ignoreRelationship);
+ } catch (AtlasServiceException e) {
+ if(e.getStatus() == ClientResponse.Status.NOT_FOUND) {
+ return null;
+ }
+
+ throw e;
+ }
+
+ return ret;
+ }
+
+ private String getCreateTableString(Table table, String location){
+ String colString = "";
+ List colList = table.getAllCols();
+
+ if (colList != null) {
+ for (FieldSchema col : colList) {
+ colString += col.getName() + " " + col.getType() + ",";
+ }
+
+ if (colList.size() > 0) {
+ colString = colString.substring(0, colString.length() - 1);
+ colString = "(" + colString + ")";
+ }
+ }
+
+ String query = "create external table " + table.getTableName() + colString + " location '" + location + "'";
+
+ return query;
+ }
+
+ private String lower(String str) {
+ if (StringUtils.isEmpty(str)) {
+ return "";
+ }
+
+ return str.toLowerCase().trim();
+ }
+
+
+ /**
+ * Construct the qualified name used to uniquely identify a Table instance in Atlas.
+ * @param metadataNamespace Metadata namespace of the cluster to which the Hive component belongs
+ * @param table hive table for which the qualified name is needed
+ * @return Unique qualified name to identify the Table instance in Atlas.
+ */
+ private static String getTableQualifiedName(String metadataNamespace, Table table) {
+ return getTableQualifiedName(metadataNamespace, table.getDbName(), table.getTableName(), table.isTemporary());
+ }
+
+ private String getHdfsPathQualifiedName(String hdfsPath) {
+ return String.format("%s@%s", hdfsPath, metadataNamespace);
+ }
+
+ /**
+ * Construct the qualified name used to uniquely identify a Database instance in Atlas.
+ * @param metadataNamespace Name of the cluster to which the Hive component belongs
+ * @param dbName Name of the Hive database
+ * @return Unique qualified name to identify the Database instance in Atlas.
+ */
+ public static String getDBQualifiedName(String metadataNamespace, String dbName) {
+ return String.format("%s@%s", dbName.toLowerCase(), metadataNamespace);
+ }
+
+ /**
+ * Construct the qualified name used to uniquely identify a Table instance in Atlas.
+ * @param metadataNamespace Name of the cluster to which the Hive component belongs
+ * @param dbName Name of the Hive database to which the Table belongs
+ * @param tableName Name of the Hive table
+ * @param isTemporaryTable is this a temporary table
+ * @return Unique qualified name to identify the Table instance in Atlas.
+ */
+ public static String getTableQualifiedName(String metadataNamespace, String dbName, String tableName, boolean isTemporaryTable) {
+ String tableTempName = tableName;
+
+ if (isTemporaryTable) {
+ if (SessionState.get() != null && SessionState.get().getSessionId() != null) {
+ tableTempName = tableName + TEMP_TABLE_PREFIX + SessionState.get().getSessionId();
+ } else {
+ tableTempName = tableName + TEMP_TABLE_PREFIX + RandomStringUtils.random(10);
+ }
+ }
+
+ return String.format("%s.%s@%s", dbName.toLowerCase(), tableTempName.toLowerCase(), metadataNamespace);
+ }
+
+ public static String getTableProcessQualifiedName(String metadataNamespace, Table table) {
+ String tableQualifiedName = getTableQualifiedName(metadataNamespace, table);
+ long createdTime = getTableCreatedTime(table);
+
+ return tableQualifiedName + SEP + createdTime;
+ }
+
+
+ /**
+ * Construct the qualified name used to uniquely identify a Table instance in Atlas.
+ * @param metadataNamespace Metadata namespace of the cluster to which the Hive component belongs
+ * @param dbName Name of the Hive database to which the Table belongs
+ * @param tableName Name of the Hive table
+ * @return Unique qualified name to identify the Table instance in Atlas.
+ */
+ public static String getTableQualifiedName(String metadataNamespace, String dbName, String tableName) {
+ return getTableQualifiedName(metadataNamespace, dbName, tableName, false);
+ }
+ public static String getStorageDescQFName(String tableQualifiedName) {
+ return tableQualifiedName + "_storage";
+ }
+
+ public static String getColumnQualifiedName(final String tableQualifiedName, final String colName) {
+ final String[] parts = tableQualifiedName.split("@");
+ final String tableName = parts[0];
+ final String metadataNamespace = parts[1];
+
+ return String.format("%s.%s@%s", tableName, colName.toLowerCase(), metadataNamespace);
+ }
+
+ public static long getTableCreatedTime(Table table) {
+ return table.getTTable().getCreateTime() * MILLIS_CONVERT_FACTOR;
+ }
+
+ private void clearRelationshipAttributes(AtlasEntitiesWithExtInfo entities) {
+ if (entities != null) {
+ if (entities.getEntities() != null) {
+ for (AtlasEntity entity : entities.getEntities()) {
+ clearRelationshipAttributes(entity);;
+ }
+ }
+
+ if (entities.getReferredEntities() != null) {
+ clearRelationshipAttributes(entities.getReferredEntities().values());
+ }
+ }
+ }
+
+ private void clearRelationshipAttributes(AtlasEntityWithExtInfo entity) {
+ if (entity != null) {
+ clearRelationshipAttributes(entity.getEntity());
+
+ if (entity.getReferredEntities() != null) {
+ clearRelationshipAttributes(entity.getReferredEntities().values());
+ }
+ }
+ }
+
+ private void clearRelationshipAttributes(Collection entities) {
+ if (entities != null) {
+ for (AtlasEntity entity : entities) {
+ clearRelationshipAttributes(entity);
+ }
+ }
+ }
+
+ private void clearRelationshipAttributes(AtlasEntity entity) {
+ if (entity != null && entity.getRelationshipAttributes() != null) {
+ entity.getRelationshipAttributes().clear();
+ }
+ }
+
+ private boolean isTableWithDatabaseName(String tableName) {
+ boolean ret = false;
+ if (tableName.contains(".")) {
+ ret = true;
+ }
+ return ret;
+ }
+
+ private List getAllDatabaseInCluster() throws AtlasServiceException {
+
+ List entities = new ArrayList<>();
+ final int pageSize = pageLimit;
+
+ SearchParameters.FilterCriteria fc = new SearchParameters.FilterCriteria();
+ fc.setAttributeName(ATTRIBUTE_CLUSTER_NAME);
+ fc.setAttributeValue(metadataNamespace);
+ fc.setOperator(SearchParameters.Operator.EQ);
+
+ for (int i = 0; ; i++) {
+ int offset = pageSize * i;
+ LOG.info("Retrieving databases: offset={}, pageSize={}", offset, pageSize);
+
+ AtlasSearchResult searchResult = atlasClientV2.basicSearch(HIVE_TYPE_DB, fc,null, null, true, pageSize, offset);
+
+ List entityHeaders = searchResult == null ? null : searchResult.getEntities();
+ int dbCount = entityHeaders == null ? 0 : entityHeaders.size();
+
+ LOG.info("Retrieved {} databases of {} cluster", dbCount, metadataNamespace);
+
+ if (dbCount > 0) {
+ entities.addAll(entityHeaders);
+ }
+
+ if (dbCount < pageSize) { // last page
+ break;
+ }
+ }
+
+ return entities;
+ }
+
+ private List getAllTablesInDb(String databaseGuid) throws AtlasServiceException {
+
+ List entities = new ArrayList<>();
+ final int pageSize = pageLimit;
+
+ for (int i = 0; ; i++) {
+ int offset = pageSize * i;
+ LOG.info("Retrieving tables: offset={}, pageSize={}", offset, pageSize);
+
+ AtlasSearchResult searchResult = atlasClientV2.relationshipSearch(databaseGuid, HIVE_TABLE_DB_EDGE_LABEL, null, null, true, pageSize, offset);
+
+ List entityHeaders = searchResult == null ? null : searchResult.getEntities();
+ int tableCount = entityHeaders == null ? 0 : entityHeaders.size();
+
+ LOG.info("Retrieved {} tables of {} database", tableCount, databaseGuid);
+
+ if (tableCount > 0) {
+ entities.addAll(entityHeaders);
+ }
+
+ if (tableCount < pageSize) { // last page
+ break;
+ }
+ }
+
+ return entities;
+ }
+
+ public String getHiveDatabaseName(String qualifiedName) {
+
+ if (StringUtils.isNotEmpty(qualifiedName)) {
+ String[] split = qualifiedName.split("@");
+ if (split.length > 0) {
+ return split[0];
+ }
+ }
+ return null;
+ }
+
+
+ public String getHiveTableName(String qualifiedName, boolean isTemporary) {
+
+ if (StringUtils.isNotEmpty(qualifiedName)) {
+ String tableName = StringUtils.substringBetween(qualifiedName, ".", "@");
+ if (!isTemporary) {
+ return tableName;
+ } else {
+ if (StringUtils.isNotEmpty(tableName)) {
+ String[] splitTemp = tableName.split(TEMP_TABLE_PREFIX);
+ if (splitTemp.length > 0) {
+ return splitTemp[0];
+ }
+ }
+ }
+ }
+ return null;
+ }
+
+ private void deleteByGuid(List guidTodelete) throws AtlasServiceException {
+
+ if (CollectionUtils.isNotEmpty(guidTodelete)) {
+
+ for (String guid : guidTodelete) {
+ EntityMutationResponse response = atlasClientV2.deleteEntityByGuid(guid);
+
+ if (response.getDeletedEntities().size() < 1) {
+ LOG.info("Entity with guid : {} is not deleted", guid);
+ } else {
+ LOG.info("Entity with guid : {} is deleted", guid);
+ }
+ }
+ } else {
+ LOG.info("No Entity to delete from Atlas");
+ }
+ }
+
+ public void deleteEntitiesForNonExistingHiveMetadata(boolean failOnError) throws Exception {
+
+ //fetch databases from Atlas
+ List dbs = null;
+ try {
+ dbs = getAllDatabaseInCluster();
+ LOG.info("Total Databases in cluster {} : {} ", metadataNamespace, dbs.size());
+ } catch (AtlasServiceException e) {
+ LOG.error("Failed to retrieve database entities for cluster {} from Atlas", metadataNamespace, e);
+ if (failOnError) {
+ throw e;
+ }
+ }
+
+ if (CollectionUtils.isNotEmpty(dbs)) {
+ //iterate all dbs to check if exists in hive
+ for (AtlasEntityHeader db : dbs) {
+
+ String dbGuid = db.getGuid();
+ String hiveDbName = getHiveDatabaseName((String) db.getAttribute(ATTRIBUTE_QUALIFIED_NAME));
+
+ if (StringUtils.isEmpty(hiveDbName)) {
+ LOG.error("Failed to get database from qualifiedName: {}, guid: {} ", db.getAttribute(ATTRIBUTE_QUALIFIED_NAME), dbGuid);
+ continue;
+ }
+
+ List tables;
+ try {
+ tables = getAllTablesInDb(dbGuid);
+ LOG.info("Total Tables in database {} : {} ", hiveDbName, tables.size());
+ } catch (AtlasServiceException e) {
+ LOG.error("Failed to retrieve table entities for database {} from Atlas", hiveDbName, e);
+ if (failOnError) {
+ throw e;
+ }
+ continue;
+ }
+
+ List guidsToDelete = new ArrayList<>();
+ if (!hiveClient.databaseExists(hiveDbName)) {
+
+ //table guids
+ if (CollectionUtils.isNotEmpty(tables)) {
+ for (AtlasEntityHeader table : tables) {
+ guidsToDelete.add(table.getGuid());
+ }
+ }
+
+ //db guid
+ guidsToDelete.add(db.getGuid());
+ LOG.info("Added database {}.{} and its {} tables to delete", metadataNamespace, hiveDbName, tables.size());
+
+ } else {
+ //iterate all table of db to check if it exists
+ if (CollectionUtils.isNotEmpty(tables)) {
+ for (AtlasEntityHeader table : tables) {
+ String hiveTableName = getHiveTableName((String) table.getAttribute(ATTRIBUTE_QUALIFIED_NAME), true);
+
+ if (StringUtils.isEmpty(hiveTableName)) {
+ LOG.error("Failed to get table from qualifiedName: {}, guid: {} ", table.getAttribute(ATTRIBUTE_QUALIFIED_NAME), table.getGuid());
+ continue;
+ }
+
+ try {
+ hiveClient.getTable(hiveDbName, hiveTableName, true);
+ } catch (InvalidTableException e) { //table doesn't exists
+ LOG.info("Added table {}.{} to delete", hiveDbName, hiveTableName);
+
+ guidsToDelete.add(table.getGuid());
+ } catch (HiveException e) {
+ LOG.error("Failed to get table {}.{} from Hive", hiveDbName, hiveTableName, e);
+
+ if (failOnError) {
+ throw e;
+ }
+ }
+ }
+ }
+ }
+
+ //delete entities
+ if (CollectionUtils.isNotEmpty(guidsToDelete)) {
+ try {
+ deleteByGuid(guidsToDelete);
+ } catch (AtlasServiceException e) {
+ LOG.error("Failed to delete Atlas entities for database {}", hiveDbName, e);
+
+ if (failOnError) {
+ throw e;
+ }
+ }
+
+ }
+ }
+
+ } else {
+ LOG.info("No database found in service.");
+ }
+
+ }
+}
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridgeV2.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridgeV2.java
new file mode 100644
index 0000000000..0627c0e095
--- /dev/null
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridgeV2.java
@@ -0,0 +1,1036 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hive.bridge;
+
+import org.apache.atlas.AtlasClientV2;
+import org.apache.atlas.AtlasServiceException;
+import org.apache.atlas.exception.AtlasBaseException;
+import org.apache.atlas.model.impexp.AtlasImportRequest;
+import org.apache.atlas.model.impexp.AtlasImportResult;
+import org.apache.atlas.model.typedef.AtlasTypesDef;
+import org.apache.atlas.type.AtlasType;
+import org.apache.atlas.type.AtlasTypeUtil;
+import org.apache.atlas.ApplicationProperties;
+import org.apache.atlas.hive.hook.events.BaseHiveEvent;
+import org.apache.atlas.hive.model.HiveDataTypes;
+import org.apache.atlas.hook.AtlasHookException;
+import org.apache.atlas.utils.AtlasPathExtractorUtil;
+import org.apache.atlas.utils.HdfsNameServiceResolver;
+import org.apache.atlas.utils.AtlasConfigurationUtil;
+import org.apache.atlas.utils.PathExtractorContext;
+import org.apache.atlas.utils.LruCache;
+import org.apache.atlas.model.instance.AtlasObjectId;
+import org.apache.atlas.model.instance.AtlasStruct;
+import org.apache.atlas.model.instance.AtlasEntity;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
+import org.apache.commons.cli.MissingArgumentException;
+import org.apache.commons.collections.CollectionUtils;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.collections.MapUtils;
+import org.apache.commons.configuration.Configuration;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.RandomStringUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Order;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.OutputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.*;
+import java.util.stream.Collectors;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
+import static org.apache.atlas.hive.hook.events.BaseHiveEvent.*;
+
+/**
+ * A Bridge Utility that imports metadata into zip file from the Hive Meta Store
+ * which can be exported at Atlas
+ */
+public class HiveMetaStoreBridgeV2 {
+ private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridgeV2.class);
+
+ private static final String OPTION_DATABASE_SHORT = "d";
+ private static final String OPTION_TABLE_SHORT = "t";
+ private static final String OPTION_IMPORT_DATA_FILE_SHORT = "f";
+ private static final String OPTION_OUTPUT_FILEPATH_SHORT = "o";
+ private static final String OPTION_IGNORE_BULK_IMPORT_SHORT = "i";
+
+ public static final String CONF_PREFIX = "atlas.hook.hive.";
+ public static final String HDFS_PATH_CONVERT_TO_LOWER_CASE = CONF_PREFIX + "hdfs_path.convert_to_lowercase";
+ public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION = CONF_PREFIX + "aws_s3.atlas.model.version";
+
+ public static final String CLUSTER_NAME_KEY = "atlas.cluster.name";
+ public static final String HIVE_USERNAME = "atlas.hook.hive.default.username";
+ public static final String HIVE_METADATA_NAMESPACE = "atlas.metadata.namespace";
+ public static final String DEFAULT_CLUSTER_NAME = "primary";
+ public static final String TEMP_TABLE_PREFIX = "_temp-";
+ public static final String SEP = ":".intern();
+ public static final String DEFAULT_METASTORE_CATALOG = "hive";
+ public static final String HOOK_HIVE_PAGE_LIMIT = CONF_PREFIX + "page.limit";
+
+ private static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2 = "v2";
+ private static final String ZIP_FILE_COMMENT_FORMAT = "{\"entitiesCount\":%d, \"total\":%d}";
+ private static final int DEFAULT_PAGE_LIMIT = 10000;
+ private static final String DEFAULT_ZIP_FILE_NAME = "import-hive-output.zip";
+ private static final String ZIP_ENTRY_ENTITIES = "entities.json";
+ private static final String TYPES_DEF_JSON = "atlas-typesdef.json";
+
+ private static final String JSON_ARRAY_START = "[";
+ private static final String JSON_COMMA = ",";
+ private static final String JSON_EMPTY_OBJECT = "{}";
+ private static final String JSON_ARRAY_END = "]";
+
+ private static int pageLimit = DEFAULT_PAGE_LIMIT;
+ private String awsS3AtlasModelVersion = null;
+
+ private final String metadataNamespace;
+ private final Hive hiveClient;
+ private final AtlasClientV2 atlasClientV2;
+ private final boolean convertHdfsPathToLowerCase;
+
+ private ZipOutputStream zipOutputStream;
+ private String outZipFileName;
+ private int totalProcessedEntities = 0;
+
+ private final Map entityLRUCache = new LruCache<>(10000, 0);
+ private final Map hiveTablesAndAtlasEntity = new HashMap<>();
+ private final Map dbEntities = new HashMap<>();
+ private final List> databaseAndTableListToImport = new ArrayList<>();
+ private final Map qualifiedNameGuidMap = new HashMap<>();
+
+ /**
+ * Construct a HiveMetaStoreBridgeV2.
+ * @param hiveConf {@link HiveConf} for Hive component in the cluster
+ */
+ public HiveMetaStoreBridgeV2(Configuration atlasProperties, HiveConf hiveConf, AtlasClientV2 atlasClientV2) throws Exception {
+ this.metadataNamespace = getMetadataNamespace(atlasProperties);
+ this.hiveClient = Hive.get(hiveConf);
+ this.atlasClientV2 = atlasClientV2;
+ this.convertHdfsPathToLowerCase = atlasProperties.getBoolean(HDFS_PATH_CONVERT_TO_LOWER_CASE, false);
+ this.awsS3AtlasModelVersion = atlasProperties.getString(HOOK_AWS_S3_ATLAS_MODEL_VERSION, HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2);
+
+ if (atlasProperties != null) {
+ pageLimit = atlasProperties.getInteger(HOOK_HIVE_PAGE_LIMIT, DEFAULT_PAGE_LIMIT);
+ }
+ }
+
+ public boolean exportDataToZipAndRunAtlasImport(CommandLine cmd) throws MissingArgumentException, IOException, HiveException, AtlasBaseException {
+ boolean ret = true;
+ boolean failOnError = cmd.hasOption("failOnError");
+
+ String databaseToImport = cmd.getOptionValue(OPTION_DATABASE_SHORT);
+ String tableToImport = cmd.getOptionValue(OPTION_TABLE_SHORT);
+ String importDataFile = cmd.getOptionValue(OPTION_IMPORT_DATA_FILE_SHORT);
+ String outputFileOrPath = cmd.getOptionValue(OPTION_OUTPUT_FILEPATH_SHORT);
+
+ boolean ignoreBulkImport = cmd.hasOption(OPTION_IGNORE_BULK_IMPORT_SHORT);
+
+ validateOutputFileOrPath(outputFileOrPath);
+
+ try {
+ initializeZipStream();
+
+ if (isValidImportDataFile(importDataFile)) {
+ File f = new File(importDataFile);
+
+ BufferedReader br = new BufferedReader(new FileReader(f));
+ String line = null;
+
+ while ((line = br.readLine()) != null) {
+ String val[] = line.split(":");
+
+ if (ArrayUtils.isNotEmpty(val)) {
+ databaseToImport = val[0];
+
+ if (val.length > 1) {
+ tableToImport = val[1];
+ } else {
+ tableToImport = "";
+ }
+
+ importHiveDatabases(databaseToImport, tableToImport, failOnError);
+ }
+ }
+ } else {
+ importHiveDatabases(databaseToImport, tableToImport, failOnError);
+ }
+
+ importHiveTables(failOnError);
+ importHiveColumns(failOnError);
+ } finally {
+ endWritingAndZipStream();
+ }
+
+ if (!ignoreBulkImport) {
+ runAtlasImport();
+ }
+
+ return ret;
+ }
+
+ private void validateOutputFileOrPath(String outputFileOrPath) throws MissingArgumentException {
+ if (StringUtils.isBlank(outputFileOrPath)) {
+ throw new MissingArgumentException("Output Path/File can't be empty");
+ }
+
+ File fileOrDirToImport = new File(outputFileOrPath);
+ if (fileOrDirToImport.exists()) {
+ if (fileOrDirToImport.isDirectory()) {
+ this.outZipFileName = outputFileOrPath + File.separator + DEFAULT_ZIP_FILE_NAME;
+ LOG.info("The default output zip file {} will be created at {}", DEFAULT_ZIP_FILE_NAME, outputFileOrPath);
+ } else {
+ throw new MissingArgumentException("output file: " + outputFileOrPath + " already present");
+ }
+ } else if (fileOrDirToImport.getParentFile().isDirectory() && outputFileOrPath.endsWith(".zip")) {
+ LOG.info("The mentioned output zip file {} will be created", outputFileOrPath);
+ this.outZipFileName = outputFileOrPath;
+ } else {
+ throw new MissingArgumentException("Invalid File/Path");
+ }
+ }
+
+ private boolean isValidImportDataFile(String importDataFile) throws MissingArgumentException {
+ boolean ret = false;
+ if (StringUtils.isNotBlank(importDataFile)) {
+ File dataFile = new File(importDataFile);
+
+ if (!dataFile.exists() || !dataFile.canRead()) {
+ throw new MissingArgumentException("Invalid import data file");
+ }
+ ret = true;
+ }
+
+ return ret;
+ }
+
+ private void initializeZipStream() throws IOException, AtlasBaseException {
+ this.zipOutputStream = new ZipOutputStream(getOutputStream(this.outZipFileName));
+
+ storeTypesDefToZip(new AtlasTypesDef());
+
+ startWritingEntitiesToZip();
+ }
+
+ private void storeTypesDefToZip(AtlasTypesDef typesDef) throws AtlasBaseException {
+ String jsonData = AtlasType.toJson(typesDef);
+ saveToZip(TYPES_DEF_JSON, jsonData);
+ }
+
+ private void saveToZip(String fileName, String jsonData) throws AtlasBaseException {
+ try {
+ ZipEntry e = new ZipEntry(fileName);
+ zipOutputStream.putNextEntry(e);
+ writeBytes(jsonData);
+ zipOutputStream.closeEntry();
+ } catch (IOException e) {
+ throw new AtlasBaseException(String.format("Error writing file %s.", fileName), e);
+ }
+ }
+
+ private void startWritingEntitiesToZip() throws IOException {
+ zipOutputStream.putNextEntry(new ZipEntry(ZIP_ENTRY_ENTITIES));
+ writeBytes(JSON_ARRAY_START);
+ }
+
+ private String getDatabaseToImport(String TableWithDatabase) {
+ String ret = null;
+ String val[] = TableWithDatabase.split("\\.");
+ if (val.length > 1) {
+ ret = val[0];
+ }
+ return ret;
+ }
+
+ private String getTableToImport(String TableWithDatabase) {
+ String ret = null;
+ String val[] = TableWithDatabase.split("\\.");
+ if (val.length > 1) {
+ ret = val[1];
+ }
+ return ret;
+ }
+
+ private void importHiveDatabases(String databaseToImport, String tableWithDatabaseToImport, boolean failOnError) throws HiveException, AtlasBaseException {
+ LOG.info("Importing Hive Databases");
+
+ List databaseNames = null;
+
+ if (StringUtils.isEmpty(databaseToImport) && StringUtils.isNotEmpty(tableWithDatabaseToImport)) {
+ if (isTableWithDatabaseName(tableWithDatabaseToImport)) {
+ databaseToImport = getDatabaseToImport(tableWithDatabaseToImport);
+ tableWithDatabaseToImport = getTableToImport(tableWithDatabaseToImport);
+ }
+ }
+
+ if (StringUtils.isEmpty(databaseToImport)) {
+ //when database to import is empty, import all
+ databaseNames = hiveClient.getAllDatabases();
+ } else {
+ //when database to import has some value then, import that db and all table under it.
+ databaseNames = hiveClient.getDatabasesByPattern(databaseToImport);
+ }
+
+ if (!CollectionUtils.isEmpty(databaseNames)) {
+ LOG.info("Found {} databases", databaseNames.size());
+ for (String databaseName : databaseNames) {
+ try {
+ if (!dbEntities.containsKey(databaseName)) {
+ LOG.info("Importing Hive Database {}", databaseName);
+ AtlasEntityWithExtInfo dbEntity = writeDatabase(databaseName);
+ if (dbEntity != null) {
+ dbEntities.put(databaseName, dbEntity.getEntity());
+ }
+ }
+ databaseAndTableListToImport.add(Collections.singletonMap(databaseName, tableWithDatabaseToImport));
+ } catch (IOException e) {
+ LOG.error("Import failed for hive database {}", databaseName, e);
+
+ if (failOnError) {
+ throw new AtlasBaseException(e.getMessage(), e);
+ }
+ }
+ }
+ } else {
+ LOG.error("No database found");
+ if (failOnError) {
+ throw new AtlasBaseException("No database found");
+ }
+ }
+ }
+
+ private void writeEntity(AtlasEntity.AtlasEntityWithExtInfo entityWithExtInfo) throws IOException {
+ if (MapUtils.isNotEmpty(entityWithExtInfo.getReferredEntities())) {
+ Iterator> itr = entityWithExtInfo.getReferredEntities().entrySet().iterator();
+ while (itr.hasNext()) {
+ Map.Entry eachEntity = itr.next();
+ if (eachEntity.getValue().getTypeName().equalsIgnoreCase(HiveDataTypes.HIVE_DB.getName())) {
+ itr.remove();
+ }
+ }
+ }
+
+ if (!entityLRUCache.containsKey(entityWithExtInfo.getEntity().getGuid())) {
+ entityLRUCache.put(entityWithExtInfo.getEntity().getGuid(), entityWithExtInfo);
+ writeBytes(AtlasType.toJson(entityWithExtInfo) + JSON_COMMA);
+ }
+ totalProcessedEntities++;
+ }
+
+ private void endWritingAndZipStream() throws IOException {
+ writeBytes(JSON_EMPTY_OBJECT);
+ writeBytes(JSON_ARRAY_END);
+ setStreamSize(totalProcessedEntities);
+ close();
+ }
+
+ private void flush() {
+ try {
+ zipOutputStream.flush();
+ } catch (IOException e) {
+ LOG.error("Error: Flush: ", e);
+ }
+ }
+
+ private void close() throws IOException {
+ zipOutputStream.flush();
+ zipOutputStream.closeEntry();
+ zipOutputStream.close();
+ }
+
+ private void writeBytes(String payload) throws IOException {
+ zipOutputStream.write(payload.getBytes());
+ }
+
+ private OutputStream getOutputStream(String fileToWrite) throws IOException {
+ return FileUtils.openOutputStream(new File(fileToWrite));
+ }
+
+ public String getMetadataNamespace(Configuration config) {
+ return AtlasConfigurationUtil.getRecentString(config, HIVE_METADATA_NAMESPACE, getClusterName(config));
+ }
+
+ private String getClusterName(Configuration config) {
+ return config.getString(CLUSTER_NAME_KEY, DEFAULT_CLUSTER_NAME);
+ }
+
+ public String getMetadataNamespace() {
+ return metadataNamespace;
+ }
+
+ public boolean isConvertHdfsPathToLowerCase() {
+ return convertHdfsPathToLowerCase;
+ }
+
+ /**
+ * Imports Hive tables if databaseAndTableListToImport is populated
+ * @param failOnError
+ * @throws Exception
+ */
+ public void importHiveTables(boolean failOnError) throws HiveException, AtlasBaseException {
+ LOG.info("Importing Hive Tables");
+
+ int tablesImported = 0;
+
+ if (CollectionUtils.isNotEmpty(databaseAndTableListToImport) && MapUtils.isNotEmpty(dbEntities)) {
+ for (Map eachEntry : databaseAndTableListToImport) {
+ final List tableObjects;
+
+ String databaseName = eachEntry.keySet().iterator().next();
+
+ if (StringUtils.isEmpty(eachEntry.values().iterator().next())) {
+ tableObjects = hiveClient.getAllTableObjects(databaseName);
+
+ populateQualifiedNameGuidMap(HiveDataTypes.HIVE_DB.getName(), (String) dbEntities.get(databaseName).getAttribute(ATTRIBUTE_QUALIFIED_NAME));
+ } else {
+ List tableNames = hiveClient.getTablesByPattern(databaseName, eachEntry.values().iterator().next());
+ tableObjects = new ArrayList<>();
+
+ for (String tableName : tableNames) {
+ Table table = hiveClient.getTable(databaseName, tableName);
+ tableObjects.add(table);
+ populateQualifiedNameGuidMap(HiveDataTypes.HIVE_TABLE.getName(), getTableQualifiedName(metadataNamespace, table));
+ }
+ }
+
+ if (!CollectionUtils.isEmpty(tableObjects)) {
+ LOG.info("Found {} tables to import in database {}", tableObjects.size(), databaseName);
+
+ try {
+ for (Table table : tableObjects) {
+ int imported = importTable(dbEntities.get(databaseName), table, failOnError);
+
+ tablesImported += imported;
+ }
+ } finally {
+ if (tablesImported == tableObjects.size()) {
+ LOG.info("Successfully imported {} tables from database {}", tablesImported, databaseName);
+ } else {
+ LOG.error("Imported {} of {} tables from database {}. Please check logs for errors during import",
+ tablesImported, tableObjects.size(), databaseName);
+ }
+ }
+ } else {
+ LOG.error("No tables to import in database {}", databaseName);
+ if (failOnError) {
+ throw new AtlasBaseException("No tables to import in database - " + databaseName);
+ }
+ }
+ }
+ }
+
+ dbEntities.clear();
+ }
+
+ private void populateQualifiedNameGuidMap(String typeName, String qualifiedName) {
+ try {
+ AtlasEntitiesWithExtInfo entitiesWithExtInfo = atlasClientV2.getEntitiesByAttribute(typeName, Collections.singletonList(Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, qualifiedName)), true, false);
+
+ if (entitiesWithExtInfo != null && entitiesWithExtInfo.getEntities() != null) {
+ for (AtlasEntity entity : entitiesWithExtInfo.getEntities()) {
+ qualifiedNameGuidMap.put((String) entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), entity.getGuid());
+
+ for(Map.Entry eachEntry : entitiesWithExtInfo.getReferredEntities().entrySet()) {
+ qualifiedNameGuidMap.put((String) eachEntry.getValue().getAttribute(ATTRIBUTE_QUALIFIED_NAME), eachEntry.getKey());
+ }
+
+ if (typeName.equals(HiveDataTypes.HIVE_DB.getName())) {
+ for (String eachRelatedGuid : getAllRelatedGuids(entity)) {
+ AtlasEntityWithExtInfo relatedEntity = atlasClientV2.getEntityByGuid(eachRelatedGuid, true, false);
+
+ qualifiedNameGuidMap.put((String) relatedEntity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME), relatedEntity.getEntity().getGuid());
+ for (Map.Entry eachEntry : relatedEntity.getReferredEntities().entrySet()) {
+ qualifiedNameGuidMap.put((String) eachEntry.getValue().getAttribute(ATTRIBUTE_QUALIFIED_NAME), eachEntry.getKey());
+ }
+ }
+ }
+ }
+ }
+ } catch (AtlasServiceException e) {
+ LOG.info("Unable to load the related entities for type {} and qualified name {} from Atlas", typeName, qualifiedName, e);
+ }
+ }
+
+ private Set getAllRelatedGuids(AtlasEntity entity) {
+ Set relGuidsSet = new HashSet<>();
+
+ for (Object o : entity.getRelationshipAttributes().values()) {
+ if (o instanceof AtlasObjectId) {
+ relGuidsSet.add(((AtlasObjectId) o).getGuid());
+ } else if (o instanceof List) {
+ for (Object id : (List) o) {
+ if (id instanceof AtlasObjectId) {
+ relGuidsSet.add(((AtlasObjectId) id).getGuid());
+ }
+ if (id instanceof Map) {
+ relGuidsSet.add((String) ((Map) id).get("guid"));
+ }
+ }
+ }
+ }
+
+ return relGuidsSet;
+ }
+
+ public void importHiveColumns(boolean failOnError) throws AtlasBaseException {
+ LOG.info("Importing Hive Columns");
+
+ if (MapUtils.isEmpty(hiveTablesAndAtlasEntity)) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("No hive table present to import columns");
+ }
+
+ return;
+ }
+
+ for (Map.Entry eachTable : hiveTablesAndAtlasEntity.entrySet()) {
+ int columnsImported = 0;
+ List columnEntities = new ArrayList<>();
+
+ try {
+ List partKeys = toColumns(eachTable.getKey().getPartitionKeys(), eachTable.getValue(), RELATIONSHIP_HIVE_TABLE_PART_KEYS);
+ List columns = toColumns(eachTable.getKey().getCols(), eachTable.getValue(), RELATIONSHIP_HIVE_TABLE_COLUMNS);
+
+ partKeys.stream().collect(Collectors.toCollection(() -> columnEntities));
+ columns.stream().collect(Collectors.toCollection(() -> columnEntities));
+
+ for (AtlasEntity eachColumnEntity : columnEntities) {
+ writeEntityToZip(new AtlasEntityWithExtInfo(eachColumnEntity));
+ columnsImported++;
+ }
+ } catch (IOException e) {
+ LOG.error("Column Import failed for hive table {}", eachTable.getValue().getAttribute(ATTRIBUTE_QUALIFIED_NAME), e);
+
+ if (failOnError) {
+ throw new AtlasBaseException(e.getMessage(), e);
+ }
+ } finally {
+ if (columnsImported == columnEntities.size()) {
+ LOG.info("Successfully imported {} columns for table {}", columnsImported, eachTable.getValue().getAttribute(ATTRIBUTE_QUALIFIED_NAME));
+ } else {
+ LOG.error("Imported {} of {} columns for table {}. Please check logs for errors during import", columnsImported, columnEntities.size(), eachTable.getValue().getAttribute(ATTRIBUTE_QUALIFIED_NAME));
+ }
+ }
+ }
+
+ }
+
+ private void runAtlasImport() {
+ AtlasImportRequest request = new AtlasImportRequest();
+ request.setOption(AtlasImportRequest.UPDATE_TYPE_DEFINITION_KEY, "false");
+ request.setOption(AtlasImportRequest.OPTION_KEY_FORMAT, AtlasImportRequest.OPTION_KEY_FORMAT_ZIP_DIRECT);
+
+ try {
+ AtlasImportResult importResult = atlasClientV2.importData(request, this.outZipFileName);
+
+ if (importResult.getOperationStatus() == AtlasImportResult.OperationStatus.SUCCESS) {
+ LOG.info("Successfully imported the zip file {} at Atlas and imported {} entities. Number of entities to be imported {}.", this.outZipFileName, importResult.getProcessedEntities().size(), totalProcessedEntities);
+ } else {
+ LOG.error("Failed to import or get the status of import for the zip file {} at Atlas. Number of entities to be imported {}.", this.outZipFileName, totalProcessedEntities);
+ }
+ } catch (AtlasServiceException e) {
+ LOG.error("Failed to import or get the status of import for the zip file {} at Atlas. Number of entities to be imported {}.", this.outZipFileName, totalProcessedEntities, e);
+ }
+ }
+
+ public int importTable(AtlasEntity dbEntity, Table table, final boolean failOnError) throws AtlasBaseException {
+ try {
+ AtlasEntityWithExtInfo tableEntity = writeTable(dbEntity, table);
+
+ hiveTablesAndAtlasEntity.put(table, tableEntity.getEntity());
+
+ if (table.getTableType() == TableType.EXTERNAL_TABLE) {
+ String processQualifiedName = getTableProcessQualifiedName(metadataNamespace, table);
+ String tableLocationString = isConvertHdfsPathToLowerCase() ? lower(table.getDataLocation().toString()) : table.getDataLocation().toString();
+ Path location = table.getDataLocation();
+ String query = getCreateTableString(table, tableLocationString);
+
+ PathExtractorContext pathExtractorCtx = new PathExtractorContext(getMetadataNamespace(), isConvertHdfsPathToLowerCase(), awsS3AtlasModelVersion);
+ AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(location, pathExtractorCtx);
+ AtlasEntity pathInst = entityWithExtInfo.getEntity();
+ AtlasEntity tableInst = tableEntity.getEntity();
+ AtlasEntity processInst = new AtlasEntity(HiveDataTypes.HIVE_PROCESS.getName());
+
+ long now = System.currentTimeMillis();
+
+ processInst.setGuid(getGuid(processQualifiedName));
+ processInst.setAttribute(ATTRIBUTE_QUALIFIED_NAME, processQualifiedName);
+ processInst.setAttribute(ATTRIBUTE_NAME, query);
+ processInst.setAttribute(ATTRIBUTE_CLUSTER_NAME, metadataNamespace);
+ processInst.setRelationshipAttribute(ATTRIBUTE_INPUTS, Collections.singletonList(AtlasTypeUtil.getAtlasRelatedObjectId(pathInst, RELATIONSHIP_DATASET_PROCESS_INPUTS)));
+ processInst.setRelationshipAttribute(ATTRIBUTE_OUTPUTS, Collections.singletonList(AtlasTypeUtil.getAtlasRelatedObjectId(tableInst, RELATIONSHIP_PROCESS_DATASET_OUTPUTS)));
+ String userName = table.getOwner();
+ if (StringUtils.isEmpty(userName)) {
+ userName = ApplicationProperties.get().getString(HIVE_USERNAME, "hive");
+ }
+ processInst.setAttribute(ATTRIBUTE_USER_NAME, userName);
+ processInst.setAttribute(ATTRIBUTE_START_TIME, now);
+ processInst.setAttribute(ATTRIBUTE_END_TIME, now);
+ processInst.setAttribute(ATTRIBUTE_OPERATION_TYPE, "CREATETABLE");
+ processInst.setAttribute(ATTRIBUTE_QUERY_TEXT, query);
+ processInst.setAttribute(ATTRIBUTE_QUERY_ID, query);
+ processInst.setAttribute(ATTRIBUTE_QUERY_PLAN, "{}");
+ processInst.setAttribute(ATTRIBUTE_RECENT_QUERIES, Collections.singletonList(query));
+
+ AtlasEntitiesWithExtInfo createTableProcess = new AtlasEntitiesWithExtInfo();
+
+ createTableProcess.addEntity(processInst);
+
+ if (pathExtractorCtx.getKnownEntities() != null) {
+ pathExtractorCtx.getKnownEntities().values().forEach(entity -> createTableProcess.addEntity(entity));
+ } else {
+ createTableProcess.addEntity(pathInst);
+ }
+
+ writeEntitiesToZip(createTableProcess);
+ }
+
+ return 1;
+ } catch (Exception e) {
+ LOG.error("Import failed for hive_table {}", table.getTableName(), e);
+
+ if (failOnError) {
+ throw new AtlasBaseException(e.getMessage(), e);
+ }
+
+ return 0;
+ }
+ }
+
+ /**
+ * Write db entity
+ * @param databaseName
+ * @return
+ * @throws Exception
+ */
+ private AtlasEntityWithExtInfo writeDatabase(String databaseName) throws HiveException, IOException {
+ AtlasEntityWithExtInfo ret = null;
+ Database db = hiveClient.getDatabase(databaseName);
+
+ if (db != null) {
+ ret = new AtlasEntityWithExtInfo(toDbEntity(db));
+ writeEntityToZip(ret);
+ }
+
+ return ret;
+ }
+
+ private AtlasEntityWithExtInfo writeTable(AtlasEntity dbEntity, Table table) throws AtlasHookException {
+ try {
+ AtlasEntityWithExtInfo tableEntity = toTableEntity(dbEntity, table);
+ writeEntityToZip(tableEntity);
+
+ return tableEntity;
+ } catch (Exception e) {
+ throw new AtlasHookException("HiveMetaStoreBridgeV2.registerTable() failed.", e);
+ }
+ }
+
+ /**
+ * Write an entity to Zip file
+ * @param entity
+ * @return
+ * @throws Exception
+ */
+ private void writeEntityToZip(AtlasEntityWithExtInfo entity) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Writing {} entity: {}", entity.getEntity().getTypeName(), entity);
+ }
+
+ writeEntity(entity);
+ clearRelationshipAttributes(entity.getEntity());
+ flush();
+ }
+
+ /**
+ * Registers an entity in atlas
+ * @param entities
+ * @return
+ * @throws Exception
+ */
+ private void writeEntitiesToZip(AtlasEntitiesWithExtInfo entities) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Writing {} entities: {}", entities.getEntities().size(), entities);
+ }
+
+ for (AtlasEntity entity : entities.getEntities()) {
+ writeEntity(new AtlasEntityWithExtInfo(entity));
+ }
+
+ flush();
+ clearRelationshipAttributes(entities);
+ }
+
+ /**
+ * Create a Hive Database entity
+ * @param hiveDB The Hive {@link Database} object from which to map properties
+ * @return new Hive Database AtlasEntity
+ * @throws HiveException
+ */
+ private AtlasEntity toDbEntity(Database hiveDB) {
+ return toDbEntity(hiveDB, null);
+ }
+
+ private AtlasEntity toDbEntity(Database hiveDB, AtlasEntity dbEntity) {
+ if (dbEntity == null) {
+ dbEntity = new AtlasEntity(HiveDataTypes.HIVE_DB.getName());
+ }
+
+ String dbName = getDatabaseName(hiveDB);
+
+ String qualifiedName = getDBQualifiedName(metadataNamespace, dbName);
+ dbEntity.setAttribute(ATTRIBUTE_QUALIFIED_NAME, qualifiedName);
+
+ dbEntity.setGuid(getGuid(true, qualifiedName));
+
+ dbEntity.setAttribute(ATTRIBUTE_NAME, dbName);
+ dbEntity.setAttribute(ATTRIBUTE_DESCRIPTION, hiveDB.getDescription());
+ dbEntity.setAttribute(ATTRIBUTE_OWNER, hiveDB.getOwnerName());
+
+ dbEntity.setAttribute(ATTRIBUTE_CLUSTER_NAME, metadataNamespace);
+ dbEntity.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getPathWithNameServiceID(hiveDB.getLocationUri()));
+ dbEntity.setAttribute(ATTRIBUTE_PARAMETERS, hiveDB.getParameters());
+
+ if (hiveDB.getOwnerType() != null) {
+ dbEntity.setAttribute(ATTRIBUTE_OWNER_TYPE, OWNER_TYPE_TO_ENUM_VALUE.get(hiveDB.getOwnerType().getValue()));
+ }
+
+ return dbEntity;
+ }
+
+ private String getDBGuidFromAtlas(String dBQualifiedName) {
+ String guid = null;
+ try {
+ guid = atlasClientV2.getEntityHeaderByAttribute(HiveDataTypes.HIVE_DB.getName(), Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, dBQualifiedName)).getGuid();
+ } catch (AtlasServiceException e) {
+ LOG.warn("Failed to get DB guid from Atlas with qualified name {}", dBQualifiedName, e);
+ }
+ return guid;
+ }
+
+ public static String getDatabaseName(Database hiveDB) {
+ String dbName = hiveDB.getName().toLowerCase();
+ String catalogName = hiveDB.getCatalogName() != null ? hiveDB.getCatalogName().toLowerCase() : null;
+
+ if (StringUtils.isNotEmpty(catalogName) && !StringUtils.equals(catalogName, DEFAULT_METASTORE_CATALOG)) {
+ dbName = catalogName + SEP + dbName;
+ }
+
+ return dbName;
+ }
+
+ /**
+ * Create a new table instance in Atlas
+ * @param database AtlasEntity for Hive {@link AtlasEntity} to which this table belongs
+ * @param hiveTable reference to the Hive {@link Table} from which to map properties
+ * @return Newly created Hive AtlasEntity
+ * @throws Exception
+ */
+ private AtlasEntityWithExtInfo toTableEntity(AtlasEntity database, final Table hiveTable) throws AtlasHookException {
+ AtlasEntityWithExtInfo table = new AtlasEntityWithExtInfo(new AtlasEntity(HiveDataTypes.HIVE_TABLE.getName()));
+
+ AtlasEntity tableEntity = table.getEntity();
+ String tableQualifiedName = getTableQualifiedName(metadataNamespace, hiveTable);
+ long createTime = BaseHiveEvent.getTableCreateTime(hiveTable);
+ long lastAccessTime = hiveTable.getLastAccessTime() > 0 ? hiveTable.getLastAccessTime() : createTime;
+
+ tableEntity.setGuid(getGuid(tableQualifiedName));
+ tableEntity.setRelationshipAttribute(ATTRIBUTE_DB, AtlasTypeUtil.getAtlasRelatedObjectId(database, RELATIONSHIP_HIVE_TABLE_DB));
+ tableEntity.setAttribute(ATTRIBUTE_QUALIFIED_NAME, tableQualifiedName);
+ tableEntity.setAttribute(ATTRIBUTE_NAME, hiveTable.getTableName().toLowerCase());
+ tableEntity.setAttribute(ATTRIBUTE_OWNER, hiveTable.getOwner());
+
+ tableEntity.setAttribute(ATTRIBUTE_CREATE_TIME, createTime);
+ tableEntity.setAttribute(ATTRIBUTE_LAST_ACCESS_TIME, lastAccessTime);
+ tableEntity.setAttribute(ATTRIBUTE_RETENTION, hiveTable.getRetention());
+ tableEntity.setAttribute(ATTRIBUTE_PARAMETERS, hiveTable.getParameters());
+ tableEntity.setAttribute(ATTRIBUTE_COMMENT, hiveTable.getParameters().get(ATTRIBUTE_COMMENT));
+ tableEntity.setAttribute(ATTRIBUTE_TABLE_TYPE, hiveTable.getTableType().name());
+ tableEntity.setAttribute(ATTRIBUTE_TEMPORARY, hiveTable.isTemporary());
+
+ if (hiveTable.getViewOriginalText() != null) {
+ tableEntity.setAttribute(ATTRIBUTE_VIEW_ORIGINAL_TEXT, hiveTable.getViewOriginalText());
+ }
+
+ if (hiveTable.getViewExpandedText() != null) {
+ tableEntity.setAttribute(ATTRIBUTE_VIEW_EXPANDED_TEXT, hiveTable.getViewExpandedText());
+ }
+
+ AtlasEntity sdEntity = toStorageDescEntity(hiveTable.getSd(), getStorageDescQFName(tableQualifiedName), AtlasTypeUtil.getObjectId(tableEntity));
+
+ tableEntity.setRelationshipAttribute(ATTRIBUTE_STORAGEDESC, AtlasTypeUtil.getAtlasRelatedObjectId(sdEntity, RELATIONSHIP_HIVE_TABLE_STORAGE_DESC));
+
+ table.addReferredEntity(database);
+ table.addReferredEntity(sdEntity);
+ table.setEntity(tableEntity);
+
+ return table;
+ }
+
+ private AtlasEntity toStorageDescEntity(StorageDescriptor storageDesc, String sdQualifiedName, AtlasObjectId tableId) {
+ AtlasEntity ret = new AtlasEntity(HiveDataTypes.HIVE_STORAGEDESC.getName());
+
+ ret.setGuid(getGuid(sdQualifiedName));
+ ret.setRelationshipAttribute(ATTRIBUTE_TABLE, AtlasTypeUtil.getAtlasRelatedObjectId(tableId, RELATIONSHIP_HIVE_TABLE_STORAGE_DESC));
+ ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, sdQualifiedName);
+ ret.setAttribute(ATTRIBUTE_PARAMETERS, storageDesc.getParameters());
+ ret.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getPathWithNameServiceID(storageDesc.getLocation()));
+ ret.setAttribute(ATTRIBUTE_INPUT_FORMAT, storageDesc.getInputFormat());
+ ret.setAttribute(ATTRIBUTE_OUTPUT_FORMAT, storageDesc.getOutputFormat());
+ ret.setAttribute(ATTRIBUTE_COMPRESSED, storageDesc.isCompressed());
+ ret.setAttribute(ATTRIBUTE_NUM_BUCKETS, storageDesc.getNumBuckets());
+ ret.setAttribute(ATTRIBUTE_STORED_AS_SUB_DIRECTORIES, storageDesc.isStoredAsSubDirectories());
+
+ if (storageDesc.getBucketCols().size() > 0) {
+ ret.setAttribute(ATTRIBUTE_BUCKET_COLS, storageDesc.getBucketCols());
+ }
+
+ if (storageDesc.getSerdeInfo() != null) {
+ SerDeInfo serdeInfo = storageDesc.getSerdeInfo();
+
+ LOG.info("serdeInfo = {}", serdeInfo);
+ AtlasStruct serdeInfoStruct = new AtlasStruct(HiveDataTypes.HIVE_SERDE.getName());
+
+ serdeInfoStruct.setAttribute(ATTRIBUTE_NAME, serdeInfo.getName());
+ serdeInfoStruct.setAttribute(ATTRIBUTE_SERIALIZATION_LIB, serdeInfo.getSerializationLib());
+ serdeInfoStruct.setAttribute(ATTRIBUTE_PARAMETERS, serdeInfo.getParameters());
+
+ ret.setAttribute(ATTRIBUTE_SERDE_INFO, serdeInfoStruct);
+ }
+
+ if (CollectionUtils.isNotEmpty(storageDesc.getSortCols())) {
+ List sortColsStruct = new ArrayList<>();
+
+ for (Order sortcol : storageDesc.getSortCols()) {
+ String hiveOrderName = HiveDataTypes.HIVE_ORDER.getName();
+ AtlasStruct colStruct = new AtlasStruct(hiveOrderName);
+ colStruct.setAttribute("col", sortcol.getCol());
+ colStruct.setAttribute("order", sortcol.getOrder());
+
+ sortColsStruct.add(colStruct);
+ }
+
+ ret.setAttribute(ATTRIBUTE_SORT_COLS, sortColsStruct);
+ }
+
+ return ret;
+ }
+
+ private List toColumns(List schemaList, AtlasEntity table, String relationshipType) {
+ List ret = new ArrayList<>();
+
+ int columnPosition = 0;
+ for (FieldSchema fs : schemaList) {
+ LOG.debug("Processing field {}", fs);
+
+ AtlasEntity column = new AtlasEntity(HiveDataTypes.HIVE_COLUMN.getName());
+
+ String columnQualifiedName = getColumnQualifiedName((String) table.getAttribute(ATTRIBUTE_QUALIFIED_NAME), fs.getName());
+
+ column.setAttribute(ATTRIBUTE_QUALIFIED_NAME, columnQualifiedName);
+ column.setGuid(getGuid(columnQualifiedName));
+
+ column.setRelationshipAttribute(ATTRIBUTE_TABLE, AtlasTypeUtil.getAtlasRelatedObjectId(table, relationshipType));
+
+ column.setAttribute(ATTRIBUTE_NAME, fs.getName());
+ column.setAttribute(ATTRIBUTE_OWNER, table.getAttribute(ATTRIBUTE_OWNER));
+ column.setAttribute(ATTRIBUTE_COL_TYPE, fs.getType());
+ column.setAttribute(ATTRIBUTE_COL_POSITION, columnPosition++);
+ column.setAttribute(ATTRIBUTE_COMMENT, fs.getComment());
+
+ ret.add(column);
+ }
+ return ret;
+ }
+
+ private String getCreateTableString(Table table, String location){
+ String colString = "";
+ List colList = table.getAllCols();
+
+ if (colList != null) {
+ for (FieldSchema col : colList) {
+ colString += col.getName() + " " + col.getType() + ",";
+ }
+
+ if (colList.size() > 0) {
+ colString = colString.substring(0, colString.length() - 1);
+ colString = "(" + colString + ")";
+ }
+ }
+
+ String query = "create external table " + table.getTableName() + colString + " location '" + location + "'";
+
+ return query;
+ }
+
+ private String lower(String str) {
+ if (StringUtils.isEmpty(str)) {
+ return "";
+ }
+
+ return str.toLowerCase().trim();
+ }
+
+ /**
+ * Construct the qualified name used to uniquely identify a Table instance in Atlas.
+ * @param metadataNamespace Metadata namespace of the cluster to which the Hive component belongs
+ * @param table hive table for which the qualified name is needed
+ * @return Unique qualified name to identify the Table instance in Atlas.
+ */
+ private static String getTableQualifiedName(String metadataNamespace, Table table) {
+ return getTableQualifiedName(metadataNamespace, table.getDbName(), table.getTableName(), table.isTemporary());
+ }
+
+ /**
+ * Construct the qualified name used to uniquely identify a Database instance in Atlas.
+ * @param metadataNamespace Name of the cluster to which the Hive component belongs
+ * @param dbName Name of the Hive database
+ * @return Unique qualified name to identify the Database instance in Atlas.
+ */
+ public static String getDBQualifiedName(String metadataNamespace, String dbName) {
+ return String.format("%s@%s", dbName.toLowerCase(), metadataNamespace);
+ }
+
+ /**
+ * Construct the qualified name used to uniquely identify a Table instance in Atlas.
+ * @param metadataNamespace Name of the cluster to which the Hive component belongs
+ * @param dbName Name of the Hive database to which the Table belongs
+ * @param tableName Name of the Hive table
+ * @param isTemporaryTable is this a temporary table
+ * @return Unique qualified name to identify the Table instance in Atlas.
+ */
+ public static String getTableQualifiedName(String metadataNamespace, String dbName, String tableName, boolean isTemporaryTable) {
+ String tableTempName = tableName;
+
+ if (isTemporaryTable) {
+ if (SessionState.get() != null && SessionState.get().getSessionId() != null) {
+ tableTempName = tableName + TEMP_TABLE_PREFIX + SessionState.get().getSessionId();
+ } else {
+ tableTempName = tableName + TEMP_TABLE_PREFIX + RandomStringUtils.random(10);
+ }
+ }
+
+ return String.format("%s.%s@%s", dbName.toLowerCase(), tableTempName.toLowerCase(), metadataNamespace);
+ }
+
+ public static String getTableProcessQualifiedName(String metadataNamespace, Table table) {
+ String tableQualifiedName = getTableQualifiedName(metadataNamespace, table);
+ long createdTime = getTableCreatedTime(table);
+
+ return tableQualifiedName + SEP + createdTime;
+ }
+
+ public static String getStorageDescQFName(String tableQualifiedName) {
+ return tableQualifiedName + "_storage";
+ }
+
+ public static String getColumnQualifiedName(final String tableQualifiedName, final String colName) {
+ final String[] parts = tableQualifiedName.split("@");
+ final String tableName = parts[0];
+ final String metadataNamespace = parts[1];
+
+ return String.format("%s.%s@%s", tableName, colName.toLowerCase(), metadataNamespace);
+ }
+
+ public static long getTableCreatedTime(Table table) {
+ return table.getTTable().getCreateTime() * MILLIS_CONVERT_FACTOR;
+ }
+
+ private void clearRelationshipAttributes(AtlasEntitiesWithExtInfo entities) {
+ if (entities != null) {
+ if (entities.getEntities() != null) {
+ for (AtlasEntity entity : entities.getEntities()) {
+ clearRelationshipAttributes(entity);;
+ }
+ }
+
+ if (entities.getReferredEntities() != null) {
+ clearRelationshipAttributes(entities.getReferredEntities().values());
+ }
+ }
+ }
+
+ private void clearRelationshipAttributes(Collection entities) {
+ if (entities != null) {
+ for (AtlasEntity entity : entities) {
+ clearRelationshipAttributes(entity);
+ }
+ }
+ }
+
+ private void clearRelationshipAttributes(AtlasEntity entity) {
+ if (entity != null && entity.getRelationshipAttributes() != null) {
+ entity.getRelationshipAttributes().clear();
+ }
+ }
+
+ private boolean isTableWithDatabaseName(String tableName) {
+ boolean ret = false;
+ if (tableName.contains(".")) {
+ ret = true;
+ }
+ return ret;
+ }
+
+ private String getGuid(String qualifiedName) {
+ return getGuid(false, qualifiedName);
+ }
+
+ private String getGuid(boolean isDBType, String qualifiedName) {
+ String guid = null;
+
+ if (qualifiedNameGuidMap.containsKey(qualifiedName)) {
+ guid = qualifiedNameGuidMap.get(qualifiedName);
+ } else if (isDBType) {
+ guid = getDBGuidFromAtlas(qualifiedName);
+ }
+
+ if (StringUtils.isBlank(guid)) {
+ guid = generateGuid();
+ }
+
+ return guid;
+ }
+
+ private String generateGuid() {
+ return UUID.randomUUID().toString();
+ }
+
+ public void setStreamSize(long size) {
+ zipOutputStream.setComment(String.format(ZIP_FILE_COMMENT_FORMAT, size, -1));
+ }
+}
\ No newline at end of file
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java
new file mode 100644
index 0000000000..14cc2f2017
--- /dev/null
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java
@@ -0,0 +1,314 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hive.hook;
+
+import org.apache.atlas.model.instance.AtlasEntity;
+import org.apache.atlas.hive.hook.HiveMetastoreHookImpl.HiveMetastoreHook;
+import org.apache.atlas.hive.hook.HiveHook.PreprocessAction;
+import org.apache.atlas.hive.hook.HiveHook.HiveHookObjectNamesCache;
+import org.apache.commons.lang.RandomStringUtils;
+import org.apache.hadoop.hive.metastore.IHMSHandler;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.events.*;
+import org.apache.hadoop.hive.ql.hooks.*;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.ql.session.SessionState;
+
+import java.util.*;
+
+import static org.apache.atlas.hive.bridge.HiveMetaStoreBridge.getDatabaseName;
+import static org.apache.atlas.hive.hook.events.BaseHiveEvent.toTable;
+
+
+public class AtlasHiveHookContext {
+ public static final char QNAME_SEP_METADATA_NAMESPACE = '@';
+ public static final char QNAME_SEP_ENTITY_NAME = '.';
+ public static final char QNAME_SEP_PROCESS = ':';
+ public static final String TEMP_TABLE_PREFIX = "_temp-";
+ public static final String CREATE_OPERATION = "CREATE";
+ public static final String ALTER_OPERATION = "ALTER";
+
+ private final HiveHook hook;
+ private final HiveOperation hiveOperation;
+ private final HookContext hiveContext;
+ private final Hive hive;
+ private final Map qNameEntityMap = new HashMap<>();
+ private final HiveHookObjectNamesCache knownObjects;
+ private final HiveMetastoreHook metastoreHook;
+ private final ListenerEvent metastoreEvent;
+ private final IHMSHandler metastoreHandler;
+
+ private boolean isSkippedInputEntity;
+ private boolean isSkippedOutputEntity;
+ private boolean skipTempTables;
+
+ public AtlasHiveHookContext(HiveHook hook, HiveOperation hiveOperation, HookContext hiveContext,
+ HiveHookObjectNamesCache knownObjects, boolean skipTempTables) throws Exception {
+ this(hook, hiveOperation, hiveContext, knownObjects, null, null, skipTempTables);
+ }
+
+ public AtlasHiveHookContext(HiveHook hook, HiveOperation hiveOperation, HiveHookObjectNamesCache knownObjects,
+ HiveMetastoreHook metastoreHook, ListenerEvent listenerEvent, boolean skipTempTables) throws Exception {
+ this(hook, hiveOperation, null, knownObjects, metastoreHook, listenerEvent, skipTempTables);
+ }
+
+ public AtlasHiveHookContext(HiveHook hook, HiveOperation hiveOperation, HookContext hiveContext, HiveHookObjectNamesCache knownObjects,
+ HiveMetastoreHook metastoreHook, ListenerEvent listenerEvent, boolean skipTempTables) throws Exception {
+ this.hook = hook;
+ this.hiveOperation = hiveOperation;
+ this.hiveContext = hiveContext;
+ this.hive = hiveContext != null ? Hive.get(hiveContext.getConf()) : null;
+ this.knownObjects = knownObjects;
+ this.metastoreHook = metastoreHook;
+ this.metastoreEvent = listenerEvent;
+ this.metastoreHandler = (listenerEvent != null) ? metastoreEvent.getIHMSHandler() : null;
+ this.skipTempTables = skipTempTables;
+
+ init();
+ }
+
+ public boolean isMetastoreHook() {
+ return metastoreHook != null;
+ }
+
+ public ListenerEvent getMetastoreEvent() {
+ return metastoreEvent;
+ }
+
+ public IHMSHandler getMetastoreHandler() {
+ return metastoreHandler;
+ }
+
+ public Set getInputs() {
+ return hiveContext != null ? hiveContext.getInputs() : Collections.emptySet();
+ }
+
+ public Set getOutputs() {
+ return hiveContext != null ? hiveContext.getOutputs() : Collections.emptySet();
+ }
+
+ public boolean isSkippedInputEntity() {
+ return isSkippedInputEntity;
+ }
+
+ public boolean isSkippedOutputEntity() {
+ return isSkippedOutputEntity;
+ }
+
+ public void registerSkippedEntity(Entity entity) {
+ if (entity instanceof ReadEntity) {
+ registerSkippedInputEntity();
+ } else if (entity instanceof WriteEntity) {
+ registerSkippedOutputEntity();
+ }
+ }
+
+ public void registerSkippedInputEntity() {
+ if (!isSkippedInputEntity) {
+ isSkippedInputEntity = true;
+ }
+ }
+
+ public void registerSkippedOutputEntity() {
+ if (!isSkippedOutputEntity) {
+ isSkippedOutputEntity = true;
+ }
+ }
+
+ public boolean isSkipTempTables() {
+ return skipTempTables;
+ }
+
+ public LineageInfo getLineageInfo() {
+ return hiveContext != null ? hiveContext.getLinfo() : null;
+ }
+
+ public HookContext getHiveContext() {
+ return hiveContext;
+ }
+
+ public Hive getHive() {
+ return hive;
+ }
+
+ public HiveOperation getHiveOperation() {
+ return hiveOperation;
+ }
+
+ public void putEntity(String qualifiedName, AtlasEntity entity) {
+ qNameEntityMap.put(qualifiedName, entity);
+ }
+
+ public AtlasEntity getEntity(String qualifiedName) {
+ return qNameEntityMap.get(qualifiedName);
+ }
+
+ public Collection getEntities() { return qNameEntityMap.values(); }
+
+ public Map getQNameToEntityMap() { return qNameEntityMap; }
+
+ public String getMetadataNamespace() {
+ return hook.getMetadataNamespace();
+ }
+
+ public String getHostName() { return hook.getHostName(); }
+
+ public boolean isConvertHdfsPathToLowerCase() {
+ return hook.isConvertHdfsPathToLowerCase();
+ }
+
+ public String getAwsS3AtlasModelVersion() {
+ return hook.getAwsS3AtlasModelVersion();
+ }
+
+ public boolean getSkipHiveColumnLineageHive20633() {
+ return hook.getSkipHiveColumnLineageHive20633();
+ }
+
+ public int getSkipHiveColumnLineageHive20633InputsThreshold() {
+ return hook.getSkipHiveColumnLineageHive20633InputsThreshold();
+ }
+
+ public PreprocessAction getPreprocessActionForHiveTable(String qualifiedName) {
+ return hook.getPreprocessActionForHiveTable(qualifiedName);
+ }
+
+ public List getIgnoreDummyDatabaseName() {
+ return hook.getIgnoreDummyDatabaseName();
+ }
+
+ public List getIgnoreDummyTableName() {
+ return hook.getIgnoreDummyTableName();
+ }
+
+ public String getIgnoreValuesTmpTableNamePrefix() {
+ return hook.getIgnoreValuesTmpTableNamePrefix();
+ }
+
+ public String getQualifiedName(Database db) {
+ return getDatabaseName(db) + QNAME_SEP_METADATA_NAMESPACE + getMetadataNamespace();
+ }
+
+ public String getQualifiedName(Table table) {
+ String tableName = table.getTableName();
+
+ if (table.isTemporary()) {
+ if (SessionState.get() != null && SessionState.get().getSessionId() != null) {
+ tableName = tableName + TEMP_TABLE_PREFIX + SessionState.get().getSessionId();
+ } else {
+ tableName = tableName + TEMP_TABLE_PREFIX + RandomStringUtils.random(10);
+ }
+ }
+
+ return (table.getDbName() + QNAME_SEP_ENTITY_NAME + tableName + QNAME_SEP_METADATA_NAMESPACE).toLowerCase() + getMetadataNamespace();
+ }
+
+ public boolean isKnownDatabase(String dbQualifiedName) {
+ return knownObjects != null && dbQualifiedName != null ? knownObjects.isKnownDatabase(dbQualifiedName) : false;
+ }
+
+ public boolean isKnownTable(String tblQualifiedName) {
+ return knownObjects != null && tblQualifiedName != null ? knownObjects.isKnownTable(tblQualifiedName) : false;
+ }
+
+ public void addToKnownEntities(Collection entities) {
+ if (knownObjects != null && entities != null) {
+ knownObjects.addToKnownEntities(entities);
+ }
+ }
+
+ public void removeFromKnownDatabase(String dbQualifiedName) {
+ if (knownObjects != null && dbQualifiedName != null) {
+ knownObjects.removeFromKnownDatabase(dbQualifiedName);
+ }
+ }
+
+ public void removeFromKnownTable(String tblQualifiedName) {
+ if (knownObjects != null && tblQualifiedName != null) {
+ knownObjects.removeFromKnownTable(tblQualifiedName);
+ }
+ }
+
+ public boolean isHiveProcessPopulateDeprecatedAttributes() {
+ return hook.isHiveProcessPopulateDeprecatedAttributes();
+ }
+
+ private void init() {
+ if (hiveOperation == null) {
+ return;
+ }
+
+ String operation = hiveOperation.getOperationName();
+
+ if (knownObjects == null || !isCreateAlterOperation(operation)) {
+ return;
+ }
+
+ List databases = new ArrayList<>();
+ List tables = new ArrayList<>();
+
+ if (isMetastoreHook()) {
+ switch (hiveOperation) {
+ case CREATEDATABASE:
+ databases.add(((CreateDatabaseEvent) metastoreEvent).getDatabase());
+ break;
+ case ALTERDATABASE:
+ databases.add(((AlterDatabaseEvent) metastoreEvent).getOldDatabase());
+ databases.add(((AlterDatabaseEvent) metastoreEvent).getNewDatabase());
+ break;
+ case CREATETABLE:
+ tables.add(toTable(((CreateTableEvent) metastoreEvent).getTable()));
+ break;
+ case ALTERTABLE_PROPERTIES:
+ case ALTERTABLE_RENAME:
+ case ALTERTABLE_RENAMECOL:
+ tables.add(toTable(((AlterTableEvent) metastoreEvent).getOldTable()));
+ tables.add(toTable(((AlterTableEvent) metastoreEvent).getNewTable()));
+ break;
+ }
+ } else {
+ if (getOutputs() != null) {
+ for (WriteEntity output : hiveContext.getOutputs()) {
+ switch (output.getType()) {
+ case DATABASE:
+ databases.add(output.getDatabase());
+ break;
+ case TABLE:
+ tables.add(output.getTable());
+ break;
+ }
+ }
+ }
+ }
+
+ for (Database database : databases) {
+ knownObjects.removeFromKnownDatabase(getQualifiedName(database));
+ }
+
+ for (Table table : tables) {
+ knownObjects.removeFromKnownTable(getQualifiedName(table));
+ }
+ }
+
+ private static boolean isCreateAlterOperation(String operationName) {
+ return operationName != null && operationName.startsWith(CREATE_OPERATION) || operationName.startsWith(ALTER_OPERATION);
+ }
+}
\ No newline at end of file
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
new file mode 100644
index 0000000000..6ea48482ab
--- /dev/null
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
@@ -0,0 +1,428 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hive.hook;
+
+import org.apache.atlas.hive.hook.events.*;
+import org.apache.atlas.hive.hook.utils.ActiveEntityFilter;
+import org.apache.atlas.hook.AtlasHook;
+import org.apache.atlas.model.instance.AtlasEntity;
+import org.apache.atlas.utils.LruCache;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
+import org.apache.hadoop.hive.ql.hooks.HookContext;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.shims.Utils;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIED_NAME;
+import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_DB;
+import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_TABLE;
+import static org.apache.atlas.repository.Constants.HS2_SOURCE;
+
+public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
+ private static final Logger LOG = LoggerFactory.getLogger(HiveHook.class);
+
+ public enum PreprocessAction { NONE, IGNORE, PRUNE }
+
+ public static final String CONF_PREFIX = "atlas.hook.hive.";
+ public static final String HDFS_PATH_CONVERT_TO_LOWER_CASE = CONF_PREFIX + "hdfs_path.convert_to_lowercase";
+ public static final String HOOK_NAME_CACHE_ENABLED = CONF_PREFIX + "name.cache.enabled";
+ public static final String HOOK_NAME_CACHE_DATABASE_COUNT = CONF_PREFIX + "name.cache.database.count";
+ public static final String HOOK_NAME_CACHE_TABLE_COUNT = CONF_PREFIX + "name.cache.table.count";
+ public static final String HOOK_NAME_CACHE_REBUID_INTERVAL_SEC = CONF_PREFIX + "name.cache.rebuild.interval.seconds";
+ public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION = CONF_PREFIX + "aws_s3.atlas.model.version";
+ public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2 = "v2";
+ public static final String HOOK_HIVE_PROCESS_POPULATE_DEPRECATED_ATTRIBUTES = CONF_PREFIX + "hive_process.populate.deprecated.attributes";
+ public static final String HOOK_SKIP_HIVE_COLUMN_LINEAGE_HIVE_20633 = CONF_PREFIX + "skip.hive_column_lineage.hive-20633";
+ public static final String HOOK_SKIP_HIVE_COLUMN_LINEAGE_HIVE_20633_INPUTS_THRESHOLD = CONF_PREFIX + "skip.hive_column_lineage.hive-20633.inputs.threshold";
+ public static final String HOOK_HIVE_TABLE_IGNORE_PATTERN = CONF_PREFIX + "hive_table.ignore.pattern";
+ public static final String HOOK_HIVE_TABLE_PRUNE_PATTERN = CONF_PREFIX + "hive_table.prune.pattern";
+ public static final String HOOK_HIVE_TABLE_CACHE_SIZE = CONF_PREFIX + "hive_table.cache.size";
+ public static final String HOOK_HIVE_IGNORE_DDL_OPERATIONS = CONF_PREFIX + "hs2.ignore.ddl.operations";
+ public static final String HOOK_HIVE_FILTER_ENTITY_ADDITIONAL_TYPES_TO_RETAIN = CONF_PREFIX + "hs2.filter.entity.additional.types.to.retain";
+ public static final String HOOK_HIVE_SKIP_TEMP_TABLES = CONF_PREFIX + "skip.temp.tables";
+ public static final String DEFAULT_HOST_NAME = "localhost";
+
+ private static final Map OPERATION_MAP = new HashMap<>();
+
+ private static final boolean convertHdfsPathToLowerCase;
+ private static final boolean nameCacheEnabled;
+ private static final int nameCacheDatabaseMaxCount;
+ private static final int nameCacheTableMaxCount;
+ private static final int nameCacheRebuildIntervalSeconds;
+ private static final String awsS3AtlasModelVersion;
+
+ private static final boolean skipHiveColumnLineageHive20633;
+ private static final int skipHiveColumnLineageHive20633InputsThreshold;
+ private static final List hiveTablesToIgnore = new ArrayList<>();
+ private static final List hiveTablesToPrune = new ArrayList<>();
+ private static final Map hiveTablesCache;
+ private static final List ignoreDummyDatabaseName;
+ private static final List ignoreDummyTableName;
+ private static final String ignoreValuesTmpTableNamePrefix;
+ private static final boolean hiveProcessPopulateDeprecatedAttributes;
+ private static HiveHookObjectNamesCache knownObjects = null;
+ private static String hostName;
+ private static boolean skipTempTables = true;
+
+ static {
+ for (HiveOperation hiveOperation : HiveOperation.values()) {
+ OPERATION_MAP.put(hiveOperation.getOperationName(), hiveOperation);
+ }
+
+ convertHdfsPathToLowerCase = atlasProperties.getBoolean(HDFS_PATH_CONVERT_TO_LOWER_CASE, false);
+ nameCacheEnabled = atlasProperties.getBoolean(HOOK_NAME_CACHE_ENABLED, true);
+ nameCacheDatabaseMaxCount = atlasProperties.getInt(HOOK_NAME_CACHE_DATABASE_COUNT, 10000);
+ nameCacheTableMaxCount = atlasProperties.getInt(HOOK_NAME_CACHE_TABLE_COUNT, 10000);
+ nameCacheRebuildIntervalSeconds = atlasProperties.getInt(HOOK_NAME_CACHE_REBUID_INTERVAL_SEC, 60 * 60); // 60 minutes default
+ awsS3AtlasModelVersion = atlasProperties.getString(HOOK_AWS_S3_ATLAS_MODEL_VERSION, HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2);
+ skipHiveColumnLineageHive20633 = atlasProperties.getBoolean(HOOK_SKIP_HIVE_COLUMN_LINEAGE_HIVE_20633, false);
+ skipHiveColumnLineageHive20633InputsThreshold = atlasProperties.getInt(HOOK_SKIP_HIVE_COLUMN_LINEAGE_HIVE_20633_INPUTS_THRESHOLD, 15); // skip if avg # of inputs is > 15
+ hiveProcessPopulateDeprecatedAttributes = atlasProperties.getBoolean(HOOK_HIVE_PROCESS_POPULATE_DEPRECATED_ATTRIBUTES, false);
+ String[] patternHiveTablesToIgnore = atlasProperties.getStringArray(HOOK_HIVE_TABLE_IGNORE_PATTERN);
+ String[] patternHiveTablesToPrune = atlasProperties.getStringArray(HOOK_HIVE_TABLE_PRUNE_PATTERN);
+
+ if (patternHiveTablesToIgnore != null) {
+ for (String pattern : patternHiveTablesToIgnore) {
+ try {
+ hiveTablesToIgnore.add(Pattern.compile(pattern));
+
+ LOG.info("{}={}", HOOK_HIVE_TABLE_IGNORE_PATTERN, pattern);
+ } catch (Throwable t) {
+ LOG.warn("failed to compile pattern {}", pattern, t);
+ LOG.warn("Ignoring invalid pattern in configuration {}: {}", HOOK_HIVE_TABLE_IGNORE_PATTERN, pattern);
+ }
+ }
+ }
+
+ if (patternHiveTablesToPrune != null) {
+ for (String pattern : patternHiveTablesToPrune) {
+ try {
+ hiveTablesToPrune.add(Pattern.compile(pattern));
+
+ LOG.info("{}={}", HOOK_HIVE_TABLE_PRUNE_PATTERN, pattern);
+ } catch (Throwable t) {
+ LOG.warn("failed to compile pattern {}", pattern, t);
+ LOG.warn("Ignoring invalid pattern in configuration {}: {}", HOOK_HIVE_TABLE_PRUNE_PATTERN, pattern);
+ }
+ }
+ }
+
+ if (!hiveTablesToIgnore.isEmpty() || !hiveTablesToPrune.isEmpty()) {
+ hiveTablesCache = new LruCache<>(atlasProperties.getInt(HOOK_HIVE_TABLE_CACHE_SIZE, 10000), 0);
+ } else {
+ hiveTablesCache = Collections.emptyMap();
+ }
+
+ knownObjects = nameCacheEnabled ? new HiveHookObjectNamesCache(nameCacheDatabaseMaxCount, nameCacheTableMaxCount, nameCacheRebuildIntervalSeconds) : null;
+
+ List defaultDummyDatabase = new ArrayList<>();
+ List defaultDummyTable = new ArrayList<>();
+
+ defaultDummyDatabase.add(SemanticAnalyzer.DUMMY_DATABASE);
+ defaultDummyTable.add(SemanticAnalyzer.DUMMY_TABLE);
+
+ ignoreDummyDatabaseName = atlasProperties.getList("atlas.hook.hive.ignore.dummy.database.name", defaultDummyDatabase);
+ ignoreDummyTableName = atlasProperties.getList("atlas.hook.hive.ignore.dummy.table.name", defaultDummyTable);
+ ignoreValuesTmpTableNamePrefix = atlasProperties.getString("atlas.hook.hive.ignore.values.tmp.table.name.prefix", "Values__Tmp__Table__");
+ skipTempTables = atlasProperties.getBoolean(HOOK_HIVE_SKIP_TEMP_TABLES, true);
+
+ try {
+ hostName = InetAddress.getLocalHost().getHostName();
+ } catch (UnknownHostException e) {
+ LOG.warn("No hostname found. Setting the hostname to default value {}", DEFAULT_HOST_NAME, e);
+ hostName = DEFAULT_HOST_NAME;
+ }
+
+ ActiveEntityFilter.init(atlasProperties);
+ }
+
+
+ public HiveHook() {
+ }
+
+ public HiveHook(String name) {
+ super(name);
+ }
+
+ public String getMessageSource() {
+ return HS2_SOURCE;
+ }
+
+ @Override
+ public void run(HookContext hookContext) throws Exception {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("==> HiveHook.run({})", hookContext.getOperationName());
+ }
+
+ try {
+ HiveOperation oper = OPERATION_MAP.get(hookContext.getOperationName());
+ AtlasHiveHookContext context = new AtlasHiveHookContext(this, oper, hookContext, getKnownObjects(), isSkipTempTables());
+ BaseHiveEvent event = null;
+
+ switch (oper) {
+ case CREATEDATABASE:
+ event = new CreateDatabase(context);
+ break;
+
+ case DROPDATABASE:
+ event = new DropDatabase(context);
+ break;
+
+ case ALTERDATABASE:
+ case ALTERDATABASE_OWNER:
+ case ALTERDATABASE_LOCATION:
+ event = new AlterDatabase(context);
+ break;
+
+ case CREATETABLE:
+ event = new CreateTable(context);
+ break;
+
+ case DROPTABLE:
+ case DROPVIEW:
+ case DROP_MATERIALIZED_VIEW:
+ event = new DropTable(context);
+ break;
+
+ case CREATETABLE_AS_SELECT:
+ case CREATE_MATERIALIZED_VIEW:
+ case CREATEVIEW:
+ case ALTERVIEW_AS:
+ case LOAD:
+ case EXPORT:
+ case IMPORT:
+ case QUERY:
+ event = new CreateHiveProcess(context);
+ break;
+
+ case ALTERTABLE_FILEFORMAT:
+ case ALTERTABLE_CLUSTER_SORT:
+ case ALTERTABLE_BUCKETNUM:
+ case ALTERTABLE_PROPERTIES:
+ case ALTERVIEW_PROPERTIES:
+ case ALTERTABLE_SERDEPROPERTIES:
+ case ALTERTABLE_SERIALIZER:
+ case ALTERTABLE_ADDCOLS:
+ case ALTERTABLE_REPLACECOLS:
+ case ALTERTABLE_PARTCOLTYPE:
+ case ALTERTABLE_LOCATION:
+ event = new AlterTable(context);
+ break;
+
+ case ALTERTABLE_RENAME:
+ case ALTERVIEW_RENAME:
+ event = new AlterTableRename(context);
+ break;
+
+ case ALTERTABLE_RENAMECOL:
+ event = new AlterTableRenameCol(context);
+ break;
+
+ default:
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("HiveHook.run({}): operation ignored", hookContext.getOperationName());
+ }
+ break;
+ }
+
+ if (event != null) {
+ final UserGroupInformation ugi = hookContext.getUgi() == null ? Utils.getUGI() : hookContext.getUgi();
+
+ super.notifyEntities(ActiveEntityFilter.apply(event.getNotificationMessages()), ugi);
+ }
+ } catch (Throwable t) {
+ LOG.error("HiveHook.run(): failed to process operation {}", hookContext.getOperationName(), t);
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("<== HiveHook.run({})", hookContext.getOperationName());
+ }
+ }
+
+ public boolean isConvertHdfsPathToLowerCase() {
+ return convertHdfsPathToLowerCase;
+ }
+
+ public String getAwsS3AtlasModelVersion() {
+ return awsS3AtlasModelVersion;
+ }
+
+ public boolean getSkipHiveColumnLineageHive20633() {
+ return skipHiveColumnLineageHive20633;
+ }
+
+ public int getSkipHiveColumnLineageHive20633InputsThreshold() {
+ return skipHiveColumnLineageHive20633InputsThreshold;
+ }
+
+ public List getIgnoreDummyDatabaseName() {
+ return ignoreDummyDatabaseName;
+ }
+
+ public List getIgnoreDummyTableName() {
+ return ignoreDummyTableName;
+ }
+
+ public String getIgnoreValuesTmpTableNamePrefix() {
+ return ignoreValuesTmpTableNamePrefix;
+ }
+
+ public boolean isHiveProcessPopulateDeprecatedAttributes() {
+ return hiveProcessPopulateDeprecatedAttributes;
+ }
+
+ public static boolean isSkipTempTables() {
+ return skipTempTables;
+ }
+
+ public PreprocessAction getPreprocessActionForHiveTable(String qualifiedName) {
+ PreprocessAction ret = PreprocessAction.NONE;
+
+ if (qualifiedName != null && (CollectionUtils.isNotEmpty(hiveTablesToIgnore) || CollectionUtils.isNotEmpty(hiveTablesToPrune))) {
+ ret = hiveTablesCache.get(qualifiedName);
+
+ if (ret == null) {
+ if (isMatch(qualifiedName, hiveTablesToIgnore)) {
+ ret = PreprocessAction.IGNORE;
+ } else if (isMatch(qualifiedName, hiveTablesToPrune)) {
+ ret = PreprocessAction.PRUNE;
+ } else {
+ ret = PreprocessAction.NONE;
+ }
+
+ hiveTablesCache.put(qualifiedName, ret);
+ }
+ }
+
+ return ret;
+ }
+
+ private boolean isMatch(String name, List patterns) {
+ boolean ret = false;
+
+ for (Pattern p : patterns) {
+ if (p.matcher(name).matches()) {
+ ret = true;
+
+ break;
+ }
+ }
+
+ return ret;
+ }
+
+ public static HiveHookObjectNamesCache getKnownObjects() {
+ if (knownObjects != null && knownObjects.isCacheExpired()) {
+ LOG.info("HiveHook.run(): purging cached databaseNames ({}) and tableNames ({})", knownObjects.getCachedDbCount(), knownObjects.getCachedTableCount());
+
+ knownObjects = new HiveHook.HiveHookObjectNamesCache(nameCacheDatabaseMaxCount, nameCacheTableMaxCount, nameCacheRebuildIntervalSeconds);
+ }
+
+ return knownObjects;
+ }
+
+ public String getHostName() {
+ return hostName;
+ }
+
+ public static class HiveHookObjectNamesCache {
+ private final int dbMaxCacheCount;
+ private final int tblMaxCacheCount;
+ private final long cacheExpiryTimeMs;
+ private final Set knownDatabases;
+ private final Set knownTables;
+
+ public HiveHookObjectNamesCache(int dbMaxCacheCount, int tblMaxCacheCount, long nameCacheRebuildIntervalSeconds) {
+ this.dbMaxCacheCount = dbMaxCacheCount;
+ this.tblMaxCacheCount = tblMaxCacheCount;
+ this.cacheExpiryTimeMs = nameCacheRebuildIntervalSeconds <= 0 ? Long.MAX_VALUE : (System.currentTimeMillis() + (nameCacheRebuildIntervalSeconds * 1000));
+ this.knownDatabases = Collections.synchronizedSet(new HashSet<>());
+ this.knownTables = Collections.synchronizedSet(new HashSet<>());
+ }
+
+ public int getCachedDbCount() {
+ return knownDatabases.size();
+ }
+
+ public int getCachedTableCount() {
+ return knownTables.size();
+ }
+
+ public boolean isCacheExpired() {
+ return System.currentTimeMillis() > cacheExpiryTimeMs;
+ }
+
+ public boolean isKnownDatabase(String dbQualifiedName) {
+ return knownDatabases.contains(dbQualifiedName);
+ }
+
+ public boolean isKnownTable(String tblQualifiedName) {
+ return knownTables.contains(tblQualifiedName);
+ }
+
+ public void addToKnownEntities(Collection entities) {
+ for (AtlasEntity entity : entities) {
+ if (StringUtils.equalsIgnoreCase(entity.getTypeName(), HIVE_TYPE_DB)) {
+ addToKnownDatabase((String) entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME));
+ } else if (StringUtils.equalsIgnoreCase(entity.getTypeName(), HIVE_TYPE_TABLE)) {
+ addToKnownTable((String) entity.getAttribute(ATTRIBUTE_QUALIFIED_NAME));
+ }
+ }
+ }
+
+ public void addToKnownDatabase(String dbQualifiedName) {
+ if (knownDatabases.size() < dbMaxCacheCount) {
+ knownDatabases.add(dbQualifiedName);
+ }
+ }
+
+ public void addToKnownTable(String tblQualifiedName) {
+ if (knownTables.size() < tblMaxCacheCount) {
+ knownTables.add(tblQualifiedName);
+ }
+ }
+
+ public void removeFromKnownDatabase(String dbQualifiedName) {
+ knownDatabases.remove(dbQualifiedName);
+ }
+
+ public void removeFromKnownTable(String tblQualifiedName) {
+ if (tblQualifiedName != null) {
+ knownTables.remove(tblQualifiedName);
+ }
+ }
+ }
+}
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveMetastoreHookImpl.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveMetastoreHookImpl.java
new file mode 100644
index 0000000000..33266ce0b3
--- /dev/null
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveMetastoreHookImpl.java
@@ -0,0 +1,216 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.atlas.hive.hook;
+
+import org.apache.atlas.hive.hook.events.*;
+import org.apache.atlas.hook.AtlasHook;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.metastore.MetaStoreEventListener;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.events.*;
+import org.apache.hadoop.hive.metastore.utils.SecurityUtils;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.shims.Utils;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+import static org.apache.atlas.hive.hook.events.AlterTableRenameCol.findRenamedColumn;
+import static org.apache.atlas.hive.hook.events.BaseHiveEvent.toTable;
+import static org.apache.atlas.repository.Constants.HMS_SOURCE;
+import static org.apache.hadoop.hive.ql.plan.HiveOperation.*;
+
+public class HiveMetastoreHookImpl extends MetaStoreEventListener {
+ private static final Logger LOG = LoggerFactory.getLogger(HiveMetastoreHookImpl.class);
+ private final HiveHook hiveHook;
+ private final HiveMetastoreHook hook;
+
+ public HiveMetastoreHookImpl(Configuration config) {
+ super(config);
+
+ this.hiveHook = new HiveHook(this.getClass().getSimpleName());
+ this.hook = new HiveMetastoreHook();
+ }
+
+ @Override
+ public void onCreateDatabase(CreateDatabaseEvent dbEvent) {
+ HiveOperationContext context = new HiveOperationContext(CREATEDATABASE, dbEvent);
+
+ hook.handleEvent(context);
+ }
+
+ @Override
+ public void onDropDatabase(DropDatabaseEvent dbEvent) {
+ HiveOperationContext context = new HiveOperationContext(DROPDATABASE, dbEvent);
+
+ hook.handleEvent(context);
+ }
+
+ @Override
+ public void onAlterDatabase(AlterDatabaseEvent dbEvent) {
+ HiveOperationContext context = new HiveOperationContext(ALTERDATABASE, dbEvent);
+
+ hook.handleEvent(context);
+ }
+
+ @Override
+ public void onCreateTable(CreateTableEvent tableEvent) {
+ HiveOperationContext context = new HiveOperationContext(CREATETABLE, tableEvent);
+
+ hook.handleEvent(context);
+ }
+
+ @Override
+ public void onDropTable(DropTableEvent tableEvent) {
+ HiveOperationContext context = new HiveOperationContext(DROPTABLE, tableEvent);
+
+ hook.handleEvent(context);
+ }
+
+ @Override
+ public void onAlterTable(AlterTableEvent tableEvent) {
+ HiveOperationContext context = new HiveOperationContext(tableEvent);
+ Table oldTable = toTable(tableEvent.getOldTable());
+ Table newTable = toTable(tableEvent.getNewTable());
+
+ if (isTableRename(oldTable, newTable)) {
+ context.setOperation(ALTERTABLE_RENAME);
+ } else if (isColumnRename(oldTable, newTable, context)) {
+ context.setOperation(ALTERTABLE_RENAMECOL);
+ } else if(isAlterTableProperty(tableEvent, "last_modified_time") ||
+ isAlterTableProperty(tableEvent, "transient_lastDdlTime")) {
+ context.setOperation(ALTERTABLE_PROPERTIES); // map other alter table operations to ALTERTABLE_PROPERTIES
+ }
+
+ hook.handleEvent(context);
+ }
+
+ public class HiveMetastoreHook extends AtlasHook {
+ public HiveMetastoreHook() {
+ }
+
+ @Override
+ public String getMessageSource() {
+ return HMS_SOURCE;
+ }
+
+ public void handleEvent(HiveOperationContext operContext) {
+ ListenerEvent listenerEvent = operContext.getEvent();
+
+ if (!listenerEvent.getStatus()) {
+ return;
+ }
+
+ try {
+ HiveOperation oper = operContext.getOperation();
+ AtlasHiveHookContext context = new AtlasHiveHookContext(hiveHook, oper, hiveHook.getKnownObjects(), this, listenerEvent, hiveHook.isSkipTempTables());
+ BaseHiveEvent event = null;
+
+ switch (oper) {
+ case CREATEDATABASE:
+ event = new CreateDatabase(context);
+ break;
+
+ case DROPDATABASE:
+ event = new DropDatabase(context);
+ break;
+
+ case ALTERDATABASE:
+ event = new AlterDatabase(context);
+ break;
+
+ case CREATETABLE:
+ event = new CreateTable(context);
+ break;
+
+ case DROPTABLE:
+ event = new DropTable(context);
+ break;
+
+ case ALTERTABLE_PROPERTIES:
+ event = new AlterTable(context);
+ break;
+
+ case ALTERTABLE_RENAME:
+ event = new AlterTableRename(context);
+ break;
+
+ case ALTERTABLE_RENAMECOL:
+ FieldSchema columnOld = operContext.getColumnOld();
+ FieldSchema columnNew = operContext.getColumnNew();
+
+ event = new AlterTableRenameCol(columnOld, columnNew, context);
+ break;
+
+ default:
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("HiveMetastoreHook.handleEvent({}): operation ignored.", listenerEvent);
+ }
+ break;
+ }
+
+ if (event != null) {
+ final UserGroupInformation ugi = SecurityUtils.getUGI() == null ? Utils.getUGI() : SecurityUtils.getUGI();
+
+ super.notifyEntities(event.getNotificationMessages(), ugi);
+ }
+ } catch (Throwable t) {
+ LOG.error("HiveMetastoreHook.handleEvent({}): failed to process operation {}", listenerEvent, t);
+ }
+ }
+ }
+
+ private static boolean isTableRename(Table oldTable, Table newTable) {
+ String oldTableName = oldTable.getTableName();
+ String newTableName = newTable.getTableName();
+
+ return !StringUtils.equalsIgnoreCase(oldTableName, newTableName);
+ }
+
+ private static boolean isColumnRename(Table oldTable, Table newTable, HiveOperationContext context) {
+ FieldSchema columnOld = findRenamedColumn(oldTable, newTable);
+ FieldSchema columnNew = findRenamedColumn(newTable, oldTable);
+ boolean isColumnRename = columnOld != null && columnNew != null;
+
+ if (isColumnRename) {
+ context.setColumnOld(columnOld);
+ context.setColumnNew(columnNew);
+ }
+
+ return isColumnRename;
+ }
+
+ private boolean isAlterTableProperty(AlterTableEvent tableEvent, String propertyToCheck) {
+ final boolean ret;
+ String oldTableModifiedTime = tableEvent.getOldTable().getParameters().get(propertyToCheck);
+ String newTableModifiedTime = tableEvent.getNewTable().getParameters().get(propertyToCheck);
+
+
+ if (oldTableModifiedTime == null) {
+ ret = newTableModifiedTime != null;
+ } else {
+ ret = !oldTableModifiedTime.equals(newTableModifiedTime);
+ }
+
+ return ret;
+
+ }
+}
\ No newline at end of file
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveOperationContext.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveOperationContext.java
new file mode 100644
index 0000000000..23ea4be690
--- /dev/null
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveOperationContext.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.atlas.hive.hook;
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.events.ListenerEvent;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+
+public class HiveOperationContext {
+ HiveOperation operation;
+ ListenerEvent event;
+ FieldSchema columnOld;
+ FieldSchema columnNew;
+
+ public HiveOperationContext(ListenerEvent event) {
+ this(null, event);
+ }
+
+ public HiveOperationContext(HiveOperation operation, ListenerEvent event) {
+ setOperation(operation);
+ setEvent(event);
+ setColumnOld(null);
+ setColumnNew(null);
+ }
+
+ public ListenerEvent getEvent() {
+ return event;
+ }
+
+ public void setEvent(ListenerEvent event) {
+ this.event = event;
+ }
+
+ public HiveOperation getOperation() {
+ return operation;
+ }
+
+ public void setOperation(HiveOperation operation) {
+ this.operation = operation;
+ }
+
+ public FieldSchema getColumnOld() {
+ return columnOld;
+ }
+
+ public void setColumnOld(FieldSchema columnOld) {
+ this.columnOld = columnOld;
+ }
+
+ public FieldSchema getColumnNew() {
+ return columnNew;
+ }
+
+ public void setColumnNew(FieldSchema columnNew) {
+ this.columnNew = columnNew;
+ }
+}
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterDatabase.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterDatabase.java
new file mode 100644
index 0000000000..d2623b3636
--- /dev/null
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterDatabase.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hive.hook.events;
+
+import org.apache.atlas.hive.hook.AtlasHiveHookContext;
+import org.apache.atlas.model.instance.AtlasEntity;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
+import org.apache.atlas.model.notification.HookNotification;
+import org.apache.atlas.model.notification.HookNotification.EntityUpdateRequestV2;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.events.AlterDatabaseEvent;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collections;
+import java.util.List;
+
+public class AlterDatabase extends CreateDatabase {
+ private static final Logger LOG = LoggerFactory.getLogger(AlterDatabase.class);
+
+ public AlterDatabase(AtlasHiveHookContext context) {
+ super(context);
+ }
+
+ @Override
+ public List getNotificationMessages() throws Exception {
+ List ret = null;
+ AtlasEntitiesWithExtInfo entities = context.isMetastoreHook() ? getHiveMetastoreEntities() : getHiveEntities();
+
+ if (entities != null && CollectionUtils.isNotEmpty(entities.getEntities())) {
+ ret = Collections.singletonList(new EntityUpdateRequestV2(getUserName(), entities));
+ }
+
+ return ret;
+ }
+
+ public AtlasEntitiesWithExtInfo getHiveMetastoreEntities() throws Exception {
+ AtlasEntitiesWithExtInfo ret = new AtlasEntitiesWithExtInfo();
+ AlterDatabaseEvent dbEvent = (AlterDatabaseEvent) context.getMetastoreEvent();
+ Database oldDb = dbEvent.getOldDatabase();
+ Database newDb = dbEvent.getNewDatabase();
+
+ if (newDb != null) {
+ AtlasEntity dbEntity = toDbEntity(newDb);
+
+ ret.addEntity(dbEntity);
+
+ addLocationEntities(dbEntity, ret);
+ } else {
+ LOG.error("AlterDatabase.getEntities(): failed to retrieve db");
+ }
+
+ addProcessedEntities(ret);
+
+ return ret;
+ }
+
+ public AtlasEntitiesWithExtInfo getHiveEntities() throws Exception {
+ return super.getHiveEntities();
+ }
+}
\ No newline at end of file
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTable.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTable.java
new file mode 100644
index 0000000000..d2f09cc108
--- /dev/null
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTable.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hive.hook.events;
+
+import org.apache.atlas.hive.hook.AtlasHiveHookContext;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
+import org.apache.atlas.model.notification.HookNotification;
+import org.apache.atlas.model.notification.HookNotification.EntityUpdateRequestV2;
+import org.apache.commons.collections.CollectionUtils;
+
+import java.util.Collections;
+import java.util.List;
+
+public class AlterTable extends CreateTable {
+ public AlterTable(AtlasHiveHookContext context) {
+ super(context);
+ }
+
+ @Override
+ public List getNotificationMessages() throws Exception {
+ List ret = null;
+ AtlasEntitiesWithExtInfo entities = context.isMetastoreHook() ? getHiveMetastoreEntities() : getHiveEntities();
+
+ if (entities != null && CollectionUtils.isNotEmpty(entities.getEntities())) {
+ ret = Collections.singletonList(new EntityUpdateRequestV2(getUserName(), entities));
+ }
+
+ return ret;
+ }
+}
\ No newline at end of file
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRename.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRename.java
new file mode 100644
index 0000000000..6961fa7c2c
--- /dev/null
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRename.java
@@ -0,0 +1,198 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hive.hook.events;
+
+import org.apache.atlas.hive.hook.AtlasHiveHookContext;
+import org.apache.atlas.model.instance.AtlasEntity;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
+import org.apache.atlas.model.instance.AtlasObjectId;
+import org.apache.atlas.model.notification.HookNotification;
+import org.apache.atlas.model.notification.HookNotification.EntityPartialUpdateRequestV2;
+import org.apache.atlas.model.notification.HookNotification.EntityUpdateRequestV2;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.hive.metastore.events.AlterTableEvent;
+import org.apache.hadoop.hive.ql.hooks.Entity;
+import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.*;
+
+public class AlterTableRename extends BaseHiveEvent {
+ private static final Logger LOG = LoggerFactory.getLogger(AlterTableRename.class);
+
+ public AlterTableRename(AtlasHiveHookContext context) {
+ super(context);
+ }
+
+ @Override
+ public List getNotificationMessages() throws Exception {
+ return context.isMetastoreHook() ? getHiveMetastoreMessages() : getHiveMessages();
+ }
+
+ public List getHiveMetastoreMessages() throws Exception {
+ List ret = new ArrayList<>();
+ AlterTableEvent tblEvent = (AlterTableEvent) context.getMetastoreEvent();
+ Table oldTable = toTable(tblEvent.getOldTable());
+ Table newTable = toTable(tblEvent.getNewTable());
+
+ if (newTable == null) {
+ LOG.error("AlterTableRename: renamed table not found in outputs list");
+
+ return ret;
+ }
+
+ processTables(oldTable, newTable, ret);
+
+ return ret;
+ }
+
+ public List getHiveMessages() throws Exception {
+ List ret = new ArrayList<>();
+ Table oldTable;
+ Table newTable;
+
+ if (CollectionUtils.isEmpty(getInputs())) {
+ LOG.error("AlterTableRename: old-table not found in inputs list");
+
+ return ret;
+ }
+
+ oldTable = getInputs().iterator().next().getTable();
+ newTable = null;
+
+ if (CollectionUtils.isNotEmpty(getOutputs())) {
+ for (WriteEntity entity : getOutputs()) {
+ if (entity.getType() == Entity.Type.TABLE) {
+ newTable = entity.getTable();
+
+ //Hive sends with both old and new table names in the outputs which is weird. So skipping that with the below check
+ if (StringUtils.equalsIgnoreCase(newTable.getDbName(), oldTable.getDbName()) &&
+ StringUtils.equalsIgnoreCase(newTable.getTableName(), oldTable.getTableName())) {
+ newTable = null;
+
+ continue;
+ }
+
+ newTable = getHive().getTable(newTable.getDbName(), newTable.getTableName());
+
+ break;
+ }
+ }
+ }
+
+ if (newTable == null) {
+ LOG.error("AlterTableRename: renamed table not found in outputs list");
+
+ return ret;
+ }
+
+ processTables(oldTable, newTable, ret);
+
+ return ret;
+ }
+
+ private void processTables(Table oldTable, Table newTable, List ret) throws Exception {
+ AtlasEntityWithExtInfo oldTableEntity = toTableEntity(oldTable);
+ AtlasEntityWithExtInfo renamedTableEntity = toTableEntity(newTable);
+
+ if (oldTableEntity == null || renamedTableEntity == null) {
+ return;
+ }
+
+ // update qualifiedName for all columns, partitionKeys, storageDesc
+ String renamedTableQualifiedName = (String) renamedTableEntity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME);
+
+ renameColumns((List) oldTableEntity.getEntity().getRelationshipAttribute(ATTRIBUTE_COLUMNS), oldTableEntity, renamedTableQualifiedName, ret);
+ renameColumns((List) oldTableEntity.getEntity().getRelationshipAttribute(ATTRIBUTE_PARTITION_KEYS), oldTableEntity, renamedTableQualifiedName, ret);
+ renameStorageDesc(oldTableEntity, renamedTableEntity, ret);
+
+ // set previous name as the alias
+ renamedTableEntity.getEntity().setAttribute(ATTRIBUTE_ALIASES, Collections.singletonList(oldTable.getTableName()));
+
+ // make a copy of renamedTableEntity to send as partial-update with no relationship attributes
+ AtlasEntity renamedTableEntityForPartialUpdate = new AtlasEntity(renamedTableEntity.getEntity());
+ renamedTableEntityForPartialUpdate.setRelationshipAttributes(null);
+
+ String oldTableQualifiedName = (String) oldTableEntity.getEntity().getAttribute(ATTRIBUTE_QUALIFIED_NAME);
+ AtlasObjectId oldTableId = new AtlasObjectId(oldTableEntity.getEntity().getTypeName(), ATTRIBUTE_QUALIFIED_NAME, oldTableQualifiedName);
+
+ // update qualifiedName and other attributes (like params - which include lastModifiedTime, lastModifiedBy) of the table
+ ret.add(new EntityPartialUpdateRequestV2(getUserName(), oldTableId, new AtlasEntityWithExtInfo(renamedTableEntityForPartialUpdate)));
+
+ // to handle cases where Atlas didn't have the oldTable, send a full update
+ ret.add(new EntityUpdateRequestV2(getUserName(), new AtlasEntitiesWithExtInfo(renamedTableEntity)));
+
+ // partial update relationship attribute ddl
+ if (!context.isMetastoreHook()) {
+ AtlasEntity ddlEntity = createHiveDDLEntity(renamedTableEntity.getEntity(), true);
+
+ if (ddlEntity != null) {
+ ret.add(new HookNotification.EntityCreateRequestV2(getUserName(), new AtlasEntitiesWithExtInfo(ddlEntity)));
+ }
+ }
+
+ context.removeFromKnownTable(oldTableQualifiedName);
+ }
+
+ private void renameColumns(List columns, AtlasEntityExtInfo oldEntityExtInfo, String newTableQualifiedName, List notifications) {
+ if (CollectionUtils.isNotEmpty(columns)) {
+ for (AtlasObjectId columnId : columns) {
+ AtlasEntity oldColumn = oldEntityExtInfo.getEntity(columnId.getGuid());
+ AtlasObjectId oldColumnId = new AtlasObjectId(oldColumn.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, oldColumn.getAttribute(ATTRIBUTE_QUALIFIED_NAME));
+ AtlasEntity newColumn = new AtlasEntity(oldColumn.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, getColumnQualifiedName(newTableQualifiedName, (String) oldColumn.getAttribute(ATTRIBUTE_NAME)));
+
+ notifications.add(new EntityPartialUpdateRequestV2(getUserName(), oldColumnId, new AtlasEntityWithExtInfo(newColumn)));
+ }
+ }
+ }
+
+ private void renameStorageDesc(AtlasEntityWithExtInfo oldEntityExtInfo, AtlasEntityWithExtInfo newEntityExtInfo, List notifications) {
+ AtlasEntity oldSd = getStorageDescEntity(oldEntityExtInfo);
+ AtlasEntity newSd = new AtlasEntity(getStorageDescEntity(newEntityExtInfo)); // make a copy of newSd, since we will be setting relationshipAttributes to 'null' below
+ // and we need relationship attributes later during entity full update
+
+ if (oldSd != null && newSd != null) {
+ AtlasObjectId oldSdId = new AtlasObjectId(oldSd.getTypeName(), ATTRIBUTE_QUALIFIED_NAME, oldSd.getAttribute(ATTRIBUTE_QUALIFIED_NAME));
+
+ newSd.removeAttribute(ATTRIBUTE_TABLE);
+ newSd.setRelationshipAttributes(null);
+
+ notifications.add(new EntityPartialUpdateRequestV2(getUserName(), oldSdId, new AtlasEntityWithExtInfo(newSd)));
+ }
+ }
+
+ private AtlasEntity getStorageDescEntity(AtlasEntityWithExtInfo tableEntity) {
+ AtlasEntity ret = null;
+
+ if (tableEntity != null && tableEntity.getEntity() != null) {
+ Object attrSdId = tableEntity.getEntity().getRelationshipAttribute(ATTRIBUTE_STORAGEDESC);
+
+ if (attrSdId instanceof AtlasObjectId) {
+ ret = tableEntity.getReferredEntity(((AtlasObjectId) attrSdId).getGuid());
+ }
+ }
+
+ return ret;
+ }
+}
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRenameCol.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRenameCol.java
new file mode 100644
index 0000000000..29ca920c64
--- /dev/null
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRenameCol.java
@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hive.hook.events;
+
+import org.apache.atlas.hive.hook.AtlasHiveHookContext;
+import org.apache.atlas.model.instance.AtlasEntity;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
+import org.apache.atlas.model.instance.AtlasObjectId;
+import org.apache.atlas.model.notification.HookNotification;
+import org.apache.atlas.model.notification.HookNotification.EntityPartialUpdateRequestV2;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.events.AlterTableEvent;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class AlterTableRenameCol extends AlterTable {
+ private static final Logger LOG = LoggerFactory.getLogger(AlterTableRenameCol.class);
+ private final FieldSchema columnOld;
+ private final FieldSchema columnNew;
+
+ public AlterTableRenameCol(AtlasHiveHookContext context) {
+ this(null, null, context);
+ }
+
+ public AlterTableRenameCol(FieldSchema columnOld, FieldSchema columnNew, AtlasHiveHookContext context) {
+ super(context);
+
+ this.columnOld = columnOld;
+ this.columnNew = columnNew;
+ }
+
+ @Override
+ public List getNotificationMessages() throws Exception {
+ return context.isMetastoreHook() ? getHiveMetastoreMessages() : getHiveMessages();
+ }
+
+ public List getHiveMetastoreMessages() throws Exception {
+ List baseMsgs = super.getNotificationMessages();
+ List ret = new ArrayList<>(baseMsgs);
+ AlterTableEvent tblEvent = (AlterTableEvent) context.getMetastoreEvent();
+ Table oldTable = toTable(tblEvent.getOldTable());
+ Table newTable = toTable(tblEvent.getNewTable());
+
+ processColumns(oldTable, newTable, ret);
+
+ return ret;
+ }
+
+ public List getHiveMessages() throws Exception {
+ List baseMsgs = super.getNotificationMessages();
+
+ if (CollectionUtils.isEmpty(getInputs())) {
+ LOG.error("AlterTableRenameCol: old-table not found in inputs list");
+
+ return null;
+ }
+
+ if (CollectionUtils.isEmpty(getOutputs())) {
+ LOG.error("AlterTableRenameCol: new-table not found in outputs list");
+
+ return null;
+ }
+
+ if (CollectionUtils.isEmpty(baseMsgs)) {
+ LOG.debug("Skipped processing of column-rename (on a temporary table?)");
+
+ return null;
+ }
+
+ List ret = new ArrayList<>(baseMsgs);
+ Table oldTable = getInputs().iterator().next().getTable();
+ Table newTable = getOutputs().iterator().next().getTable();
+
+ if (newTable != null) {
+ newTable = getHive().getTable(newTable.getDbName(), newTable.getTableName());
+ }
+
+ processColumns(oldTable, newTable, ret);
+
+ return ret;
+ }
+
+ private void processColumns(Table oldTable, Table newTable, List ret) {
+ FieldSchema changedColumnOld = (columnOld == null) ? findRenamedColumn(oldTable, newTable) : columnOld;
+ FieldSchema changedColumnNew = (columnNew == null) ? findRenamedColumn(newTable, oldTable) : columnNew;
+
+ if (changedColumnOld != null && changedColumnNew != null) {
+ AtlasObjectId oldColumnId = new AtlasObjectId(HIVE_TYPE_COLUMN, ATTRIBUTE_QUALIFIED_NAME, getQualifiedName(oldTable, changedColumnOld));
+ AtlasEntity newColumn = new AtlasEntity(HIVE_TYPE_COLUMN);
+
+ newColumn.setAttribute(ATTRIBUTE_NAME, changedColumnNew.getName());
+ newColumn.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getQualifiedName(newTable, changedColumnNew));
+
+ ret.add(0, new EntityPartialUpdateRequestV2(getUserName(), oldColumnId, new AtlasEntityWithExtInfo(newColumn)));
+ } else {
+ LOG.error("AlterTableRenameCol: no renamed column detected");
+ }
+ }
+
+ public static FieldSchema findRenamedColumn(Table inputTable, Table outputTable) {
+ FieldSchema ret = null;
+ List inputColumns = inputTable.getCols();
+ List outputColumns = outputTable.getCols();
+
+ for (FieldSchema inputColumn : inputColumns) {
+ if (!outputColumns.contains(inputColumn)) {
+ ret = inputColumn;
+
+ break;
+ }
+ }
+
+ return ret;
+ }
+}
\ No newline at end of file
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java
new file mode 100644
index 0000000000..3f358139be
--- /dev/null
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java
@@ -0,0 +1,1189 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hive.hook.events;
+
+import org.apache.atlas.hive.hook.AtlasHiveHookContext;
+import org.apache.atlas.hive.hook.HiveHook.PreprocessAction;
+import org.apache.atlas.utils.PathExtractorContext;
+import org.apache.atlas.model.instance.AtlasEntity;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityExtInfo;
+import org.apache.atlas.model.instance.AtlasObjectId;
+import org.apache.atlas.model.instance.AtlasRelatedObjectId;
+import org.apache.atlas.model.instance.AtlasStruct;
+import org.apache.atlas.model.notification.HookNotification;
+import org.apache.atlas.type.AtlasTypeUtil;
+import org.apache.atlas.utils.AtlasPathExtractorUtil;
+import org.apache.atlas.utils.HdfsNameServiceResolver;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.collections.MapUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Order;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.utils.SecurityUtils;
+import org.apache.hadoop.hive.ql.hooks.*;
+import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo;
+import org.apache.hadoop.hive.ql.hooks.LineageInfo.DependencyKey;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.*;
+
+import static org.apache.atlas.hive.bridge.HiveMetaStoreBridge.getDatabaseName;
+import static org.apache.atlas.hive.hook.AtlasHiveHookContext.QNAME_SEP_METADATA_NAMESPACE;
+import static org.apache.atlas.hive.hook.AtlasHiveHookContext.QNAME_SEP_ENTITY_NAME;
+import static org.apache.atlas.hive.hook.AtlasHiveHookContext.QNAME_SEP_PROCESS;
+
+public abstract class BaseHiveEvent {
+ private static final Logger LOG = LoggerFactory.getLogger(BaseHiveEvent.class);
+
+ public static final String HIVE_TYPE_DB = "hive_db";
+ public static final String HIVE_TYPE_TABLE = "hive_table";
+ public static final String HIVE_TYPE_STORAGEDESC = "hive_storagedesc";
+ public static final String HIVE_TYPE_COLUMN = "hive_column";
+ public static final String HIVE_TYPE_PROCESS = "hive_process";
+ public static final String HIVE_TYPE_COLUMN_LINEAGE = "hive_column_lineage";
+ public static final String HIVE_TYPE_SERDE = "hive_serde";
+ public static final String HIVE_TYPE_ORDER = "hive_order";
+ public static final String HIVE_TYPE_PROCESS_EXECUTION = "hive_process_execution";
+ public static final String HIVE_DB_DDL = "hive_db_ddl";
+ public static final String HIVE_TABLE_DDL = "hive_table_ddl";
+ public static final String HBASE_TYPE_TABLE = "hbase_table";
+ public static final String HBASE_TYPE_NAMESPACE = "hbase_namespace";
+ public static final String ATTRIBUTE_QUALIFIED_NAME = "qualifiedName";
+ public static final String ATTRIBUTE_NAME = "name";
+ public static final String ATTRIBUTE_DESCRIPTION = "description";
+ public static final String ATTRIBUTE_OWNER = "owner";
+ public static final String ATTRIBUTE_CLUSTER_NAME = "clusterName";
+ public static final String ATTRIBUTE_LOCATION = "location";
+ public static final String ATTRIBUTE_LOCATION_PATH = "locationPath";
+ public static final String ATTRIBUTE_PARAMETERS = "parameters";
+ public static final String ATTRIBUTE_OWNER_TYPE = "ownerType";
+ public static final String ATTRIBUTE_COMMENT = "comment";
+ public static final String ATTRIBUTE_CREATE_TIME = "createTime";
+ public static final String ATTRIBUTE_LAST_ACCESS_TIME = "lastAccessTime";
+ public static final String ATTRIBUTE_VIEW_ORIGINAL_TEXT = "viewOriginalText";
+ public static final String ATTRIBUTE_VIEW_EXPANDED_TEXT = "viewExpandedText";
+ public static final String ATTRIBUTE_TABLE_TYPE = "tableType";
+ public static final String ATTRIBUTE_TEMPORARY = "temporary";
+ public static final String ATTRIBUTE_RETENTION = "retention";
+ public static final String ATTRIBUTE_DB = "db";
+ public static final String ATTRIBUTE_HIVE_DB = "hiveDb";
+ public static final String ATTRIBUTE_STORAGEDESC = "sd";
+ public static final String ATTRIBUTE_PARTITION_KEYS = "partitionKeys";
+ public static final String ATTRIBUTE_COLUMNS = "columns";
+ public static final String ATTRIBUTE_INPUT_FORMAT = "inputFormat";
+ public static final String ATTRIBUTE_OUTPUT_FORMAT = "outputFormat";
+ public static final String ATTRIBUTE_COMPRESSED = "compressed";
+ public static final String ATTRIBUTE_BUCKET_COLS = "bucketCols";
+ public static final String ATTRIBUTE_NUM_BUCKETS = "numBuckets";
+ public static final String ATTRIBUTE_STORED_AS_SUB_DIRECTORIES = "storedAsSubDirectories";
+ public static final String ATTRIBUTE_TABLE = "table";
+ public static final String ATTRIBUTE_SERDE_INFO = "serdeInfo";
+ public static final String ATTRIBUTE_SERIALIZATION_LIB = "serializationLib";
+ public static final String ATTRIBUTE_SORT_COLS = "sortCols";
+ public static final String ATTRIBUTE_COL_TYPE = "type";
+ public static final String ATTRIBUTE_COL_POSITION = "position";
+ public static final String ATTRIBUTE_PATH = "path";
+ public static final String ATTRIBUTE_NAMESERVICE_ID = "nameServiceId";
+ public static final String ATTRIBUTE_INPUTS = "inputs";
+ public static final String ATTRIBUTE_OUTPUTS = "outputs";
+ public static final String ATTRIBUTE_OPERATION_TYPE = "operationType";
+ public static final String ATTRIBUTE_START_TIME = "startTime";
+ public static final String ATTRIBUTE_USER_NAME = "userName";
+ public static final String ATTRIBUTE_QUERY_TEXT = "queryText";
+ public static final String ATTRIBUTE_PROCESS = "process";
+ public static final String ATTRIBUTE_PROCESS_EXECUTIONS = "processExecutions";
+ public static final String ATTRIBUTE_QUERY_ID = "queryId";
+ public static final String ATTRIBUTE_QUERY_PLAN = "queryPlan";
+ public static final String ATTRIBUTE_END_TIME = "endTime";
+ public static final String ATTRIBUTE_RECENT_QUERIES = "recentQueries";
+ public static final String ATTRIBUTE_QUERY = "query";
+ public static final String ATTRIBUTE_DEPENDENCY_TYPE = "depenendencyType";
+ public static final String ATTRIBUTE_EXPRESSION = "expression";
+ public static final String ATTRIBUTE_ALIASES = "aliases";
+ public static final String ATTRIBUTE_URI = "uri";
+ public static final String ATTRIBUTE_STORAGE_HANDLER = "storage_handler";
+ public static final String ATTRIBUTE_NAMESPACE = "namespace";
+ public static final String ATTRIBUTE_HOSTNAME = "hostName";
+ public static final String ATTRIBUTE_EXEC_TIME = "execTime";
+ public static final String ATTRIBUTE_DDL_QUERIES = "ddlQueries";
+ public static final String ATTRIBUTE_SERVICE_TYPE = "serviceType";
+ public static final String ATTRIBUTE_GUID = "guid";
+ public static final String ATTRIBUTE_UNIQUE_ATTRIBUTES = "uniqueAttributes";
+ public static final String HBASE_STORAGE_HANDLER_CLASS = "org.apache.hadoop.hive.hbase.HBaseStorageHandler";
+ public static final String HBASE_DEFAULT_NAMESPACE = "default";
+ public static final String HBASE_NAMESPACE_TABLE_DELIMITER = ":";
+ public static final String HBASE_PARAM_TABLE_NAME = "hbase.table.name";
+ public static final long MILLIS_CONVERT_FACTOR = 1000;
+ public static final String HDFS_PATH_PREFIX = "hdfs://";
+ public static final String EMPTY_ATTRIBUTE_VALUE = "";
+
+ public static final String RELATIONSHIP_DATASET_PROCESS_INPUTS = "dataset_process_inputs";
+ public static final String RELATIONSHIP_PROCESS_DATASET_OUTPUTS = "process_dataset_outputs";
+ public static final String RELATIONSHIP_HIVE_PROCESS_COLUMN_LINEAGE = "hive_process_column_lineage";
+ public static final String RELATIONSHIP_HIVE_TABLE_DB = "hive_table_db";
+ public static final String RELATIONSHIP_HIVE_TABLE_PART_KEYS = "hive_table_partitionkeys";
+ public static final String RELATIONSHIP_HIVE_TABLE_COLUMNS = "hive_table_columns";
+ public static final String RELATIONSHIP_HIVE_TABLE_STORAGE_DESC = "hive_table_storagedesc";
+ public static final String RELATIONSHIP_HIVE_PROCESS_PROCESS_EXE = "hive_process_process_executions";
+ public static final String RELATIONSHIP_HIVE_DB_DDL_QUERIES = "hive_db_ddl_queries";
+ public static final String RELATIONSHIP_HIVE_DB_LOCATION = "hive_db_location";
+ public static final String RELATIONSHIP_HIVE_TABLE_DDL_QUERIES = "hive_table_ddl_queries";
+ public static final String RELATIONSHIP_HBASE_TABLE_NAMESPACE = "hbase_table_namespace";
+
+
+ public static final Map OWNER_TYPE_TO_ENUM_VALUE = new HashMap<>();
+
+ protected final boolean skipTempTables;
+
+ static {
+ OWNER_TYPE_TO_ENUM_VALUE.put(1, "USER");
+ OWNER_TYPE_TO_ENUM_VALUE.put(2, "ROLE");
+ OWNER_TYPE_TO_ENUM_VALUE.put(3, "GROUP");
+ }
+
+ protected final AtlasHiveHookContext context;
+
+
+ protected BaseHiveEvent(AtlasHiveHookContext context) {
+ this.context = context;
+ this.skipTempTables = context.isSkipTempTables();
+ }
+
+ public AtlasHiveHookContext getContext() {
+ return context;
+ }
+
+ public List getNotificationMessages() throws Exception {
+ return null;
+ }
+
+ public static long getTableCreateTime(Table table) {
+ return table.getTTable() != null ? (table.getTTable().getCreateTime() * MILLIS_CONVERT_FACTOR) : System.currentTimeMillis();
+ }
+
+ public static String getTableOwner(Table table) {
+ return table.getTTable() != null ? (table.getOwner()): "";
+ }
+
+
+ public static List getObjectIds(List entities) {
+ final List ret;
+
+ if (CollectionUtils.isNotEmpty(entities)) {
+ ret = new ArrayList<>(entities.size());
+
+ for (AtlasEntity entity : entities) {
+ ret.add(AtlasTypeUtil.getObjectId(entity));
+ }
+ } else {
+ ret = Collections.emptyList();
+ }
+
+ return ret;
+ }
+
+
+ protected void addProcessedEntities(AtlasEntitiesWithExtInfo entitiesWithExtInfo) {
+ for (AtlasEntity entity : context.getEntities()) {
+ entitiesWithExtInfo.addReferredEntity(entity);
+ }
+
+ entitiesWithExtInfo.compact();
+
+ context.addToKnownEntities(entitiesWithExtInfo.getEntities());
+
+ if (entitiesWithExtInfo.getReferredEntities() != null) {
+ context.addToKnownEntities(entitiesWithExtInfo.getReferredEntities().values());
+ }
+ }
+
+ protected AtlasEntity getInputOutputEntity(Entity entity, AtlasEntityExtInfo entityExtInfo, boolean skipTempTables) throws Exception {
+ AtlasEntity ret = null;
+
+ switch(entity.getType()) {
+ case TABLE:
+ case PARTITION:
+ case DFS_DIR:
+ case LOCAL_DIR: {
+ ret = toAtlasEntity(entity, entityExtInfo, skipTempTables);
+ }
+ break;
+ }
+
+ return ret;
+ }
+
+ protected AtlasEntity toAtlasEntity(Entity entity, AtlasEntityExtInfo entityExtInfo, boolean skipTempTables) throws Exception {
+ AtlasEntity ret = null;
+
+ switch (entity.getType()) {
+ case DATABASE: {
+ String dbName = getDatabaseName(entity.getDatabase());
+
+ if (!context.getIgnoreDummyDatabaseName().contains(dbName)) {
+ Database db = getHive().getDatabase(dbName);
+
+ ret = toDbEntity(db);
+ }
+ }
+ break;
+
+ case TABLE:
+ case PARTITION: {
+ String dbName = entity.getTable().getDbName();
+ String tableName = entity.getTable().getTableName();
+ boolean skipTable = StringUtils.isNotEmpty(context.getIgnoreValuesTmpTableNamePrefix()) && tableName.toLowerCase().startsWith(context.getIgnoreValuesTmpTableNamePrefix());
+
+ if (!skipTable) {
+ skipTable = context.getIgnoreDummyTableName().contains(tableName) && context.getIgnoreDummyDatabaseName().contains(dbName);
+ }
+
+ if (!skipTable) {
+ skipTable = skipTempTables && entity.getTable().isTemporary();
+ }
+
+ if (!skipTable) {
+ Table table = getHive().getTable(dbName, tableName);
+
+ ret = toTableEntity(table, entityExtInfo);
+ } else {
+ context.registerSkippedEntity(entity);
+ }
+ }
+ break;
+
+ case DFS_DIR:
+ case LOCAL_DIR: {
+ URI location = entity.getLocation();
+
+ if (location != null) {
+ ret = getPathEntity(new Path(entity.getLocation()), entityExtInfo);
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return ret;
+ }
+
+ protected AtlasEntity toDbEntity(Database db) throws Exception {
+ String dbName = getDatabaseName(db);
+ String dbQualifiedName = getQualifiedName(db);
+ boolean isKnownDatabase = context.isKnownDatabase(dbQualifiedName);
+ AtlasEntity ret = context.getEntity(dbQualifiedName);
+
+ if (ret == null) {
+ ret = new AtlasEntity(HIVE_TYPE_DB);
+
+ // if this DB was sent in an earlier notification, set 'guid' to null - which will:
+ // - result in this entity to be not included in 'referredEntities'
+ // - cause Atlas server to resolve the entity by its qualifiedName
+ if (isKnownDatabase) {
+ ret.setGuid(null);
+ }
+
+ ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, dbQualifiedName);
+ ret.setAttribute(ATTRIBUTE_NAME, dbName);
+
+ if (StringUtils.isNotEmpty(db.getDescription())) {
+ ret.setAttribute(ATTRIBUTE_DESCRIPTION, db.getDescription());
+ }
+
+ ret.setAttribute(ATTRIBUTE_OWNER, db.getOwnerName());
+
+ ret.setAttribute(ATTRIBUTE_CLUSTER_NAME, getMetadataNamespace());
+ ret.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getPathWithNameServiceID(db.getLocationUri()));
+ ret.setAttribute(ATTRIBUTE_PARAMETERS, db.getParameters());
+
+ if (db.getOwnerType() != null) {
+ ret.setAttribute(ATTRIBUTE_OWNER_TYPE, OWNER_TYPE_TO_ENUM_VALUE.get(db.getOwnerType().getValue()));
+ }
+
+ context.putEntity(dbQualifiedName, ret);
+ }
+
+ return ret;
+ }
+
+ protected AtlasEntityWithExtInfo toTableEntity(Table table) throws Exception {
+ AtlasEntityWithExtInfo ret = new AtlasEntityWithExtInfo();
+
+ AtlasEntity entity = toTableEntity(table, ret);
+
+ if (entity != null) {
+ ret.setEntity(entity);
+ } else {
+ ret = null;
+ }
+
+ return ret;
+ }
+
+ protected AtlasEntity toTableEntity(Table table, AtlasEntitiesWithExtInfo entities) throws Exception {
+ AtlasEntity ret = toTableEntity(table, (AtlasEntityExtInfo) entities);
+
+ if (ret != null) {
+ entities.addEntity(ret);
+ }
+
+ return ret;
+ }
+
+ protected AtlasEntity toTableEntity(Table table, AtlasEntityExtInfo entityExtInfo) throws Exception {
+ Database db = getDatabases(table.getDbName());
+ AtlasEntity dbEntity = toDbEntity(db);
+
+ if (entityExtInfo != null) {
+ if (dbEntity != null) {
+ entityExtInfo.addReferredEntity(dbEntity);
+ }
+ }
+
+ AtlasEntity ret = toTableEntity(AtlasTypeUtil.getObjectId(dbEntity), table, entityExtInfo);
+
+ return ret;
+ }
+
+ protected AtlasEntity toTableEntity(AtlasObjectId dbId, Table table, AtlasEntityExtInfo entityExtInfo) throws Exception {
+ String tblQualifiedName = getQualifiedName(table);
+ boolean isKnownTable = context.isKnownTable(tblQualifiedName);
+
+ AtlasEntity ret = context.getEntity(tblQualifiedName);
+
+ if (ret == null) {
+ PreprocessAction action = context.getPreprocessActionForHiveTable(tblQualifiedName);
+
+ if (action == PreprocessAction.IGNORE) {
+ LOG.info("ignoring table {}", tblQualifiedName);
+ } else {
+ ret = new AtlasEntity(HIVE_TYPE_TABLE);
+
+ // if this table was sent in an earlier notification, set 'guid' to null - which will:
+ // - result in this entity to be not included in 'referredEntities'
+ // - cause Atlas server to resolve the entity by its qualifiedName
+ if (isKnownTable && !isAlterTableOperation()) {
+ ret.setGuid(null);
+ }
+
+ long createTime = getTableCreateTime(table);
+ long lastAccessTime = table.getLastAccessTime() > 0 ? (table.getLastAccessTime() * MILLIS_CONVERT_FACTOR) : createTime;
+
+ AtlasRelatedObjectId dbRelatedObject = new AtlasRelatedObjectId(dbId, RELATIONSHIP_HIVE_TABLE_DB);
+
+ ret.setRelationshipAttribute(ATTRIBUTE_DB, dbRelatedObject );
+ ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, tblQualifiedName);
+ ret.setAttribute(ATTRIBUTE_NAME, table.getTableName().toLowerCase());
+ ret.setAttribute(ATTRIBUTE_OWNER, table.getOwner());
+ ret.setAttribute(ATTRIBUTE_CREATE_TIME, createTime);
+ ret.setAttribute(ATTRIBUTE_LAST_ACCESS_TIME, lastAccessTime);
+ ret.setAttribute(ATTRIBUTE_RETENTION, table.getRetention());
+ ret.setAttribute(ATTRIBUTE_PARAMETERS, table.getParameters());
+ ret.setAttribute(ATTRIBUTE_COMMENT, table.getParameters().get(ATTRIBUTE_COMMENT));
+ ret.setAttribute(ATTRIBUTE_TABLE_TYPE, table.getTableType().name());
+ ret.setAttribute(ATTRIBUTE_TEMPORARY, table.isTemporary());
+
+ if (table.getViewOriginalText() != null) {
+ ret.setAttribute(ATTRIBUTE_VIEW_ORIGINAL_TEXT, table.getViewOriginalText());
+ }
+
+ if (table.getViewExpandedText() != null) {
+ ret.setAttribute(ATTRIBUTE_VIEW_EXPANDED_TEXT, table.getViewExpandedText());
+ }
+
+ boolean pruneTable = table.isTemporary() || action == PreprocessAction.PRUNE;
+
+ if (pruneTable) {
+ LOG.info("ignoring details of table {}", tblQualifiedName);
+ } else {
+ AtlasObjectId tableId = AtlasTypeUtil.getObjectId(ret);
+ AtlasEntity sd = getStorageDescEntity(tableId, table);
+ List partitionKeys = getColumnEntities(tableId, table, table.getPartitionKeys(), RELATIONSHIP_HIVE_TABLE_PART_KEYS);
+ List columns = getColumnEntities(tableId, table, table.getCols(), RELATIONSHIP_HIVE_TABLE_COLUMNS);
+
+
+
+ if (entityExtInfo != null) {
+ entityExtInfo.addReferredEntity(sd);
+
+ if (partitionKeys != null) {
+ for (AtlasEntity partitionKey : partitionKeys) {
+ entityExtInfo.addReferredEntity(partitionKey);
+ }
+ }
+
+ if (columns != null) {
+ for (AtlasEntity column : columns) {
+ entityExtInfo.addReferredEntity(column);
+ }
+ }
+ }
+
+
+ ret.setRelationshipAttribute(ATTRIBUTE_STORAGEDESC, AtlasTypeUtil.getAtlasRelatedObjectId(sd, RELATIONSHIP_HIVE_TABLE_STORAGE_DESC));
+ ret.setRelationshipAttribute(ATTRIBUTE_PARTITION_KEYS, AtlasTypeUtil.getAtlasRelatedObjectIds(partitionKeys, RELATIONSHIP_HIVE_TABLE_PART_KEYS));
+ ret.setRelationshipAttribute(ATTRIBUTE_COLUMNS, AtlasTypeUtil.getAtlasRelatedObjectIds(columns, RELATIONSHIP_HIVE_TABLE_COLUMNS));
+ }
+
+ context.putEntity(tblQualifiedName, ret);
+ }
+ }
+
+ return ret;
+ }
+
+ protected AtlasEntity getStorageDescEntity(AtlasObjectId tableId, Table table) {
+ String sdQualifiedName = getQualifiedName(table, table.getSd());
+ boolean isKnownTable = tableId.getGuid() == null;
+
+ AtlasEntity ret = context.getEntity(sdQualifiedName);
+
+ if (ret == null) {
+ ret = new AtlasEntity(HIVE_TYPE_STORAGEDESC);
+
+ // if sd's table was sent in an earlier notification, set 'guid' to null - which will:
+ // - result in this entity to be not included in 'referredEntities'
+ // - cause Atlas server to resolve the entity by its qualifiedName
+ if (isKnownTable) {
+ ret.setGuid(null);
+ }
+
+ StorageDescriptor sd = table.getSd();
+
+ AtlasRelatedObjectId tableRelatedObject = new AtlasRelatedObjectId(tableId, RELATIONSHIP_HIVE_TABLE_STORAGE_DESC);
+
+ ret.setRelationshipAttribute(ATTRIBUTE_TABLE, tableRelatedObject);
+ ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, sdQualifiedName);
+ ret.setAttribute(ATTRIBUTE_PARAMETERS, sd.getParameters());
+ ret.setAttribute(ATTRIBUTE_LOCATION, HdfsNameServiceResolver.getPathWithNameServiceID(sd.getLocation()));
+ ret.setAttribute(ATTRIBUTE_INPUT_FORMAT, sd.getInputFormat());
+ ret.setAttribute(ATTRIBUTE_OUTPUT_FORMAT, sd.getOutputFormat());
+ ret.setAttribute(ATTRIBUTE_COMPRESSED, sd.isCompressed());
+ ret.setAttribute(ATTRIBUTE_NUM_BUCKETS, sd.getNumBuckets());
+ ret.setAttribute(ATTRIBUTE_STORED_AS_SUB_DIRECTORIES, sd.isStoredAsSubDirectories());
+
+ if (sd.getBucketCols() != null && sd.getBucketCols().size() > 0) {
+ ret.setAttribute(ATTRIBUTE_BUCKET_COLS, sd.getBucketCols());
+ }
+
+ if (sd.getSerdeInfo() != null) {
+ AtlasStruct serdeInfo = new AtlasStruct(HIVE_TYPE_SERDE);
+ SerDeInfo sdSerDeInfo = sd.getSerdeInfo();
+
+ serdeInfo.setAttribute(ATTRIBUTE_NAME, sdSerDeInfo.getName());
+ serdeInfo.setAttribute(ATTRIBUTE_SERIALIZATION_LIB, sdSerDeInfo.getSerializationLib());
+ serdeInfo.setAttribute(ATTRIBUTE_PARAMETERS, sdSerDeInfo.getParameters());
+
+ ret.setAttribute(ATTRIBUTE_SERDE_INFO, serdeInfo);
+ }
+
+ if (CollectionUtils.isNotEmpty(sd.getSortCols())) {
+ List sortCols = new ArrayList<>(sd.getSortCols().size());
+
+ for (Order sdSortCol : sd.getSortCols()) {
+ AtlasStruct sortcol = new AtlasStruct(HIVE_TYPE_ORDER);
+
+ sortcol.setAttribute("col", sdSortCol.getCol());
+ sortcol.setAttribute("order", sdSortCol.getOrder());
+
+ sortCols.add(sortcol);
+ }
+
+ ret.setAttribute(ATTRIBUTE_SORT_COLS, sortCols);
+ }
+
+ context.putEntity(sdQualifiedName, ret);
+ }
+
+ return ret;
+ }
+
+ protected List getColumnEntities(AtlasObjectId tableId, Table table, List fieldSchemas, String relationshipType) {
+ List ret = new ArrayList<>();
+ boolean isKnownTable = tableId.getGuid() == null;
+ int columnPosition = 0;
+
+ if (CollectionUtils.isNotEmpty(fieldSchemas)) {
+ for (FieldSchema fieldSchema : fieldSchemas) {
+ String colQualifiedName = getQualifiedName(table, fieldSchema);
+ AtlasEntity column = context.getEntity(colQualifiedName);
+
+ if (column == null) {
+ column = new AtlasEntity(HIVE_TYPE_COLUMN);
+
+ // if column's table was sent in an earlier notification, set 'guid' to null - which will:
+ // - result in this entity to be not included in 'referredEntities'
+ // - cause Atlas server to resolve the entity by its qualifiedName
+ if (isKnownTable) {
+ column.setGuid(null);
+ }
+ AtlasRelatedObjectId relatedObjectId = new AtlasRelatedObjectId(tableId, relationshipType);
+ column.setRelationshipAttribute(ATTRIBUTE_TABLE, (relatedObjectId));
+ column.setAttribute(ATTRIBUTE_QUALIFIED_NAME, colQualifiedName);
+ column.setAttribute(ATTRIBUTE_NAME, fieldSchema.getName());
+ column.setAttribute(ATTRIBUTE_OWNER, table.getOwner());
+ column.setAttribute(ATTRIBUTE_COL_TYPE, fieldSchema.getType());
+ column.setAttribute(ATTRIBUTE_COL_POSITION, columnPosition++);
+ column.setAttribute(ATTRIBUTE_COMMENT, fieldSchema.getComment());
+
+ context.putEntity(colQualifiedName, column);
+ }
+
+ ret.add(column);
+ }
+ }
+
+ return ret;
+ }
+
+ protected AtlasEntity getPathEntity(Path path, AtlasEntityExtInfo extInfo) {
+ String strPath = path.toString();
+ String metadataNamespace = getMetadataNamespace();
+ boolean isConvertPathToLowerCase = strPath.startsWith(HDFS_PATH_PREFIX) && context.isConvertHdfsPathToLowerCase();
+ PathExtractorContext pathExtractorContext = new PathExtractorContext(metadataNamespace, context.getQNameToEntityMap(),
+ isConvertPathToLowerCase, context.getAwsS3AtlasModelVersion());
+
+ AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(path, pathExtractorContext);
+
+ if (entityWithExtInfo.getReferredEntities() != null){
+ for (AtlasEntity entity : entityWithExtInfo.getReferredEntities().values()) {
+ extInfo.addReferredEntity(entity);
+ }
+ }
+
+ return entityWithExtInfo.getEntity();
+ }
+
+ protected AtlasEntity getHiveProcessEntity(List inputs, List outputs) throws Exception {
+ AtlasEntity ret = new AtlasEntity(HIVE_TYPE_PROCESS);
+ String queryStr = getQueryString();
+ String qualifiedName = getQualifiedName(inputs, outputs);
+
+ if (queryStr != null) {
+ queryStr = queryStr.toLowerCase().trim();
+ }
+
+ ret.setAttribute(ATTRIBUTE_OPERATION_TYPE, getOperationName());
+
+ if (context.isMetastoreHook()) {
+ HiveOperation operation = context.getHiveOperation();
+
+ if (operation == HiveOperation.CREATETABLE || operation == HiveOperation.CREATETABLE_AS_SELECT) {
+ AtlasEntity table = outputs.get(0);
+ long createTime = Long.valueOf((Long)table.getAttribute(ATTRIBUTE_CREATE_TIME));
+ qualifiedName = (String) table.getAttribute(ATTRIBUTE_QUALIFIED_NAME) + QNAME_SEP_PROCESS + createTime;
+
+ ret.setAttribute(ATTRIBUTE_NAME, "dummyProcess:" + UUID.randomUUID());
+ ret.setAttribute(ATTRIBUTE_OPERATION_TYPE, operation.getOperationName());
+ }
+ }
+
+ ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, qualifiedName);
+ ret.setAttribute(ATTRIBUTE_NAME, qualifiedName);
+ ret.setRelationshipAttribute(ATTRIBUTE_INPUTS, AtlasTypeUtil.getAtlasRelatedObjectIds(inputs, RELATIONSHIP_DATASET_PROCESS_INPUTS));
+ ret.setRelationshipAttribute(ATTRIBUTE_OUTPUTS, AtlasTypeUtil.getAtlasRelatedObjectIds(outputs, RELATIONSHIP_PROCESS_DATASET_OUTPUTS));
+
+ // We are setting an empty value to these attributes, since now we have a new entity type called hive process
+ // execution which captures these values. We have to set empty values here because these attributes are
+ // mandatory attributes for hive process entity type.
+ ret.setAttribute(ATTRIBUTE_START_TIME, System.currentTimeMillis());
+ ret.setAttribute(ATTRIBUTE_END_TIME, System.currentTimeMillis());
+
+ if (context.isHiveProcessPopulateDeprecatedAttributes()) {
+ ret.setAttribute(ATTRIBUTE_USER_NAME, getUserName());
+ ret.setAttribute(ATTRIBUTE_QUERY_TEXT, queryStr);
+ ret.setAttribute(ATTRIBUTE_QUERY_ID, getQueryId());
+ } else {
+ ret.setAttribute(ATTRIBUTE_USER_NAME, EMPTY_ATTRIBUTE_VALUE);
+ ret.setAttribute(ATTRIBUTE_QUERY_TEXT, EMPTY_ATTRIBUTE_VALUE);
+ ret.setAttribute(ATTRIBUTE_QUERY_ID, EMPTY_ATTRIBUTE_VALUE);
+ }
+
+ ret.setAttribute(ATTRIBUTE_QUERY_PLAN, "Not Supported");
+ ret.setAttribute(ATTRIBUTE_RECENT_QUERIES, Collections.singletonList(queryStr));
+ ret.setAttribute(ATTRIBUTE_CLUSTER_NAME, getMetadataNamespace());
+
+ return ret;
+ }
+
+ protected AtlasEntity getHiveProcessExecutionEntity(AtlasEntity hiveProcess) throws Exception {
+ AtlasEntity ret = new AtlasEntity(HIVE_TYPE_PROCESS_EXECUTION);
+ String queryStr = getQueryString();
+
+ if (queryStr != null) {
+ queryStr = queryStr.toLowerCase().trim();
+ }
+
+ Long endTime = System.currentTimeMillis();
+ ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, hiveProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME).toString() +
+ QNAME_SEP_PROCESS + getQueryStartTime().toString() +
+ QNAME_SEP_PROCESS + endTime.toString());
+ ret.setAttribute(ATTRIBUTE_NAME, ret.getAttribute(ATTRIBUTE_QUALIFIED_NAME));
+ ret.setAttribute(ATTRIBUTE_START_TIME, getQueryStartTime());
+ ret.setAttribute(ATTRIBUTE_END_TIME, endTime);
+ ret.setAttribute(ATTRIBUTE_USER_NAME, getUserName());
+ ret.setAttribute(ATTRIBUTE_QUERY_TEXT, queryStr);
+ ret.setAttribute(ATTRIBUTE_QUERY_ID, getQueryId());
+ ret.setAttribute(ATTRIBUTE_QUERY_PLAN, "Not Supported");
+ ret.setAttribute(ATTRIBUTE_HOSTNAME, getContext().getHostName()); //
+ AtlasRelatedObjectId hiveProcessRelationObjectId = AtlasTypeUtil.toAtlasRelatedObjectId(hiveProcess, RELATIONSHIP_HIVE_PROCESS_PROCESS_EXE);
+ ret.setRelationshipAttribute(ATTRIBUTE_PROCESS, hiveProcessRelationObjectId);
+ return ret;
+ }
+
+ protected AtlasEntity createHiveDDLEntity(AtlasEntity dbOrTable) {
+ return createHiveDDLEntity(dbOrTable, false);
+ }
+
+ protected AtlasEntity createHiveDDLEntity(AtlasEntity dbOrTable, boolean excludeEntityGuid) {
+ AtlasObjectId objId = AtlasTypeUtil.getObjectId(dbOrTable);
+ AtlasEntity hiveDDL = null;
+
+ if (excludeEntityGuid) {
+ objId.setGuid(null);
+ }
+ AtlasRelatedObjectId objIdRelatedObject = new AtlasRelatedObjectId(objId);
+
+ if (StringUtils.equals(objId.getTypeName(), HIVE_TYPE_DB)) {
+ hiveDDL = new AtlasEntity(HIVE_DB_DDL);
+ objIdRelatedObject.setRelationshipType(RELATIONSHIP_HIVE_DB_DDL_QUERIES);
+ hiveDDL.setRelationshipAttribute(ATTRIBUTE_DB, objIdRelatedObject);
+ } else if (StringUtils.equals(objId.getTypeName(), HIVE_TYPE_TABLE)) {
+ hiveDDL = new AtlasEntity(HIVE_TABLE_DDL);
+ objIdRelatedObject.setRelationshipType(RELATIONSHIP_HIVE_TABLE_DDL_QUERIES);
+ hiveDDL.setRelationshipAttribute( ATTRIBUTE_TABLE, objIdRelatedObject);
+ }
+
+ if (hiveDDL != null) {
+ hiveDDL.setAttribute(ATTRIBUTE_SERVICE_TYPE, "hive");
+ hiveDDL.setAttribute(ATTRIBUTE_EXEC_TIME, getQueryStartTime());
+ hiveDDL.setAttribute(ATTRIBUTE_QUERY_TEXT, getQueryString());
+ hiveDDL.setAttribute(ATTRIBUTE_USER_NAME, getUserName());
+ hiveDDL.setAttribute(ATTRIBUTE_NAME, getQueryString());
+ hiveDDL.setAttribute(ATTRIBUTE_QUALIFIED_NAME, dbOrTable.getAttribute(ATTRIBUTE_QUALIFIED_NAME).toString()
+ + QNAME_SEP_PROCESS + getQueryStartTime().toString());
+ }
+
+ return hiveDDL;
+ }
+
+ protected AtlasEntity createHiveLocationEntity(AtlasEntity dbEntity, AtlasEntitiesWithExtInfo extInfoEntity) {
+ AtlasEntity ret = null;
+ String locationUri = (String)dbEntity.getAttribute(ATTRIBUTE_LOCATION);
+
+ if (StringUtils.isNotEmpty(locationUri)) {
+ Path path = null;
+
+ try {
+ path = new Path(locationUri);
+ } catch (IllegalArgumentException excp) {
+ LOG.warn("failed to create Path from locationUri {}", locationUri, excp);
+ }
+
+ if (path != null) {
+ ret = getPathEntity(path, extInfoEntity);
+
+ if (ret != null) {
+ AtlasRelatedObjectId dbRelatedObjectId = AtlasTypeUtil.getAtlasRelatedObjectId(dbEntity, RELATIONSHIP_HIVE_DB_LOCATION);
+
+ ret.setRelationshipAttribute(ATTRIBUTE_HIVE_DB, dbRelatedObjectId);
+ }
+ }
+ }
+
+ return ret;
+ }
+
+ protected String getMetadataNamespace() {
+ return context.getMetadataNamespace();
+ }
+
+ protected Database getDatabases(String dbName) throws Exception {
+ return context.isMetastoreHook() ? context.getMetastoreHandler().get_database(dbName) :
+ context.getHive().getDatabase(dbName);
+ }
+
+ protected Hive getHive() {
+ return context.getHive();
+ }
+
+ protected Set getInputs() {
+ return context != null ? context.getInputs() : Collections.emptySet();
+ }
+
+ protected Set getOutputs() {
+ return context != null ? context.getOutputs() : Collections.emptySet();
+ }
+
+ protected LineageInfo getLineageInfo() {
+ return context != null ? context.getLineageInfo() : null;
+ }
+
+ protected String getQueryString() {
+ return isHiveContextValid() ? context.getHiveContext().getQueryPlan().getQueryStr() : null;
+ }
+
+ protected String getOperationName() {
+ return isHiveContextValid() ? context.getHiveContext().getOperationName() : null;
+ }
+
+ protected String getHiveUserName() {
+ return isHiveContextValid() ? context.getHiveContext().getUserName() : null;
+ }
+
+ protected UserGroupInformation getUgi() {
+ return isHiveContextValid() ? context.getHiveContext().getUgi() : null;
+ }
+
+ protected Long getQueryStartTime() {
+ return isHiveContextValid() ? context.getHiveContext().getQueryPlan().getQueryStartTime() : System.currentTimeMillis();
+ }
+
+ protected String getQueryId() {
+ return isHiveContextValid() ? context.getHiveContext().getQueryPlan().getQueryId() : null;
+ }
+
+ private boolean isHiveContextValid() {
+ return context != null && context.getHiveContext() != null;
+ }
+
+ protected String getUserName() {
+ String ret = null;
+ UserGroupInformation ugi = null;
+
+ if (context.isMetastoreHook()) {
+ try {
+ ugi = SecurityUtils.getUGI();
+ } catch (Exception e) {
+ //do nothing
+ }
+ } else {
+ ret = getHiveUserName();
+
+ if (StringUtils.isEmpty(ret)) {
+ ugi = getUgi();
+ }
+ }
+
+ if (ugi != null) {
+ ret = ugi.getShortUserName();
+ }
+
+ if (StringUtils.isEmpty(ret)) {
+ try {
+ ret = UserGroupInformation.getCurrentUser().getShortUserName();
+ } catch (IOException e) {
+ LOG.warn("Failed for UserGroupInformation.getCurrentUser() ", e);
+
+ ret = System.getProperty("user.name");
+ }
+ }
+
+ return ret;
+ }
+
+ protected String getQualifiedName(Entity entity) throws Exception {
+ switch (entity.getType()) {
+ case DATABASE:
+ return getQualifiedName(entity.getDatabase());
+
+ case TABLE:
+ case PARTITION:
+ return getQualifiedName(entity.getTable());
+
+ case DFS_DIR:
+ case LOCAL_DIR:
+ return getQualifiedName(entity.getLocation());
+ }
+
+ return null;
+ }
+
+ protected String getQualifiedName(Database db) {
+ return context.getQualifiedName(db);
+ }
+
+ protected String getQualifiedName(Table table) {
+ return context.getQualifiedName(table);
+ }
+
+ protected String getQualifiedName(Table table, StorageDescriptor sd) {
+ return getQualifiedName(table) + "_storage";
+ }
+
+ protected String getQualifiedName(Table table, FieldSchema column) {
+ String tblQualifiedName = getQualifiedName(table);
+
+ int sepPos = tblQualifiedName.lastIndexOf(QNAME_SEP_METADATA_NAMESPACE);
+
+ if (sepPos == -1) {
+ return tblQualifiedName + QNAME_SEP_ENTITY_NAME + column.getName().toLowerCase();
+ } else {
+ return tblQualifiedName.substring(0, sepPos) + QNAME_SEP_ENTITY_NAME + column.getName().toLowerCase() + tblQualifiedName.substring(sepPos);
+ }
+ }
+
+ protected String getQualifiedName(DependencyKey column) {
+ String dbName = column.getDataContainer().getTable().getDbName();
+ String tableName = column.getDataContainer().getTable().getTableName();
+ String colName = column.getFieldSchema().getName();
+
+ return getQualifiedName(dbName, tableName, colName);
+ }
+
+ protected String getQualifiedName(BaseColumnInfo column) {
+ String dbName = column.getTabAlias().getTable().getDbName();
+ String tableName = column.getTabAlias().getTable().getTableName();
+ String colName = column.getColumn() != null ? column.getColumn().getName() : null;
+ String metadataNamespace = getMetadataNamespace();
+
+ if (colName == null) {
+ return (dbName + QNAME_SEP_ENTITY_NAME + tableName + QNAME_SEP_METADATA_NAMESPACE).toLowerCase() + metadataNamespace;
+ } else {
+ return (dbName + QNAME_SEP_ENTITY_NAME + tableName + QNAME_SEP_ENTITY_NAME + colName + QNAME_SEP_METADATA_NAMESPACE).toLowerCase() + metadataNamespace;
+ }
+ }
+
+ protected String getQualifiedName(String dbName, String tableName, String colName) {
+ return (dbName + QNAME_SEP_ENTITY_NAME + tableName + QNAME_SEP_ENTITY_NAME + colName + QNAME_SEP_METADATA_NAMESPACE).toLowerCase() + getMetadataNamespace();
+ }
+
+ protected String getQualifiedName(URI location) {
+ String strPath = new Path(location).toString();
+
+ if (strPath.startsWith(HDFS_PATH_PREFIX) && context.isConvertHdfsPathToLowerCase()) {
+ strPath = strPath.toLowerCase();
+ }
+
+ String nameServiceID = HdfsNameServiceResolver.getNameServiceIDForPath(strPath);
+ String attrPath = StringUtils.isEmpty(nameServiceID) ? strPath : HdfsNameServiceResolver.getPathWithNameServiceID(strPath);
+
+ return getQualifiedName(attrPath);
+ }
+
+ protected String getQualifiedName(String path) {
+ if (path.startsWith(HdfsNameServiceResolver.HDFS_SCHEME)) {
+ return path + QNAME_SEP_METADATA_NAMESPACE + getMetadataNamespace();
+ }
+
+ return path.toLowerCase();
+ }
+
+ protected String getColumnQualifiedName(String tblQualifiedName, String columnName) {
+ int sepPos = tblQualifiedName.lastIndexOf(QNAME_SEP_METADATA_NAMESPACE);
+
+ if (sepPos == -1) {
+ return tblQualifiedName + QNAME_SEP_ENTITY_NAME + columnName.toLowerCase();
+ } else {
+ return tblQualifiedName.substring(0, sepPos) + QNAME_SEP_ENTITY_NAME + columnName.toLowerCase() + tblQualifiedName.substring(sepPos);
+ }
+
+ }
+
+ protected String getQualifiedName(List inputs, List outputs) throws Exception {
+ HiveOperation operation = context.getHiveOperation();
+
+ if (operation == HiveOperation.CREATETABLE ||
+ operation == HiveOperation.CREATETABLE_AS_SELECT ||
+ operation == HiveOperation.CREATEVIEW ||
+ operation == HiveOperation.ALTERVIEW_AS ||
+ operation == HiveOperation.ALTERTABLE_LOCATION) {
+ List extends Entity> sortedEntities = new ArrayList<>(getOutputs());
+
+ Collections.sort(sortedEntities, entityComparator);
+
+ for (Entity entity : sortedEntities) {
+ if (entity.getType() == Entity.Type.TABLE) {
+ Table table = entity.getTable();
+
+ table = getHive().getTable(table.getDbName(), table.getTableName());
+
+ long createTime = getTableCreateTime(table);
+
+ return getQualifiedName(table) + QNAME_SEP_PROCESS + createTime;
+ }
+ }
+ }
+
+ String qualifiedName = null;
+ String operationName = getOperationName();
+
+ if (operationName != null) {
+ StringBuilder sb = new StringBuilder(operationName);
+
+ boolean ignoreHDFSPaths = ignoreHDFSPathsinProcessQualifiedName();
+
+ addToProcessQualifiedName(sb, getInputs(), ignoreHDFSPaths);
+ sb.append("->");
+ addToProcessQualifiedName(sb, getOutputs(), ignoreHDFSPaths);
+
+ qualifiedName = sb.toString();
+ }
+
+
+ return qualifiedName;
+ }
+
+ protected AtlasEntity toReferencedHBaseTable(Table table, AtlasEntitiesWithExtInfo entities) {
+ AtlasEntity ret = null;
+ HBaseTableInfo hBaseTableInfo = new HBaseTableInfo(table);
+ String hbaseNameSpace = hBaseTableInfo.getHbaseNameSpace();
+ String hbaseTableName = hBaseTableInfo.getHbaseTableName();
+ String metadataNamespace = getMetadataNamespace();
+
+ if (hbaseTableName != null) {
+ AtlasEntity nsEntity = new AtlasEntity(HBASE_TYPE_NAMESPACE);
+ nsEntity.setAttribute(ATTRIBUTE_NAME, hbaseNameSpace);
+ nsEntity.setAttribute(ATTRIBUTE_CLUSTER_NAME, metadataNamespace);
+ nsEntity.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getHBaseNameSpaceQualifiedName(metadataNamespace, hbaseNameSpace));
+
+ ret = new AtlasEntity(HBASE_TYPE_TABLE);
+
+ ret.setAttribute(ATTRIBUTE_NAME, hbaseTableName);
+ ret.setAttribute(ATTRIBUTE_URI, hbaseTableName);
+
+ AtlasRelatedObjectId objIdRelatedObject = new AtlasRelatedObjectId(AtlasTypeUtil.getObjectId(nsEntity), RELATIONSHIP_HBASE_TABLE_NAMESPACE);
+
+ ret.setRelationshipAttribute(ATTRIBUTE_NAMESPACE, objIdRelatedObject);
+ ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getHBaseTableQualifiedName(metadataNamespace, hbaseNameSpace, hbaseTableName));
+
+ entities.addReferredEntity(nsEntity);
+ entities.addEntity(ret);
+ }
+
+ return ret;
+ }
+
+ protected boolean isHBaseStore(Table table) {
+ boolean ret = false;
+ Map parameters = table.getParameters();
+
+ if (MapUtils.isNotEmpty(parameters)) {
+ String storageHandler = parameters.get(ATTRIBUTE_STORAGE_HANDLER);
+
+ ret = (storageHandler != null && storageHandler.equals(HBASE_STORAGE_HANDLER_CLASS));
+ }
+
+ return ret;
+ }
+
+ private static String getHBaseTableQualifiedName(String metadataNamespace, String nameSpace, String tableName) {
+ return String.format("%s:%s@%s", nameSpace.toLowerCase(), tableName.toLowerCase(), metadataNamespace);
+ }
+
+ private static String getHBaseNameSpaceQualifiedName(String metadataNamespace, String nameSpace) {
+ return String.format("%s@%s", nameSpace.toLowerCase(), metadataNamespace);
+ }
+
+ private boolean ignoreHDFSPathsinProcessQualifiedName() {
+ switch (context.getHiveOperation()) {
+ case LOAD:
+ case IMPORT:
+ return hasPartitionEntity(getOutputs());
+ case EXPORT:
+ return hasPartitionEntity(getInputs());
+ case QUERY:
+ return true;
+ }
+
+ return false;
+ }
+
+ private boolean hasPartitionEntity(Collection extends Entity> entities) {
+ if (entities != null) {
+ for (Entity entity : entities) {
+ if (entity.getType() == Entity.Type.PARTITION) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ private void addToProcessQualifiedName(StringBuilder processQualifiedName, Collection extends Entity> entities, boolean ignoreHDFSPaths) {
+ if (entities == null) {
+ return;
+ }
+
+ List extends Entity> sortedEntities = new ArrayList<>(entities);
+
+ Collections.sort(sortedEntities, entityComparator);
+
+ Set dataSetsProcessed = new HashSet<>();
+ Map tableMap = new HashMap<>();
+
+ for (Entity entity : sortedEntities) {
+ if (ignoreHDFSPaths && (Entity.Type.DFS_DIR.equals(entity.getType()) || Entity.Type.LOCAL_DIR.equals(entity.getType()))) {
+ continue;
+ }
+
+ String qualifiedName = null;
+ long createTime = 0;
+
+ try {
+ if (entity.getType() == Entity.Type.PARTITION || entity.getType() == Entity.Type.TABLE) {
+ String tableKey = entity.getTable().getDbName() + "." + entity.getTable().getTableName();
+ Table table = tableMap.get(tableKey);
+
+ if (table == null) {
+ table = getHive().getTable(entity.getTable().getDbName(), entity.getTable().getTableName());
+
+ tableMap.put(tableKey, table); //since there could be several partitions in a table, store it to avoid hive calls.
+ }
+ if (table != null) {
+ createTime = getTableCreateTime(table);
+ qualifiedName = getQualifiedName(table);
+ }
+ } else {
+ qualifiedName = getQualifiedName(entity);
+ }
+ } catch (Exception excp) {
+ LOG.error("error while computing qualifiedName for process", excp);
+ }
+
+ if (qualifiedName == null || !dataSetsProcessed.add(qualifiedName)) {
+ continue;
+ }
+
+ if (entity instanceof WriteEntity) { // output entity
+ WriteEntity writeEntity = (WriteEntity) entity;
+
+ if (writeEntity.getWriteType() != null && HiveOperation.QUERY.equals(context.getHiveOperation())) {
+ boolean addWriteType = false;
+
+ switch (((WriteEntity) entity).getWriteType()) {
+ case INSERT:
+ case INSERT_OVERWRITE:
+ case UPDATE:
+ case DELETE:
+ addWriteType = true;
+ break;
+
+ case PATH_WRITE:
+ addWriteType = !Entity.Type.LOCAL_DIR.equals(entity.getType());
+ break;
+ }
+
+ if (addWriteType) {
+ processQualifiedName.append(QNAME_SEP_PROCESS).append(writeEntity.getWriteType().name());
+ }
+ }
+ }
+
+ processQualifiedName.append(QNAME_SEP_PROCESS).append(qualifiedName.toLowerCase().replaceAll("/", ""));
+
+ if (createTime != 0) {
+ processQualifiedName.append(QNAME_SEP_PROCESS).append(createTime);
+ }
+ }
+ }
+
+ private boolean isAlterTableOperation() {
+ switch (context.getHiveOperation()) {
+ case ALTERTABLE_FILEFORMAT:
+ case ALTERTABLE_CLUSTER_SORT:
+ case ALTERTABLE_BUCKETNUM:
+ case ALTERTABLE_PROPERTIES:
+ case ALTERTABLE_SERDEPROPERTIES:
+ case ALTERTABLE_SERIALIZER:
+ case ALTERTABLE_ADDCOLS:
+ case ALTERTABLE_REPLACECOLS:
+ case ALTERTABLE_PARTCOLTYPE:
+ case ALTERTABLE_LOCATION:
+ case ALTERTABLE_RENAME:
+ case ALTERTABLE_RENAMECOL:
+ case ALTERVIEW_PROPERTIES:
+ case ALTERVIEW_RENAME:
+ case ALTERVIEW_AS:
+ return true;
+ }
+
+ return false;
+ }
+
+ static final class EntityComparator implements Comparator {
+ @Override
+ public int compare(Entity entity1, Entity entity2) {
+ String name1 = entity1.getName();
+ String name2 = entity2.getName();
+
+ if (name1 == null || name2 == null) {
+ name1 = entity1.getD().toString();
+ name2 = entity2.getD().toString();
+ }
+
+ return name1.toLowerCase().compareTo(name2.toLowerCase());
+ }
+ }
+
+ static final Comparator entityComparator = new EntityComparator();
+
+ static final class HBaseTableInfo {
+ String hbaseNameSpace = null;
+ String hbaseTableName = null;
+
+ HBaseTableInfo(Table table) {
+ Map parameters = table.getParameters();
+
+ if (MapUtils.isNotEmpty(parameters)) {
+ hbaseNameSpace = HBASE_DEFAULT_NAMESPACE;
+ hbaseTableName = parameters.get(HBASE_PARAM_TABLE_NAME);
+
+ if (hbaseTableName != null) {
+ if (hbaseTableName.contains(HBASE_NAMESPACE_TABLE_DELIMITER)) {
+ String[] hbaseTableInfo = hbaseTableName.split(HBASE_NAMESPACE_TABLE_DELIMITER);
+
+ if (hbaseTableInfo.length > 1) {
+ hbaseNameSpace = hbaseTableInfo[0];
+ hbaseTableName = hbaseTableInfo[1];
+ }
+ }
+ }
+ }
+ }
+
+ public String getHbaseNameSpace() {
+ return hbaseNameSpace;
+ }
+
+ public String getHbaseTableName() {
+ return hbaseTableName;
+ }
+ }
+
+ public static Table toTable(org.apache.hadoop.hive.metastore.api.Table table) {
+ return new Table(table);
+ }
+}
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateDatabase.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateDatabase.java
new file mode 100644
index 0000000000..bf5f5620e7
--- /dev/null
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateDatabase.java
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hive.hook.events;
+
+import org.apache.atlas.hive.hook.AtlasHiveHookContext;
+import org.apache.atlas.model.instance.AtlasEntity;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
+import org.apache.atlas.model.notification.HookNotification;
+import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.events.CreateDatabaseEvent;
+import org.apache.hadoop.hive.ql.hooks.Entity;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collections;
+import java.util.List;
+
+import static org.apache.atlas.hive.bridge.HiveMetaStoreBridge.getDatabaseName;
+import static org.apache.hadoop.hive.ql.hooks.Entity.Type.DATABASE;
+
+public class CreateDatabase extends BaseHiveEvent {
+ private static final Logger LOG = LoggerFactory.getLogger(CreateDatabase.class);
+
+ public CreateDatabase(AtlasHiveHookContext context) {
+ super(context);
+ }
+
+ @Override
+ public List getNotificationMessages() throws Exception {
+ List ret = null;
+ AtlasEntitiesWithExtInfo entities = context.isMetastoreHook() ? getHiveMetastoreEntities() : getHiveEntities();
+
+ if (entities != null && CollectionUtils.isNotEmpty(entities.getEntities())) {
+ ret = Collections.singletonList(new EntityCreateRequestV2(getUserName(), entities));
+ }
+
+ return ret;
+ }
+
+ public AtlasEntitiesWithExtInfo getHiveMetastoreEntities() throws Exception {
+ AtlasEntitiesWithExtInfo ret = new AtlasEntitiesWithExtInfo();
+ CreateDatabaseEvent dbEvent = (CreateDatabaseEvent) context.getMetastoreEvent();
+ Database db = dbEvent.getDatabase();
+
+ if (db != null) {
+ db = context.getMetastoreHandler().get_database(db.getName());
+ }
+
+ if (db != null) {
+ AtlasEntity dbEntity = toDbEntity(db);
+
+ ret.addEntity(dbEntity);
+
+ addLocationEntities(dbEntity, ret);
+ } else {
+ LOG.error("CreateDatabase.getEntities(): failed to retrieve db");
+ }
+
+ addProcessedEntities(ret);
+
+ return ret;
+ }
+
+ public AtlasEntitiesWithExtInfo getHiveEntities() throws Exception {
+ AtlasEntitiesWithExtInfo ret = new AtlasEntitiesWithExtInfo();
+
+ for (Entity entity : getOutputs()) {
+ if (entity.getType() == DATABASE) {
+ Database db = entity.getDatabase();
+
+ if (db != null) {
+ db = getHive().getDatabase(getDatabaseName(db));
+ }
+
+ if (db != null) {
+ AtlasEntity dbEntity = toDbEntity(db);
+ AtlasEntity dbDDLEntity = createHiveDDLEntity(dbEntity);
+
+ ret.addEntity(dbEntity);
+
+ if (dbDDLEntity != null) {
+ ret.addEntity(dbDDLEntity);
+ }
+
+ addLocationEntities(dbEntity, ret);
+ } else {
+ LOG.error("CreateDatabase.getEntities(): failed to retrieve db");
+ }
+ }
+ }
+
+ addProcessedEntities(ret);
+
+ return ret;
+ }
+
+ public void addLocationEntities(AtlasEntity dbEntity, AtlasEntitiesWithExtInfo ret) {
+ AtlasEntity dbLocationEntity = createHiveLocationEntity(dbEntity, ret);
+
+ if (dbLocationEntity != null) {
+ ret.addEntity(dbLocationEntity);
+ }
+ }
+}
\ No newline at end of file
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateHiveProcess.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateHiveProcess.java
new file mode 100644
index 0000000000..5787c9365a
--- /dev/null
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateHiveProcess.java
@@ -0,0 +1,295 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.atlas.hive.hook.events;
+
+import org.apache.atlas.type.AtlasTypeUtil;
+import org.apache.atlas.hive.hook.AtlasHiveHookContext;
+import org.apache.atlas.model.instance.AtlasEntity;
+import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
+import org.apache.atlas.model.notification.HookNotification;
+import org.apache.atlas.model.notification.HookNotification.EntityCreateRequestV2;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.hadoop.hive.ql.hooks.Entity;
+import org.apache.hadoop.hive.ql.hooks.LineageInfo;
+import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo;
+import org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency;
+import org.apache.hadoop.hive.ql.hooks.LineageInfo.DependencyKey;
+import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+
+public class CreateHiveProcess extends BaseHiveEvent {
+ private static final Logger LOG = LoggerFactory.getLogger(CreateHiveProcess.class);
+
+ public CreateHiveProcess(AtlasHiveHookContext context) {
+ super(context);
+ }
+
+ @Override
+ public List getNotificationMessages() throws Exception {
+ List ret = null;
+ AtlasEntitiesWithExtInfo entities = getEntities();
+
+ if (entities != null && CollectionUtils.isNotEmpty(entities.getEntities())) {
+ ret = Collections.singletonList(new EntityCreateRequestV2(getUserName(), entities));
+ }
+
+ return ret;
+ }
+
+ public AtlasEntitiesWithExtInfo getEntities() throws Exception {
+ AtlasEntitiesWithExtInfo ret = null;
+
+ if (!skipProcess()) {
+ List inputs = new ArrayList<>();
+ List outputs = new ArrayList<>();
+ Set processedNames = new HashSet<>();
+
+ ret = new AtlasEntitiesWithExtInfo();
+
+ if (getInputs() != null) {
+ for (ReadEntity input : getInputs()) {
+ String qualifiedName = getQualifiedName(input);
+
+ if (qualifiedName == null || !processedNames.add(qualifiedName)) {
+ continue;
+ }
+
+ AtlasEntity entity = getInputOutputEntity(input, ret, skipTempTables);
+
+ if (!input.isDirect()) {
+ continue;
+ }
+
+ if (entity != null) {
+ inputs.add(entity);
+ }
+ }
+ }
+
+ if (getOutputs() != null) {
+ for (WriteEntity output : getOutputs()) {
+ String qualifiedName = getQualifiedName(output);
+
+ if (qualifiedName == null || !processedNames.add(qualifiedName)) {
+ continue;
+ }
+
+ AtlasEntity entity = getInputOutputEntity(output, ret, skipTempTables);
+
+ if (entity != null) {
+ outputs.add(entity);
+ }
+
+ if (isDdlOperation(entity)) {
+
+ AtlasEntity ddlEntity = createHiveDDLEntity(entity);
+
+ if (ddlEntity != null) {
+ ret.addEntity(ddlEntity);
+ }
+ }
+ }
+ }
+
+ boolean skipProcess = inputs.isEmpty() && outputs.isEmpty();
+
+ if (!skipProcess) {
+ if (inputs.isEmpty() && context.isSkippedInputEntity()) {
+ skipProcess = true;
+ } else if (outputs.isEmpty() && context.isSkippedOutputEntity()) {
+ skipProcess = true;
+ }
+ }
+
+ if (!skipProcess && !context.isMetastoreHook()) {
+ AtlasEntity process = getHiveProcessEntity(inputs, outputs);
+
+ ret.addEntity(process);
+
+ AtlasEntity processExecution = getHiveProcessExecutionEntity(process);
+ ret.addEntity(processExecution);
+
+ processColumnLineage(process, ret);
+
+ addProcessedEntities(ret);
+ } else {
+ ret = null;
+ }
+ }
+
+ return ret;
+ }
+
+ private void processColumnLineage(AtlasEntity hiveProcess, AtlasEntitiesWithExtInfo entities) {
+ LineageInfo lineageInfo = getLineageInfo();
+
+ if (lineageInfo == null || CollectionUtils.isEmpty(lineageInfo.entrySet())) {
+ return;
+ }
+
+ final List columnLineages = new ArrayList<>();
+ int lineageInputsCount = 0;
+ final Set processedOutputCols = new HashSet<>();
+
+ for (Map.Entry entry : lineageInfo.entrySet()) {
+ String outputColName = getQualifiedName(entry.getKey());
+ AtlasEntity outputColumn = context.getEntity(outputColName);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("processColumnLineage(): DependencyKey={}; Dependency={}", entry.getKey(), entry.getValue());
+ }
+
+ if (outputColumn == null) {
+ LOG.warn("column-lineage: non-existing output-column {}", outputColName);
+
+ continue;
+ }
+
+ if (processedOutputCols.contains(outputColName)) {
+ LOG.warn("column-lineage: duplicate for output-column {}", outputColName);
+
+ continue;
+ } else {
+ processedOutputCols.add(outputColName);
+ }
+
+ List inputColumns = new ArrayList<>();
+
+ for (BaseColumnInfo baseColumn : getBaseCols(entry.getValue())) {
+ String inputColName = getQualifiedName(baseColumn);
+ AtlasEntity inputColumn = context.getEntity(inputColName);
+
+ if (inputColumn == null) {
+ LOG.warn("column-lineage: non-existing input-column {} for output-column={}", inputColName, outputColName);
+
+ continue;
+ }
+
+ inputColumns.add(inputColumn);
+ }
+
+ if (inputColumns.isEmpty()) {
+ continue;
+ }
+
+ lineageInputsCount += inputColumns.size();
+
+ AtlasEntity columnLineageProcess = new AtlasEntity(HIVE_TYPE_COLUMN_LINEAGE);
+
+ columnLineageProcess.setAttribute(ATTRIBUTE_NAME, hiveProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME));
+ columnLineageProcess.setAttribute(ATTRIBUTE_QUALIFIED_NAME, hiveProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME));
+ columnLineageProcess.setRelationshipAttribute(ATTRIBUTE_INPUTS, AtlasTypeUtil.getAtlasRelatedObjectIds(inputColumns, BaseHiveEvent.RELATIONSHIP_DATASET_PROCESS_INPUTS));
+ columnLineageProcess.setRelationshipAttribute(ATTRIBUTE_OUTPUTS, Collections.singletonList(AtlasTypeUtil.getAtlasRelatedObjectId(outputColumn, BaseHiveEvent.RELATIONSHIP_PROCESS_DATASET_OUTPUTS)));
+ columnLineageProcess.setRelationshipAttribute(ATTRIBUTE_QUERY, AtlasTypeUtil.getAtlasRelatedObjectId(hiveProcess, BaseHiveEvent.RELATIONSHIP_HIVE_PROCESS_COLUMN_LINEAGE));
+ columnLineageProcess.setAttribute(ATTRIBUTE_DEPENDENCY_TYPE, entry.getValue().getType());
+ columnLineageProcess.setAttribute(ATTRIBUTE_EXPRESSION, entry.getValue().getExpr());
+
+ columnLineages.add(columnLineageProcess);
+ }
+
+ float avgInputsCount = columnLineages.size() > 0 ? (((float) lineageInputsCount) / columnLineages.size()) : 0;
+ boolean skipColumnLineage = context.getSkipHiveColumnLineageHive20633() && avgInputsCount > context.getSkipHiveColumnLineageHive20633InputsThreshold();
+
+ if (!skipColumnLineage) {
+ for (AtlasEntity columnLineage : columnLineages) {
+ entities.addEntity(columnLineage);
+ }
+ } else {
+ LOG.warn("skipped {} hive_column_lineage entities. Average # of inputs={}, threshold={}, total # of inputs={}", columnLineages.size(), avgInputsCount, context.getSkipHiveColumnLineageHive20633InputsThreshold(), lineageInputsCount);
+ }
+ }
+
+ private Collection getBaseCols(Dependency lInfoDep) {
+ Collection ret = Collections.emptyList();
+
+ if (lInfoDep != null) {
+ try {
+ Method getBaseColsMethod = lInfoDep.getClass().getMethod("getBaseCols");
+
+ Object retGetBaseCols = getBaseColsMethod.invoke(lInfoDep);
+
+ if (retGetBaseCols != null) {
+ if (retGetBaseCols instanceof Collection) {
+ ret = (Collection) retGetBaseCols;
+ } else {
+ LOG.warn("{}: unexpected return type from LineageInfo.Dependency.getBaseCols(), expected type {}",
+ retGetBaseCols.getClass().getName(), "Collection");
+ }
+ }
+ } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException ex) {
+ LOG.warn("getBaseCols()", ex);
+ }
+ }
+
+ return ret;
+ }
+
+
+ private boolean skipProcess() {
+ Set inputs = getInputs();
+ Set