diff --git a/.travis.yml b/.travis.yml index a776739e..670938b8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,4 +29,4 @@ before_install: # Build install: -- build/gimel -Dmaven.test.skip=true -pl gimel-dataapi/gimel-tools -am | egrep -v "Download|Copy|Including|WARNING" +- sh -x build/gimel -Dmaven.test.skip=true -pl gimel-dataapi/gimel-tools -am | egrep -v "Download|Copy|Including|WARNING" diff --git a/build/gimel b/build/gimel index 0d4bf254..2b9ddfef 100755 --- a/build/gimel +++ b/build/gimel @@ -15,9 +15,81 @@ # See the License for the specific language governing permissions and # limitations under the License. +echo "Present Working Dir" +echo $PWD + this_dir=`dirname build/install_dependencies` -# Source reusable functions -source ${this_dir}/gimel_functions + +#----------------------------function will check for error code & exit if failure, else proceed further----------------------------# + +#usage : check_error <$?> +#Example: Check_error < pass $? from the shell command > < Custom Message for errorcode -gt 0 > + +check_error() +{ + cmd_error_code=$1 + custom_message=$2 + if [ ${cmd_error_code} -gt 0 ]; then + write_log "Error | Stage | ${custom_message}" + exit ${cmd_error_code} + else + write_log "Success | Stage | ${custom_message}" + fi +} + +#----------------------------function will check for error code & warn if failure----------------------------# + +#usage : check_warning <$?> +#Example: Check_warning < pass $? from the shell command > < Custom Message for errorcode -gt 0 > + + +check_warning() +{ + + cmd_error_code=$1 + pgm_exit_code=$2 + pgm_exit_msg=$3 + if [ ${cmd_error_code} -gt 0 ]; then + write_log "WARNING ! ${cmd_error_code} ${pgm_exit_code} ${pgm_exit_msg}" + else + echo "" + fi +} + + + +#----------------------------function will write the message to Console / Log File----------------------------# + +#Usage : write_log < Whatever message you need to log > + +write_log() +{ + msg=$1 + to_be_logged="$(date '+%Y%m%d %H:%M:%S') | $msg" + echo ${to_be_logged} +} + +#-----------------------------------Executes a Command--------------------------------------------------------# + + + +#Usage : run_cmd < The command to execute > + +run_cmd() +{ + cmd=$1 + if [ -z $2 ]; then + fail_on_error="break_code" + else + fail_on_error=$2 + fi + write_log "Executing Command --> $1" + $cmd + error_code=$? + if [ ! $fail_on_error = "ignore_errors" ]; then + check_error $error_code "$cmd" + fi +} #--------------------------------Begin execution of Steps------------------------------------------------# @@ -34,9 +106,29 @@ else fi -build/install_dependencies -check_error $? "build/install_dependencies" +write_log "Installing dependencies [sh -x build/install_dependencies]" + +# All jars below are not present in maven central or any public repository, thats why they are added manually while building gimel. + +mvn install:install-file -DgroupId=qubole-hive-JDBC -DartifactId=qubole-hive-JDBC -Dversion=0.0.7 -Dpackaging=jar -Dfile=${this_dir}/../lib/qubole-hive-JDBC.jar 1>>/dev/null 2>&1 +check_error $? "install qubole-hive-JDBC" + +mvn install:install-file -DgroupId=com.hortonworks -DartifactId=shc-core -Dversion=1.1.2-2.3-s_2.11 -Dpackaging=jar -Dfile=${this_dir}/../lib/shc-core.jar 1>>/dev/null 2>&1 +check_error $? "install shc-core" + +mvn install:install-file -DgroupId=com.osscube -DartifactId=aerospike-spark -Dversion=0.3-SNAPSHOT -Dpackaging=jar -Dfile=${this_dir}/../lib/aerospike-spark.jar 1>>/dev/null 2>&1 +check_error $? "install aerospike-spark" + +mvn install:install-file -DgroupId=com.twitter -DartifactId=zookeeper-client_2.10 -Dversion=2.0.0_fs-b -Dpackaging=jar -Dfile=${this_dir}/../lib/zookeeper-client_2.10-2.0.0_fs-b.jar 1>>/dev/null 2>&1 +check_error $? "install zookeeper-client" + +mvn install:install-file -DgroupId=com.teradata.jdbc -DartifactId=terajdbc4 -Dversion=15.10.00.22 -Dpackaging=jar -Dfile=${this_dir}/../lib/terajdbc4-15.10.00.22.jar 1>>/dev/null 2>&1 +check_error $? "install terajdbc4" + +mvn install:install-file -DgroupId=com.teradata.jdbc -DartifactId=tdgssconfig -Dversion=15.10.00.22 -Dpackaging=jar -Dfile=${this_dir}/../lib/tdgssconfig-15.10.00.22.jar 1>>/dev/null 2>&1 +check_error $? "install tdgssconfig" +write_log "Building the project [mvn install ${user_args}]" mvn install "$@" check_error $? "mvn install $@" diff --git a/build/install_dependencies b/build/install_dependencies index 90159c1d..5f7c185a 100755 --- a/build/install_dependencies +++ b/build/install_dependencies @@ -18,8 +18,77 @@ this_script=`pwd`/$BASH_SOURCE this_dir=`dirname $this_script` -# Source reusable functions -source ${this_dir}/gimel_functions + +#----------------------------function will check for error code & exit if failure, else proceed further----------------------------# + +#usage : check_error <$?> +#Example: Check_error < pass $? from the shell command > < Custom Message for errorcode -gt 0 > + +check_error() +{ + cmd_error_code=$1 + custom_message=$2 + if [ ${cmd_error_code} -gt 0 ]; then + write_log "Error | Stage | ${custom_message}" + exit ${cmd_error_code} + else + write_log "Success | Stage | ${custom_message}" + fi +} + +#----------------------------function will check for error code & warn if failure----------------------------# + +#usage : check_warning <$?> +#Example: Check_warning < pass $? from the shell command > < Custom Message for errorcode -gt 0 > + + +check_warning() +{ + + cmd_error_code=$1 + pgm_exit_code=$2 + pgm_exit_msg=$3 + if [ ${cmd_error_code} -gt 0 ]; then + write_log "WARNING ! ${cmd_error_code} ${pgm_exit_code} ${pgm_exit_msg}" + else + echo "" + fi +} + + + +#----------------------------function will write the message to Console / Log File----------------------------# + +#Usage : write_log < Whatever message you need to log > + +write_log() +{ + msg=$1 + to_be_logged="$(date '+%Y%m%d %H:%M:%S') | $msg" + echo ${to_be_logged} +} + +#-----------------------------------Executes a Command--------------------------------------------------------# + + + +#Usage : run_cmd < The command to execute > + +run_cmd() +{ + cmd=$1 + if [ -z $2 ]; then + fail_on_error="break_code" + else + fail_on_error=$2 + fi + write_log "Executing Command --> $1" + $cmd + error_code=$? + if [ ! $fail_on_error = "ignore_errors" ]; then + check_error $error_code "$cmd" + fi +} #--------------------------------Begin execution of Steps------------------------------------------------# diff --git a/gimel-dataapi/gimel-common/pom.xml b/gimel-dataapi/gimel-common/pom.xml index 52834328..b0c3a867 100644 --- a/gimel-dataapi/gimel-common/pom.xml +++ b/gimel-dataapi/gimel-common/pom.xml @@ -55,9 +55,9 @@ under the License. ${packaging.scope} - org.scala-lang - scala-xml - 2.11.0-M4 + org.scala-lang.modules + scala-xml_${scala.binary.version} + ${scala.xml.version} ${scala.packaging.scope} @@ -82,12 +82,6 @@ under the License. com.paypal.gimel gimel-logger ${gimel.version}-SNAPSHOT - - - org.apache.kafka - kafka-clients - - org.apache.kafka @@ -97,7 +91,7 @@ under the License. com.paypal.gimel serde-common - 1.0-SNAPSHOT + 2.4.7-SNAPSHOT ${packaging.scope} @@ -106,12 +100,6 @@ under the License. ${kafka.version} ${packaging.scope} - - com.databricks - spark-avro_${scala.binary.version} - 4.0.0 - ${packaging.scope} - org.apache.curator curator-framework @@ -162,7 +150,7 @@ under the License. org.apache.spark - spark-streaming-kafka-${kafka.binary.version}_${scala.binary.version} + spark-streaming-kafka-${spark.kafka.connector.version} ${spark.version} ${spark.packaging.scope} @@ -197,7 +185,7 @@ under the License. ${confluent.version} test - + io.netty netty @@ -211,7 +199,7 @@ under the License. test - net.jpountz.lz4 + net.jpountz.lz4 lz4 1.3.0 test @@ -220,16 +208,19 @@ under the License. com.fasterxml.jackson.core jackson-core ${jackson.version} + compile com.fasterxml.jackson.core jackson-annotations ${jackson.version} + compile com.fasterxml.jackson.core jackson-databind ${jackson.version} + compile @@ -277,7 +268,7 @@ under the License. org.apache.maven.plugins maven-shade-plugin - 3.2.1 + ${maven.shade.plugin.version} diff --git a/gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/pom.xml b/gimel-dataapi/gimel-connectors/gimel-aerospike/pom.xml similarity index 97% rename from gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/pom.xml rename to gimel-dataapi/gimel-connectors/gimel-aerospike/pom.xml index d0071ded..bd3e9355 100644 --- a/gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/pom.xml +++ b/gimel-dataapi/gimel-connectors/gimel-aerospike/pom.xml @@ -28,7 +28,7 @@ under the License. 4.0.0 - gimel-aerospike-3.14 + gimel-aerospike 2.4.7-SNAPSHOT @@ -58,7 +58,7 @@ under the License. org.apache.maven.plugins maven-shade-plugin - 3.0.0 + ${maven.shade.plugin.version} diff --git a/gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/DataSet.scala b/gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/DataSet.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/DataSet.scala rename to gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/DataSet.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/conf/AerospikeClientConfiguration.scala b/gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/conf/AerospikeClientConfiguration.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/conf/AerospikeClientConfiguration.scala rename to gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/conf/AerospikeClientConfiguration.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/conf/AerospikeConfigs.scala b/gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/conf/AerospikeConfigs.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/conf/AerospikeConfigs.scala rename to gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/conf/AerospikeConfigs.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/reader/AerospikeReader.scala b/gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/reader/AerospikeReader.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/reader/AerospikeReader.scala rename to gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/reader/AerospikeReader.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/utilities/AeroSparkConnector.scala b/gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/utilities/AeroSparkConnector.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/utilities/AeroSparkConnector.scala rename to gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/utilities/AeroSparkConnector.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/utilities/AerospikeUtilities.scala b/gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/utilities/AerospikeUtilities.scala similarity index 98% rename from gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/utilities/AerospikeUtilities.scala rename to gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/utilities/AerospikeUtilities.scala index b34d1dff..ace5118f 100644 --- a/gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/utilities/AerospikeUtilities.scala +++ b/gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/utilities/AerospikeUtilities.scala @@ -57,7 +57,7 @@ object AerospikeUtilities { val aerospikeSet = conf.aerospikeSet val aerospikeRowKey = conf.aerospikeRowKey // For each partition of Dataframe, aerospike client is created and is used to write data to aerospike - dataFrame.foreachPartition { partition => + dataFrame.foreachPartition { partition: Iterator[Row] => val client = AerospikeAdminClient.createClientConnection(aerospikeHosts, aerospikePort.toInt) partition.foreach { row => val bins = columns.map(eachCol => new Bin(eachCol.toString, row.getAs(eachCol).toString)).toArray diff --git a/gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/writer/AerospikeWriter.scala b/gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/writer/AerospikeWriter.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-aerospike-3.14/src/main/scala/com/paypal/gimel/aerospike/writer/AerospikeWriter.scala rename to gimel-dataapi/gimel-connectors/gimel-aerospike/src/main/scala/com/paypal/gimel/aerospike/writer/AerospikeWriter.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-cassandra-2.0/pom.xml b/gimel-dataapi/gimel-connectors/gimel-cassandra/pom.xml similarity index 92% rename from gimel-dataapi/gimel-connectors/gimel-cassandra-2.0/pom.xml rename to gimel-dataapi/gimel-connectors/gimel-cassandra/pom.xml index 6e275bd0..fd4dc3d3 100644 --- a/gimel-dataapi/gimel-connectors/gimel-cassandra-2.0/pom.xml +++ b/gimel-dataapi/gimel-connectors/gimel-cassandra/pom.xml @@ -28,7 +28,7 @@ under the License. 4.0.0 - gimel-cassandra-2.0 + gimel-cassandra 2.4.7-SNAPSHOT @@ -42,11 +42,6 @@ under the License. spark-cassandra-connector_${scala.binary.version} ${cassandra.spark.version} - - com.datastax.spark - spark-cassandra-connector-java_${scala.binary.version} - 1.5.2 - org.scalatest scalatest_${scala.binary.version} @@ -62,7 +57,7 @@ under the License. org.apache.maven.plugins maven-shade-plugin - 3.0.0 + ${maven.shade.plugin.version} diff --git a/gimel-dataapi/gimel-connectors/gimel-cassandra-2.0/src/main/scala/com/paypal/gimel/cassandra/DataSet.scala b/gimel-dataapi/gimel-connectors/gimel-cassandra/src/main/scala/com/paypal/gimel/cassandra/DataSet.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-cassandra-2.0/src/main/scala/com/paypal/gimel/cassandra/DataSet.scala rename to gimel-dataapi/gimel-connectors/gimel-cassandra/src/main/scala/com/paypal/gimel/cassandra/DataSet.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-cassandra-2.0/src/main/scala/com/paypal/gimel/cassandra/conf/CassandraClientConfiguration.scala b/gimel-dataapi/gimel-connectors/gimel-cassandra/src/main/scala/com/paypal/gimel/cassandra/conf/CassandraClientConfiguration.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-cassandra-2.0/src/main/scala/com/paypal/gimel/cassandra/conf/CassandraClientConfiguration.scala rename to gimel-dataapi/gimel-connectors/gimel-cassandra/src/main/scala/com/paypal/gimel/cassandra/conf/CassandraClientConfiguration.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-cassandra-2.0/src/main/scala/com/paypal/gimel/cassandra/conf/CassandraConfigs.scala b/gimel-dataapi/gimel-connectors/gimel-cassandra/src/main/scala/com/paypal/gimel/cassandra/conf/CassandraConfigs.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-cassandra-2.0/src/main/scala/com/paypal/gimel/cassandra/conf/CassandraConfigs.scala rename to gimel-dataapi/gimel-connectors/gimel-cassandra/src/main/scala/com/paypal/gimel/cassandra/conf/CassandraConfigs.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-cassandra-2.0/src/main/scala/com/paypal/gimel/cassandra/reader/CassandraReader.scala b/gimel-dataapi/gimel-connectors/gimel-cassandra/src/main/scala/com/paypal/gimel/cassandra/reader/CassandraReader.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-cassandra-2.0/src/main/scala/com/paypal/gimel/cassandra/reader/CassandraReader.scala rename to gimel-dataapi/gimel-connectors/gimel-cassandra/src/main/scala/com/paypal/gimel/cassandra/reader/CassandraReader.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-cassandra-2.0/src/main/scala/com/paypal/gimel/cassandra/writer/CassandraWriter.scala b/gimel-dataapi/gimel-connectors/gimel-cassandra/src/main/scala/com/paypal/gimel/cassandra/writer/CassandraWriter.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-cassandra-2.0/src/main/scala/com/paypal/gimel/cassandra/writer/CassandraWriter.scala rename to gimel-dataapi/gimel-connectors/gimel-cassandra/src/main/scala/com/paypal/gimel/cassandra/writer/CassandraWriter.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-druid-0.82/pom.xml b/gimel-dataapi/gimel-connectors/gimel-druid/pom.xml similarity index 97% rename from gimel-dataapi/gimel-connectors/gimel-druid-0.82/pom.xml rename to gimel-dataapi/gimel-connectors/gimel-druid/pom.xml index ce5d1582..939557b8 100644 --- a/gimel-dataapi/gimel-connectors/gimel-druid-0.82/pom.xml +++ b/gimel-dataapi/gimel-connectors/gimel-druid/pom.xml @@ -40,13 +40,17 @@ under the License. io.druid - tranquility-core_${scala.binary.version} + tranquility-core_2.11 ${tranquility.version} com.fasterxml.jackson.core jackson-databind + + org.scala-lang + * + com.fasterxml.jackson.core jackson-core diff --git a/gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/DataSet.scala b/gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/DataSet.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/DataSet.scala rename to gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/DataSet.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/conf/DruidClientConfiguration.scala b/gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/conf/DruidClientConfiguration.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/conf/DruidClientConfiguration.scala rename to gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/conf/DruidClientConfiguration.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/conf/DruidConfigs.scala b/gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/conf/DruidConfigs.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/conf/DruidConfigs.scala rename to gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/conf/DruidConfigs.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/conf/DruidConstants.scala b/gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/conf/DruidConstants.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/conf/DruidConstants.scala rename to gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/conf/DruidConstants.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/model/DruidDimension.scala b/gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/model/DruidDimension.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/model/DruidDimension.scala rename to gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/model/DruidDimension.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/model/DruidMetric.scala b/gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/model/DruidMetric.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/model/DruidMetric.scala rename to gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/model/DruidMetric.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/reader/DruidReader.scala b/gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/reader/DruidReader.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/reader/DruidReader.scala rename to gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/reader/DruidReader.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/util/DruidEventBeam.scala b/gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/util/DruidEventBeam.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/util/DruidEventBeam.scala rename to gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/util/DruidEventBeam.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/util/DruidUtility.scala b/gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/util/DruidUtility.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/util/DruidUtility.scala rename to gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/util/DruidUtility.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/writer/DruidRealtimeWriter.scala b/gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/writer/DruidRealtimeWriter.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/writer/DruidRealtimeWriter.scala rename to gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/writer/DruidRealtimeWriter.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/writer/DruidWriter.scala b/gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/writer/DruidWriter.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-druid-0.82/src/main/scala/com/paypal/gimel/druid/writer/DruidWriter.scala rename to gimel-dataapi/gimel-connectors/gimel-druid/src/main/scala/com/paypal/gimel/druid/writer/DruidWriter.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-elasticsearch-6.2/pom.xml b/gimel-dataapi/gimel-connectors/gimel-elasticsearch/pom.xml similarity index 86% rename from gimel-dataapi/gimel-connectors/gimel-elasticsearch-6.2/pom.xml rename to gimel-dataapi/gimel-connectors/gimel-elasticsearch/pom.xml index 41196140..e39db857 100644 --- a/gimel-dataapi/gimel-connectors/gimel-elasticsearch-6.2/pom.xml +++ b/gimel-dataapi/gimel-connectors/gimel-elasticsearch/pom.xml @@ -28,7 +28,7 @@ under the License. 4.0.0 - gimel-elasticsearch-6.2 + gimel-elasticsearch 2.4.7-SNAPSHOT @@ -45,9 +45,23 @@ under the License. org.elasticsearch - elasticsearch-spark-20_${scala.binary.version} + elasticsearch-spark-20_${elastic.scala.binary.version} ${elasticsearch.version} ${packaging.scope} + + + org.apache.spark + * + + + org.slf4j + log4j-over-slf4j + + + org.apache.hive + * + + org.elasticsearch @@ -78,7 +92,7 @@ under the License. org.apache.maven.plugins maven-shade-plugin - 3.0.0 + ${maven.shade.plugin.version} diff --git a/gimel-dataapi/gimel-connectors/gimel-elasticsearch-6.2/src/main/scala/com/paypal/gimel/elasticsearch/DataSet.scala b/gimel-dataapi/gimel-connectors/gimel-elasticsearch/src/main/scala/com/paypal/gimel/elasticsearch/DataSet.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-elasticsearch-6.2/src/main/scala/com/paypal/gimel/elasticsearch/DataSet.scala rename to gimel-dataapi/gimel-connectors/gimel-elasticsearch/src/main/scala/com/paypal/gimel/elasticsearch/DataSet.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-elasticsearch-6.2/src/main/scala/com/paypal/gimel/elasticsearch/conf/ElasticSearchConfigs.scala b/gimel-dataapi/gimel-connectors/gimel-elasticsearch/src/main/scala/com/paypal/gimel/elasticsearch/conf/ElasticSearchConfigs.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-elasticsearch-6.2/src/main/scala/com/paypal/gimel/elasticsearch/conf/ElasticSearchConfigs.scala rename to gimel-dataapi/gimel-connectors/gimel-elasticsearch/src/main/scala/com/paypal/gimel/elasticsearch/conf/ElasticSearchConfigs.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-elasticsearch-6.2/src/main/scala/com/paypal/gimel/elasticsearch/conf/ElasticSearchConstants.scala b/gimel-dataapi/gimel-connectors/gimel-elasticsearch/src/main/scala/com/paypal/gimel/elasticsearch/conf/ElasticSearchConstants.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-elasticsearch-6.2/src/main/scala/com/paypal/gimel/elasticsearch/conf/ElasticSearchConstants.scala rename to gimel-dataapi/gimel-connectors/gimel-elasticsearch/src/main/scala/com/paypal/gimel/elasticsearch/conf/ElasticSearchConstants.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-elasticsearch-6.2/src/main/scala/com/paypal/gimel/elasticsearch/utilities/ElasticSearchUtilities.scala b/gimel-dataapi/gimel-connectors/gimel-elasticsearch/src/main/scala/com/paypal/gimel/elasticsearch/utilities/ElasticSearchUtilities.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-elasticsearch-6.2/src/main/scala/com/paypal/gimel/elasticsearch/utilities/ElasticSearchUtilities.scala rename to gimel-dataapi/gimel-connectors/gimel-elasticsearch/src/main/scala/com/paypal/gimel/elasticsearch/utilities/ElasticSearchUtilities.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/pom.xml b/gimel-dataapi/gimel-connectors/gimel-hbase/pom.xml similarity index 94% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/pom.xml rename to gimel-dataapi/gimel-connectors/gimel-hbase/pom.xml index 7bf7cada..d0165507 100644 --- a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/pom.xml +++ b/gimel-dataapi/gimel-connectors/gimel-hbase/pom.xml @@ -47,8 +47,18 @@ under the License. com.hortonworks shc-core - ${hortonworks.shc.version}-${spark.binary.version}-s_${scala.binary.version} + 1.1.1-2.1-s_2.11 ${packaging.scope} + + + org.apache.spark + * + + + org.scala-lang + * + + org.apache.hbase @@ -179,7 +189,7 @@ under the License. org.apache.maven.plugins maven-shade-plugin - 3.0.0 + ${maven.shade.plugin.version} diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/DataSet.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/DataSet.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/DataSet.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/DataSet.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/conf/HbaseClientConfiguration.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/conf/HbaseClientConfiguration.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/conf/HbaseClientConfiguration.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/conf/HbaseClientConfiguration.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/conf/HbaseConfigs.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/conf/HbaseConfigs.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/conf/HbaseConfigs.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/conf/HbaseConfigs.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/conf/HbaseConstants.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/conf/HbaseConstants.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/conf/HbaseConstants.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/conf/HbaseConstants.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseCatalog.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseCatalog.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseCatalog.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseCatalog.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseLookUp.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseLookUp.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseLookUp.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseLookUp.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/utilities/HBasePut.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/utilities/HBasePut.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/utilities/HBasePut.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/utilities/HBasePut.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseScanner.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseScanner.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseScanner.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseScanner.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseSparkConnector.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseSparkConnector.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseSparkConnector.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseSparkConnector.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseUtilities.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseUtilities.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseUtilities.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/main/scala/com/paypal/gimel/hbase/utilities/HBaseUtilities.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/DataSetTest.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/DataSetTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/DataSetTest.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/DataSetTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseCatalogTest.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseCatalogTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseCatalogTest.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseCatalogTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseLocalClient.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseLocalClient.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseLocalClient.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseLocalClient.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseLookUpTest.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseLookUpTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseLookUpTest.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseLookUpTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBasePutTest.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBasePutTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBasePutTest.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBasePutTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseScannerTest.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseScannerTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseScannerTest.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseScannerTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseSparkConnectorTest.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseSparkConnectorTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseSparkConnectorTest.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseSparkConnectorTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseUtilitiesTest.scala b/gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseUtilitiesTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hbase-1.2/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseUtilitiesTest.scala rename to gimel-dataapi/gimel-connectors/gimel-hbase/src/test/scala/com/paypal/gimel/hbase/utilities/HBaseUtilitiesTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/pom.xml b/gimel-dataapi/gimel-connectors/gimel-hive/pom.xml similarity index 98% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/pom.xml rename to gimel-dataapi/gimel-connectors/gimel-hive/pom.xml index db91c22b..10125332 100644 --- a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/pom.xml +++ b/gimel-dataapi/gimel-connectors/gimel-hive/pom.xml @@ -28,7 +28,7 @@ under the License. 4.0.0 - gimel-hive-1.2 + gimel-hive 2.4.7-SNAPSHOT @@ -125,7 +125,7 @@ under the License. org.apache.maven.plugins maven-shade-plugin - 3.2.1 + ${maven.shade.plugin.version} diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hdfs/DataSet.scala b/gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hdfs/DataSet.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hdfs/DataSet.scala rename to gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hdfs/DataSet.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hdfs/conf/HdfsClientConfiguration.scala b/gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hdfs/conf/HdfsClientConfiguration.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hdfs/conf/HdfsClientConfiguration.scala rename to gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hdfs/conf/HdfsClientConfiguration.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hdfs/conf/HdfsConfigs.scala b/gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hdfs/conf/HdfsConfigs.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hdfs/conf/HdfsConfigs.scala rename to gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hdfs/conf/HdfsConfigs.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hdfs/conf/HdfsConstants.scala b/gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hdfs/conf/HdfsConstants.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hdfs/conf/HdfsConstants.scala rename to gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hdfs/conf/HdfsConstants.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hdfs/utilities/HDFSUtilities.scala b/gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hdfs/utilities/HDFSUtilities.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hdfs/utilities/HDFSUtilities.scala rename to gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hdfs/utilities/HDFSUtilities.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/DataSet.scala b/gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/DataSet.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/DataSet.scala rename to gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/DataSet.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/conf/HiveConfigs.scala b/gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/conf/HiveConfigs.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/conf/HiveConfigs.scala rename to gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/conf/HiveConfigs.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/conf/HiveConstants.scala b/gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/conf/HiveConstants.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/conf/HiveConstants.scala rename to gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/conf/HiveConstants.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/utilities/HiveJDBCUtils.scala b/gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/utilities/HiveJDBCUtils.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/utilities/HiveJDBCUtils.scala rename to gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/utilities/HiveJDBCUtils.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/utilities/HiveSchemaTemplates.scala b/gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/utilities/HiveSchemaTemplates.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/utilities/HiveSchemaTemplates.scala rename to gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/utilities/HiveSchemaTemplates.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/utilities/HiveSchemaUtils.scala b/gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/utilities/HiveSchemaUtils.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/utilities/HiveSchemaUtils.scala rename to gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/utilities/HiveSchemaUtils.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/utilities/HiveUtils.scala b/gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/utilities/HiveUtils.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/main/scala/com/paypal/gimel/hive/utilities/HiveUtils.scala rename to gimel-dataapi/gimel-connectors/gimel-hive/src/main/scala/com/paypal/gimel/hive/utilities/HiveUtils.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.avro b/gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.avro similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.avro rename to gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.avro diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.csv b/gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.csv similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.csv rename to gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.csv diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.json b/gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.json similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.json rename to gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.json diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.parquet b/gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.parquet similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.parquet rename to gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.parquet diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.seq b/gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.seq similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.seq rename to gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.seq diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.txt b/gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.txt similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.txt rename to gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.txt diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.txt.gz b/gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.txt.gz similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/resources/hdfs_test.txt.gz rename to gimel-dataapi/gimel-connectors/gimel-hive/src/test/resources/hdfs_test.txt.gz diff --git a/gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/scala/com/paypal/gimel/hdfs/DataSetTest.scala b/gimel-dataapi/gimel-connectors/gimel-hive/src/test/scala/com/paypal/gimel/hdfs/DataSetTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-hive-1.2/src/test/scala/com/paypal/gimel/hdfs/DataSetTest.scala rename to gimel-dataapi/gimel-connectors/gimel-hive/src/test/scala/com/paypal/gimel/hdfs/DataSetTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-jdbc/pom.xml b/gimel-dataapi/gimel-connectors/gimel-jdbc/pom.xml index 26d7c30f..8c6088ea 100644 --- a/gimel-dataapi/gimel-connectors/gimel-jdbc/pom.xml +++ b/gimel-dataapi/gimel-connectors/gimel-jdbc/pom.xml @@ -83,7 +83,7 @@ under the License. org.apache.maven.plugins maven-shade-plugin - 3.0.0 + ${maven.shade.plugin.version} diff --git a/gimel-dataapi/gimel-connectors/gimel-jdbc/src/main/scala/com/paypal/gimel/jdbc/utilities/ExtendedJdbcRDD.scala b/gimel-dataapi/gimel-connectors/gimel-jdbc/src/main/scala/com/paypal/gimel/jdbc/utilities/ExtendedJdbcRDD.scala index 9e7e3ecb..537b4eb6 100644 --- a/gimel-dataapi/gimel-connectors/gimel-jdbc/src/main/scala/com/paypal/gimel/jdbc/utilities/ExtendedJdbcRDD.scala +++ b/gimel-dataapi/gimel-connectors/gimel-jdbc/src/main/scala/com/paypal/gimel/jdbc/utilities/ExtendedJdbcRDD.scala @@ -98,7 +98,11 @@ class ExtendedJdbcRDD[T: ClassTag]( protected var finished = false val logger = Logger(this.getClass.getName) - context.addTaskCompletionListener { context => closeIfNeeded() } + context.addTaskCompletionListener{ + (context: org.apache.spark.TaskContext) => + closeIfNeeded() + "dummy" + } val conn: Connection = getConnection() diff --git a/gimel-dataapi/gimel-connectors/gimel-jdbc/src/main/scala/com/paypal/gimel/jdbc/utilities/JDBCUtilities.scala b/gimel-dataapi/gimel-connectors/gimel-jdbc/src/main/scala/com/paypal/gimel/jdbc/utilities/JDBCUtilities.scala index 62b8ed8b..1e02efc8 100644 --- a/gimel-dataapi/gimel-connectors/gimel-jdbc/src/main/scala/com/paypal/gimel/jdbc/utilities/JDBCUtilities.scala +++ b/gimel-dataapi/gimel-connectors/gimel-jdbc/src/main/scala/com/paypal/gimel/jdbc/utilities/JDBCUtilities.scala @@ -349,6 +349,9 @@ class JDBCUtilities(sparkSession: SparkSession) extends Serializable { st } +// private def handlePartition(partition:Iterator[Row]):Unit = { +// +// } /** * This method inserts into given table in given mode * @@ -374,10 +377,8 @@ class JDBCUtilities(sparkSession: SparkSession) extends Serializable { } case _ => // do nothing } - - // For each partition create a temp table to insert - dataFrame.foreachPartition { batch => + dataFrame.foreachPartition { batch: Iterator[Row] => // create logger inside the executor val logger = Logger(this.getClass.getName) @@ -572,7 +573,7 @@ class JDBCUtilities(sparkSession: SparkSession) extends Serializable { */ private def updateTable(dataFrame: DataFrame, jdbcConnectionUtility: JDBCConnectionUtility, jdbcHolder: JDBCArgsHolder) { - dataFrame.foreachPartition { batch => + dataFrame.foreachPartition { batch: Iterator[Row] => if (batch.nonEmpty) { // create logger inside the executor val logger = Logger(this.getClass.getName) @@ -630,7 +631,7 @@ class JDBCUtilities(sparkSession: SparkSession) extends Serializable { */ private def upsertTable(dataFrame: DataFrame, jDBCConnectionUtility: JDBCConnectionUtility, jdbcHolder: JDBCArgsHolder) { - dataFrame.foreachPartition { batch => + dataFrame.foreachPartition { batch: Iterator[Row] => // create logger inside the executor val logger = Logger(this.getClass.getName) if (batch.nonEmpty) { diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/pom.xml b/gimel-dataapi/gimel-connectors/gimel-kafka/pom.xml similarity index 89% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/pom.xml rename to gimel-dataapi/gimel-connectors/gimel-kafka/pom.xml index 2dae625c..f6bba1b5 100644 --- a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/pom.xml +++ b/gimel-dataapi/gimel-connectors/gimel-kafka/pom.xml @@ -28,7 +28,7 @@ under the License. 4.0.0 - gimel-kafka-2.2 + gimel-kafka 2.4.7-SNAPSHOT @@ -37,6 +37,14 @@ under the License. gimel-common ${gimel.version}-SNAPSHOT + + org.apache.hbase + * + + + org.apache.hadoop + * + com.fasterxml.jackson.core jackson-core @@ -51,23 +59,11 @@ under the License. - - com.databricks - spark-avro_${scala.binary.version} - 3.2.0 - ${packaging.scope} - org.apache.spark spark-sql-kafka-${spark.kafka.connector.version} ${spark.version} ${packaging.scope} - - - org.apache.kafka - kafka-clients - - @@ -75,21 +71,6 @@ under the License. kafka-clients ${kafka.version} - - com.fasterxml.jackson.core - jackson-core - ${jackson.version} - - - com.fasterxml.jackson.core - jackson-annotations - ${jackson.version} - - - com.fasterxml.jackson.core - jackson-databind - ${jackson.version} - @@ -163,12 +144,6 @@ under the License. ${confluent.version} test - - org.apache.avro - avro - 1.7.7 - test - io.netty @@ -234,7 +209,7 @@ under the License. org.apache.maven.plugins maven-shade-plugin - 3.2.1 + ${maven.shade.plugin.version} diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/DataSet.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/DataSet.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/DataSet.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/DataSet.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/DataStream.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/DataStream.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/DataStream.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/DataStream.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaClientConfiguration.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaClientConfiguration.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaClientConfiguration.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaClientConfiguration.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaConfigs.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaConfigs.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaConfigs.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaConfigs.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaConstants.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaConstants.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaConstants.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaConstants.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaJsonProtocol.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaJsonProtocol.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaJsonProtocol.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/conf/KafkaJsonProtocol.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/reader/KafkaBatchConsumer.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/reader/KafkaBatchConsumer.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/reader/KafkaBatchConsumer.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/reader/KafkaBatchConsumer.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/reader/KafkaStreamConsumer.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/reader/KafkaStreamConsumer.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/reader/KafkaStreamConsumer.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/reader/KafkaStreamConsumer.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/utilities/ImplicitKafkaConverters.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/utilities/ImplicitKafkaConverters.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/utilities/ImplicitKafkaConverters.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/utilities/ImplicitKafkaConverters.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/utilities/ImplicitZKCheckPointers.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/utilities/ImplicitZKCheckPointers.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/utilities/ImplicitZKCheckPointers.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/utilities/ImplicitZKCheckPointers.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/utilities/KafkaOptionsLoaderUtils.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/utilities/KafkaOptionsLoaderUtils.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/utilities/KafkaOptionsLoaderUtils.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/utilities/KafkaOptionsLoaderUtils.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/utilities/KafkaUtilities.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/utilities/KafkaUtilities.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/utilities/KafkaUtilities.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/utilities/KafkaUtilities.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/writer/KafkaBatchProducer.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/writer/KafkaBatchProducer.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/writer/KafkaBatchProducer.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/writer/KafkaBatchProducer.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/writer/KafkaStreamProducer.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/writer/KafkaStreamProducer.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/main/scala/com/paypal/gimel/kafka2/writer/KafkaStreamProducer.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/main/scala/com/paypal/gimel/kafka2/writer/KafkaStreamProducer.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/DataSetTest.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/DataSetTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/DataSetTest.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/DataSetTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/reader/KafkaBatchConsumerTest.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/reader/KafkaBatchConsumerTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/reader/KafkaBatchConsumerTest.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/reader/KafkaBatchConsumerTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/utilities/ImplicitKafkaConvertersTest.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/utilities/ImplicitKafkaConvertersTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/utilities/ImplicitKafkaConvertersTest.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/utilities/ImplicitKafkaConvertersTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/utilities/ImplicitZKCheckPointersTest.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/utilities/ImplicitZKCheckPointersTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/utilities/ImplicitZKCheckPointersTest.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/utilities/ImplicitZKCheckPointersTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/utilities/KafkaOptionsLoaderUtilsTest.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/utilities/KafkaOptionsLoaderUtilsTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/utilities/KafkaOptionsLoaderUtilsTest.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/utilities/KafkaOptionsLoaderUtilsTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/utilities/KafkaUtilitiesTest.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/utilities/KafkaUtilitiesTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/utilities/KafkaUtilitiesTest.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/utilities/KafkaUtilitiesTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/utilities/MockKafkaoptionsLoader.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/utilities/MockKafkaoptionsLoader.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/utilities/MockKafkaoptionsLoader.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/utilities/MockKafkaoptionsLoader.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/writer/KafkaBatchProducerTest.scala b/gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/writer/KafkaBatchProducerTest.scala similarity index 100% rename from gimel-dataapi/gimel-connectors/gimel-kafka-2.2/src/test/scala/com/paypal/gimel/kafka2/writer/KafkaBatchProducerTest.scala rename to gimel-dataapi/gimel-connectors/gimel-kafka/src/test/scala/com/paypal/gimel/kafka2/writer/KafkaBatchProducerTest.scala diff --git a/gimel-dataapi/gimel-connectors/gimel-restapi/pom.xml b/gimel-dataapi/gimel-connectors/gimel-restapi/pom.xml index eb6b509c..4d5d19a6 100644 --- a/gimel-dataapi/gimel-connectors/gimel-restapi/pom.xml +++ b/gimel-dataapi/gimel-connectors/gimel-restapi/pom.xml @@ -47,12 +47,11 @@ under the License. src/main/scala - src/test/scala org.apache.maven.plugins maven-shade-plugin - 3.0.0 + ${maven.shade.plugin.version} diff --git a/gimel-dataapi/gimel-connectors/gimel-s3/pom.xml b/gimel-dataapi/gimel-connectors/gimel-s3/pom.xml index 2ab73f2e..db84305b 100644 --- a/gimel-dataapi/gimel-connectors/gimel-s3/pom.xml +++ b/gimel-dataapi/gimel-connectors/gimel-s3/pom.xml @@ -69,7 +69,6 @@ under the License. src/main/scala - src/test/scala net.alchim31.maven @@ -111,7 +110,7 @@ under the License. org.apache.maven.plugins maven-shade-plugin - 3.0.0 + ${maven.shade.plugin.version} diff --git a/gimel-dataapi/gimel-connectors/gimel-sftp/pom.xml b/gimel-dataapi/gimel-connectors/gimel-sftp/pom.xml index 9d07b5cf..5c2c66b0 100644 --- a/gimel-dataapi/gimel-connectors/gimel-sftp/pom.xml +++ b/gimel-dataapi/gimel-connectors/gimel-sftp/pom.xml @@ -39,9 +39,19 @@ under the License. com.springml - spark-sftp_${scala.binary.version} + spark-sftp_${springml.scala.binary.version} ${spark.sftp.version} ${packaging.scope} + + + org.apache.spark + * + + + org.scala-lang + * + + org.scalatest @@ -53,12 +63,11 @@ under the License. src/main/scala - src/test/scala org.apache.maven.plugins maven-shade-plugin - 3.0.0 + ${maven.shade.plugin.version} diff --git a/gimel-dataapi/gimel-core/pom.xml b/gimel-dataapi/gimel-core/pom.xml index 7026f5c5..f8c36242 100644 --- a/gimel-dataapi/gimel-core/pom.xml +++ b/gimel-dataapi/gimel-core/pom.xml @@ -87,12 +87,12 @@ under the License. com.paypal.gimel - gimel-kafka-2.2 + gimel-kafka ${gimel.version}-SNAPSHOT com.paypal.gimel - gimel-elasticsearch-6.2 + gimel-elasticsearch ${gimel.version}-SNAPSHOT @@ -102,37 +102,32 @@ under the License. com.paypal.gimel - gimel-hbase-1.2 + gimel-aerospike ${gimel.version}-SNAPSHOT com.paypal.gimel - gimel-aerospike-3.14 + gimel-hive ${gimel.version}-SNAPSHOT com.paypal.gimel - gimel-hive-1.2 + gimel-cassandra ${gimel.version}-SNAPSHOT com.paypal.gimel - gimel-cassandra-2.0 - ${gimel.version}-SNAPSHOT - - - com.paypal.gimel - gimel-druid-0.82 + gimel-sftp ${gimel.version}-SNAPSHOT com.paypal.gimel - gimel-sftp + gimel-restapi ${gimel.version}-SNAPSHOT com.paypal.gimel - gimel-restapi + gimel-s3 ${gimel.version}-SNAPSHOT @@ -229,18 +224,18 @@ under the License. src/main/scala - - org.apache.avro - avro-maven-plugin - - - String - - + + + + + + + + org.apache.maven.plugins maven-shade-plugin - 3.0.0 + ${maven.shade.plugin.version} diff --git a/gimel-dataapi/gimel-core/src/main/scala/com/paypal/gimel/DataSet.scala b/gimel-dataapi/gimel-core/src/main/scala/com/paypal/gimel/DataSet.scala index 0f7e09c9..645b10af 100644 --- a/gimel-dataapi/gimel-core/src/main/scala/com/paypal/gimel/DataSet.scala +++ b/gimel-dataapi/gimel-core/src/main/scala/com/paypal/gimel/DataSet.scala @@ -38,9 +38,8 @@ import com.paypal.gimel.common.utilities.BindToFieldsUtils._ import com.paypal.gimel.common.utilities.DataSetUtils.propStringToMap import com.paypal.gimel.datasetfactory.GimelDataSet import com.paypal.gimel.elasticsearch.conf.ElasticSearchConfigs -import com.paypal.gimel.hbase.conf.HbaseConfigs import com.paypal.gimel.jdbc.conf.JdbcConfigs -import com.paypal.gimel.kafka.conf.{KafkaConfigs, KafkaConstants} +import com.paypal.gimel.kafka2.conf.{KafkaConfigs, KafkaConstants} import com.paypal.gimel.logger.Logger class DataSet(val sparkSession: SparkSession) { @@ -160,7 +159,7 @@ class DataSet(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkAppName , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeBatch + , "BATCH" , clusterName , user , appTag.replaceAllLiterally("/", "_") @@ -190,7 +189,7 @@ class DataSet(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkAppName , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeBatch + , "BATCH" , clusterName , user , appTag.replaceAllLiterally("/", "_") @@ -293,7 +292,7 @@ class DataSet(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkAppName , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeBatch + , "BATCH" , clusterName , user , appTag.replaceAllLiterally("/", "_") @@ -324,7 +323,7 @@ class DataSet(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkAppName , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeBatch + , "BATCH" , clusterName , user , appTag.replaceAllLiterally("/", "_") @@ -411,7 +410,7 @@ class DataSet(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkAppName , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeBatch + , "BATCH" , clusterName , user , appTag.replaceAllLiterally("/", "_") @@ -440,7 +439,7 @@ class DataSet(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkAppName , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeBatch + , "BATCH" , clusterName , user , appTag.replaceAllLiterally("/", "_") @@ -597,7 +596,7 @@ class DataSet(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkAppName , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeBatch + , "BATCH" , clusterName , user , appTag.replaceAllLiterally("/", "_") @@ -627,7 +626,7 @@ class DataSet(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkAppName , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeBatch + , "BATCH" , clusterName , user , appTag.replaceAllLiterally("/", "_") @@ -727,7 +726,7 @@ class DataSet(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkAppName , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeBatch + , "BATCH" , clusterName , user , appTag.replaceAllLiterally("/", "_") @@ -758,7 +757,7 @@ class DataSet(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkAppName , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeBatch + , "BATCH" , clusterName , user , appTag.replaceAllLiterally("/", "_") @@ -855,7 +854,7 @@ class DataSet(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkAppName , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeBatch + , "BATCH" , clusterName , user , appTag.replaceAllLiterally("/", "_") @@ -886,7 +885,7 @@ class DataSet(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkAppName , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeBatch + , "BATCH" , clusterName , user , appTag.replaceAllLiterally("/", "_") @@ -983,10 +982,6 @@ object DataSetUtils { def getDataSet(sparkSession: SparkSession, sourceType: DataSetType.SystemType): GimelDataSet = { sourceType match { - case DataSetType.KAFKA => - new com.paypal.gimel.kafka.DataSet(sparkSession) - case DataSetType.HBASE => - new com.paypal.gimel.hbase.DataSet(sparkSession) case DataSetType.HDFS => new com.paypal.gimel.hdfs.DataSet(sparkSession) case DataSetType.ES => @@ -999,12 +994,8 @@ object DataSetUtils { new com.paypal.gimel.cassandra.DataSet(sparkSession) case DataSetType.AEROSPIKE => new com.paypal.gimel.aerospike.DataSet(sparkSession) - case DataSetType.HDFS => - new com.paypal.gimel.hdfs.DataSet(sparkSession) case DataSetType.RESTAPI => new com.paypal.gimel.restapi.DataSet(sparkSession) - case DataSetType.DRUID => - new com.paypal.gimel.druid.DataSet(sparkSession) case DataSetType.SFTP => new com.paypal.gimel.sftp.DataSet(sparkSession) case DataSetType.KAFKA2 => @@ -1022,8 +1013,8 @@ object DataSetUtils { def getLatestKafkaDataSetReader(dataSet: DataSet): Option[GimelDataSet] = { Try { dataSet.latestDataSetReader.get match { - case kafka: com.paypal.gimel.kafka.DataSet => - kafka +// case kafka: com.paypal.gimel.kafka.DataSet => +// kafka case kafka2: com.paypal.gimel.kafka2.DataSet => kafka2 } diff --git a/gimel-dataapi/gimel-core/src/main/scala/com/paypal/gimel/DataStream.scala b/gimel-dataapi/gimel-core/src/main/scala/com/paypal/gimel/DataStream.scala deleted file mode 100644 index 37f90269..00000000 --- a/gimel-dataapi/gimel-core/src/main/scala/com/paypal/gimel/DataStream.scala +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Copyright 2018 PayPal Inc. - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.paypal.gimel - -import scala.collection.immutable.Map -import scala.language.implicitConversions -import scala.util.{Success, Try} - -import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.sql.SQLContext -import org.apache.spark.sql.hive.HiveContext -import org.apache.spark.streaming.{Seconds, StreamingContext} - -import com.paypal.gimel.common.catalog.{CatalogProvider, DataSetProperties} -import com.paypal.gimel.common.conf.{CatalogProviderConfigs, CatalogProviderConstants, GimelConstants} -import com.paypal.gimel.common.utilities.Timer -import com.paypal.gimel.datastreamfactory.{GimelDataStream, StreamingResult} -import com.paypal.gimel.kafka.conf.{KafkaConfigs, KafkaConstants} -import com.paypal.gimel.logger.Logger - -object DataStreamType extends Enumeration { - type SystemType = Value - val KAFKA = Value -} - -class DataStream(val streamingContext: StreamingContext) { - - import com.paypal.gimel.common.utilities.DataSetUtils._ - - val user: String = sys.env(GimelConstants.USER) - val sparkAppName: String = streamingContext.sparkContext.getConf.get(GimelConstants.SPARK_APP_NAME) - val appTag: String = getAppTag(streamingContext.sparkContext) - val sparkContext: SparkContext = streamingContext.sparkContext - val logger = Logger() - logger.setSparkVersion(streamingContext.sparkContext.version) - val latestDataStreamReader: Option[GimelDataStream] = None - var datasetSystemType: String = "KAFKA" - var additionalPropsToLog = scala.collection.mutable.Map[String, String]() - - // get gimel timer object - val gimelTimer = Timer() - - import DataStreamUtils._ - - def latestKafkaDataStreamReader: Option[com.paypal.gimel.kafka.DataStream] = { - getLatestKafkaDataStreamReader(this) - } - - /** - * Provides DStream for a given configuration - * - * @param sourceType DataStreamType.Type - * @param sourceName Kafka Topic Name - * @param props Map of K->V kafka Properties - * @return StreamingResult - */ - private def read(sourceType: DataStreamType.SystemType - , sourceName: String, props: Any): StreamingResult = { - val propsMap: Map[String, Any] = getProps(props) - val dataStream = DataStreamUtils.getDataStream(streamingContext, sourceType) - dataStream.read(sourceName, propsMap) - } - - /** - * Provides DStream for a given configuration - * - * @param dataSet Kafka Topic Name - * @param props Map of K->V kafka Properties - * @return StreamingResult - */ - def read(dataSet: String, props: Any = Map[String, Any]()): StreamingResult = { - def MethodName: String = new Exception().getStackTrace.apply(1).getMethodName - - // get start time - val startTime = gimelTimer.start.get - - try { - - // Get catalog provider from run time hive context (1st Preference) - // if not available - check user props (2nd Preference) - // if not available - check Primary Provider of Catalog (Default) - val formattedProps: Map[String, Any] = - Map(CatalogProviderConfigs.CATALOG_PROVIDER -> CatalogProviderConstants.PRIMARY_CATALOG_PROVIDER, - GimelConstants.SPARK_APP_ID -> streamingContext.sparkContext.getConf.get(GimelConstants.SPARK_APP_ID), - GimelConstants.SPARK_APP_NAME -> streamingContext.sparkContext.getConf.get(GimelConstants.SPARK_APP_NAME), - GimelConstants.APP_TAG -> appTag) ++ - getProps(props) - val dataSetProperties: DataSetProperties = - CatalogProvider.getDataSetProperties(dataSet, formattedProps) - // dataSetProperties. - // val (systemType, hiveTableProps) = getSystemType(dataSet) - // val systemType = getSystemType1(dataSetProperties) - val newProps: Map[String, Any] = getProps(props) ++ Map( - GimelConstants.DATASET_PROPS -> dataSetProperties - , GimelConstants.DATASET -> dataSet - , GimelConstants.RESOLVED_HIVE_TABLE -> resolveDataSetName(dataSet) - , GimelConstants.APP_TAG -> appTag) - - // Why are we doing this? Elastic Search Cannot Accept "." in keys - val dataSetProps = dataSetProperties.props.map { case (k, v) => - k.replaceAllLiterally(".", "~") -> v - } - - val propsToLog = scala.collection.mutable.Map[String, String]() - dataSetProps.foreach(x => propsToLog.put(x._1, x._2)) - // additionalPropsToLog = propsToLog - - val data = this.read(DataStreamType.KAFKA, dataSet, newProps) - - - // update log variables to push logs - val endTime = gimelTimer.endTime.get - val executionTime: Double = gimelTimer.endWithMillSecRunTime - - // post audit logs to KAFKA - logger.logApiAccess(streamingContext.sparkContext.getConf.getAppId - , streamingContext.sparkContext.getConf.get("spark.app.name") - , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeStream - , getYarnClusterName() - , user - , appTag.replaceAllLiterally("/", "_") - , MethodName - , dataSet - , datasetSystemType - , "" - , additionalPropsToLog - , GimelConstants.SUCCESS - , GimelConstants.EMPTY_STRING - , GimelConstants.EMPTY_STRING - , startTime - , endTime - , executionTime - ) - - data - } - catch { - case e: Throwable => - - logger.info(s"Pushing to logs: Error Description\n dataset=${dataSet}\n method=${MethodName}\n Error: ${e.printStackTrace()}") - - // update log variables to push logs - val endTime = System.currentTimeMillis() - val executionTime = endTime - startTime - - // post audit logs to KAFKA - logger.logApiAccess(streamingContext.sparkContext.getConf.getAppId - , streamingContext.sparkContext.getConf.get("spark.app.name") - , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeStream - , getYarnClusterName() - , user - , appTag.replaceAllLiterally("/", "_") - , MethodName - , dataSet - , datasetSystemType - , "" - , additionalPropsToLog - , GimelConstants.FAILURE - , e.toString + "\n" + e.getStackTraceString - , GimelConstants.UNKNOWN_STRING - , startTime - , endTime - , executionTime - ) - - - // throw error to console - logger.throwError(e.toString) - - val msg = s"Error in DataSet ${MethodName} Operation: ${e.printStackTrace()}" - throw new DataSetOperationException(msg, e) - } - - } - -} - -/** - * Client API for initiating datastreams - */ - -object DataStream { - - val defaultBatchInterval = 25 - - import DataStreamUtils._ - - /** - * Client calls for a DataStream with SparkContext - * , we internally create an HiveContext & provide DataStream - * - * @param sparkContext SparkContext - * @return DataStream - */ - def apply(sparkContext: SparkContext): DataStream = { - // todo ADD LOGGING .... WARN USER of default value or pass a specific one explicitly - getOrCreateLogger(sparkContext).warning("Initiating New Spark Context. " + - "Please provide HiveContext if you already have One.") - val allConfs = sparkContext.getConf.getAll.toMap - val batchWindowSec = allConfs.getOrElse(KafkaConfigs.defaultBatchInterval - , defaultBatchInterval.toString).toInt - val ssc = new StreamingContext(sparkContext, Seconds(batchWindowSec)) - this (ssc) - } - - /** - * Client calls for a DataStream with already available HiveContext - * , we provide a DataStream API with the same HiveConext - * - * @param hiveContext HiveContext - * @return DataStream - */ - def apply(hiveContext: HiveContext): DataStream = { - getOrCreateLogger(hiveContext.sparkContext).warning("Initiating New Spark Context" + - ". Please provide HiveContext if you already have One.") - this (hiveContext.sparkContext) - } - - /** - * Client calls for a DataStream without any context (spark or hive) - * , we provide a DataStream API with the same HiveConext - * - * @return DataStream - */ - def apply(): DataStream = { - val sparkConf = new SparkConf().setAppName(sys.env(GimelConstants.USER) + "PCataLog-DataSet") - val sc = new SparkContext(sparkConf) - getOrCreateLogger(sc).warning("Initiating New Spark Context" + - ". Please provide HiveContext if you already have One.") - this (sc) - } - - /** - * Client calls for a DataStream with already available SQLContext - * , we provide a DataStream API with the equivalent HiveConext - * - * @param sqlContext SQLContext - * @return DataStream - */ - def apply(sqlContext: SQLContext): DataStream = { - getOrCreateLogger(sqlContext.sparkContext).warning("Initiating New Spark Context. " + - "Please provide HiveContext if you already have One.") - this (sqlContext.sparkContext) - } - - /** - * Client calls for a DataStream with SparkContext, - * we internally create an HiveContext & provide DataStream - * - * @param streamingContext StreamingContext - * @return DataStream - */ - def apply(streamingContext: StreamingContext): DataStream = { - new DataStream(streamingContext) - } - -} - -/** - * Custom Exception for DataStream initiation errors - * - * @param message Message to Throw - * @param cause A Throwable Cause - */ -private class DataStreamInitializationException(message: String, cause: Throwable) - extends RuntimeException(message) { - if (cause != null) { - initCause(cause) - } - - def this(message: String) = this(message, null) -} - - -/** - * Private Functionalities required for DataStream Initiation Operations - * Do Not Expose to Client - */ - -private object DataStreamUtils { - - - /** - * Convenience Method to Get or Create Logger - * - * @param sparkContext SparkContext - * @return Logger - */ - def getOrCreateLogger(sparkContext: SparkContext): Logger = { - val user = sys.env(GimelConstants.USER) - val sparkAppName = sparkContext.getConf.get(GimelConstants.SPARK_APP_NAME) - val appTag = s"${user}-${sparkAppName}" - val logger = Logger(appTag) - logger - } - - /** - * provides an appropriate DataStream - * - * @param sparkStreamingContext - * @param sourceType Type of System. Example - KAFKA - * @return DataStream - */ - - def getDataStream(sparkStreamingContext: StreamingContext - , sourceType: DataStreamType.SystemType): GimelDataStream = { - sourceType match { - case DataStreamType.KAFKA => - new com.paypal.gimel.kafka.DataStream(sparkStreamingContext) - } - } - - /** - * Gets the last user Kafka KafkaDataStream reader (if already use), else Returns None - * - * @param dataStream DataStream - * @return Option[KafkaDataStream] - */ - - def getLatestKafkaDataStreamReader(dataStream: DataStream) - : Option[com.paypal.gimel.kafka.DataStream] = { - val kafkaReader = Try { - dataStream.latestDataStreamReader.get.asInstanceOf[com.paypal.gimel.kafka.DataStream] - } - kafkaReader match { - case Success(x) => - Some(x) - case _ => - None - } - } - -} diff --git a/gimel-dataapi/gimel-core/src/main/scala/com/paypal/gimel/DataStream2.scala b/gimel-dataapi/gimel-core/src/main/scala/com/paypal/gimel/DataStream2.scala index 6f67ca92..92bba7d1 100644 --- a/gimel-dataapi/gimel-core/src/main/scala/com/paypal/gimel/DataStream2.scala +++ b/gimel-dataapi/gimel-core/src/main/scala/com/paypal/gimel/DataStream2.scala @@ -184,7 +184,7 @@ class DataStream2(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkContext.getConf.get("spark.app.name") , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeStream + , "STREAM" , getYarnClusterName() , user , appTag.replaceAllLiterally("/", "_") @@ -300,7 +300,7 @@ class DataStream2(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkContext.getConf.get("spark.app.name") , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeStream + , "STREAM" , getYarnClusterName() , user , appTag.replaceAllLiterally("/", "_") @@ -330,7 +330,7 @@ class DataStream2(val sparkSession: SparkSession) { logger.logApiAccess(sparkSession.sparkContext.getConf.getAppId , sparkContext.getConf.get("spark.app.name") , this.getClass.getName - , KafkaConstants.gimelAuditRunTypeStream + , "STREAM" , getYarnClusterName() , user , appTag.replaceAllLiterally("/", "_") diff --git a/gimel-dataapi/gimel-examples/pom.xml b/gimel-dataapi/gimel-examples/pom.xml index 498f8fcc..f0f9eec6 100644 --- a/gimel-dataapi/gimel-examples/pom.xml +++ b/gimel-dataapi/gimel-examples/pom.xml @@ -45,7 +45,7 @@ under the License. org.apache.maven.plugins maven-shade-plugin - 3.0.0 + ${maven.shade.plugin.version} diff --git a/gimel-dataapi/gimel-logger/pom.xml b/gimel-dataapi/gimel-logger/pom.xml index 924f466c..e0688787 100644 --- a/gimel-dataapi/gimel-logger/pom.xml +++ b/gimel-dataapi/gimel-logger/pom.xml @@ -50,33 +50,6 @@ under the License. ${scala.version} ${scala.packaging.scope} - - com.paypal.gimel - gimel-logging_${gimel.logging.spark.binary.version} - ${gimel.logging.version} - - - org.slf4j - slf4j-log4j12 - - - log4j - log4j - - - org.apache.kafka - * - - - org.apache.kafka - kafka-log4j-appender - - - com.googlecode.protobuf-java-format - protobuf-java-format - - - src/main/scala diff --git a/gimel-dataapi/pom.xml b/gimel-dataapi/pom.xml index c1f7c61d..dd742063 100644 --- a/gimel-dataapi/pom.xml +++ b/gimel-dataapi/pom.xml @@ -39,25 +39,20 @@ under the License. gimel-logger gimel-common gimel-connectors/gimel-sftp - gimel-connectors/gimel-elasticsearch-6.2 - gimel-connectors/gimel-jdbc - gimel-connectors/gimel-hive-1.2 - gimel-connectors/gimel-hbase-1.2 - gimel-connectors/gimel-cassandra-2.0 - gimel-connectors/gimel-aerospike-3.14 - gimel-connectors/gimel-kafka-2.2 - gimel-connectors/gimel-druid-0.82 + gimel-connectors/gimel-elasticsearch + gimel-connectors/gimel-hive gimel-connectors/gimel-restapi gimel-connectors/gimel-s3 + gimel-connectors/gimel-kafka + gimel-connectors/gimel-jdbc + gimel-connectors/gimel-cassandra gimel-core - gimel-sql - gimel-tools gimel-examples 3.1.5 - 2.0.6 + 2.4.3 3.3.0 6.2.1 16.0 @@ -67,20 +62,20 @@ under the License. 2.6.7 1.8 1.19.4 - 1.3.2 - 2.2.1 - 0-10 + + 2.1.1 + 1.4 2.3 0.4.3-SNAPSHOT - 0.8.2 + 0.8.3 2.13.0 0.0.7 1.3.3 15.10.00.22 3.4.13 2.13.0 - 0-10_2.11 + 0-10_2.12 1.1.3 3.9.9.Final @@ -125,14 +120,6 @@ under the License. - - org.apache.avro - avro-maven-plugin - - - String - - org.apache.maven.plugins maven-surefire-plugin @@ -214,7 +201,7 @@ under the License. org.apache.maven.plugins maven-shade-plugin - 3.0.0 + ${maven.shade.plugin.version} diff --git a/gimel-parser/pom.xml b/gimel-parser/pom.xml index 85a1b352..680399f9 100644 --- a/gimel-parser/pom.xml +++ b/gimel-parser/pom.xml @@ -15,9 +15,9 @@ - org.scala-lang - scala-xml - 2.11.0-M4 + org.scala-lang.modules + scala-xml_${scala.binary.version} + ${scala.xml.version} ${scala.packaging.scope} @@ -109,7 +109,7 @@ org.apache.maven.plugins maven-shade-plugin - 3.0.0 + ${maven.shade.plugin.version} diff --git a/gimel-serde/gimel-deserializers/generic-deserializers/pom.xml b/gimel-serde/gimel-deserializers/generic-deserializers/pom.xml index a9df87a5..34a6231b 100644 --- a/gimel-serde/gimel-deserializers/generic-deserializers/pom.xml +++ b/gimel-serde/gimel-deserializers/generic-deserializers/pom.xml @@ -18,7 +18,7 @@ com.paypal.gimel serde-common - 1.0-SNAPSHOT + 2.4.7-SNAPSHOT org.scala-lang @@ -43,7 +43,7 @@ com.paypal.gimel serde-common - 1.0-SNAPSHOT + 2.4.7-SNAPSHOT test-jar test @@ -85,24 +85,6 @@ test - - - - - - - - - - - - - - - - - - org.apache.curator curator-test diff --git a/gimel-serde/gimel-serializers/generic-serializers/pom.xml b/gimel-serde/gimel-serializers/generic-serializers/pom.xml index b0c2c148..ec62b3a4 100644 --- a/gimel-serde/gimel-serializers/generic-serializers/pom.xml +++ b/gimel-serde/gimel-serializers/generic-serializers/pom.xml @@ -18,7 +18,7 @@ com.paypal.gimel serde-common - 1.0-SNAPSHOT + 2.4.7-SNAPSHOT org.scala-lang @@ -49,7 +49,7 @@ com.paypal.gimel serde-common - 1.0-SNAPSHOT + 2.4.7-SNAPSHOT test-jar test diff --git a/gimel-serde/pom.xml b/gimel-serde/pom.xml index c775d59e..c93ec474 100644 --- a/gimel-serde/pom.xml +++ b/gimel-serde/pom.xml @@ -38,7 +38,7 @@ 3.9.9.Final 4.1.17.Final - 2.2.1 + ${kafka.version} @@ -97,14 +97,6 @@ - - org.apache.avro - avro-maven-plugin - - - String - - org.apache.maven.plugins maven-surefire-plugin @@ -164,7 +156,7 @@ org.apache.maven.plugins maven-shade-plugin - 3.0.0 + ${maven.shade.plugin.version} diff --git a/gimel-serde/serde-common/pom.xml b/gimel-serde/serde-common/pom.xml index 9b5d1edc..6e4d522d 100644 --- a/gimel-serde/serde-common/pom.xml +++ b/gimel-serde/serde-common/pom.xml @@ -50,12 +50,12 @@ - - com.databricks - spark-avro_${scala.binary.version} - 4.0.0 - ${packaging.scope} - + + org.apache.spark + spark-avro_2.12 + ${spark.version} + ${packaging.scope} + io.spray spray-json_${scala.binary.version} diff --git a/gimel-serde/serde-common/src/main/scala/com/paypal/gimel/serde/common/avro/AvroUtils.scala b/gimel-serde/serde-common/src/main/scala/com/paypal/gimel/serde/common/avro/AvroUtils.scala index 9173ec9d..0d0bf0c1 100644 --- a/gimel-serde/serde-common/src/main/scala/com/paypal/gimel/serde/common/avro/AvroUtils.scala +++ b/gimel-serde/serde-common/src/main/scala/com/paypal/gimel/serde/common/avro/AvroUtils.scala @@ -25,16 +25,15 @@ import scala.collection.JavaConverters._ import scala.collection.immutable.Map import scala.collection.mutable -import com.databricks.spark.avro.SchemaConverters._ import org.apache.avro.Schema import org.apache.avro.generic.{GenericData, GenericRecord} import org.apache.avro.io.DecoderFactory import org.apache.avro.io.EncoderFactory import org.apache.avro.specific.SpecificDatumWriter import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{DataFrame, Row, SparkSession} -import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.avro._ +import org.apache.spark.sql.functions._ import spray.json._ import spray.json.DefaultJsonProtocol._ import spray.json.JsValue @@ -109,6 +108,26 @@ object AvroUtils extends Serializable { newGenericRec } + /** + * Takes An Avro Schema String and Returns the list of field names in the "fields" list + * @param schemaString + * @return List(fieldNames) + */ + def getTopLevelFieldNamesFromAvro(schemaString: String): Seq[String] = { + // Parse as JsValue + val schemaAsJsVal = schemaString.parseJson + // Convert to JsObject + val schemaAsJsObject = schemaAsJsVal.asJsObject + // Get the Map of each element & Value + val schemaElementsMap: Map[String, JsValue] = schemaAsJsObject.fields + // These fields will be added with "to-add" fields + val schemaFields: Seq[JsValue] = schemaAsJsObject.getFields("fields").head.convertTo[Seq[JsValue]] + schemaFields.map{ x => + x.asJsObject.fields.head._2.toString().replace(""""""", "") + } + + } + /** * Adds additional fields to the Avro Schema * @@ -189,39 +208,24 @@ object AvroUtils extends Serializable { */ def getDeserializedDataFrame(dataframe: DataFrame, columnToDeserialize: String, avroSchemaString: String): DataFrame = { val originalFields: Array[String] = dataframe.columns.filter(field => field != columnToDeserialize) - val newAvroSchemaString = addAdditionalFieldsToSchema(originalFields.toList, avroSchemaString) - try { - dataframe.map { eachRow => - val recordToDeserialize: Array[Byte] = eachRow.getAs(columnToDeserialize).asInstanceOf[Array[Byte]] - val originalColumnsMap = originalFields.map { - field => { - val index = eachRow.fieldIndex(field) - if (eachRow.isNullAt(index)) { - (field -> "null") - } else { - (field -> eachRow.getAs(field).toString) - } - } - } - val deserializedGenericRecord: GenericRecord = bytesToGenericRecordWithSchemaRecon(recordToDeserialize, avroSchemaString, avroSchemaString) - val newDeserializedGenericRecord: GenericRecord = copyToGenericRecord(deserializedGenericRecord, avroSchemaString, newAvroSchemaString) - originalColumnsMap.foreach { kv => newDeserializedGenericRecord.put(kv._1, kv._2) } - val avroSchemaObj: Schema = (new Schema.Parser).parse(newAvroSchemaString) - val converter = AvroToSQLSchemaConverter.createConverterToSQL(avroSchemaObj) - converter(newDeserializedGenericRecord).asInstanceOf[Row] - } { - val avroSchema: Schema = (new Schema.Parser).parse(newAvroSchemaString) - val schemaType: SchemaType = toSqlType(avroSchema) - val encoder = RowEncoder(schemaType.dataType.asInstanceOf[StructType]) - encoder - }.toDF - } catch { - case ex: Throwable => { - ex.printStackTrace() - throw ex - } - } + logger.debug(s"Original Fields \n${originalFields}") + logger.debug(s"schema \n${avroSchemaString}") + val fieldsInAvro = getTopLevelFieldNamesFromAvro(avroSchemaString ) + logger.debug(s"Avro Fields \n${fieldsInAvro}") + logger.debug(s"**************** schema before deserialize ************************") + dataframe.printSchema() + val op = dataframe.withColumn("avro", from_avro(col(columnToDeserialize), avroSchemaString) ) + logger.debug(s"**************** schema after deserialize ************************") + op.printSchema() + op.show(2) + logger.debug(s"**************** Fields in avro that will be projected in dataFrame ************************") + logger.debug(fieldsInAvro.mkString(",")) + val colsToSelect: Seq[String] = fieldsInAvro.map{ x => s"avro.${x}"} + logger.debug(colsToSelect.mkString(",")) + val k = op.select(colsToSelect.head, colsToSelect.tail: _*) + k + } /** diff --git a/pom.xml b/pom.xml index 3635f3ea..1d5c6342 100644 --- a/pom.xml +++ b/pom.xml @@ -61,10 +61,10 @@ under the License. - general + dataproc_1.5x - general + dataproc_1.5x true @@ -75,18 +75,23 @@ under the License. provided provided 3.4.13 - 2.11 + 2.12 3.0.1 4.1.0 2.4.7 - 2.11.8 - 2.3 - 2.3.0 - 2.7.3 - 1.2.1 - 1.1.2 + 2.12.10 + 1.3.0 + 2.4 + 2.4.7 + 2.10.0 + 2.3.7 + 1.5.0 1.8 + 0.17.1 2.8.0 + 2.11 + 2.11 + 3.2.4