From fb2aec726978784a02349cbe22980646b5d11cc0 Mon Sep 17 00:00:00 2001 From: Marek Horst Date: Mon, 16 Oct 2023 17:16:57 +0200 Subject: [PATCH] Closes #1426: Run IIS experiments by relying on spark 3.4 version WIP. Introducing required workflow.xml fixes for various workflows relying on spark3 to let their integration tests to succeed: * setting `spark.extraListeners` and `spark.sql.queryExecutionListeners` explicitly to empty values in order to avoid relying on incompatible, spark2 compliant, cloudera listeners * setting `spark.shuffle.useOldFetchProtocol=true` in order to address `2.4 to 3.0 migration guide` requirement regarding protocol for fetching shuffle blocks backward compatibility (and avoiding `IllegalArgumentException: Unexpected message type: ` kind of errors) The following modules were covered with workflow.xml related changes which resulted in successful integration tests execution: * `iis-wf-documentssimilarity` (explicitly excluded `hadoop-mapreduce-client-app` is still among spark342 sharelib dependencies what causes test failres) * `iis-wf-import` (infospace importer still fails due to spark3 regression, more details in #8941#note-35) --- iis-wf/iis-wf-documentssimilarity/pom.xml | 20 +++++++++++++++++++ .../sampletest/oozie_app/workflow.xml | 15 +++----------- .../core_parquet/oozie_app/workflow.xml | 15 +++----------- .../importer/infospace/oozie_app/workflow.xml | 15 +++----------- .../wf/importer/patent/oozie_app/workflow.xml | 15 +++----------- 5 files changed, 32 insertions(+), 48 deletions(-) diff --git a/iis-wf/iis-wf-documentssimilarity/pom.xml b/iis-wf/iis-wf-documentssimilarity/pom.xml index f91b992c8..22ddb8b59 100644 --- a/iis-wf/iis-wf-documentssimilarity/pom.xml +++ b/iis-wf/iis-wf-documentssimilarity/pom.xml @@ -84,6 +84,13 @@ org.apache.hadoop hadoop-mapreduce-client-core + + + + org.apache.hadoop + hadoop-yarn-api + + @@ -91,6 +98,19 @@ document-similarity-oap-uberworkflow tar.gz oozie-job + + + + org.apache.hadoop + hadoop-hdfs + + + + + org.apache.hadoop + hadoop-mapreduce-client-app + + diff --git a/iis-wf/iis-wf-documentssimilarity/src/test/resources/eu/dnetlib/iis/wf/documentssimilarity/avro_to_protobuf/sampletest/oozie_app/workflow.xml b/iis-wf/iis-wf-documentssimilarity/src/test/resources/eu/dnetlib/iis/wf/documentssimilarity/avro_to_protobuf/sampletest/oozie_app/workflow.xml index c69311b6a..d88365ce2 100644 --- a/iis-wf/iis-wf-documentssimilarity/src/test/resources/eu/dnetlib/iis/wf/documentssimilarity/avro_to_protobuf/sampletest/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-documentssimilarity/src/test/resources/eu/dnetlib/iis/wf/documentssimilarity/avro_to_protobuf/sampletest/oozie_app/workflow.xml @@ -18,16 +18,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -110,8 +100,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/iis-wf/iis-wf-import/src/main/resources/eu/dnetlib/iis/wf/importer/content_url/core_parquet/oozie_app/workflow.xml b/iis-wf/iis-wf-import/src/main/resources/eu/dnetlib/iis/wf/importer/content_url/core_parquet/oozie_app/workflow.xml index 0805a45ab..825a37f2c 100644 --- a/iis-wf/iis-wf-import/src/main/resources/eu/dnetlib/iis/wf/importer/content_url/core_parquet/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-import/src/main/resources/eu/dnetlib/iis/wf/importer/content_url/core_parquet/oozie_app/workflow.xml @@ -39,16 +39,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -91,8 +81,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/iis-wf/iis-wf-import/src/main/resources/eu/dnetlib/iis/wf/importer/infospace/oozie_app/workflow.xml b/iis-wf/iis-wf-import/src/main/resources/eu/dnetlib/iis/wf/importer/infospace/oozie_app/workflow.xml index 299984373..274d6921f 100644 --- a/iis-wf/iis-wf-import/src/main/resources/eu/dnetlib/iis/wf/importer/infospace/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-import/src/main/resources/eu/dnetlib/iis/wf/importer/infospace/oozie_app/workflow.xml @@ -135,16 +135,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -187,8 +177,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/iis-wf/iis-wf-import/src/main/resources/eu/dnetlib/iis/wf/importer/patent/oozie_app/workflow.xml b/iis-wf/iis-wf-import/src/main/resources/eu/dnetlib/iis/wf/importer/patent/oozie_app/workflow.xml index 6a91a7096..0764ffe4b 100644 --- a/iis-wf/iis-wf-import/src/main/resources/eu/dnetlib/iis/wf/importer/patent/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-import/src/main/resources/eu/dnetlib/iis/wf/importer/patent/oozie_app/workflow.xml @@ -34,16 +34,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -86,8 +76,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}