From 32d2ccc8930a93e0fef3468e26e71e125de5bf14 Mon Sep 17 00:00:00 2001 From: Marek Horst Date: Fri, 13 Oct 2023 18:17:38 +0200 Subject: [PATCH] Closes #1426: Run IIS experiments by relying on spark 3.4 version WIP. Introducing required workflow.xml fixes for various workflows relying on spark3 to let their integration tests to succeed: * setting `spark.extraListeners` and `spark.sql.queryExecutionListeners` explicitly to empty values in order to avoid relying on incompatible, spark2 compliant, cloudera listeners * setting `spark.shuffle.useOldFetchProtocol=true` in order to address `2.4 to 3.0 migration guide` requirement regarding protocol for fetching shuffle blocks backward compatibility (and avoiding `IllegalArgumentException: Unexpected message type: ` kind of errors) The following modules were covered with workflow.xml related changes which resulted in successful integration tests execution: * `iis-wf-referenceextraction` --- .../community/main/oozie_app/workflow.xml | 15 ++-------- .../oozie_app/workflow.xml | 15 ++-------- .../covid19/main/oozie_app/workflow.xml | 20 ++++--------- .../patent/main/oozie_app/workflow.xml | 30 ++++++++----------- .../funder_report/oozie_app/workflow.xml | 15 ++-------- .../project/tara_main/oozie_app/workflow.xml | 15 ++-------- .../main/oozie_app/workflow.xml | 20 ++++--------- .../softwareurl/main/oozie_app/workflow.xml | 15 ++-------- 8 files changed, 39 insertions(+), 106 deletions(-) diff --git a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/community/main/oozie_app/workflow.xml b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/community/main/oozie_app/workflow.xml index 3a57797df..1e7810a52 100644 --- a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/community/main/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/community/main/oozie_app/workflow.xml @@ -30,16 +30,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -74,8 +64,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/concept/root_conceptid_report/oozie_app/workflow.xml b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/concept/root_conceptid_report/oozie_app/workflow.xml index 075b49295..9b07b4c0f 100644 --- a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/concept/root_conceptid_report/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/concept/root_conceptid_report/oozie_app/workflow.xml @@ -36,16 +36,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -77,8 +67,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/covid19/main/oozie_app/workflow.xml b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/covid19/main/oozie_app/workflow.xml index caf32aa40..308d141ea 100644 --- a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/covid19/main/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/covid19/main/oozie_app/workflow.xml @@ -36,16 +36,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -102,8 +92,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} @@ -194,8 +185,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/patent/main/oozie_app/workflow.xml b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/patent/main/oozie_app/workflow.xml index dcfe52dd0..734d070ce 100644 --- a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/patent/main/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/patent/main/oozie_app/workflow.xml @@ -38,16 +38,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -169,8 +159,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} @@ -234,8 +225,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} @@ -258,8 +250,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=1 --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} @@ -301,8 +294,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/project/funder_report/oozie_app/workflow.xml b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/project/funder_report/oozie_app/workflow.xml index ef1073550..d83f5e326 100644 --- a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/project/funder_report/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/project/funder_report/oozie_app/workflow.xml @@ -46,16 +46,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -86,8 +76,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/project/tara_main/oozie_app/workflow.xml b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/project/tara_main/oozie_app/workflow.xml index 3b246eb8f..5a76f9082 100644 --- a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/project/tara_main/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/project/tara_main/oozie_app/workflow.xml @@ -44,16 +44,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -88,8 +78,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=${sparkSqlShufflePartitions} diff --git a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/researchinitiative/main/oozie_app/workflow.xml b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/researchinitiative/main/oozie_app/workflow.xml index a44334674..2cb6ad81b 100644 --- a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/researchinitiative/main/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/researchinitiative/main/oozie_app/workflow.xml @@ -42,16 +42,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -86,8 +76,9 @@ --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.yarn.executor.memoryOverhead=${sparkExecutorOverhead} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} @@ -116,8 +107,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} diff --git a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/softwareurl/main/oozie_app/workflow.xml b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/softwareurl/main/oozie_app/workflow.xml index bc12a4976..6ee4ba856 100644 --- a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/softwareurl/main/oozie_app/workflow.xml +++ b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/softwareurl/main/oozie_app/workflow.xml @@ -81,16 +81,6 @@ oozieActionShareLibForSpark2 oozie action sharelib for spark 2.* - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - spark2YarnHistoryServerAddress spark 2.* yarn history server address @@ -212,8 +202,9 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=1 --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.extraListeners= + --conf spark.sql.queryExecutionListeners= + --conf spark.shuffle.useOldFetchProtocol=true --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}