diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 019780d0e2c..1897b176c9f 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -23,6 +23,7 @@ import java.util.Properties import java.util.concurrent.ConcurrentHashMap import scala.collection.JavaConverters._ +import scala.collection.mutable import scala.sys.process._ import scala.util.Try @@ -353,20 +354,29 @@ object RapidsPluginUtils extends Logging { val resourceName = "spark-rapids-extra-plugins" val classLoader = RapidsPluginUtils.getClass.getClassLoader val resourceUrls = classLoader.getResources(resourceName) - val resourceUrlArray = resourceUrls.asScala.toArray + // Somehow, it is possible that the definition of same Plugin occurs multiple times in the + // resourceUrls. Therefore, deduplication work is essential in case of loading some plugins + // repeatedly. + val distinctResources = mutable.HashSet.empty[URL] + while (resourceUrls.hasMoreElements) { + val url = resourceUrls.nextElement() + if (distinctResources.contains(url)) { + logWarning(s"Found duplicated definition of ExtraPlugin: $url! Discarded it.") + } else { + distinctResources.add(url) + } + } - if (resourceUrlArray.isEmpty) { + if (distinctResources.isEmpty) { logDebug(s"Could not find file $resourceName in the classpath, not loading extra plugins") Seq.empty } else { - val plugins = scala.collection.mutable.ListBuffer[SparkPlugin]() - for (resourceUrl <- resourceUrlArray) { + distinctResources.iterator.flatMap { resourceUrl => val source = scala.io.Source.fromURL(resourceUrl) val pluginClasses = source.getLines().toList source.close() - plugins ++= loadExtensions(classOf[SparkPlugin], pluginClasses) - } - plugins.toSeq + loadExtensions(classOf[SparkPlugin], pluginClasses) + }.toList } }