From 609a0fbea3c7b9d9dafcb9523eee7fb9f647d83e Mon Sep 17 00:00:00 2001
From: Michael Brachmann <mrb24@buffalo.edu>
Date: Wed, 2 Oct 2019 12:21:03 -0400
Subject: [PATCH 1/2] minor updates to enable building for scala 2.12

---
 build.sbt | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/build.sbt b/build.sbt
index c01d73a..5d9e666 100644
--- a/build.sbt
+++ b/build.sbt
@@ -2,11 +2,11 @@ name := "spark-google-spreadsheets"
 
 organization := "com.github.potix2"
 
-scalaVersion := "2.11.12"
+scalaVersion := "2.12.10"
 
-crossScalaVersions := Seq("2.11.12")
+crossScalaVersions := Seq("2.12.10")
 
-version := "0.6.4-SNAPSHOT"
+version := "0.6.4"
 
 spName := "potix2/spark-google-spreadsheets"
 
@@ -16,7 +16,7 @@ spIncludeMaven := true
 
 spIgnoreProvided := true
 
-sparkVersion := "2.3.3"
+sparkVersion := "2.4.4"
 
 val testSparkVersion = settingKey[String]("The version of Spark to test against.")
 
@@ -26,7 +26,7 @@ sparkComponents := Seq("sql")
 
 libraryDependencies ++= Seq(
   "org.slf4j" % "slf4j-api" % "1.7.5" % "provided",
-  "org.scalatest" %% "scalatest" % "2.2.1" % "test",
+  "org.scalatest" %% "scalatest" % "3.0.8" % "test",
   ("com.google.api-client" % "google-api-client" % "1.22.0").
     exclude("com.google.guava", "guava-jdk5"),
   "com.google.oauth-client" % "google-oauth-client-jetty" % "1.22.0",
@@ -55,6 +55,9 @@ publishArtifact in Test := false
 
 pomIncludeRepository := { _ => false }
 
+//publishMavenStyle := true
+//publishTo := Some(Resolver.file("file",  new File(Path.userHome.absolutePath+"/.m2/repository")))
+
 publishTo := {
   val nexus = "https://oss.sonatype.org/"
   if (version.value.endsWith("SNAPSHOT"))

From 7c1f310c6f9e98741385bcbf9b7313b0716bf2e4 Mon Sep 17 00:00:00 2001
From: Michael Brachmann <mrb24@buffalo.edu>
Date: Tue, 26 Nov 2019 11:00:31 -0500
Subject: [PATCH 2/2] Make user provided schema work and sanitize inferred
 schema field names

---
 .../spreadsheets/SpreadsheetRelation.scala    | 20 ++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/main/scala/com/github/potix2/spark/google/spreadsheets/SpreadsheetRelation.scala b/src/main/scala/com/github/potix2/spark/google/spreadsheets/SpreadsheetRelation.scala
index db19bf3..e7b1a99 100644
--- a/src/main/scala/com/github/potix2/spark/google/spreadsheets/SpreadsheetRelation.scala
+++ b/src/main/scala/com/github/potix2/spark/google/spreadsheets/SpreadsheetRelation.scala
@@ -29,6 +29,7 @@ case class SpreadsheetRelation protected[spark] (
 
   import com.github.potix2.spark.google.spreadsheets.SparkSpreadsheetService._
 
+  private val fieldMap = scala.collection.mutable.Map[String, String]()
   override def schema: StructType = userSchema.getOrElse(inferSchema())
 
   private lazy val aWorksheet: SparkWorksheet =
@@ -47,6 +48,7 @@ case class SpreadsheetRelation protected[spark] (
 
   override def buildScan(): RDD[Row] = {
     val aSchema = schema
+    val schemaMap = fieldMap.toMap
     sqlContext.sparkContext.makeRDD(rows).mapPartitions { iter =>
       iter.map { m =>
         var index = 0
@@ -55,6 +57,8 @@ case class SpreadsheetRelation protected[spark] (
           val field = aSchema.fields(index)
           rowArray(index) = if (m.contains(field.name)) {
             TypeCast.castTo(m(field.name), field.dataType, field.nullable)
+          } else if (schemaMap.contains(field.name) && m.contains(schemaMap(field.name))) {
+            TypeCast.castTo(m(schemaMap(field.name)), field.dataType, field.nullable)
           } else {
             null
           }
@@ -78,9 +82,19 @@ case class SpreadsheetRelation protected[spark] (
     }
   }
 
+  def sanitizeColumnName(name: String): String =
+  {
+    name
+      .replaceAll("[^a-zA-Z0-9]+", "_")    // Replace sequences of non-alphanumeric characters with underscores
+      .replaceAll("_+$", "")               // Strip trailing underscores
+      .replaceAll("^[0-9_]+", "")          // Strip leading underscores and digits
+  }
+
   private def inferSchema(): StructType =
-    StructType(aWorksheet.headers.toList.map { fieldName =>
-      StructField(fieldName, StringType, nullable = true)
-    })
+    StructType(aWorksheet.headers.toList.map { fieldName => {
+      val sanitizedName = sanitizeColumnName(fieldName)
+      fieldMap.put(sanitizedName, fieldName)
+      StructField(sanitizedName, StringType, true)
+    }})
 
 }