Skip to content

Merge pull request #62 from mlverse/ml #57

Merge pull request #62 from mlverse/ml

Merge pull request #62 from mlverse/ml #57

Workflow file for this run

on:
push:
branches: main
pull_request:
branches: main
name: Spark-Connect
jobs:
Spark-Tests:
runs-on: ubuntu-latest
name: ${{ matrix.config.name }}
strategy:
fail-fast: false
matrix:
config:
- {spark: '3.5.0', pyspark: '3.5', hadoop: '3', name: 'PySpark 3.5'}
- {spark: '3.4.1', pyspark: '3.4', hadoop: '3', name: 'PySpark 3.4'}
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes
SPARK_VERSION: ${{ matrix.config.spark }}
HADOOP_VERSION: ${{ matrix.config.hadoop }}
PYSPARK_VERSION: ${{ matrix.config.pyspark }}
steps:
- uses: actions/checkout@v3
- uses: r-lib/actions/setup-r@v2
with:
r-version: 'release'
use-public-rspm: true
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::devtools
needs: check
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install Venv
run: |
sudo apt-get install python3-venv
- name: Cache Spark
id: cache-spark
uses: actions/cache@v3
with:
path: /home/runner/spark/spark-${{ matrix.config.spark }}-bin-hadoop${{ matrix.config.hadoop }}
key: sparklyr-spark-${{ matrix.config.spark }}-bin-hadoop${{ matrix.config.hadoop }}
- name: Install Spark (via sparklyr)
if: steps.cache-spark.outputs.cache-hit != 'true'
run: |
sparklyr::spark_install(version = Sys.getenv("SPARK_VERSION"))
shell: Rscript {0}
- name: Cache Scala
id: cache-scala
uses: actions/cache@v3
with:
path: /home/runner/scala/
key: scala-2
- name: Install Scala (via sparklyr)
if: steps.cache-scala.outputs.cache-hit != 'true'
run: |
sparklyr::download_scalac()
shell: Rscript {0}
- name: R Session Info
run: sessionInfo()
shell: Rscript {0}
- name: R Environment Variables
run: Sys.getenv()
shell: Rscript {0}
- name: R Installed Packages
run: |
m_pkgs <- installed.packages()
t_pkgs <- as.data.frame(m_pkgs, row.names = FALSE)
print(t_pkgs[, c("Package", "Version")])
shell: Rscript {0}
- name: Cache Environment
id: cache-venv
uses: actions/cache@v3
with:
path: /home/runner/.virtualenvs/r-sparklyr-pyspark-${{ matrix.config.pyspark }}
key: sparklyr-virtualenv-${{ matrix.config.pyspark }}
- name: Virtual Environment
#if: steps.cache-venv.outputs.cache-hit != 'true'
run: |
devtools::load_all()
install_pyspark(
Sys.getenv("SPARK_VERSION"),
python = Sys.which("python")
)
shell: Rscript {0}
- name: R Tests
run: |
devtools::load_all()
library(sparklyr)
sv <- Sys.getenv("SPARK_VERSION")
if( sv >="3.5") {
env_name <- use_envname(version = sv)
loc <- paste0("/home/runner/.virtualenvs/", env_name,"/bin/python")
Sys.setenv("PYTHON_VERSION_MISMATCH" = loc)
Sys.setenv("PYSPARK_DRIVER_PYTHON" = loc)
}
Sys.getenv("JAVA_HOME")
Sys.getenv("PYTHON_VERSION_MISMATCH")
Sys.getenv("PYSPARK_DRIVER_PYTHON")
devtools::test(reporter = sparklyr_reporter())
shell: Rscript {0}