From 46773c930bc99d12e22121d42247688f28972d33 Mon Sep 17 00:00:00 2001
From: Brendan Walsh <brwals@microsoft.com>
Date: Thu, 15 Aug 2024 15:24:28 -0700
Subject: [PATCH 1/6] chore: optimize git checkouts

---
 pipeline.yaml          | 13 +++++++++++++
 templates/checkout.yml |  6 ++++++
 2 files changed, 19 insertions(+)
 create mode 100644 templates/checkout.yml

diff --git a/pipeline.yaml b/pipeline.yaml
index 0e75e509c1..fb46411bab 100644
--- a/pipeline.yaml
+++ b/pipeline.yaml
@@ -64,6 +64,7 @@ jobs:
   pool:
     vmImage: ubuntu-20.04
   steps:
+    - template: templates/checkout.yml
     - task: AzureCLI@2
       displayName: 'Scala Style Check'
       inputs:
@@ -147,6 +148,7 @@ jobs:
 #        synapse-internal:
 #          TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseExtension.SynapseExtensionsTests"
   steps:
+    - template: templates/checkout.yml
     #- template: templates/ivy_cache.yml
     - template: templates/update_cli.yml
     - template: templates/conda.yml
@@ -176,6 +178,7 @@ jobs:
 #  pool:
 #    vmImage: ubuntu-20.04
 #  steps:
+#    - template: templates/checkout.yml
 #    - task: AzureCLI@2
 #      displayName: 'Get Docker Tag + Version'
 #      inputs:
@@ -250,6 +253,7 @@ jobs:
   pool:
     vmImage: ubuntu-20.04
   steps:
+    - template: templates/checkout.yml
     - template: templates/update_cli.yml
     - bash: |
         echo '##vso[task.setvariable variable=tag]'$(git tag -l --points-at HEAD)
@@ -369,6 +373,7 @@ jobs:
       cognitive:
         PACKAGE: "cognitive"
   steps:
+    - template: templates/checkout.yml
     #- template: templates/ivy_cache.yml
     - template: templates/update_cli.yml
     - template: templates/conda.yml
@@ -438,6 +443,7 @@ jobs:
       cognitive:
         PACKAGE: "cognitive"
   steps:
+    - template: templates/checkout.yml
     #- template: templates/ivy_cache_2.yml
     - template: templates/update_cli.yml
     - template: templates/conda.yml
@@ -496,6 +502,7 @@ jobs:
   pool:
     vmImage: ubuntu-20.04
   steps:
+    - template: templates/checkout.yml
     - template: templates/conda.yml
     - bash: df -H
 
@@ -505,6 +512,7 @@ jobs:
   pool:
     vmImage: ubuntu-20.04
   steps:
+    - template: templates/checkout.yml
     #- template: templates/ivy_cache.yml
     - template: templates/update_cli.yml
     - template: templates/conda.yml
@@ -545,6 +553,10 @@ jobs:
     vmImage: ubuntu-20.04
   steps:
     - checkout: self
+      fetchDepth: 1
+      clean: true
+      submodules: false
+      lfs: false
       persistCredentials: true
     - template: templates/update_cli.yml
     - template: templates/conda.yml
@@ -692,6 +704,7 @@ jobs:
       vw:
         PACKAGE: "vw"
   steps:
+    - template: templates/checkout.yml
     #- template: templates/ivy_cache.yml
     - template: templates/update_cli.yml
     - task: AzureCLI@2
diff --git a/templates/checkout.yml b/templates/checkout.yml
new file mode 100644
index 0000000000..977867297b
--- /dev/null
+++ b/templates/checkout.yml
@@ -0,0 +1,6 @@
+steps:
+  - checkout: self
+    fetchDepth: 1
+    clean: true
+    submodules: false
+    lfs: false

From d80be72bfd51311488d81d903ec769fbd9c56c22 Mon Sep 17 00:00:00 2001
From: Brendan Walsh <brwals@microsoft.com>
Date: Thu, 15 Aug 2024 15:30:12 -0700
Subject: [PATCH 2/6] Add missing job

---
 pipeline.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pipeline.yaml b/pipeline.yaml
index fb46411bab..a6d5489b8d 100644
--- a/pipeline.yaml
+++ b/pipeline.yaml
@@ -84,6 +84,7 @@ jobs:
   pool:
     vmImage: ubuntu-20.04
   steps:
+    - template: templates/checkout.yml
     #- template: templates/ivy_cache.yml
     - template: templates/update_cli.yml
     - template: templates/conda.yml

From fe09064ab31a572224f7a81e3a55a360baaa71a4 Mon Sep 17 00:00:00 2001
From: Brendan Walsh <brwals@microsoft.com>
Date: Thu, 15 Aug 2024 20:13:02 -0700
Subject: [PATCH 3/6] refactor conda caching and specify cache folder

---
 pipeline.yaml | 31 ++++---------------------------
 1 file changed, 4 insertions(+), 27 deletions(-)

diff --git a/pipeline.yaml b/pipeline.yaml
index a6d5489b8d..d098339ed7 100644
--- a/pipeline.yaml
+++ b/pipeline.yaml
@@ -53,7 +53,8 @@ parameters:
 
 variables:
   runTests: True
-  CONDA_CACHE_DIR: /usr/share/miniconda/envs
+  CONDA_ENV: synapseml
+  CONDA_CACHE_DIR: /usr/share/miniconda/envs/$(CONDA_ENV)/
   ComponentDetection.Timeout: 900
   isMaster: $[eq(variables['Build.SourceBranch'], 'refs/heads/master')]
 
@@ -253,6 +254,7 @@ jobs:
   cancelTimeoutInMinutes: 0
   pool:
     vmImage: ubuntu-20.04
+  condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v'))
   steps:
     - template: templates/checkout.yml
     - template: templates/update_cli.yml
@@ -264,9 +266,7 @@ jobs:
         wget https://github.com/git-chglog/git-chglog/releases/download/0.8.0/git-chglog_linux_amd64
         chmod +x git-chglog_linux_amd64
         ./git-chglog_linux_amd64 -o CHANGELOG.md $TAG
-      condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v'))
     - task: GitHubRelease@0
-      condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v'))
       inputs:
         gitHubConnection: 'MMLSpark Github'
         repositoryName: '$(Build.Repository.Name)'
@@ -275,28 +275,8 @@ jobs:
         tagSource: 'auto'
         releaseNotesFile: 'CHANGELOG.md'
         isDraft: true
-    - bash: echo "##vso[task.prependpath]$CONDA/bin"
-      condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v'))
-      displayName: Add conda to PATH
-    - bash: sudo chown -R $(whoami):$(id -ng) $(CONDA_CACHE_DIR)
-      displayName: Fix directory permissions
-      condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v'))
-    - task: Cache@2
-      displayName: Use cached Anaconda environment
-      condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v'))
-      inputs:
-        key: 'conda | "$(Agent.OS)" | environment.yml'
-        restoreKeys: |
-          python | "$(Agent.OS)"
-          python
-        path: $(CONDA_CACHE_DIR)
-        cacheHitVar: CONDA_CACHE_RESTORED
-    - bash: |
-        conda env create --force -f environment.yml -v
-      condition: and(eq(variables.isMaster, true), and(startsWith(variables['tag'], 'v'), eq(variables.CONDA_CACHE_RESTORED, 'false')))
-      displayName: Create Anaconda environment
+    - template: templates/conda.yml
     - task: AzureKeyVault@1
-      condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v'))
       inputs:
         azureSubscription: 'SynapseML Build'
         keyVaultName: mmlspark-keys
@@ -304,7 +284,6 @@ jobs:
         set -e
         source activate synapseml
         sbt publishPypi
-      condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v'))
       env:
         STORAGE-KEY: $(storage-key)
         NEXUS-UN: $(nexus-un)
@@ -320,7 +299,6 @@ jobs:
         source activate synapseml
         sbt publishLocalSigned
         python tools/esrp/prepare_jar.py
-      condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v'))
       env:
         STORAGE-KEY: $(storage-key)
         NEXUS-UN: $(nexus-un)
@@ -350,7 +328,6 @@ jobs:
         DomainTenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47'
         waitforreleasecompletion: true
       displayName: 'ESRP Publish Package'
-      condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v'))
 
 
 - job: PythonTests

From 750bafc4555684c401271b746beabdad0f43c8ca Mon Sep 17 00:00:00 2001
From: Brendan Walsh <brwals@microsoft.com>
Date: Thu, 15 Aug 2024 22:42:35 -0700
Subject: [PATCH 4/6] ensure conda env dir exists

---
 templates/conda.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/templates/conda.yml b/templates/conda.yml
index 8c36f95929..771daaf4ce 100644
--- a/templates/conda.yml
+++ b/templates/conda.yml
@@ -2,7 +2,9 @@ steps:
   - bash: echo "##vso[task.prependpath]$CONDA/bin"
     displayName: Add conda to PATH
     retryCountOnTaskFailure: 1
-  - bash: sudo chown -R $(whoami):$(id -ng) $(CONDA_CACHE_DIR)
+  - bash: |
+      mkdir -p $(CONDA_CACHE_DIR)
+      sudo chown -R $(whoami):$(id -ng) $(CONDA_CACHE_DIR)
     displayName: Fix directory permissions
   - task: Cache@2
     displayName: Use cached Anaconda environment

From 6200d6eb5378485afb1c75029b332e6894f1b4f8 Mon Sep 17 00:00:00 2001
From: Brendan Walsh <brwals@microsoft.com>
Date: Thu, 15 Aug 2024 23:24:27 -0700
Subject: [PATCH 5/6] separated jobs into stages, added DU reporting

---
 pipeline.yaml       | 1315 ++++++++++++++++++++++---------------------
 templates/conda.yml |    6 +
 2 files changed, 668 insertions(+), 653 deletions(-)

diff --git a/pipeline.yaml b/pipeline.yaml
index d098339ed7..a1505f0c2c 100644
--- a/pipeline.yaml
+++ b/pipeline.yaml
@@ -58,673 +58,682 @@ variables:
   ComponentDetection.Timeout: 900
   isMaster: $[eq(variables['Build.SourceBranch'], 'refs/heads/master')]
 
-jobs:
-- job: Style
-  cancelTimeoutInMinutes: 0
-  condition: eq(variables.runTests, 'True')
-  pool:
-    vmImage: ubuntu-20.04
-  steps:
-    - template: templates/checkout.yml
-    - task: AzureCLI@2
-      displayName: 'Scala Style Check'
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: 'sbt scalastyle test:scalastyle'
-    - template: templates/conda.yml
-    - bash: |
-        set -e
-        source activate synapseml
-        black --diff --color . && black --check -q .
-      displayName: 'Python Style Check'
-
-- job: Publish
-  cancelTimeoutInMinutes: 0
-  pool:
-    vmImage: ubuntu-20.04
-  steps:
-    - template: templates/checkout.yml
-    #- template: templates/ivy_cache.yml
-    - template: templates/update_cli.yml
-    - template: templates/conda.yml
-    - template: templates/kv.yml
-    - task: MavenAuthenticate@0
-      name: mavenAuthPublicPackages
-      displayName: Authenticate SynapseML_PublicPackages
-      inputs:
-        artifactsFeeds: SynapseML_PublicPackages
-        mavenServiceConnections: SynapseML_PublicPackages-Feed-Connection
-    - task: AzureCLI@2
-      displayName: 'Publish Artifacts'
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: |
+stages:
+- stage: Lint
+  displayName: "Lint"
+  jobs:
+  - job: Style
+    cancelTimeoutInMinutes: 0
+    condition: eq(variables.runTests, 'True')
+    pool:
+      vmImage: ubuntu-20.04
+    steps:
+      - template: templates/checkout.yml
+      - task: AzureCLI@2
+        displayName: 'Scala Style Check'
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: 'sbt scalastyle test:scalastyle'
+      - template: templates/conda.yml
+      - bash: |
           set -e
-          sudo apt-get install graphviz doxygen -y
           source activate synapseml
-          sbt packagePython uploadNotebooks
-          sbt -DskipCodegen=true publishBlob publishDocs publishR publishPython
-          sbt genBuildInfo
-          echo "##vso[task.uploadsummary]$(pwd)/target/Build.md"
-          sbt -DskipCodegen=true publishLocalSigned
-          python tools/esrp/prepare_jar.py
-      env:
-        NEXUS-UN: $(nexus-un)
-        NEXUS-PW: $(nexus-pw)
-        PGP-PRIVATE: $(pgp-private)
-        PGP-PUBLIC: $(pgp-public)
-        PGP-PW: $(pgp-pw)
-        SYNAPSEML_ENABLE_PUBLISH: true
-    - task: AzureCLI@2
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: |
-          set -e
-          sbt publishBadges
-      condition: and(succeeded(), eq(variables.isMaster, true))
-      displayName: Publish Badges
+          black --diff --color . && black --check -q .
+        displayName: 'Python Style Check'
 
+- stage: Build
+  displayName: "Build and Publish"
+  jobs:
+  - job: BuildAndCacheCondaEnv
+    cancelTimeoutInMinutes: 0
+    condition: eq(variables.runTests, 'True')
+    pool:
+      vmImage: ubuntu-20.04
+    steps:
+      - template: templates/checkout.yml
+      - template: templates/conda.yml
+      - bash: df -H
 
-- job: E2E
-  timeoutInMinutes: 120
-  cancelTimeoutInMinutes: 0
-  pool:
-    vmImage: ubuntu-20.04
-  strategy:
-    matrix:
-      databricks-cpu:
-        TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests"
-      databricks-gpu:
-        TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksGPUTests"
-      databricks-rapids:
-        TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksRapidsTests"
-      synapse:
-        TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseTests"
-#      ${{ if eq(parameters.runSynapseExtensionE2ETests, true) }}:
-#        synapse-internal:
-#          TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseExtension.SynapseExtensionsTests"
-  steps:
-    - template: templates/checkout.yml
-    #- template: templates/ivy_cache.yml
-    - template: templates/update_cli.yml
-    - template: templates/conda.yml
-    - template: templates/kv.yml
-    - template: templates/publish.yml
-    - task: AzureCLI@2
-      displayName: 'E2E'
-      inputs:
-        azureSubscription:  'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: |
-          set -e
-          source activate synapseml
-          sbt "testOnly $(TEST-CLASS)"
-      condition: and(succeeded(), eq(variables.runTests, 'True'))
-    - task: PublishTestResults@2
-      displayName: 'Publish Test Results'
-      inputs:
-        testResultsFiles: '**/test-reports/TEST-*.xml'
-        failTaskOnFailedTests: true
-      condition: and(eq(variables.runTests, 'True'), succeededOrFailed())
+  - job: Publish
+    cancelTimeoutInMinutes: 0
+    pool:
+      vmImage: ubuntu-20.04
+    steps:
+      - template: templates/checkout.yml
+      #- template: templates/ivy_cache.yml
+      - template: templates/update_cli.yml
+      - template: templates/conda.yml
+      - template: templates/kv.yml
+      - task: MavenAuthenticate@0
+        name: mavenAuthPublicPackages
+        displayName: Authenticate SynapseML_PublicPackages
+        inputs:
+          artifactsFeeds: SynapseML_PublicPackages
+          mavenServiceConnections: SynapseML_PublicPackages-Feed-Connection
+      - task: AzureCLI@2
+        displayName: 'Publish Artifacts'
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: |
+            set -e
+            sudo apt-get install graphviz doxygen -y
+            source activate synapseml
+            sbt packagePython uploadNotebooks
+            sbt -DskipCodegen=true publishBlob publishDocs publishR publishPython
+            sbt genBuildInfo
+            echo "##vso[task.uploadsummary]$(pwd)/target/Build.md"
+            sbt -DskipCodegen=true publishLocalSigned
+            python tools/esrp/prepare_jar.py
+        env:
+          NEXUS-UN: $(nexus-un)
+          NEXUS-PW: $(nexus-pw)
+          PGP-PRIVATE: $(pgp-private)
+          PGP-PUBLIC: $(pgp-public)
+          PGP-PW: $(pgp-pw)
+          SYNAPSEML_ENABLE_PUBLISH: true
+      - task: AzureCLI@2
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: |
+            set -e
+            sbt publishBadges
+        condition: and(succeeded(), eq(variables.isMaster, true))
+        displayName: Publish Badges
 
-#
-#- job: PublishDocker
-#  displayName: PublishDocker
-#  pool:
-#    vmImage: ubuntu-20.04
-#  steps:
-#    - template: templates/checkout.yml
-#    - task: AzureCLI@2
-#      displayName: 'Get Docker Tag + Version'
-#      inputs:
-#        azureSubscription: 'SynapseML Build'
-#        scriptLocation: inlineScript
-#        scriptType: bash
-#        inlineScript: |
-#          VERSION=$(sbt "core/version" | tail -1 |  cut -d' ' -f2 | sed 's/\x1b\[[0-9;]*m//g')
-#          echo '##vso[task.setvariable variable=version]'$VERSION
-#          echo '##vso[task.setvariable variable=gittag]'$(git tag -l --points-at HEAD)
-#    - task: Docker@2
-#      displayName: Demo Image Build
-#      inputs:
-#        containerRegistry: 'SynapseML MCR MSI'
-#        repository: 'public/mmlspark/build-demo'
-#        command: 'build'
-#        buildContext: "."
-#        Dockerfile: 'tools/docker/demo/Dockerfile'
-#        tags: $(version)
-#        arguments: --build-arg SYNAPSEML_VERSION=$(version)
-#    - task: Docker@2
-#      displayName: Demo Image Push
-#      inputs:
-#        containerRegistry: 'SynapseML MCR MSI'
-#        repository: 'public/mmlspark/build-demo'
-#        command: 'push'
-#        tags: $(version)
-#    - task: Docker@2
-#      displayName: Minimal Image Build
-#      inputs:
-#        containerRegistry: 'SynapseML MCR MSI'
-#        repository: 'public/mmlspark/build-minimal'
-#        command: 'build'
-#        buildContext: "."
-#        Dockerfile: 'tools/docker/minimal/Dockerfile'
-#        tags: $(version)
-#        arguments: --build-arg SYNAPSEML_VERSION=$(version)
-#    - task: Docker@2
-#      displayName: Minimal Image Push
-#      inputs:
-#        containerRegistry: 'SynapseML MCR MSI'
-#        repository: 'public/mmlspark/build-minimal'
-#        command: 'push'
-#        tags: $(version)
-#    - task: Docker@2
-#      condition: and(eq(variables.isMaster, true), startsWith(variables['gittag'], 'v'))
-#      displayName: Release Image Build
-#      inputs:
-#        containerRegistry: 'SynapseML MCR MSI'
-#        repository: 'public/mmlspark/release'
-#        command: 'build'
-#        buildContext: "."
-#        Dockerfile: 'tools/docker/demo/Dockerfile'
-#        tags: |
-#          $(version)
-#          latest
-#        arguments: --build-arg SYNAPSEML_VERSION=$(version)
-#    - task: Docker@2
-#      condition: and(eq(variables.isMaster, true), startsWith(variables['gittag'], 'v'))
-#      displayName: Release Image Push
-#      inputs:
-#        containerRegistry: 'SynapseML MCR MSI'
-#        repository: 'public/mmlspark/release'
-#        command: 'push'
-#        tags: |
-#          $(version)
-#          latest
-#    - task: ComponentGovernanceComponentDetection@0
+  - job: WebsiteAutoDeployment
+    cancelTimeoutInMinutes: 0
+    pool:
+      vmImage: ubuntu-20.04
+    steps:
+      - checkout: self
+        fetchDepth: 1
+        clean: true
+        submodules: false
+        lfs: false
+        persistCredentials: true
+      - template: templates/update_cli.yml
+      - template: templates/conda.yml
+      - template: templates/kv.yml
+      - task: NodeTool@0
+        inputs:
+          versionSpec: '16.x'
+        displayName: 'Install Node.js'
+      - task: AzureCLI@2
+        displayName: 'Convert notebooks to markdowns'
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: |
+            source activate synapseml
+            sbt convertNotebooks
+      - bash: |
+          set -e
+          yarn install
+          cd website
+          yarn
+          yarn build
+        displayName: 'yarn install and build'
+      - bash: |
+          set -e
+          git config --global user.name "${GH_NAME}"
+          git config --global user.email "${GH_EMAIL}"
+          git checkout -b main
+          echo "machine github.com login ${GH_NAME} password ${GH_TOKEN}" > ~/.netrc
+          cd website
+          GIT_USER="${GH_NAME}" yarn deploy
+        condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/master'))
+        env:
+          GH_NAME: $(gh-name)
+          GH_EMAIL: $(gh-email)
+          GH_TOKEN: $(gh-token)
+        displayName: 'yarn deploy'
 
-- job: Release
-  cancelTimeoutInMinutes: 0
-  pool:
-    vmImage: ubuntu-20.04
-  condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v'))
-  steps:
-    - template: templates/checkout.yml
-    - template: templates/update_cli.yml
-    - bash: |
-        echo '##vso[task.setvariable variable=tag]'$(git tag -l --points-at HEAD)
-      displayName: 'Get Git Tag'
-    - bash: |
-        set -e
-        wget https://github.com/git-chglog/git-chglog/releases/download/0.8.0/git-chglog_linux_amd64
-        chmod +x git-chglog_linux_amd64
-        ./git-chglog_linux_amd64 -o CHANGELOG.md $TAG
-    - task: GitHubRelease@0
-      inputs:
-        gitHubConnection: 'MMLSpark Github'
-        repositoryName: '$(Build.Repository.Name)'
-        action: 'create'
-        target: '$(Build.SourceVersion)'
-        tagSource: 'auto'
-        releaseNotesFile: 'CHANGELOG.md'
-        isDraft: true
-    - template: templates/conda.yml
-    - task: AzureKeyVault@1
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        keyVaultName: mmlspark-keys
-    - bash: |
-        set -e
-        source activate synapseml
-        sbt publishPypi
-      env:
-        STORAGE-KEY: $(storage-key)
-        NEXUS-UN: $(nexus-un)
-        NEXUS-PW: $(nexus-pw)
-        PGP-PRIVATE: $(pgp-private)
-        PGP-PUBLIC: $(pgp-public)
-        PGP-PW: $(pgp-pw)
-        PYPI-API-TOKEN: $(pypi-api-token)
-        SYNAPSEML_ENABLE_PUBLISH: true
-      displayName: 'publish python package to pypi'
-    - bash: |
-        set -e
-        source activate synapseml
-        sbt publishLocalSigned
-        python tools/esrp/prepare_jar.py
-      env:
-        STORAGE-KEY: $(storage-key)
-        NEXUS-UN: $(nexus-un)
-        NEXUS-PW: $(nexus-pw)
-        PGP-PRIVATE: $(pgp-private)
-        PGP-PUBLIC: $(pgp-public)
-        PGP-PW: $(pgp-pw)
-        SYNAPSEML_ENABLE_PUBLISH: true
-      displayName: 'publish jar package to maven central'
-    - task: EsrpRelease@7
-      inputs:
-        ConnectedServiceName: 'DataScienceESRPRelease2024'
-        # The keyvault hosting the certs https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/13842c9d-5a2d-4da1-84a8-3383f543d9ba/resourceGroups/esrp/providers/Microsoft.KeyVault/vaults/synapseml-esrp-kv/overview
-        keyvaultname: 'synapseml-esrp-kv'
-        authcertname: 'ReleaseAutomation'
-        signcertname: 'ESRPReqSignCA'
-        # The entra app https://ms.portal.azure.com/#view/Microsoft_AAD_RegisteredApps/ApplicationMenuBlade/~/Manifest/appId/1fc1c0d1-5a85-4081-8f1e-12a8c225b9a6/isMSAApp~/false
-        clientid: '1fc1c0d1-5a85-4081-8f1e-12a8c225b9a6'
-        Intent: 'PackageDistribution'
-        ContentType: 'Maven'
-        contentsource: 'Folder'
-        folderlocation: '/home/vsts/.ivy2/local/com.microsoft.azure/'
-        Owners: 'richwyd@microsoft.com,taniaarya@microsoft.com,marcozo@microsoft.com,romanbat@microsoft.com'
-        Approvers: 'romanbat@microsoft.com,markus.weimer@microsoft.com,negust@microsoft.com'
-        ServiceEndpointUrl: 'https://api.esrp.microsoft.com'
-        MainPublisher: 'synapseml'
-        DomainTenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47'
-        waitforreleasecompletion: true
-      displayName: 'ESRP Publish Package'
+  #
+  #- job: PublishDocker
+  #  displayName: PublishDocker
+  #  pool:
+  #    vmImage: ubuntu-20.04
+  #  steps:
+  #    - template: templates/checkout.yml
+  #    - task: AzureCLI@2
+  #      displayName: 'Get Docker Tag + Version'
+  #      inputs:
+  #        azureSubscription: 'SynapseML Build'
+  #        scriptLocation: inlineScript
+  #        scriptType: bash
+  #        inlineScript: |
+  #          VERSION=$(sbt "core/version" | tail -1 |  cut -d' ' -f2 | sed 's/\x1b\[[0-9;]*m//g')
+  #          echo '##vso[task.setvariable variable=version]'$VERSION
+  #          echo '##vso[task.setvariable variable=gittag]'$(git tag -l --points-at HEAD)
+  #    - task: Docker@2
+  #      displayName: Demo Image Build
+  #      inputs:
+  #        containerRegistry: 'SynapseML MCR MSI'
+  #        repository: 'public/mmlspark/build-demo'
+  #        command: 'build'
+  #        buildContext: "."
+  #        Dockerfile: 'tools/docker/demo/Dockerfile'
+  #        tags: $(version)
+  #        arguments: --build-arg SYNAPSEML_VERSION=$(version)
+  #    - task: Docker@2
+  #      displayName: Demo Image Push
+  #      inputs:
+  #        containerRegistry: 'SynapseML MCR MSI'
+  #        repository: 'public/mmlspark/build-demo'
+  #        command: 'push'
+  #        tags: $(version)
+  #    - task: Docker@2
+  #      displayName: Minimal Image Build
+  #      inputs:
+  #        containerRegistry: 'SynapseML MCR MSI'
+  #        repository: 'public/mmlspark/build-minimal'
+  #        command: 'build'
+  #        buildContext: "."
+  #        Dockerfile: 'tools/docker/minimal/Dockerfile'
+  #        tags: $(version)
+  #        arguments: --build-arg SYNAPSEML_VERSION=$(version)
+  #    - task: Docker@2
+  #      displayName: Minimal Image Push
+  #      inputs:
+  #        containerRegistry: 'SynapseML MCR MSI'
+  #        repository: 'public/mmlspark/build-minimal'
+  #        command: 'push'
+  #        tags: $(version)
+  #    - task: Docker@2
+  #      condition: and(eq(variables.isMaster, true), startsWith(variables['gittag'], 'v'))
+  #      displayName: Release Image Build
+  #      inputs:
+  #        containerRegistry: 'SynapseML MCR MSI'
+  #        repository: 'public/mmlspark/release'
+  #        command: 'build'
+  #        buildContext: "."
+  #        Dockerfile: 'tools/docker/demo/Dockerfile'
+  #        tags: |
+  #          $(version)
+  #          latest
+  #        arguments: --build-arg SYNAPSEML_VERSION=$(version)
+  #    - task: Docker@2
+  #      condition: and(eq(variables.isMaster, true), startsWith(variables['gittag'], 'v'))
+  #      displayName: Release Image Push
+  #      inputs:
+  #        containerRegistry: 'SynapseML MCR MSI'
+  #        repository: 'public/mmlspark/release'
+  #        command: 'push'
+  #        tags: |
+  #          $(version)
+  #          latest
+  #    - task: ComponentGovernanceComponentDetection@0
 
+- stage: E2E
+  displayName: "End to End Tests"
+  jobs:
+  - job: E2E
+    timeoutInMinutes: 120
+    cancelTimeoutInMinutes: 0
+    pool:
+      vmImage: ubuntu-20.04
+    strategy:
+      matrix:
+        databricks-cpu:
+          TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests"
+        databricks-gpu:
+          TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksGPUTests"
+        databricks-rapids:
+          TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksRapidsTests"
+        synapse:
+          TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseTests"
+  #      ${{ if eq(parameters.runSynapseExtensionE2ETests, true) }}:
+  #        synapse-internal:
+  #          TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseExtension.SynapseExtensionsTests"
+    steps:
+      - template: templates/checkout.yml
+      #- template: templates/ivy_cache.yml
+      - template: templates/update_cli.yml
+      - template: templates/conda.yml
+      - template: templates/kv.yml
+      - template: templates/publish.yml
+      - task: AzureCLI@2
+        displayName: 'E2E'
+        inputs:
+          azureSubscription:  'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: |
+            set -e
+            source activate synapseml
+            sbt "testOnly $(TEST-CLASS)"
+        condition: and(succeeded(), eq(variables.runTests, 'True'))
+      - task: PublishTestResults@2
+        displayName: 'Publish Test Results'
+        inputs:
+          testResultsFiles: '**/test-reports/TEST-*.xml'
+          failTaskOnFailedTests: true
+        condition: and(eq(variables.runTests, 'True'), succeededOrFailed())
 
-- job: PythonTests
-  timeoutInMinutes: 120
-  cancelTimeoutInMinutes: 0
-  condition: eq(variables.runTests, 'True')
-  pool:
-    vmImage: ubuntu-22.04
-  strategy:
-    matrix:
-      core:
-        PACKAGE: "core"
-      deep-learning:
-        PACKAGE: "deepLearning"
-      lightgbm:
-        PACKAGE: "lightgbm"
-      opencv:
-        PACKAGE: "opencv"
-      vw:
-        PACKAGE: "vw"
-      cognitive:
-        PACKAGE: "cognitive"
-  steps:
-    - template: templates/checkout.yml
-    #- template: templates/ivy_cache.yml
-    - template: templates/update_cli.yml
-    - template: templates/conda.yml
-    - template: templates/kv.yml
-    - task: AzureCLI@2
-      displayName: 'Install and package deps'
-      timeoutInMinutes: 40
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: |
-          source activate synapseml
-          sbt coverage getDatasets installPipPackage
-          sbt publishM2
-    - task: AzureCLI@2
-      displayName: 'Test Python Code'
-      retryCountOnTaskFailure: 1
-      timeoutInMinutes: 40
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: |
+- stage: Release
+  displayName: "Release"
+  jobs:
+  - job: Release
+    cancelTimeoutInMinutes: 0
+    pool:
+      vmImage: ubuntu-20.04
+    condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v'))
+    steps:
+      - template: templates/checkout.yml
+      - template: templates/update_cli.yml
+      - bash: |
+          echo '##vso[task.setvariable variable=tag]'$(git tag -l --points-at HEAD)
+        displayName: 'Get Git Tag'
+      - bash: |
           set -e
-          source activate synapseml
-          export SBT_OPTS="-XX:+UseG1GC"
-          echo "##vso[task.setvariable variable=SBT_OPTS]$SBT_OPTS"
-          echo "SBT_OPTS=$SBT_OPTS"
-          (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython)
-    - task: PublishTestResults@2
-      displayName: 'Publish Test Results'
-      inputs:
-        testResultsFiles: '**/python-test-*.xml'
-        failTaskOnFailedTests: true
-      condition: succeededOrFailed()
-    - task: AzureCLI@2
-      displayName: 'Generate Codecov report'
-      retryCountOnTaskFailure: 1
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: 'sbt coverageReport'
-      condition: succeededOrFailed()
-    - template: templates/codecov.yml
-
-
-- job: RTests
-  timeoutInMinutes: 60
-  cancelTimeoutInMinutes: 0
-  condition: eq(variables.runTests, 'True')
-  pool:
-    vmImage: ubuntu-20.04
-  strategy:
-    matrix:
-      core:
-        PACKAGE: "core"
-      deep-learning:
-        PACKAGE: "deepLearning"
-      lightgbm:
-        PACKAGE: "lightgbm"
-      opencv:
-        PACKAGE: "opencv"
-      vw:
-        PACKAGE: "vw"
-      cognitive:
-        PACKAGE: "cognitive"
-  steps:
-    - template: templates/checkout.yml
-    #- template: templates/ivy_cache_2.yml
-    - template: templates/update_cli.yml
-    - template: templates/conda.yml
-    - template: templates/kv.yml
-    - task: AzureCLI@2
-      displayName: 'Prepare for tests'
-      retryCountOnTaskFailure: 1
-      timeoutInMinutes: 60
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: |
+          wget https://github.com/git-chglog/git-chglog/releases/download/0.8.0/git-chglog_linux_amd64
+          chmod +x git-chglog_linux_amd64
+          ./git-chglog_linux_amd64 -o CHANGELOG.md $TAG
+      - task: GitHubRelease@0
+        inputs:
+          gitHubConnection: 'MMLSpark Github'
+          repositoryName: '$(Build.Repository.Name)'
+          action: 'create'
+          target: '$(Build.SourceVersion)'
+          tagSource: 'auto'
+          releaseNotesFile: 'CHANGELOG.md'
+          isDraft: true
+      - template: templates/conda.yml
+      - task: AzureKeyVault@1
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          keyVaultName: mmlspark-keys
+      - bash: |
           set -e
-          export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M  -Duser.timezone=GMT"
           source activate synapseml
-          (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
-          sbt codegen
-          sbt publishM2
-          SPARK_VERSION=3.4.1
-          HADOOP_VERSION=3
-          wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
-    - task: AzureCLI@2
-      displayName: 'Test R Code'
-      retryCountOnTaskFailure: 3
-      timeoutInMinutes: 20
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: |
+          sbt publishPypi
+        env:
+          STORAGE-KEY: $(storage-key)
+          NEXUS-UN: $(nexus-un)
+          NEXUS-PW: $(nexus-pw)
+          PGP-PRIVATE: $(pgp-private)
+          PGP-PUBLIC: $(pgp-public)
+          PGP-PW: $(pgp-pw)
+          PYPI-API-TOKEN: $(pypi-api-token)
+          SYNAPSEML_ENABLE_PUBLISH: true
+        displayName: 'publish python package to pypi'
+      - bash: |
           set -e
-          export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M  -Duser.timezone=GMT"
           source activate synapseml
-          timeout 20m sbt -DskipCodegen=true "project $(PACKAGE)" coverage testR
-    - task: PublishTestResults@2
-      displayName: 'Publish Test Results'
-      inputs:
-        testResultsFiles: '**/r-test-*.xml'
-        failTaskOnFailedTests: true
-      condition: succeededOrFailed()
-    - task: AzureCLI@2
-      retryCountOnTaskFailure: 1
-      displayName: 'Generate Codecov report'
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: 'sbt coverageReport'
-      condition: succeededOrFailed()
-    - template: templates/codecov.yml
-
-- job: BuildAndCacheCondaEnv
-  cancelTimeoutInMinutes: 0
-  condition: eq(variables.runTests, 'True')
-  pool:
-    vmImage: ubuntu-20.04
-  steps:
-    - template: templates/checkout.yml
-    - template: templates/conda.yml
-    - bash: df -H
-
-- job: WebsiteSamplesTests
-  cancelTimeoutInMinutes: 0
-  condition: eq(variables.runTests, 'True')
-  pool:
-    vmImage: ubuntu-20.04
-  steps:
-    - template: templates/checkout.yml
-    #- template: templates/ivy_cache.yml
-    - template: templates/update_cli.yml
-    - template: templates/conda.yml
-    - template: templates/kv.yml
-    - template: templates/publish.yml
-    - task: AzureCLI@2
-      displayName: 'Test Website Samples'
-      timeoutInMinutes: 30
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: |
-          (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
-          (sbt coverage testWebsiteDocs)
-    - task: PublishTestResults@2
-      displayName: 'Publish Test Results'
-      inputs:
-        testResultsFiles: '**/website-test-result.xml'
-        failTaskOnFailedTests: true
-      condition: succeededOrFailed()
-    - task: AzureCLI@2
-      displayName: 'Generate Codecov report'
-      retryCountOnTaskFailure: 1
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: 'sbt coverageReport'
-      condition: succeededOrFailed()
-    - template: templates/codecov.yml
-
+          sbt publishLocalSigned
+          python tools/esrp/prepare_jar.py
+        env:
+          STORAGE-KEY: $(storage-key)
+          NEXUS-UN: $(nexus-un)
+          NEXUS-PW: $(nexus-pw)
+          PGP-PRIVATE: $(pgp-private)
+          PGP-PUBLIC: $(pgp-public)
+          PGP-PW: $(pgp-pw)
+          SYNAPSEML_ENABLE_PUBLISH: true
+        displayName: 'publish jar package to maven central'
+      - task: EsrpRelease@7
+        inputs:
+          ConnectedServiceName: 'DataScienceESRPRelease2024'
+          # The keyvault hosting the certs https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/13842c9d-5a2d-4da1-84a8-3383f543d9ba/resourceGroups/esrp/providers/Microsoft.KeyVault/vaults/synapseml-esrp-kv/overview
+          keyvaultname: 'synapseml-esrp-kv'
+          authcertname: 'ReleaseAutomation'
+          signcertname: 'ESRPReqSignCA'
+          # The entra app https://ms.portal.azure.com/#view/Microsoft_AAD_RegisteredApps/ApplicationMenuBlade/~/Manifest/appId/1fc1c0d1-5a85-4081-8f1e-12a8c225b9a6/isMSAApp~/false
+          clientid: '1fc1c0d1-5a85-4081-8f1e-12a8c225b9a6'
+          Intent: 'PackageDistribution'
+          ContentType: 'Maven'
+          contentsource: 'Folder'
+          folderlocation: '/home/vsts/.ivy2/local/com.microsoft.azure/'
+          Owners: 'richwyd@microsoft.com,taniaarya@microsoft.com,marcozo@microsoft.com,romanbat@microsoft.com'
+          Approvers: 'romanbat@microsoft.com,markus.weimer@microsoft.com,negust@microsoft.com'
+          ServiceEndpointUrl: 'https://api.esrp.microsoft.com'
+          MainPublisher: 'synapseml'
+          DomainTenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47'
+          waitforreleasecompletion: true
+        displayName: 'ESRP Publish Package'
 
+- stage: Tests
+  displayName: "Unit Tests"
+  jobs:
+  - job: PythonTests
+    timeoutInMinutes: 120
+    cancelTimeoutInMinutes: 0
+    condition: eq(variables.runTests, 'True')
+    pool:
+      vmImage: ubuntu-22.04
+    strategy:
+      matrix:
+        core:
+          PACKAGE: "core"
+        deep-learning:
+          PACKAGE: "deepLearning"
+        lightgbm:
+          PACKAGE: "lightgbm"
+        opencv:
+          PACKAGE: "opencv"
+        vw:
+          PACKAGE: "vw"
+        cognitive:
+          PACKAGE: "cognitive"
+    steps:
+      - template: templates/checkout.yml
+      #- template: templates/ivy_cache.yml
+      - template: templates/update_cli.yml
+      - template: templates/conda.yml
+      - template: templates/kv.yml
+      - task: AzureCLI@2
+        displayName: 'Install and package deps'
+        timeoutInMinutes: 40
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: |
+            source activate synapseml
+            sbt coverage getDatasets installPipPackage
+            sbt publishM2
+      - task: AzureCLI@2
+        displayName: 'Test Python Code'
+        retryCountOnTaskFailure: 1
+        timeoutInMinutes: 40
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: |
+            set -e
+            source activate synapseml
+            export SBT_OPTS="-XX:+UseG1GC"
+            echo "##vso[task.setvariable variable=SBT_OPTS]$SBT_OPTS"
+            echo "SBT_OPTS=$SBT_OPTS"
+            (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython)
+      - task: PublishTestResults@2
+        displayName: 'Publish Test Results'
+        inputs:
+          testResultsFiles: '**/python-test-*.xml'
+          failTaskOnFailedTests: true
+        condition: succeededOrFailed()
+      - task: AzureCLI@2
+        displayName: 'Generate Codecov report'
+        retryCountOnTaskFailure: 1
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: 'sbt coverageReport'
+        condition: succeededOrFailed()
+      - template: templates/codecov.yml
 
-- job: WebsiteAutoDeployment
-  cancelTimeoutInMinutes: 0
-  pool:
-    vmImage: ubuntu-20.04
-  steps:
-    - checkout: self
-      fetchDepth: 1
-      clean: true
-      submodules: false
-      lfs: false
-      persistCredentials: true
-    - template: templates/update_cli.yml
-    - template: templates/conda.yml
-    - template: templates/kv.yml
-    - task: NodeTool@0
-      inputs:
-        versionSpec: '16.x'
-      displayName: 'Install Node.js'
-    - task: AzureCLI@2
-      displayName: 'Convert notebooks to markdowns'
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: |
-          source activate synapseml
-          sbt convertNotebooks
-    - bash: |
-        set -e
-        yarn install
-        cd website
-        yarn
-        yarn build
-      displayName: 'yarn install and build'
-    - bash: |
-        set -e
-        git config --global user.name "${GH_NAME}"
-        git config --global user.email "${GH_EMAIL}"
-        git checkout -b main
-        echo "machine github.com login ${GH_NAME} password ${GH_TOKEN}" > ~/.netrc
-        cd website
-        GIT_USER="${GH_NAME}" yarn deploy
-      condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/master'))
-      env:
-        GH_NAME: $(gh-name)
-        GH_EMAIL: $(gh-email)
-        GH_TOKEN: $(gh-token)
-      displayName: 'yarn deploy'
+  - job: RTests
+    timeoutInMinutes: 60
+    cancelTimeoutInMinutes: 0
+    condition: eq(variables.runTests, 'True')
+    pool:
+      vmImage: ubuntu-20.04
+    strategy:
+      matrix:
+        core:
+          PACKAGE: "core"
+        deep-learning:
+          PACKAGE: "deepLearning"
+        lightgbm:
+          PACKAGE: "lightgbm"
+        opencv:
+          PACKAGE: "opencv"
+        vw:
+          PACKAGE: "vw"
+        cognitive:
+          PACKAGE: "cognitive"
+    steps:
+      - template: templates/checkout.yml
+      #- template: templates/ivy_cache_2.yml
+      - template: templates/update_cli.yml
+      - template: templates/conda.yml
+      - template: templates/kv.yml
+      - task: AzureCLI@2
+        displayName: 'Prepare for tests'
+        retryCountOnTaskFailure: 1
+        timeoutInMinutes: 60
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: |
+            set -e
+            export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M  -Duser.timezone=GMT"
+            source activate synapseml
+            (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
+            sbt codegen
+            sbt publishM2
+            SPARK_VERSION=3.4.1
+            HADOOP_VERSION=3
+            wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
+      - task: AzureCLI@2
+        displayName: 'Test R Code'
+        retryCountOnTaskFailure: 3
+        timeoutInMinutes: 20
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: |
+            set -e
+            export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M  -Duser.timezone=GMT"
+            source activate synapseml
+            timeout 20m sbt -DskipCodegen=true "project $(PACKAGE)" coverage testR
+      - task: PublishTestResults@2
+        displayName: 'Publish Test Results'
+        inputs:
+          testResultsFiles: '**/r-test-*.xml'
+          failTaskOnFailedTests: true
+        condition: succeededOrFailed()
+      - task: AzureCLI@2
+        retryCountOnTaskFailure: 1
+        displayName: 'Generate Codecov report'
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: 'sbt coverageReport'
+        condition: succeededOrFailed()
+      - template: templates/codecov.yml
 
+  - job: WebsiteSamplesTests
+    cancelTimeoutInMinutes: 0
+    condition: eq(variables.runTests, 'True')
+    pool:
+      vmImage: ubuntu-20.04
+    steps:
+      - template: templates/checkout.yml
+      #- template: templates/ivy_cache.yml
+      - template: templates/update_cli.yml
+      - template: templates/conda.yml
+      - template: templates/kv.yml
+      - template: templates/publish.yml
+      - task: AzureCLI@2
+        displayName: 'Test Website Samples'
+        timeoutInMinutes: 30
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: |
+            (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
+            (sbt coverage testWebsiteDocs)
+      - task: PublishTestResults@2
+        displayName: 'Publish Test Results'
+        inputs:
+          testResultsFiles: '**/website-test-result.xml'
+          failTaskOnFailedTests: true
+        condition: succeededOrFailed()
+      - task: AzureCLI@2
+        displayName: 'Generate Codecov report'
+        retryCountOnTaskFailure: 1
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: 'sbt coverageReport'
+        condition: succeededOrFailed()
+      - template: templates/codecov.yml
 
-- job: UnitTests
-  cancelTimeoutInMinutes: 1
-  timeoutInMinutes: 80
-  condition: eq(variables.runTests, 'True')
-  pool:
-    vmImage: ubuntu-20.04
-  strategy:
-    matrix:
-      automl:
-        PACKAGE: "automl"
-      causal:
-        PACKAGE: "causal"
-      onnx:
-        PACKAGE: "onnx"
-      geospatial:
-        PACKAGE: "services.geospatial"
-      anomaly:
-        PACKAGE: "services.anomaly"
-        FLAKY: "true"
-      bing:
-        PACKAGE: "services.bing"
-        FLAKY: "true"
-      face:
-        PACKAGE: "services.face"
-        FLAKY: "true"
-      form:
-        PACKAGE: "services.form"
-        FLAKY: "true"
-      language:
-        PACKAGE: "services.language"
-        FLAKY: "true"
-      openai:
-        PACKAGE: "services.openai"
-        FLAKY: "true"
-      search:
-        PACKAGE: "services.search"
-        FFMPEG: "true"
-        FLAKY: "true"
-      speech:
-        PACKAGE: "services.speech"
-        FFMPEG: "true"
-        FLAKY: "true"
-      text:
-        PACKAGE: "services.text"
-        FLAKY: "true"
-      translate:
-        PACKAGE: "services.translate"
-        FLAKY: "true"
-      vision:
-        PACKAGE: "services.vision"
-        FLAKY: "true"
-      core:
-        PACKAGE: "core"
-      explainers1:
-        PACKAGE: "explainers.split1"
-      explainers2:
-        PACKAGE: "explainers.split2"
-      explainers3:
-        PACKAGE: "explainers.split3"
-      exploratory:
-        PACKAGE: "exploratory"
-      featurize:
-        PACKAGE: "featurize"
-      image:
-        PACKAGE: "image"
-      io1:
-        PACKAGE: "io.split1"
-        FLAKY: "true"
-      io2:
-        PACKAGE: "io.split2"
-        FLAKY: "true"
-      isolationforest:
-        PACKAGE: "isolationforest"
-      flaky:
-        PACKAGE: "flaky"           #TODO fix flaky test so isolation is not needed
-        FLAKY: "true"
-      lightgbm1:
-        PACKAGE: "lightgbm.split1" #TODO speed up LGBM Tests and remove split
-        FLAKY: "true"
-      lightgbm2:
-        PACKAGE: "lightgbm.split2"
-        FLAKY: "true"
-      lightgbm3:
-        PACKAGE: "lightgbm.split3"
-        FLAKY: "true"
-      lightgbm4:
-        PACKAGE: "lightgbm.split4"
-        FLAKY: "true"
-      lightgbm5:
-        PACKAGE: "lightgbm.split5"
-        FLAKY: "true"
-      lightgbm6:
-        PACKAGE: "lightgbm.split6"
-        FLAKY: "true"
-      opencv:
-        PACKAGE: "opencv"
-      recommendation:
-        PACKAGE: "recommendation"
-      stages:
-        PACKAGE: "stages"
-      nn:
-        PACKAGE: "nn"
-      train:
-        PACKAGE: "train"
-      vw:
-        PACKAGE: "vw"
-  steps:
-    - template: templates/checkout.yml
-    #- template: templates/ivy_cache.yml
-    - template: templates/update_cli.yml
-    - task: AzureCLI@2
-      displayName: 'Setup repo'
-      retryCountOnTaskFailure: 1
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: |
-          (timeout 30s pip install requests) || (echo "retrying" && timeout 30s pip install requests)
-          (${FFMPEG:-false} && sudo apt-get update && \
-          sudo apt-get install ffmpeg libgstreamer1.0-0 \
-          gstreamer1.0-plugins-base gstreamer1.0-plugins-good gstreamer1.0-plugins-ugly -y)
-          (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
-    - task: AzureCLI@2
-      displayName: 'Unit Test'
-      retryCountOnTaskFailure: 1
-      timeoutInMinutes: 90
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: |
-          ulimit -c unlimited
-          export SBT_OPTS="-Xmx2G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=2G -Xss2M  -Duser.timezone=GMT"
-          (timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") ||
-          (${FLAKY:-false} && timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**")
-    - task: PublishTestResults@2
-      displayName: 'Publish Test Results'
-      inputs:
-        testResultsFiles: '**/test-reports/TEST-*.xml'
-        failTaskOnFailedTests: true
-      condition: succeededOrFailed()
-    - task: AzureCLI@2
-      displayName: 'Generate Codecov report'
-      retryCountOnTaskFailure: 1
-      inputs:
-        azureSubscription: 'SynapseML Build'
-        scriptLocation: inlineScript
-        scriptType: bash
-        inlineScript: 'sbt coverageReport'
-      condition: succeededOrFailed()
-    - template: templates/kv.yml
-    - template: templates/codecov.yml
+  - job: UnitTests
+    cancelTimeoutInMinutes: 1
+    timeoutInMinutes: 80
+    condition: eq(variables.runTests, 'True')
+    pool:
+      vmImage: ubuntu-20.04
+    strategy:
+      matrix:
+        automl:
+          PACKAGE: "automl"
+        causal:
+          PACKAGE: "causal"
+        onnx:
+          PACKAGE: "onnx"
+        geospatial:
+          PACKAGE: "services.geospatial"
+        anomaly:
+          PACKAGE: "services.anomaly"
+          FLAKY: "true"
+        bing:
+          PACKAGE: "services.bing"
+          FLAKY: "true"
+        face:
+          PACKAGE: "services.face"
+          FLAKY: "true"
+        form:
+          PACKAGE: "services.form"
+          FLAKY: "true"
+        language:
+          PACKAGE: "services.language"
+          FLAKY: "true"
+        openai:
+          PACKAGE: "services.openai"
+          FLAKY: "true"
+        search:
+          PACKAGE: "services.search"
+          FFMPEG: "true"
+          FLAKY: "true"
+        speech:
+          PACKAGE: "services.speech"
+          FFMPEG: "true"
+          FLAKY: "true"
+        text:
+          PACKAGE: "services.text"
+          FLAKY: "true"
+        translate:
+          PACKAGE: "services.translate"
+          FLAKY: "true"
+        vision:
+          PACKAGE: "services.vision"
+          FLAKY: "true"
+        core:
+          PACKAGE: "core"
+        explainers1:
+          PACKAGE: "explainers.split1"
+        explainers2:
+          PACKAGE: "explainers.split2"
+        explainers3:
+          PACKAGE: "explainers.split3"
+        exploratory:
+          PACKAGE: "exploratory"
+        featurize:
+          PACKAGE: "featurize"
+        image:
+          PACKAGE: "image"
+        io1:
+          PACKAGE: "io.split1"
+          FLAKY: "true"
+        io2:
+          PACKAGE: "io.split2"
+          FLAKY: "true"
+        isolationforest:
+          PACKAGE: "isolationforest"
+        flaky:
+          PACKAGE: "flaky"           #TODO fix flaky test so isolation is not needed
+          FLAKY: "true"
+        lightgbm1:
+          PACKAGE: "lightgbm.split1" #TODO speed up LGBM Tests and remove split
+          FLAKY: "true"
+        lightgbm2:
+          PACKAGE: "lightgbm.split2"
+          FLAKY: "true"
+        lightgbm3:
+          PACKAGE: "lightgbm.split3"
+          FLAKY: "true"
+        lightgbm4:
+          PACKAGE: "lightgbm.split4"
+          FLAKY: "true"
+        lightgbm5:
+          PACKAGE: "lightgbm.split5"
+          FLAKY: "true"
+        lightgbm6:
+          PACKAGE: "lightgbm.split6"
+          FLAKY: "true"
+        opencv:
+          PACKAGE: "opencv"
+        recommendation:
+          PACKAGE: "recommendation"
+        stages:
+          PACKAGE: "stages"
+        nn:
+          PACKAGE: "nn"
+        train:
+          PACKAGE: "train"
+        vw:
+          PACKAGE: "vw"
+    steps:
+      - template: templates/checkout.yml
+      #- template: templates/ivy_cache.yml
+      - template: templates/update_cli.yml
+      - task: AzureCLI@2
+        displayName: 'Setup repo'
+        retryCountOnTaskFailure: 1
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: |
+            (timeout 30s pip install requests) || (echo "retrying" && timeout 30s pip install requests)
+            (${FFMPEG:-false} && sudo apt-get update && \
+            sudo apt-get install ffmpeg libgstreamer1.0-0 \
+            gstreamer1.0-plugins-base gstreamer1.0-plugins-good gstreamer1.0-plugins-ugly -y)
+            (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
+      - task: AzureCLI@2
+        displayName: 'Unit Test'
+        retryCountOnTaskFailure: 1
+        timeoutInMinutes: 90
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: |
+            ulimit -c unlimited
+            export SBT_OPTS="-Xmx2G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=2G -Xss2M  -Duser.timezone=GMT"
+            (timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") ||
+            (${FLAKY:-false} && timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**")
+      - task: PublishTestResults@2
+        displayName: 'Publish Test Results'
+        inputs:
+          testResultsFiles: '**/test-reports/TEST-*.xml'
+          failTaskOnFailedTests: true
+        condition: succeededOrFailed()
+      - task: AzureCLI@2
+        displayName: 'Generate Codecov report'
+        retryCountOnTaskFailure: 1
+        inputs:
+          azureSubscription: 'SynapseML Build'
+          scriptLocation: inlineScript
+          scriptType: bash
+          inlineScript: 'sbt coverageReport'
+        condition: succeededOrFailed()
+      - template: templates/kv.yml
+      - template: templates/codecov.yml
diff --git a/templates/conda.yml b/templates/conda.yml
index 771daaf4ce..a9ad3d063f 100644
--- a/templates/conda.yml
+++ b/templates/conda.yml
@@ -22,3 +22,9 @@ steps:
     displayName: Create Anaconda environment
     retryCountOnTaskFailure: 1
     condition: eq(variables.CONDA_CACHE_RESTORED, 'false')
+  - bash: |
+        echo "system usage:"
+        sudo df -h 
+        echo "conda cache usage:"
+        sudo du -h --max-depth=1 $(CONDA_CACHE_DIR)
+    displayName: Display disk usage

From 35f513ba605a73e4a02ddb1563fae4b0498b6051 Mon Sep 17 00:00:00 2001
From: Brendan Walsh <brwals@microsoft.com>
Date: Thu, 15 Aug 2024 23:27:01 -0700
Subject: [PATCH 6/6] fix stage dependencies

---
 pipeline.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pipeline.yaml b/pipeline.yaml
index a1505f0c2c..d99ef33ed3 100644
--- a/pipeline.yaml
+++ b/pipeline.yaml
@@ -61,6 +61,7 @@ variables:
 stages:
 - stage: Lint
   displayName: "Lint"
+  dependsOn:
   jobs:
   - job: Style
     cancelTimeoutInMinutes: 0
@@ -85,6 +86,7 @@ stages:
 
 - stage: Build
   displayName: "Build and Publish"
+  dependsOn:
   jobs:
   - job: BuildAndCacheCondaEnv
     cancelTimeoutInMinutes: 0
@@ -273,6 +275,7 @@ stages:
 
 - stage: E2E
   displayName: "End to End Tests"
+  dependsOn:
   jobs:
   - job: E2E
     timeoutInMinutes: 120
@@ -319,6 +322,7 @@ stages:
 
 - stage: Release
   displayName: "Release"
+  dependsOn:
   jobs:
   - job: Release
     cancelTimeoutInMinutes: 0
@@ -401,6 +405,7 @@ stages:
 
 - stage: Tests
   displayName: "Unit Tests"
+  dependsOn:
   jobs:
   - job: PythonTests
     timeoutInMinutes: 120