From 46773c930bc99d12e22121d42247688f28972d33 Mon Sep 17 00:00:00 2001 From: Brendan Walsh <brwals@microsoft.com> Date: Thu, 15 Aug 2024 15:24:28 -0700 Subject: [PATCH 1/6] chore: optimize git checkouts --- pipeline.yaml | 13 +++++++++++++ templates/checkout.yml | 6 ++++++ 2 files changed, 19 insertions(+) create mode 100644 templates/checkout.yml diff --git a/pipeline.yaml b/pipeline.yaml index 0e75e509c1..fb46411bab 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -64,6 +64,7 @@ jobs: pool: vmImage: ubuntu-20.04 steps: + - template: templates/checkout.yml - task: AzureCLI@2 displayName: 'Scala Style Check' inputs: @@ -147,6 +148,7 @@ jobs: # synapse-internal: # TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseExtension.SynapseExtensionsTests" steps: + - template: templates/checkout.yml #- template: templates/ivy_cache.yml - template: templates/update_cli.yml - template: templates/conda.yml @@ -176,6 +178,7 @@ jobs: # pool: # vmImage: ubuntu-20.04 # steps: +# - template: templates/checkout.yml # - task: AzureCLI@2 # displayName: 'Get Docker Tag + Version' # inputs: @@ -250,6 +253,7 @@ jobs: pool: vmImage: ubuntu-20.04 steps: + - template: templates/checkout.yml - template: templates/update_cli.yml - bash: | echo '##vso[task.setvariable variable=tag]'$(git tag -l --points-at HEAD) @@ -369,6 +373,7 @@ jobs: cognitive: PACKAGE: "cognitive" steps: + - template: templates/checkout.yml #- template: templates/ivy_cache.yml - template: templates/update_cli.yml - template: templates/conda.yml @@ -438,6 +443,7 @@ jobs: cognitive: PACKAGE: "cognitive" steps: + - template: templates/checkout.yml #- template: templates/ivy_cache_2.yml - template: templates/update_cli.yml - template: templates/conda.yml @@ -496,6 +502,7 @@ jobs: pool: vmImage: ubuntu-20.04 steps: + - template: templates/checkout.yml - template: templates/conda.yml - bash: df -H @@ -505,6 +512,7 @@ jobs: pool: vmImage: ubuntu-20.04 steps: + - template: templates/checkout.yml #- template: templates/ivy_cache.yml - template: templates/update_cli.yml - template: templates/conda.yml @@ -545,6 +553,10 @@ jobs: vmImage: ubuntu-20.04 steps: - checkout: self + fetchDepth: 1 + clean: true + submodules: false + lfs: false persistCredentials: true - template: templates/update_cli.yml - template: templates/conda.yml @@ -692,6 +704,7 @@ jobs: vw: PACKAGE: "vw" steps: + - template: templates/checkout.yml #- template: templates/ivy_cache.yml - template: templates/update_cli.yml - task: AzureCLI@2 diff --git a/templates/checkout.yml b/templates/checkout.yml new file mode 100644 index 0000000000..977867297b --- /dev/null +++ b/templates/checkout.yml @@ -0,0 +1,6 @@ +steps: + - checkout: self + fetchDepth: 1 + clean: true + submodules: false + lfs: false From d80be72bfd51311488d81d903ec769fbd9c56c22 Mon Sep 17 00:00:00 2001 From: Brendan Walsh <brwals@microsoft.com> Date: Thu, 15 Aug 2024 15:30:12 -0700 Subject: [PATCH 2/6] Add missing job --- pipeline.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/pipeline.yaml b/pipeline.yaml index fb46411bab..a6d5489b8d 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -84,6 +84,7 @@ jobs: pool: vmImage: ubuntu-20.04 steps: + - template: templates/checkout.yml #- template: templates/ivy_cache.yml - template: templates/update_cli.yml - template: templates/conda.yml From fe09064ab31a572224f7a81e3a55a360baaa71a4 Mon Sep 17 00:00:00 2001 From: Brendan Walsh <brwals@microsoft.com> Date: Thu, 15 Aug 2024 20:13:02 -0700 Subject: [PATCH 3/6] refactor conda caching and specify cache folder --- pipeline.yaml | 31 ++++--------------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/pipeline.yaml b/pipeline.yaml index a6d5489b8d..d098339ed7 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -53,7 +53,8 @@ parameters: variables: runTests: True - CONDA_CACHE_DIR: /usr/share/miniconda/envs + CONDA_ENV: synapseml + CONDA_CACHE_DIR: /usr/share/miniconda/envs/$(CONDA_ENV)/ ComponentDetection.Timeout: 900 isMaster: $[eq(variables['Build.SourceBranch'], 'refs/heads/master')] @@ -253,6 +254,7 @@ jobs: cancelTimeoutInMinutes: 0 pool: vmImage: ubuntu-20.04 + condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) steps: - template: templates/checkout.yml - template: templates/update_cli.yml @@ -264,9 +266,7 @@ jobs: wget https://github.com/git-chglog/git-chglog/releases/download/0.8.0/git-chglog_linux_amd64 chmod +x git-chglog_linux_amd64 ./git-chglog_linux_amd64 -o CHANGELOG.md $TAG - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - task: GitHubRelease@0 - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) inputs: gitHubConnection: 'MMLSpark Github' repositoryName: '$(Build.Repository.Name)' @@ -275,28 +275,8 @@ jobs: tagSource: 'auto' releaseNotesFile: 'CHANGELOG.md' isDraft: true - - bash: echo "##vso[task.prependpath]$CONDA/bin" - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - displayName: Add conda to PATH - - bash: sudo chown -R $(whoami):$(id -ng) $(CONDA_CACHE_DIR) - displayName: Fix directory permissions - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - - task: Cache@2 - displayName: Use cached Anaconda environment - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - inputs: - key: 'conda | "$(Agent.OS)" | environment.yml' - restoreKeys: | - python | "$(Agent.OS)" - python - path: $(CONDA_CACHE_DIR) - cacheHitVar: CONDA_CACHE_RESTORED - - bash: | - conda env create --force -f environment.yml -v - condition: and(eq(variables.isMaster, true), and(startsWith(variables['tag'], 'v'), eq(variables.CONDA_CACHE_RESTORED, 'false'))) - displayName: Create Anaconda environment + - template: templates/conda.yml - task: AzureKeyVault@1 - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) inputs: azureSubscription: 'SynapseML Build' keyVaultName: mmlspark-keys @@ -304,7 +284,6 @@ jobs: set -e source activate synapseml sbt publishPypi - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) env: STORAGE-KEY: $(storage-key) NEXUS-UN: $(nexus-un) @@ -320,7 +299,6 @@ jobs: source activate synapseml sbt publishLocalSigned python tools/esrp/prepare_jar.py - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) env: STORAGE-KEY: $(storage-key) NEXUS-UN: $(nexus-un) @@ -350,7 +328,6 @@ jobs: DomainTenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47' waitforreleasecompletion: true displayName: 'ESRP Publish Package' - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - job: PythonTests From 750bafc4555684c401271b746beabdad0f43c8ca Mon Sep 17 00:00:00 2001 From: Brendan Walsh <brwals@microsoft.com> Date: Thu, 15 Aug 2024 22:42:35 -0700 Subject: [PATCH 4/6] ensure conda env dir exists --- templates/conda.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/templates/conda.yml b/templates/conda.yml index 8c36f95929..771daaf4ce 100644 --- a/templates/conda.yml +++ b/templates/conda.yml @@ -2,7 +2,9 @@ steps: - bash: echo "##vso[task.prependpath]$CONDA/bin" displayName: Add conda to PATH retryCountOnTaskFailure: 1 - - bash: sudo chown -R $(whoami):$(id -ng) $(CONDA_CACHE_DIR) + - bash: | + mkdir -p $(CONDA_CACHE_DIR) + sudo chown -R $(whoami):$(id -ng) $(CONDA_CACHE_DIR) displayName: Fix directory permissions - task: Cache@2 displayName: Use cached Anaconda environment From 6200d6eb5378485afb1c75029b332e6894f1b4f8 Mon Sep 17 00:00:00 2001 From: Brendan Walsh <brwals@microsoft.com> Date: Thu, 15 Aug 2024 23:24:27 -0700 Subject: [PATCH 5/6] separated jobs into stages, added DU reporting --- pipeline.yaml | 1315 ++++++++++++++++++++++--------------------- templates/conda.yml | 6 + 2 files changed, 668 insertions(+), 653 deletions(-) diff --git a/pipeline.yaml b/pipeline.yaml index d098339ed7..a1505f0c2c 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -58,673 +58,682 @@ variables: ComponentDetection.Timeout: 900 isMaster: $[eq(variables['Build.SourceBranch'], 'refs/heads/master')] -jobs: -- job: Style - cancelTimeoutInMinutes: 0 - condition: eq(variables.runTests, 'True') - pool: - vmImage: ubuntu-20.04 - steps: - - template: templates/checkout.yml - - task: AzureCLI@2 - displayName: 'Scala Style Check' - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: 'sbt scalastyle test:scalastyle' - - template: templates/conda.yml - - bash: | - set -e - source activate synapseml - black --diff --color . && black --check -q . - displayName: 'Python Style Check' - -- job: Publish - cancelTimeoutInMinutes: 0 - pool: - vmImage: ubuntu-20.04 - steps: - - template: templates/checkout.yml - #- template: templates/ivy_cache.yml - - template: templates/update_cli.yml - - template: templates/conda.yml - - template: templates/kv.yml - - task: MavenAuthenticate@0 - name: mavenAuthPublicPackages - displayName: Authenticate SynapseML_PublicPackages - inputs: - artifactsFeeds: SynapseML_PublicPackages - mavenServiceConnections: SynapseML_PublicPackages-Feed-Connection - - task: AzureCLI@2 - displayName: 'Publish Artifacts' - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | +stages: +- stage: Lint + displayName: "Lint" + jobs: + - job: Style + cancelTimeoutInMinutes: 0 + condition: eq(variables.runTests, 'True') + pool: + vmImage: ubuntu-20.04 + steps: + - template: templates/checkout.yml + - task: AzureCLI@2 + displayName: 'Scala Style Check' + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: 'sbt scalastyle test:scalastyle' + - template: templates/conda.yml + - bash: | set -e - sudo apt-get install graphviz doxygen -y source activate synapseml - sbt packagePython uploadNotebooks - sbt -DskipCodegen=true publishBlob publishDocs publishR publishPython - sbt genBuildInfo - echo "##vso[task.uploadsummary]$(pwd)/target/Build.md" - sbt -DskipCodegen=true publishLocalSigned - python tools/esrp/prepare_jar.py - env: - NEXUS-UN: $(nexus-un) - NEXUS-PW: $(nexus-pw) - PGP-PRIVATE: $(pgp-private) - PGP-PUBLIC: $(pgp-public) - PGP-PW: $(pgp-pw) - SYNAPSEML_ENABLE_PUBLISH: true - - task: AzureCLI@2 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - set -e - sbt publishBadges - condition: and(succeeded(), eq(variables.isMaster, true)) - displayName: Publish Badges + black --diff --color . && black --check -q . + displayName: 'Python Style Check' +- stage: Build + displayName: "Build and Publish" + jobs: + - job: BuildAndCacheCondaEnv + cancelTimeoutInMinutes: 0 + condition: eq(variables.runTests, 'True') + pool: + vmImage: ubuntu-20.04 + steps: + - template: templates/checkout.yml + - template: templates/conda.yml + - bash: df -H -- job: E2E - timeoutInMinutes: 120 - cancelTimeoutInMinutes: 0 - pool: - vmImage: ubuntu-20.04 - strategy: - matrix: - databricks-cpu: - TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests" - databricks-gpu: - TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksGPUTests" - databricks-rapids: - TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksRapidsTests" - synapse: - TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseTests" -# ${{ if eq(parameters.runSynapseExtensionE2ETests, true) }}: -# synapse-internal: -# TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseExtension.SynapseExtensionsTests" - steps: - - template: templates/checkout.yml - #- template: templates/ivy_cache.yml - - template: templates/update_cli.yml - - template: templates/conda.yml - - template: templates/kv.yml - - template: templates/publish.yml - - task: AzureCLI@2 - displayName: 'E2E' - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - set -e - source activate synapseml - sbt "testOnly $(TEST-CLASS)" - condition: and(succeeded(), eq(variables.runTests, 'True')) - - task: PublishTestResults@2 - displayName: 'Publish Test Results' - inputs: - testResultsFiles: '**/test-reports/TEST-*.xml' - failTaskOnFailedTests: true - condition: and(eq(variables.runTests, 'True'), succeededOrFailed()) + - job: Publish + cancelTimeoutInMinutes: 0 + pool: + vmImage: ubuntu-20.04 + steps: + - template: templates/checkout.yml + #- template: templates/ivy_cache.yml + - template: templates/update_cli.yml + - template: templates/conda.yml + - template: templates/kv.yml + - task: MavenAuthenticate@0 + name: mavenAuthPublicPackages + displayName: Authenticate SynapseML_PublicPackages + inputs: + artifactsFeeds: SynapseML_PublicPackages + mavenServiceConnections: SynapseML_PublicPackages-Feed-Connection + - task: AzureCLI@2 + displayName: 'Publish Artifacts' + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + sudo apt-get install graphviz doxygen -y + source activate synapseml + sbt packagePython uploadNotebooks + sbt -DskipCodegen=true publishBlob publishDocs publishR publishPython + sbt genBuildInfo + echo "##vso[task.uploadsummary]$(pwd)/target/Build.md" + sbt -DskipCodegen=true publishLocalSigned + python tools/esrp/prepare_jar.py + env: + NEXUS-UN: $(nexus-un) + NEXUS-PW: $(nexus-pw) + PGP-PRIVATE: $(pgp-private) + PGP-PUBLIC: $(pgp-public) + PGP-PW: $(pgp-pw) + SYNAPSEML_ENABLE_PUBLISH: true + - task: AzureCLI@2 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + sbt publishBadges + condition: and(succeeded(), eq(variables.isMaster, true)) + displayName: Publish Badges -# -#- job: PublishDocker -# displayName: PublishDocker -# pool: -# vmImage: ubuntu-20.04 -# steps: -# - template: templates/checkout.yml -# - task: AzureCLI@2 -# displayName: 'Get Docker Tag + Version' -# inputs: -# azureSubscription: 'SynapseML Build' -# scriptLocation: inlineScript -# scriptType: bash -# inlineScript: | -# VERSION=$(sbt "core/version" | tail -1 | cut -d' ' -f2 | sed 's/\x1b\[[0-9;]*m//g') -# echo '##vso[task.setvariable variable=version]'$VERSION -# echo '##vso[task.setvariable variable=gittag]'$(git tag -l --points-at HEAD) -# - task: Docker@2 -# displayName: Demo Image Build -# inputs: -# containerRegistry: 'SynapseML MCR MSI' -# repository: 'public/mmlspark/build-demo' -# command: 'build' -# buildContext: "." -# Dockerfile: 'tools/docker/demo/Dockerfile' -# tags: $(version) -# arguments: --build-arg SYNAPSEML_VERSION=$(version) -# - task: Docker@2 -# displayName: Demo Image Push -# inputs: -# containerRegistry: 'SynapseML MCR MSI' -# repository: 'public/mmlspark/build-demo' -# command: 'push' -# tags: $(version) -# - task: Docker@2 -# displayName: Minimal Image Build -# inputs: -# containerRegistry: 'SynapseML MCR MSI' -# repository: 'public/mmlspark/build-minimal' -# command: 'build' -# buildContext: "." -# Dockerfile: 'tools/docker/minimal/Dockerfile' -# tags: $(version) -# arguments: --build-arg SYNAPSEML_VERSION=$(version) -# - task: Docker@2 -# displayName: Minimal Image Push -# inputs: -# containerRegistry: 'SynapseML MCR MSI' -# repository: 'public/mmlspark/build-minimal' -# command: 'push' -# tags: $(version) -# - task: Docker@2 -# condition: and(eq(variables.isMaster, true), startsWith(variables['gittag'], 'v')) -# displayName: Release Image Build -# inputs: -# containerRegistry: 'SynapseML MCR MSI' -# repository: 'public/mmlspark/release' -# command: 'build' -# buildContext: "." -# Dockerfile: 'tools/docker/demo/Dockerfile' -# tags: | -# $(version) -# latest -# arguments: --build-arg SYNAPSEML_VERSION=$(version) -# - task: Docker@2 -# condition: and(eq(variables.isMaster, true), startsWith(variables['gittag'], 'v')) -# displayName: Release Image Push -# inputs: -# containerRegistry: 'SynapseML MCR MSI' -# repository: 'public/mmlspark/release' -# command: 'push' -# tags: | -# $(version) -# latest -# - task: ComponentGovernanceComponentDetection@0 + - job: WebsiteAutoDeployment + cancelTimeoutInMinutes: 0 + pool: + vmImage: ubuntu-20.04 + steps: + - checkout: self + fetchDepth: 1 + clean: true + submodules: false + lfs: false + persistCredentials: true + - template: templates/update_cli.yml + - template: templates/conda.yml + - template: templates/kv.yml + - task: NodeTool@0 + inputs: + versionSpec: '16.x' + displayName: 'Install Node.js' + - task: AzureCLI@2 + displayName: 'Convert notebooks to markdowns' + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + source activate synapseml + sbt convertNotebooks + - bash: | + set -e + yarn install + cd website + yarn + yarn build + displayName: 'yarn install and build' + - bash: | + set -e + git config --global user.name "${GH_NAME}" + git config --global user.email "${GH_EMAIL}" + git checkout -b main + echo "machine github.com login ${GH_NAME} password ${GH_TOKEN}" > ~/.netrc + cd website + GIT_USER="${GH_NAME}" yarn deploy + condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/master')) + env: + GH_NAME: $(gh-name) + GH_EMAIL: $(gh-email) + GH_TOKEN: $(gh-token) + displayName: 'yarn deploy' -- job: Release - cancelTimeoutInMinutes: 0 - pool: - vmImage: ubuntu-20.04 - condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) - steps: - - template: templates/checkout.yml - - template: templates/update_cli.yml - - bash: | - echo '##vso[task.setvariable variable=tag]'$(git tag -l --points-at HEAD) - displayName: 'Get Git Tag' - - bash: | - set -e - wget https://github.com/git-chglog/git-chglog/releases/download/0.8.0/git-chglog_linux_amd64 - chmod +x git-chglog_linux_amd64 - ./git-chglog_linux_amd64 -o CHANGELOG.md $TAG - - task: GitHubRelease@0 - inputs: - gitHubConnection: 'MMLSpark Github' - repositoryName: '$(Build.Repository.Name)' - action: 'create' - target: '$(Build.SourceVersion)' - tagSource: 'auto' - releaseNotesFile: 'CHANGELOG.md' - isDraft: true - - template: templates/conda.yml - - task: AzureKeyVault@1 - inputs: - azureSubscription: 'SynapseML Build' - keyVaultName: mmlspark-keys - - bash: | - set -e - source activate synapseml - sbt publishPypi - env: - STORAGE-KEY: $(storage-key) - NEXUS-UN: $(nexus-un) - NEXUS-PW: $(nexus-pw) - PGP-PRIVATE: $(pgp-private) - PGP-PUBLIC: $(pgp-public) - PGP-PW: $(pgp-pw) - PYPI-API-TOKEN: $(pypi-api-token) - SYNAPSEML_ENABLE_PUBLISH: true - displayName: 'publish python package to pypi' - - bash: | - set -e - source activate synapseml - sbt publishLocalSigned - python tools/esrp/prepare_jar.py - env: - STORAGE-KEY: $(storage-key) - NEXUS-UN: $(nexus-un) - NEXUS-PW: $(nexus-pw) - PGP-PRIVATE: $(pgp-private) - PGP-PUBLIC: $(pgp-public) - PGP-PW: $(pgp-pw) - SYNAPSEML_ENABLE_PUBLISH: true - displayName: 'publish jar package to maven central' - - task: EsrpRelease@7 - inputs: - ConnectedServiceName: 'DataScienceESRPRelease2024' - # The keyvault hosting the certs https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/13842c9d-5a2d-4da1-84a8-3383f543d9ba/resourceGroups/esrp/providers/Microsoft.KeyVault/vaults/synapseml-esrp-kv/overview - keyvaultname: 'synapseml-esrp-kv' - authcertname: 'ReleaseAutomation' - signcertname: 'ESRPReqSignCA' - # The entra app https://ms.portal.azure.com/#view/Microsoft_AAD_RegisteredApps/ApplicationMenuBlade/~/Manifest/appId/1fc1c0d1-5a85-4081-8f1e-12a8c225b9a6/isMSAApp~/false - clientid: '1fc1c0d1-5a85-4081-8f1e-12a8c225b9a6' - Intent: 'PackageDistribution' - ContentType: 'Maven' - contentsource: 'Folder' - folderlocation: '/home/vsts/.ivy2/local/com.microsoft.azure/' - Owners: 'richwyd@microsoft.com,taniaarya@microsoft.com,marcozo@microsoft.com,romanbat@microsoft.com' - Approvers: 'romanbat@microsoft.com,markus.weimer@microsoft.com,negust@microsoft.com' - ServiceEndpointUrl: 'https://api.esrp.microsoft.com' - MainPublisher: 'synapseml' - DomainTenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47' - waitforreleasecompletion: true - displayName: 'ESRP Publish Package' + # + #- job: PublishDocker + # displayName: PublishDocker + # pool: + # vmImage: ubuntu-20.04 + # steps: + # - template: templates/checkout.yml + # - task: AzureCLI@2 + # displayName: 'Get Docker Tag + Version' + # inputs: + # azureSubscription: 'SynapseML Build' + # scriptLocation: inlineScript + # scriptType: bash + # inlineScript: | + # VERSION=$(sbt "core/version" | tail -1 | cut -d' ' -f2 | sed 's/\x1b\[[0-9;]*m//g') + # echo '##vso[task.setvariable variable=version]'$VERSION + # echo '##vso[task.setvariable variable=gittag]'$(git tag -l --points-at HEAD) + # - task: Docker@2 + # displayName: Demo Image Build + # inputs: + # containerRegistry: 'SynapseML MCR MSI' + # repository: 'public/mmlspark/build-demo' + # command: 'build' + # buildContext: "." + # Dockerfile: 'tools/docker/demo/Dockerfile' + # tags: $(version) + # arguments: --build-arg SYNAPSEML_VERSION=$(version) + # - task: Docker@2 + # displayName: Demo Image Push + # inputs: + # containerRegistry: 'SynapseML MCR MSI' + # repository: 'public/mmlspark/build-demo' + # command: 'push' + # tags: $(version) + # - task: Docker@2 + # displayName: Minimal Image Build + # inputs: + # containerRegistry: 'SynapseML MCR MSI' + # repository: 'public/mmlspark/build-minimal' + # command: 'build' + # buildContext: "." + # Dockerfile: 'tools/docker/minimal/Dockerfile' + # tags: $(version) + # arguments: --build-arg SYNAPSEML_VERSION=$(version) + # - task: Docker@2 + # displayName: Minimal Image Push + # inputs: + # containerRegistry: 'SynapseML MCR MSI' + # repository: 'public/mmlspark/build-minimal' + # command: 'push' + # tags: $(version) + # - task: Docker@2 + # condition: and(eq(variables.isMaster, true), startsWith(variables['gittag'], 'v')) + # displayName: Release Image Build + # inputs: + # containerRegistry: 'SynapseML MCR MSI' + # repository: 'public/mmlspark/release' + # command: 'build' + # buildContext: "." + # Dockerfile: 'tools/docker/demo/Dockerfile' + # tags: | + # $(version) + # latest + # arguments: --build-arg SYNAPSEML_VERSION=$(version) + # - task: Docker@2 + # condition: and(eq(variables.isMaster, true), startsWith(variables['gittag'], 'v')) + # displayName: Release Image Push + # inputs: + # containerRegistry: 'SynapseML MCR MSI' + # repository: 'public/mmlspark/release' + # command: 'push' + # tags: | + # $(version) + # latest + # - task: ComponentGovernanceComponentDetection@0 +- stage: E2E + displayName: "End to End Tests" + jobs: + - job: E2E + timeoutInMinutes: 120 + cancelTimeoutInMinutes: 0 + pool: + vmImage: ubuntu-20.04 + strategy: + matrix: + databricks-cpu: + TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksCPUTests" + databricks-gpu: + TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksGPUTests" + databricks-rapids: + TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.DatabricksRapidsTests" + synapse: + TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseTests" + # ${{ if eq(parameters.runSynapseExtensionE2ETests, true) }}: + # synapse-internal: + # TEST-CLASS: "com.microsoft.azure.synapse.ml.nbtest.SynapseExtension.SynapseExtensionsTests" + steps: + - template: templates/checkout.yml + #- template: templates/ivy_cache.yml + - template: templates/update_cli.yml + - template: templates/conda.yml + - template: templates/kv.yml + - template: templates/publish.yml + - task: AzureCLI@2 + displayName: 'E2E' + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + source activate synapseml + sbt "testOnly $(TEST-CLASS)" + condition: and(succeeded(), eq(variables.runTests, 'True')) + - task: PublishTestResults@2 + displayName: 'Publish Test Results' + inputs: + testResultsFiles: '**/test-reports/TEST-*.xml' + failTaskOnFailedTests: true + condition: and(eq(variables.runTests, 'True'), succeededOrFailed()) -- job: PythonTests - timeoutInMinutes: 120 - cancelTimeoutInMinutes: 0 - condition: eq(variables.runTests, 'True') - pool: - vmImage: ubuntu-22.04 - strategy: - matrix: - core: - PACKAGE: "core" - deep-learning: - PACKAGE: "deepLearning" - lightgbm: - PACKAGE: "lightgbm" - opencv: - PACKAGE: "opencv" - vw: - PACKAGE: "vw" - cognitive: - PACKAGE: "cognitive" - steps: - - template: templates/checkout.yml - #- template: templates/ivy_cache.yml - - template: templates/update_cli.yml - - template: templates/conda.yml - - template: templates/kv.yml - - task: AzureCLI@2 - displayName: 'Install and package deps' - timeoutInMinutes: 40 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - source activate synapseml - sbt coverage getDatasets installPipPackage - sbt publishM2 - - task: AzureCLI@2 - displayName: 'Test Python Code' - retryCountOnTaskFailure: 1 - timeoutInMinutes: 40 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | +- stage: Release + displayName: "Release" + jobs: + - job: Release + cancelTimeoutInMinutes: 0 + pool: + vmImage: ubuntu-20.04 + condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) + steps: + - template: templates/checkout.yml + - template: templates/update_cli.yml + - bash: | + echo '##vso[task.setvariable variable=tag]'$(git tag -l --points-at HEAD) + displayName: 'Get Git Tag' + - bash: | set -e - source activate synapseml - export SBT_OPTS="-XX:+UseG1GC" - echo "##vso[task.setvariable variable=SBT_OPTS]$SBT_OPTS" - echo "SBT_OPTS=$SBT_OPTS" - (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython) - - task: PublishTestResults@2 - displayName: 'Publish Test Results' - inputs: - testResultsFiles: '**/python-test-*.xml' - failTaskOnFailedTests: true - condition: succeededOrFailed() - - task: AzureCLI@2 - displayName: 'Generate Codecov report' - retryCountOnTaskFailure: 1 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: 'sbt coverageReport' - condition: succeededOrFailed() - - template: templates/codecov.yml - - -- job: RTests - timeoutInMinutes: 60 - cancelTimeoutInMinutes: 0 - condition: eq(variables.runTests, 'True') - pool: - vmImage: ubuntu-20.04 - strategy: - matrix: - core: - PACKAGE: "core" - deep-learning: - PACKAGE: "deepLearning" - lightgbm: - PACKAGE: "lightgbm" - opencv: - PACKAGE: "opencv" - vw: - PACKAGE: "vw" - cognitive: - PACKAGE: "cognitive" - steps: - - template: templates/checkout.yml - #- template: templates/ivy_cache_2.yml - - template: templates/update_cli.yml - - template: templates/conda.yml - - template: templates/kv.yml - - task: AzureCLI@2 - displayName: 'Prepare for tests' - retryCountOnTaskFailure: 1 - timeoutInMinutes: 60 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | + wget https://github.com/git-chglog/git-chglog/releases/download/0.8.0/git-chglog_linux_amd64 + chmod +x git-chglog_linux_amd64 + ./git-chglog_linux_amd64 -o CHANGELOG.md $TAG + - task: GitHubRelease@0 + inputs: + gitHubConnection: 'MMLSpark Github' + repositoryName: '$(Build.Repository.Name)' + action: 'create' + target: '$(Build.SourceVersion)' + tagSource: 'auto' + releaseNotesFile: 'CHANGELOG.md' + isDraft: true + - template: templates/conda.yml + - task: AzureKeyVault@1 + inputs: + azureSubscription: 'SynapseML Build' + keyVaultName: mmlspark-keys + - bash: | set -e - export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M -Duser.timezone=GMT" source activate synapseml - (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) - sbt codegen - sbt publishM2 - SPARK_VERSION=3.4.1 - HADOOP_VERSION=3 - wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz - - task: AzureCLI@2 - displayName: 'Test R Code' - retryCountOnTaskFailure: 3 - timeoutInMinutes: 20 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | + sbt publishPypi + env: + STORAGE-KEY: $(storage-key) + NEXUS-UN: $(nexus-un) + NEXUS-PW: $(nexus-pw) + PGP-PRIVATE: $(pgp-private) + PGP-PUBLIC: $(pgp-public) + PGP-PW: $(pgp-pw) + PYPI-API-TOKEN: $(pypi-api-token) + SYNAPSEML_ENABLE_PUBLISH: true + displayName: 'publish python package to pypi' + - bash: | set -e - export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M -Duser.timezone=GMT" source activate synapseml - timeout 20m sbt -DskipCodegen=true "project $(PACKAGE)" coverage testR - - task: PublishTestResults@2 - displayName: 'Publish Test Results' - inputs: - testResultsFiles: '**/r-test-*.xml' - failTaskOnFailedTests: true - condition: succeededOrFailed() - - task: AzureCLI@2 - retryCountOnTaskFailure: 1 - displayName: 'Generate Codecov report' - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: 'sbt coverageReport' - condition: succeededOrFailed() - - template: templates/codecov.yml - -- job: BuildAndCacheCondaEnv - cancelTimeoutInMinutes: 0 - condition: eq(variables.runTests, 'True') - pool: - vmImage: ubuntu-20.04 - steps: - - template: templates/checkout.yml - - template: templates/conda.yml - - bash: df -H - -- job: WebsiteSamplesTests - cancelTimeoutInMinutes: 0 - condition: eq(variables.runTests, 'True') - pool: - vmImage: ubuntu-20.04 - steps: - - template: templates/checkout.yml - #- template: templates/ivy_cache.yml - - template: templates/update_cli.yml - - template: templates/conda.yml - - template: templates/kv.yml - - template: templates/publish.yml - - task: AzureCLI@2 - displayName: 'Test Website Samples' - timeoutInMinutes: 30 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) - (sbt coverage testWebsiteDocs) - - task: PublishTestResults@2 - displayName: 'Publish Test Results' - inputs: - testResultsFiles: '**/website-test-result.xml' - failTaskOnFailedTests: true - condition: succeededOrFailed() - - task: AzureCLI@2 - displayName: 'Generate Codecov report' - retryCountOnTaskFailure: 1 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: 'sbt coverageReport' - condition: succeededOrFailed() - - template: templates/codecov.yml - + sbt publishLocalSigned + python tools/esrp/prepare_jar.py + env: + STORAGE-KEY: $(storage-key) + NEXUS-UN: $(nexus-un) + NEXUS-PW: $(nexus-pw) + PGP-PRIVATE: $(pgp-private) + PGP-PUBLIC: $(pgp-public) + PGP-PW: $(pgp-pw) + SYNAPSEML_ENABLE_PUBLISH: true + displayName: 'publish jar package to maven central' + - task: EsrpRelease@7 + inputs: + ConnectedServiceName: 'DataScienceESRPRelease2024' + # The keyvault hosting the certs https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/13842c9d-5a2d-4da1-84a8-3383f543d9ba/resourceGroups/esrp/providers/Microsoft.KeyVault/vaults/synapseml-esrp-kv/overview + keyvaultname: 'synapseml-esrp-kv' + authcertname: 'ReleaseAutomation' + signcertname: 'ESRPReqSignCA' + # The entra app https://ms.portal.azure.com/#view/Microsoft_AAD_RegisteredApps/ApplicationMenuBlade/~/Manifest/appId/1fc1c0d1-5a85-4081-8f1e-12a8c225b9a6/isMSAApp~/false + clientid: '1fc1c0d1-5a85-4081-8f1e-12a8c225b9a6' + Intent: 'PackageDistribution' + ContentType: 'Maven' + contentsource: 'Folder' + folderlocation: '/home/vsts/.ivy2/local/com.microsoft.azure/' + Owners: 'richwyd@microsoft.com,taniaarya@microsoft.com,marcozo@microsoft.com,romanbat@microsoft.com' + Approvers: 'romanbat@microsoft.com,markus.weimer@microsoft.com,negust@microsoft.com' + ServiceEndpointUrl: 'https://api.esrp.microsoft.com' + MainPublisher: 'synapseml' + DomainTenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47' + waitforreleasecompletion: true + displayName: 'ESRP Publish Package' +- stage: Tests + displayName: "Unit Tests" + jobs: + - job: PythonTests + timeoutInMinutes: 120 + cancelTimeoutInMinutes: 0 + condition: eq(variables.runTests, 'True') + pool: + vmImage: ubuntu-22.04 + strategy: + matrix: + core: + PACKAGE: "core" + deep-learning: + PACKAGE: "deepLearning" + lightgbm: + PACKAGE: "lightgbm" + opencv: + PACKAGE: "opencv" + vw: + PACKAGE: "vw" + cognitive: + PACKAGE: "cognitive" + steps: + - template: templates/checkout.yml + #- template: templates/ivy_cache.yml + - template: templates/update_cli.yml + - template: templates/conda.yml + - template: templates/kv.yml + - task: AzureCLI@2 + displayName: 'Install and package deps' + timeoutInMinutes: 40 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + source activate synapseml + sbt coverage getDatasets installPipPackage + sbt publishM2 + - task: AzureCLI@2 + displayName: 'Test Python Code' + retryCountOnTaskFailure: 1 + timeoutInMinutes: 40 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + source activate synapseml + export SBT_OPTS="-XX:+UseG1GC" + echo "##vso[task.setvariable variable=SBT_OPTS]$SBT_OPTS" + echo "SBT_OPTS=$SBT_OPTS" + (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython) + - task: PublishTestResults@2 + displayName: 'Publish Test Results' + inputs: + testResultsFiles: '**/python-test-*.xml' + failTaskOnFailedTests: true + condition: succeededOrFailed() + - task: AzureCLI@2 + displayName: 'Generate Codecov report' + retryCountOnTaskFailure: 1 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: 'sbt coverageReport' + condition: succeededOrFailed() + - template: templates/codecov.yml -- job: WebsiteAutoDeployment - cancelTimeoutInMinutes: 0 - pool: - vmImage: ubuntu-20.04 - steps: - - checkout: self - fetchDepth: 1 - clean: true - submodules: false - lfs: false - persistCredentials: true - - template: templates/update_cli.yml - - template: templates/conda.yml - - template: templates/kv.yml - - task: NodeTool@0 - inputs: - versionSpec: '16.x' - displayName: 'Install Node.js' - - task: AzureCLI@2 - displayName: 'Convert notebooks to markdowns' - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - source activate synapseml - sbt convertNotebooks - - bash: | - set -e - yarn install - cd website - yarn - yarn build - displayName: 'yarn install and build' - - bash: | - set -e - git config --global user.name "${GH_NAME}" - git config --global user.email "${GH_EMAIL}" - git checkout -b main - echo "machine github.com login ${GH_NAME} password ${GH_TOKEN}" > ~/.netrc - cd website - GIT_USER="${GH_NAME}" yarn deploy - condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/master')) - env: - GH_NAME: $(gh-name) - GH_EMAIL: $(gh-email) - GH_TOKEN: $(gh-token) - displayName: 'yarn deploy' + - job: RTests + timeoutInMinutes: 60 + cancelTimeoutInMinutes: 0 + condition: eq(variables.runTests, 'True') + pool: + vmImage: ubuntu-20.04 + strategy: + matrix: + core: + PACKAGE: "core" + deep-learning: + PACKAGE: "deepLearning" + lightgbm: + PACKAGE: "lightgbm" + opencv: + PACKAGE: "opencv" + vw: + PACKAGE: "vw" + cognitive: + PACKAGE: "cognitive" + steps: + - template: templates/checkout.yml + #- template: templates/ivy_cache_2.yml + - template: templates/update_cli.yml + - template: templates/conda.yml + - template: templates/kv.yml + - task: AzureCLI@2 + displayName: 'Prepare for tests' + retryCountOnTaskFailure: 1 + timeoutInMinutes: 60 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M -Duser.timezone=GMT" + source activate synapseml + (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) + sbt codegen + sbt publishM2 + SPARK_VERSION=3.4.1 + HADOOP_VERSION=3 + wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz + - task: AzureCLI@2 + displayName: 'Test R Code' + retryCountOnTaskFailure: 3 + timeoutInMinutes: 20 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + export SBT_OPTS="-Xms2G -Xmx4G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=4G -Xss5M -Duser.timezone=GMT" + source activate synapseml + timeout 20m sbt -DskipCodegen=true "project $(PACKAGE)" coverage testR + - task: PublishTestResults@2 + displayName: 'Publish Test Results' + inputs: + testResultsFiles: '**/r-test-*.xml' + failTaskOnFailedTests: true + condition: succeededOrFailed() + - task: AzureCLI@2 + retryCountOnTaskFailure: 1 + displayName: 'Generate Codecov report' + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: 'sbt coverageReport' + condition: succeededOrFailed() + - template: templates/codecov.yml + - job: WebsiteSamplesTests + cancelTimeoutInMinutes: 0 + condition: eq(variables.runTests, 'True') + pool: + vmImage: ubuntu-20.04 + steps: + - template: templates/checkout.yml + #- template: templates/ivy_cache.yml + - template: templates/update_cli.yml + - template: templates/conda.yml + - template: templates/kv.yml + - template: templates/publish.yml + - task: AzureCLI@2 + displayName: 'Test Website Samples' + timeoutInMinutes: 30 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) + (sbt coverage testWebsiteDocs) + - task: PublishTestResults@2 + displayName: 'Publish Test Results' + inputs: + testResultsFiles: '**/website-test-result.xml' + failTaskOnFailedTests: true + condition: succeededOrFailed() + - task: AzureCLI@2 + displayName: 'Generate Codecov report' + retryCountOnTaskFailure: 1 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: 'sbt coverageReport' + condition: succeededOrFailed() + - template: templates/codecov.yml -- job: UnitTests - cancelTimeoutInMinutes: 1 - timeoutInMinutes: 80 - condition: eq(variables.runTests, 'True') - pool: - vmImage: ubuntu-20.04 - strategy: - matrix: - automl: - PACKAGE: "automl" - causal: - PACKAGE: "causal" - onnx: - PACKAGE: "onnx" - geospatial: - PACKAGE: "services.geospatial" - anomaly: - PACKAGE: "services.anomaly" - FLAKY: "true" - bing: - PACKAGE: "services.bing" - FLAKY: "true" - face: - PACKAGE: "services.face" - FLAKY: "true" - form: - PACKAGE: "services.form" - FLAKY: "true" - language: - PACKAGE: "services.language" - FLAKY: "true" - openai: - PACKAGE: "services.openai" - FLAKY: "true" - search: - PACKAGE: "services.search" - FFMPEG: "true" - FLAKY: "true" - speech: - PACKAGE: "services.speech" - FFMPEG: "true" - FLAKY: "true" - text: - PACKAGE: "services.text" - FLAKY: "true" - translate: - PACKAGE: "services.translate" - FLAKY: "true" - vision: - PACKAGE: "services.vision" - FLAKY: "true" - core: - PACKAGE: "core" - explainers1: - PACKAGE: "explainers.split1" - explainers2: - PACKAGE: "explainers.split2" - explainers3: - PACKAGE: "explainers.split3" - exploratory: - PACKAGE: "exploratory" - featurize: - PACKAGE: "featurize" - image: - PACKAGE: "image" - io1: - PACKAGE: "io.split1" - FLAKY: "true" - io2: - PACKAGE: "io.split2" - FLAKY: "true" - isolationforest: - PACKAGE: "isolationforest" - flaky: - PACKAGE: "flaky" #TODO fix flaky test so isolation is not needed - FLAKY: "true" - lightgbm1: - PACKAGE: "lightgbm.split1" #TODO speed up LGBM Tests and remove split - FLAKY: "true" - lightgbm2: - PACKAGE: "lightgbm.split2" - FLAKY: "true" - lightgbm3: - PACKAGE: "lightgbm.split3" - FLAKY: "true" - lightgbm4: - PACKAGE: "lightgbm.split4" - FLAKY: "true" - lightgbm5: - PACKAGE: "lightgbm.split5" - FLAKY: "true" - lightgbm6: - PACKAGE: "lightgbm.split6" - FLAKY: "true" - opencv: - PACKAGE: "opencv" - recommendation: - PACKAGE: "recommendation" - stages: - PACKAGE: "stages" - nn: - PACKAGE: "nn" - train: - PACKAGE: "train" - vw: - PACKAGE: "vw" - steps: - - template: templates/checkout.yml - #- template: templates/ivy_cache.yml - - template: templates/update_cli.yml - - task: AzureCLI@2 - displayName: 'Setup repo' - retryCountOnTaskFailure: 1 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - (timeout 30s pip install requests) || (echo "retrying" && timeout 30s pip install requests) - (${FFMPEG:-false} && sudo apt-get update && \ - sudo apt-get install ffmpeg libgstreamer1.0-0 \ - gstreamer1.0-plugins-base gstreamer1.0-plugins-good gstreamer1.0-plugins-ugly -y) - (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) - - task: AzureCLI@2 - displayName: 'Unit Test' - retryCountOnTaskFailure: 1 - timeoutInMinutes: 90 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: | - ulimit -c unlimited - export SBT_OPTS="-Xmx2G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=2G -Xss2M -Duser.timezone=GMT" - (timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") || - (${FLAKY:-false} && timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") - - task: PublishTestResults@2 - displayName: 'Publish Test Results' - inputs: - testResultsFiles: '**/test-reports/TEST-*.xml' - failTaskOnFailedTests: true - condition: succeededOrFailed() - - task: AzureCLI@2 - displayName: 'Generate Codecov report' - retryCountOnTaskFailure: 1 - inputs: - azureSubscription: 'SynapseML Build' - scriptLocation: inlineScript - scriptType: bash - inlineScript: 'sbt coverageReport' - condition: succeededOrFailed() - - template: templates/kv.yml - - template: templates/codecov.yml + - job: UnitTests + cancelTimeoutInMinutes: 1 + timeoutInMinutes: 80 + condition: eq(variables.runTests, 'True') + pool: + vmImage: ubuntu-20.04 + strategy: + matrix: + automl: + PACKAGE: "automl" + causal: + PACKAGE: "causal" + onnx: + PACKAGE: "onnx" + geospatial: + PACKAGE: "services.geospatial" + anomaly: + PACKAGE: "services.anomaly" + FLAKY: "true" + bing: + PACKAGE: "services.bing" + FLAKY: "true" + face: + PACKAGE: "services.face" + FLAKY: "true" + form: + PACKAGE: "services.form" + FLAKY: "true" + language: + PACKAGE: "services.language" + FLAKY: "true" + openai: + PACKAGE: "services.openai" + FLAKY: "true" + search: + PACKAGE: "services.search" + FFMPEG: "true" + FLAKY: "true" + speech: + PACKAGE: "services.speech" + FFMPEG: "true" + FLAKY: "true" + text: + PACKAGE: "services.text" + FLAKY: "true" + translate: + PACKAGE: "services.translate" + FLAKY: "true" + vision: + PACKAGE: "services.vision" + FLAKY: "true" + core: + PACKAGE: "core" + explainers1: + PACKAGE: "explainers.split1" + explainers2: + PACKAGE: "explainers.split2" + explainers3: + PACKAGE: "explainers.split3" + exploratory: + PACKAGE: "exploratory" + featurize: + PACKAGE: "featurize" + image: + PACKAGE: "image" + io1: + PACKAGE: "io.split1" + FLAKY: "true" + io2: + PACKAGE: "io.split2" + FLAKY: "true" + isolationforest: + PACKAGE: "isolationforest" + flaky: + PACKAGE: "flaky" #TODO fix flaky test so isolation is not needed + FLAKY: "true" + lightgbm1: + PACKAGE: "lightgbm.split1" #TODO speed up LGBM Tests and remove split + FLAKY: "true" + lightgbm2: + PACKAGE: "lightgbm.split2" + FLAKY: "true" + lightgbm3: + PACKAGE: "lightgbm.split3" + FLAKY: "true" + lightgbm4: + PACKAGE: "lightgbm.split4" + FLAKY: "true" + lightgbm5: + PACKAGE: "lightgbm.split5" + FLAKY: "true" + lightgbm6: + PACKAGE: "lightgbm.split6" + FLAKY: "true" + opencv: + PACKAGE: "opencv" + recommendation: + PACKAGE: "recommendation" + stages: + PACKAGE: "stages" + nn: + PACKAGE: "nn" + train: + PACKAGE: "train" + vw: + PACKAGE: "vw" + steps: + - template: templates/checkout.yml + #- template: templates/ivy_cache.yml + - template: templates/update_cli.yml + - task: AzureCLI@2 + displayName: 'Setup repo' + retryCountOnTaskFailure: 1 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + (timeout 30s pip install requests) || (echo "retrying" && timeout 30s pip install requests) + (${FFMPEG:-false} && sudo apt-get update && \ + sudo apt-get install ffmpeg libgstreamer1.0-0 \ + gstreamer1.0-plugins-base gstreamer1.0-plugins-good gstreamer1.0-plugins-ugly -y) + (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) + - task: AzureCLI@2 + displayName: 'Unit Test' + retryCountOnTaskFailure: 1 + timeoutInMinutes: 90 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + ulimit -c unlimited + export SBT_OPTS="-Xmx2G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=2G -Xss2M -Duser.timezone=GMT" + (timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") || + (${FLAKY:-false} && timeout 30m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") + - task: PublishTestResults@2 + displayName: 'Publish Test Results' + inputs: + testResultsFiles: '**/test-reports/TEST-*.xml' + failTaskOnFailedTests: true + condition: succeededOrFailed() + - task: AzureCLI@2 + displayName: 'Generate Codecov report' + retryCountOnTaskFailure: 1 + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: 'sbt coverageReport' + condition: succeededOrFailed() + - template: templates/kv.yml + - template: templates/codecov.yml diff --git a/templates/conda.yml b/templates/conda.yml index 771daaf4ce..a9ad3d063f 100644 --- a/templates/conda.yml +++ b/templates/conda.yml @@ -22,3 +22,9 @@ steps: displayName: Create Anaconda environment retryCountOnTaskFailure: 1 condition: eq(variables.CONDA_CACHE_RESTORED, 'false') + - bash: | + echo "system usage:" + sudo df -h + echo "conda cache usage:" + sudo du -h --max-depth=1 $(CONDA_CACHE_DIR) + displayName: Display disk usage From 35f513ba605a73e4a02ddb1563fae4b0498b6051 Mon Sep 17 00:00:00 2001 From: Brendan Walsh <brwals@microsoft.com> Date: Thu, 15 Aug 2024 23:27:01 -0700 Subject: [PATCH 6/6] fix stage dependencies --- pipeline.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pipeline.yaml b/pipeline.yaml index a1505f0c2c..d99ef33ed3 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -61,6 +61,7 @@ variables: stages: - stage: Lint displayName: "Lint" + dependsOn: jobs: - job: Style cancelTimeoutInMinutes: 0 @@ -85,6 +86,7 @@ stages: - stage: Build displayName: "Build and Publish" + dependsOn: jobs: - job: BuildAndCacheCondaEnv cancelTimeoutInMinutes: 0 @@ -273,6 +275,7 @@ stages: - stage: E2E displayName: "End to End Tests" + dependsOn: jobs: - job: E2E timeoutInMinutes: 120 @@ -319,6 +322,7 @@ stages: - stage: Release displayName: "Release" + dependsOn: jobs: - job: Release cancelTimeoutInMinutes: 0 @@ -401,6 +405,7 @@ stages: - stage: Tests displayName: "Unit Tests" + dependsOn: jobs: - job: PythonTests timeoutInMinutes: 120