Skip to content

Commit

Permalink
Run all scripts as job with timeout. (#3971)
Browse files Browse the repository at this point in the history
* Run all scripts as job with timeout.

* compress KM trace file.

* refactored common code.
  • Loading branch information
shankarseal authored Nov 5, 2024
1 parent d43fcd2 commit a3edec5
Show file tree
Hide file tree
Showing 6 changed files with 257 additions and 146 deletions.
111 changes: 72 additions & 39 deletions scripts/cleanup_ebpf_cicd_tests.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -6,54 +6,87 @@ param ([parameter(Mandatory=$false)][string] $Target = "TEST_VM",
[parameter(Mandatory=$false)][string] $LogFileName = "TestLog.log",
[parameter(Mandatory=$false)][string] $WorkingDirectory = $pwd.ToString(),
[parameter(Mandatory=$false)][string] $TestExecutionJsonFileName = "test_execution.json",
[parameter(Mandatory=$false)][string] $SelfHostedRunnerName)
[parameter(Mandatory=$false)][string] $SelfHostedRunnerName = [System.Net.Dns]::GetHostName(),
[Parameter(Mandatory = $false)][int] $TestJobTimeout = (30*60))

Push-Location $WorkingDirectory

$TestVMCredential = Get-StoredCredential -Target $Target -ErrorAction Stop

# Load other utility modules.
Import-Module .\common.psm1 -Force -ArgumentList ($LogFileName) -WarningAction SilentlyContinue
Import-Module .\config_test_vm.psm1 -Force -ArgumentList ($TestVMCredential.UserName, $TestVMCredential.Password, $WorkingDirectory, $LogFileName) -WarningAction SilentlyContinue
Import-Module .\install_ebpf.psm1 -ArgumentList ($WorkingDirectory, $LogFileName) -Force -WarningAction SilentlyContinue

$TestVMCredential = Get-StoredCredential -Target $Target -ErrorAction Stop

# Read the test execution json.
$TestExecutionConfig = Get-Content ("{0}\{1}" -f $PSScriptRoot, $TestExecutionJsonFileName) | ConvertFrom-Json
$VMList = $TestExecutionConfig.VMMap.$SelfHostedRunnerName

# Wait for all VMs to be in ready state, in case the test run caused any VM to crash.
Wait-AllVMsToInitialize `
-VMList $VMList `
-UserName $TestVMCredential.UserName `
-AdminPassword $TestVMCredential.Password

# Check if we're here after a crash (we are if c:\windows\memory.dmp exists on the VM). If so,
# we need to skip the stopping of the drivers as they may be in a wedged state as a result of the
# crash. We will be restoring the VM's 'baseline' snapshot next, so the step is redundant anyway.
foreach ($VM in $VMList) {
$VMName = $VM.Name
$DumpFound = Invoke-Command `
-VMName $VMName `
-Credential $TestVMCredential `
-ScriptBlock {
Test-Path -Path "c:\windows\memory.dmp" -PathType leaf
}
$Config = Get-Content ("{0}\{1}" -f $PSScriptRoot, $TestExecutionJsonFileName) | ConvertFrom-Json

if ($DumpFound -eq $True) {
Write-Log "Post-crash reboot detected on VM $VMName"
} else {
# Stop eBPF Components on the test VM. (Un-install is not necessary.)
# We *MUST* be able to stop all drivers cleanly after a test. Failure to do so indicates a fatal bug in
# one/some of the ebpf driver-set.
Stop-eBPFComponentsOnVM -VMName $VMname -ErrorAction Stop
$Job = Start-Job -ScriptBlock {
param ([Parameter(Mandatory = $True)] [PSCredential] $TestVMCredential,
[Parameter(Mandatory = $true)] [PSCustomObject] $Config,
[Parameter(Mandatory = $true)] [string] $SelfHostedRunnerName,
[parameter(Mandatory = $true)] [string] $LogFileName,
[parameter(Mandatory = $true)] [string] $WorkingDirectory = $pwd.ToString(),
[parameter(Mandatory = $true)] [bool] $KmTracing
)
Push-Location $WorkingDirectory

# Load other utility modules.
Import-Module .\common.psm1 -Force -ArgumentList ($LogFileName) -WarningAction SilentlyContinue
Import-Module .\config_test_vm.psm1 -Force -ArgumentList ($TestVMCredential.UserName, $TestVMCredential.Password, $WorkingDirectory, $LogFileName) -WarningAction SilentlyContinue

$VMList = $Config.VMMap.$SelfHostedRunnerName
# Wait for all VMs to be in ready state, in case the test run caused any VM to crash.
Wait-AllVMsToInitialize `
-VMList $VMList `
-UserName $TestVMCredential.UserName `
-AdminPassword $TestVMCredential.Password

# Check if we're here after a crash (we are if c:\windows\memory.dmp exists on the VM). If so,
# we need to skip the stopping of the drivers as they may be in a wedged state as a result of the
# crash. We will be restoring the VM's 'baseline' snapshot next, so the step is redundant anyway.
foreach ($VM in $VMList) {
$VMName = $VM.Name
$DumpFound = Invoke-Command `
-VMName $VMName `
-Credential $TestVMCredential `
-ScriptBlock {
Test-Path -Path "c:\windows\memory.dmp" -PathType leaf
}

if ($DumpFound -eq $True) {
Write-Log "Post-crash reboot detected on VM $VMName"
}
}
}

# Import logs from VMs.
Import-ResultsFromVM -VMList $VMList -KmTracing $KmTracing
# Import logs from VMs.
Import-ResultsFromVM -VMList $VMList -KmTracing $KmTracing

# Stop the VMs.
Stop-AllVMs -VMList $VMList

Pop-Location

} -ArgumentList (
$TestVMCredential,
$Config,
$SelfHostedRunnerName,
$LogFileName,
$WorkingDirectory,
$KmTracing)


# Stop the VMs.
Stop-AllVMs -VMList $VMList
Restore-AllVMs -VMList $VMList
# Wait for the job to complete
$JobTimedOut = `
Wait-TestJobToComplete -Job $Job `
-Config $Config `
-SelfHostedRunnerName $SelfHostedRunnerName `
-TestJobTimeout $TestJobTimeout `
-CheckpointPrefix "Cleanup"

# Clean up
Remove-Job -Job $Job -Force

Pop-Location

if ($JobTimedOut) {
exit 1
}

70 changes: 70 additions & 0 deletions scripts/common.psm1
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,73 @@ function New-Credential
$Credential = New-Object -TypeName System.Management.Automation.PSCredential -ArgumentList @($UserName, $AdminPassword)
return $Credential
}


function Compress-File
{
param ([Parameter(Mandatory = $True)] [string] $SourcePath,
[Parameter(Mandatory = $True)] [string] $DestinationPath
)

Write-Log "Compressing $SourcePath -> $DestinationPath"

# Retry 3 times to ensure compression operation succeeds.
# To mitigate error message: "The process cannot access the file <filename> because it is being used by another process."
$retryCount = 1
while ($retryCount -lt 4) {
$error.clear()
Compress-Archive `
-Path $SourcePath `
-DestinationPath $DestinationPath `
-CompressionLevel Fastest `
-Force
if ($error[0] -ne $null) {
$ErrorMessage = "*** ERROR *** Failed to compress kernel mode dump files: $error. Retrying $retryCount"
Write-Output $ErrorMessage
Start-Sleep -seconds (5 * $retryCount)
$retryCount++
} else {
# Compression succeeded.
break;
}
}
}

function Wait-TestJobToComplete
{
param([Parameter(Mandatory = $true)] [System.Management.Automation.Job] $Job,
[Parameter(Mandatory = $true)] [PSCustomObject] $Config,
[Parameter(Mandatory = $true)] [string] $SelfHostedRunnerName,
[Parameter(Mandatory = $true)] [int] $TestJobTimeout,
[Parameter(Mandatory = $true)] [string] $CheckpointPrefix)
$TimeElapsed = 0
# Loop to fetch and print job output in near real-time
while ($Job.State -eq 'Running') {
$JobOutput = Receive-Job -Job $job
$JobOutput | ForEach-Object { Write-Host $_ }

Start-Sleep -Seconds 2
$TimeElapsed += 2

if ($TimeElapsed -gt $TestJobTimeout) {
if ($Job.State -eq "Running") {
$VMList = $Config.VMMap.$SelfHostedRunnerName
# currently one VM runs per runner.
$TestVMName = $VMList[0].Name
Write-Host "Running kernel tests on $TestVMName has timed out after one hour" -ForegroundColor Yellow
$Timestamp = Get-Date -Format 'yyyy-MM-dd_HH-mm-ss'
$CheckpointName = "$CheckpointPrefix-$TestVMName-Checkpoint-$Timestamp"
Write-Log "Taking snapshot $CheckpointName of $TestVMName"
Checkpoint-VM -Name $TestVMName -SnapshotName $CheckpointName
$JobTimedOut = $true
break
}
}
}

# Print any remaining output after the job completes
$JobOutput = Receive-Job -Job $job
$JobOutput | ForEach-Object { Write-Host $_ }

return $JobTimedOut
}
64 changes: 22 additions & 42 deletions scripts/config_test_vm.psm1
Original file line number Diff line number Diff line change
Expand Up @@ -302,80 +302,59 @@ function Stop-eBPFComponentsOnVM
Write-Log "eBPF components stopped on $VMName" -ForegroundColor Green
}

function ArchiveKernelModeDumpOnVM
function Compress-KernelModeDumpOnVM
{
param (
[Parameter(Mandatory = $True)] [System.Management.Automation.Runspaces.PSSession] $Session
)

Invoke-Command -Session $Session -ScriptBlock {
param([Parameter(Mandatory=$True)] [string] $WorkingDirectory)

Import-Module $env:SystemDrive\$WorkingDirectory\common.psm1 -ArgumentList ($LogFileName) -Force -WarningAction SilentlyContinue

$KernelModeDumpFileSourcePath = "$Env:WinDir"
$KernelModeDumpFileDestinationPath = "$Env:SystemDrive\KernelDumps"

# Create the compressed dump folder if doesn't exist.
if (!(Test-Path $KernelModeDumpFileDestinationPath)) {
Write-Output "Creating $KernelModeDumpFileDestinationPath directory."
Write-Log "Creating $KernelModeDumpFileDestinationPath directory."
New-Item -ItemType Directory -Path $KernelModeDumpFileDestinationPath | Out-Null

# Make sure it was created
if (!(Test-Path $KernelModeDumpFileDestinationPath)) {
$ErrorMessage = `
"*** ERROR *** Create compressed dump file directory failed: $KernelModeDumpFileDestinationPath`n"
Write-Output $ErrorMessage
Start-Sleep -seconds 3
Write-Log $ErrorMessage
Throw $ErrorMessage
}
}

if (Test-Path $KernelModeDumpFileSourcePath\*.dmp -PathType Leaf) {
Write-Output "Found kernel mode dump(s) in $($KernelModeDumpFileSourcePath):"
Write-Log "Found kernel mode dump(s) in $($KernelModeDumpFileSourcePath):"
$DumpFiles = get-childitem -Path $KernelModeDumpFileSourcePath\*.dmp
foreach ($DumpFile in $DumpFiles) {
Write-Output "`tName:$($DumpFile.Name), Size:$((($DumpFile.Length) / 1MB).ToString("F2")) MB"
Write-Log "`tName:$($DumpFile.Name), Size:$((($DumpFile.Length) / 1MB).ToString("F2")) MB"
}
Write-Output "`n"

Write-Output `
Write-Log `
"Compressing kernel dump files: $KernelModeDumpFileSourcePath -> $KernelModeDumpFileDestinationPath"

# Retry 3 times to ensure compression operation succeeds.
# To mitigate error message: "The process cannot access the file 'C:\Windows\MEMORY.DMP' because it is being used by another process."
$retryCount = 1
while ($retryCount -lt 4) {
$error.clear()
Compress-Archive `
-Path "$KernelModeDumpFileSourcePath\*.dmp" `
-DestinationPath "$KernelModeDumpFileDestinationPath\km_dumps.zip" `
-CompressionLevel Fastest `
-Force
if ($error[0] -ne $null) {
$ErrorMessage = "*** ERROR *** Failed to compress kernel mode dump files: $error. Retrying $retryCount"
Write-Output $ErrorMessage
Start-Sleep -seconds (5 * $retryCount)
$retryCount++
} else {
# Compression succeeded.
break;
}
}

Compress-File -SourcePath $KernelModeDumpFileSourcePath\*.dmp -DestinationPath $KernelModeDumpFileDestinationPath\km_dumps.zip
if (Test-Path $KernelModeDumpFileDestinationPath\km_dumps.zip -PathType Leaf) {
$CompressedDumpFile = get-childitem -Path $KernelModeDumpFileDestinationPath\km_dumps.zip
Write-Output "Found compressed kernel mode dump file in $($KernelModeDumpFileDestinationPath):"
Write-Output `
Write-Log "Found compressed kernel mode dump file in $($KernelModeDumpFileDestinationPath):"
Write-Log `
"`tName:$($CompressedDumpFile.Name), Size:$((($CompressedDumpFile.Length) / 1MB).ToString("F2")) MB"
} else {
$ErrorMessage = "*** ERROR *** kernel mode dump compressed file not found.`n`n"
Write-Output $ErrorMessage
Start-Sleep -seconds 3
Write-Log $ErrorMessage
throw $ErrorMessage
}
} else {
Write-Output "`n"
Write-Output "No kernel mode dump(s) in $($KernelModeDumpFileSourcePath)."
Write-Log "No kernel mode dump(s) in $($KernelModeDumpFileSourcePath)."
}
}
} -ArgumentList ("eBPF") -ErrorAction Ignore
}

#
Expand Down Expand Up @@ -405,7 +384,7 @@ function Import-ResultsFromVM

# Archive and copy kernel crash dumps, if any.
Write-Log "Processing kernel mode dump (if any) on VM $VMName"
ArchiveKernelModeDumpOnVM -Session $VMSession
Compress-KernelModeDumpOnVM -Session $VMSession

$LocalKernelArchiveLocation = ".\TestLogs\$VMName\KernelDumps"
Copy-Item `
Expand All @@ -418,11 +397,9 @@ function Import-ResultsFromVM

if (Test-Path $LocalKernelArchiveLocation\km_dumps.zip -PathType Leaf) {
$LocalFile = get-childitem -Path $LocalKernelArchiveLocation\km_dumps.zip
Write-Log "`n"
Write-Log "Local copy of kernel mode dump archive in $($LocalKernelArchiveLocation) for VM $($VMName):"
Write-Log "`tName:$($LocalFile.Name), Size:$((($LocalFile.Length) / 1MB).ToString("F2")) MB"
} else {
Write-Log "`n"
Write-Log "No local copy of kernel mode dump archive in $($LocalKernelArchiveLocation) for VM $VMName."
}

Expand Down Expand Up @@ -489,13 +466,16 @@ function Import-ResultsFromVM

$EtlFileSize = (Get-ChildItem $WorkingDirectory\$EtlFile).Length/1MB
Write-Log "ETL file Size: $EtlFileSize MB"

Write-Log "Compressing $WorkingDirectory\$EtlFile ..."
Compress-File -SourcePath "$WorkingDirectory\$EtlFile" -DestinationPath "$WorkingDirectory\$EtlFile.zip"
} -ArgumentList ("eBPF", $LogFileName, $EtlFile) -ErrorAction Ignore

# Copy ETL from Test VM.
Write-Log ("Copy $WorkingDirectory\$EtlFile on $VMName to $pwd\TestLogs\$VMName\Logs")
Write-Log ("Copy $VMSystemDrive\eBPF\$EtlFile.zip on $VMName to $pwd\TestLogs\$VMName\Logs")
Copy-Item `
-FromSession $VMSession `
-Path "$VMSystemDrive\eBPF\$EtlFile" `
-Path "$VMSystemDrive\eBPF\$EtlFile.zip" `
-Destination ".\TestLogs\$VMName\Logs" `
-Recurse `
-Force `
Expand Down
Loading

0 comments on commit a3edec5

Please sign in to comment.