Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run all scripts as job with timeout. #3971

Merged
merged 3 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 72 additions & 39 deletions scripts/cleanup_ebpf_cicd_tests.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -6,54 +6,87 @@ param ([parameter(Mandatory=$false)][string] $Target = "TEST_VM",
[parameter(Mandatory=$false)][string] $LogFileName = "TestLog.log",
[parameter(Mandatory=$false)][string] $WorkingDirectory = $pwd.ToString(),
[parameter(Mandatory=$false)][string] $TestExecutionJsonFileName = "test_execution.json",
[parameter(Mandatory=$false)][string] $SelfHostedRunnerName)
[parameter(Mandatory=$false)][string] $SelfHostedRunnerName = [System.Net.Dns]::GetHostName(),
[Parameter(Mandatory = $false)][int] $TestJobTimeout = (30*60))

Push-Location $WorkingDirectory

$TestVMCredential = Get-StoredCredential -Target $Target -ErrorAction Stop

# Load other utility modules.
Import-Module .\common.psm1 -Force -ArgumentList ($LogFileName) -WarningAction SilentlyContinue
Import-Module .\config_test_vm.psm1 -Force -ArgumentList ($TestVMCredential.UserName, $TestVMCredential.Password, $WorkingDirectory, $LogFileName) -WarningAction SilentlyContinue
Import-Module .\install_ebpf.psm1 -ArgumentList ($WorkingDirectory, $LogFileName) -Force -WarningAction SilentlyContinue

$TestVMCredential = Get-StoredCredential -Target $Target -ErrorAction Stop

# Read the test execution json.
$TestExecutionConfig = Get-Content ("{0}\{1}" -f $PSScriptRoot, $TestExecutionJsonFileName) | ConvertFrom-Json
$VMList = $TestExecutionConfig.VMMap.$SelfHostedRunnerName

# Wait for all VMs to be in ready state, in case the test run caused any VM to crash.
Wait-AllVMsToInitialize `
-VMList $VMList `
-UserName $TestVMCredential.UserName `
-AdminPassword $TestVMCredential.Password

# Check if we're here after a crash (we are if c:\windows\memory.dmp exists on the VM). If so,
# we need to skip the stopping of the drivers as they may be in a wedged state as a result of the
# crash. We will be restoring the VM's 'baseline' snapshot next, so the step is redundant anyway.
foreach ($VM in $VMList) {
$VMName = $VM.Name
$DumpFound = Invoke-Command `
-VMName $VMName `
-Credential $TestVMCredential `
-ScriptBlock {
Test-Path -Path "c:\windows\memory.dmp" -PathType leaf
}
$Config = Get-Content ("{0}\{1}" -f $PSScriptRoot, $TestExecutionJsonFileName) | ConvertFrom-Json

if ($DumpFound -eq $True) {
Write-Log "Post-crash reboot detected on VM $VMName"
} else {
# Stop eBPF Components on the test VM. (Un-install is not necessary.)
# We *MUST* be able to stop all drivers cleanly after a test. Failure to do so indicates a fatal bug in
# one/some of the ebpf driver-set.
Stop-eBPFComponentsOnVM -VMName $VMname -ErrorAction Stop
$Job = Start-Job -ScriptBlock {
param ([Parameter(Mandatory = $True)] [PSCredential] $TestVMCredential,
[Parameter(Mandatory = $true)] [PSCustomObject] $Config,
[Parameter(Mandatory = $true)] [string] $SelfHostedRunnerName,
[parameter(Mandatory = $true)] [string] $LogFileName,
[parameter(Mandatory = $true)] [string] $WorkingDirectory = $pwd.ToString(),
[parameter(Mandatory = $true)] [bool] $KmTracing
)
Push-Location $WorkingDirectory

# Load other utility modules.
Import-Module .\common.psm1 -Force -ArgumentList ($LogFileName) -WarningAction SilentlyContinue
Import-Module .\config_test_vm.psm1 -Force -ArgumentList ($TestVMCredential.UserName, $TestVMCredential.Password, $WorkingDirectory, $LogFileName) -WarningAction SilentlyContinue

$VMList = $Config.VMMap.$SelfHostedRunnerName
# Wait for all VMs to be in ready state, in case the test run caused any VM to crash.
Wait-AllVMsToInitialize `
-VMList $VMList `
-UserName $TestVMCredential.UserName `
-AdminPassword $TestVMCredential.Password

# Check if we're here after a crash (we are if c:\windows\memory.dmp exists on the VM). If so,
# we need to skip the stopping of the drivers as they may be in a wedged state as a result of the
# crash. We will be restoring the VM's 'baseline' snapshot next, so the step is redundant anyway.
foreach ($VM in $VMList) {
$VMName = $VM.Name
$DumpFound = Invoke-Command `
-VMName $VMName `
-Credential $TestVMCredential `
-ScriptBlock {
Test-Path -Path "c:\windows\memory.dmp" -PathType leaf
}

if ($DumpFound -eq $True) {
Write-Log "Post-crash reboot detected on VM $VMName"
}
}
}

# Import logs from VMs.
Import-ResultsFromVM -VMList $VMList -KmTracing $KmTracing
# Import logs from VMs.
Import-ResultsFromVM -VMList $VMList -KmTracing $KmTracing

# Stop the VMs.
Stop-AllVMs -VMList $VMList

Pop-Location

} -ArgumentList (
$TestVMCredential,
$Config,
$SelfHostedRunnerName,
$LogFileName,
$WorkingDirectory,
$KmTracing)


# Stop the VMs.
Stop-AllVMs -VMList $VMList
Restore-AllVMs -VMList $VMList
# Wait for the job to complete
$JobTimedOut = `
Wait-TestJobToComplete -Job $Job `
-Config $Config `
-SelfHostedRunnerName $SelfHostedRunnerName `
-TestJobTimeout $TestJobTimeout `
-CheckpointPrefix "Cleanup"

# Clean up
Remove-Job -Job $Job -Force

Pop-Location

if ($JobTimedOut) {
exit 1
}

70 changes: 70 additions & 0 deletions scripts/common.psm1
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,73 @@ function New-Credential
$Credential = New-Object -TypeName System.Management.Automation.PSCredential -ArgumentList @($UserName, $AdminPassword)
return $Credential
}


function Compress-File
{
param ([Parameter(Mandatory = $True)] [string] $SourcePath,
[Parameter(Mandatory = $True)] [string] $DestinationPath
)

Write-Log "Compressing $SourcePath -> $DestinationPath"

# Retry 3 times to ensure compression operation succeeds.
# To mitigate error message: "The process cannot access the file <filename> because it is being used by another process."
$retryCount = 1
while ($retryCount -lt 4) {
$error.clear()
Compress-Archive `
-Path $SourcePath `
-DestinationPath $DestinationPath `
-CompressionLevel Fastest `
-Force
if ($error[0] -ne $null) {
$ErrorMessage = "*** ERROR *** Failed to compress kernel mode dump files: $error. Retrying $retryCount"
Write-Output $ErrorMessage
Start-Sleep -seconds (5 * $retryCount)
$retryCount++
} else {
# Compression succeeded.
break;
}
}
}

function Wait-TestJobToComplete
{
param([Parameter(Mandatory = $true)] [System.Management.Automation.Job] $Job,
[Parameter(Mandatory = $true)] [PSCustomObject] $Config,
[Parameter(Mandatory = $true)] [string] $SelfHostedRunnerName,
[Parameter(Mandatory = $true)] [int] $TestJobTimeout,
[Parameter(Mandatory = $true)] [string] $CheckpointPrefix)
$TimeElapsed = 0
# Loop to fetch and print job output in near real-time
while ($Job.State -eq 'Running') {
$JobOutput = Receive-Job -Job $job
$JobOutput | ForEach-Object { Write-Host $_ }

Start-Sleep -Seconds 2
$TimeElapsed += 2

if ($TimeElapsed -gt $TestJobTimeout) {
if ($Job.State -eq "Running") {
$VMList = $Config.VMMap.$SelfHostedRunnerName
# currently one VM runs per runner.
$TestVMName = $VMList[0].Name
Write-Host "Running kernel tests on $TestVMName has timed out after one hour" -ForegroundColor Yellow
$Timestamp = Get-Date -Format 'yyyy-MM-dd_HH-mm-ss'
$CheckpointName = "$CheckpointPrefix-$TestVMName-Checkpoint-$Timestamp"
Write-Log "Taking snapshot $CheckpointName of $TestVMName"
Checkpoint-VM -Name $TestVMName -SnapshotName $CheckpointName
$JobTimedOut = $true
break
}
}
}

# Print any remaining output after the job completes
$JobOutput = Receive-Job -Job $job
$JobOutput | ForEach-Object { Write-Host $_ }

return $JobTimedOut
}
64 changes: 22 additions & 42 deletions scripts/config_test_vm.psm1
Original file line number Diff line number Diff line change
Expand Up @@ -302,80 +302,59 @@ function Stop-eBPFComponentsOnVM
Write-Log "eBPF components stopped on $VMName" -ForegroundColor Green
}

function ArchiveKernelModeDumpOnVM
function Compress-KernelModeDumpOnVM
{
param (
[Parameter(Mandatory = $True)] [System.Management.Automation.Runspaces.PSSession] $Session
)

Invoke-Command -Session $Session -ScriptBlock {
param([Parameter(Mandatory=$True)] [string] $WorkingDirectory)

Import-Module $env:SystemDrive\$WorkingDirectory\common.psm1 -ArgumentList ($LogFileName) -Force -WarningAction SilentlyContinue

$KernelModeDumpFileSourcePath = "$Env:WinDir"
$KernelModeDumpFileDestinationPath = "$Env:SystemDrive\KernelDumps"

# Create the compressed dump folder if doesn't exist.
if (!(Test-Path $KernelModeDumpFileDestinationPath)) {
Write-Output "Creating $KernelModeDumpFileDestinationPath directory."
Write-Log "Creating $KernelModeDumpFileDestinationPath directory."
New-Item -ItemType Directory -Path $KernelModeDumpFileDestinationPath | Out-Null

# Make sure it was created
if (!(Test-Path $KernelModeDumpFileDestinationPath)) {
$ErrorMessage = `
"*** ERROR *** Create compressed dump file directory failed: $KernelModeDumpFileDestinationPath`n"
Write-Output $ErrorMessage
Start-Sleep -seconds 3
Write-Log $ErrorMessage
Throw $ErrorMessage
}
}

if (Test-Path $KernelModeDumpFileSourcePath\*.dmp -PathType Leaf) {
Write-Output "Found kernel mode dump(s) in $($KernelModeDumpFileSourcePath):"
Write-Log "Found kernel mode dump(s) in $($KernelModeDumpFileSourcePath):"
$DumpFiles = get-childitem -Path $KernelModeDumpFileSourcePath\*.dmp
foreach ($DumpFile in $DumpFiles) {
Write-Output "`tName:$($DumpFile.Name), Size:$((($DumpFile.Length) / 1MB).ToString("F2")) MB"
Write-Log "`tName:$($DumpFile.Name), Size:$((($DumpFile.Length) / 1MB).ToString("F2")) MB"
}
Write-Output "`n"

Write-Output `
Write-Log `
"Compressing kernel dump files: $KernelModeDumpFileSourcePath -> $KernelModeDumpFileDestinationPath"

# Retry 3 times to ensure compression operation succeeds.
# To mitigate error message: "The process cannot access the file 'C:\Windows\MEMORY.DMP' because it is being used by another process."
$retryCount = 1
while ($retryCount -lt 4) {
$error.clear()
Compress-Archive `
-Path "$KernelModeDumpFileSourcePath\*.dmp" `
-DestinationPath "$KernelModeDumpFileDestinationPath\km_dumps.zip" `
-CompressionLevel Fastest `
-Force
if ($error[0] -ne $null) {
$ErrorMessage = "*** ERROR *** Failed to compress kernel mode dump files: $error. Retrying $retryCount"
Write-Output $ErrorMessage
Start-Sleep -seconds (5 * $retryCount)
$retryCount++
} else {
# Compression succeeded.
break;
}
}

Compress-File -SourcePath $KernelModeDumpFileSourcePath\*.dmp -DestinationPath $KernelModeDumpFileDestinationPath\km_dumps.zip
if (Test-Path $KernelModeDumpFileDestinationPath\km_dumps.zip -PathType Leaf) {
$CompressedDumpFile = get-childitem -Path $KernelModeDumpFileDestinationPath\km_dumps.zip
Write-Output "Found compressed kernel mode dump file in $($KernelModeDumpFileDestinationPath):"
Write-Output `
Write-Log "Found compressed kernel mode dump file in $($KernelModeDumpFileDestinationPath):"
Write-Log `
"`tName:$($CompressedDumpFile.Name), Size:$((($CompressedDumpFile.Length) / 1MB).ToString("F2")) MB"
} else {
$ErrorMessage = "*** ERROR *** kernel mode dump compressed file not found.`n`n"
Write-Output $ErrorMessage
Start-Sleep -seconds 3
Write-Log $ErrorMessage
throw $ErrorMessage
}
} else {
Write-Output "`n"
Write-Output "No kernel mode dump(s) in $($KernelModeDumpFileSourcePath)."
Write-Log "No kernel mode dump(s) in $($KernelModeDumpFileSourcePath)."
}
}
} -ArgumentList ("eBPF") -ErrorAction Ignore
}

#
Expand Down Expand Up @@ -405,7 +384,7 @@ function Import-ResultsFromVM

# Archive and copy kernel crash dumps, if any.
Write-Log "Processing kernel mode dump (if any) on VM $VMName"
ArchiveKernelModeDumpOnVM -Session $VMSession
Compress-KernelModeDumpOnVM -Session $VMSession

$LocalKernelArchiveLocation = ".\TestLogs\$VMName\KernelDumps"
Copy-Item `
Expand All @@ -418,11 +397,9 @@ function Import-ResultsFromVM

if (Test-Path $LocalKernelArchiveLocation\km_dumps.zip -PathType Leaf) {
$LocalFile = get-childitem -Path $LocalKernelArchiveLocation\km_dumps.zip
Write-Log "`n"
Write-Log "Local copy of kernel mode dump archive in $($LocalKernelArchiveLocation) for VM $($VMName):"
Write-Log "`tName:$($LocalFile.Name), Size:$((($LocalFile.Length) / 1MB).ToString("F2")) MB"
} else {
Write-Log "`n"
Write-Log "No local copy of kernel mode dump archive in $($LocalKernelArchiveLocation) for VM $VMName."
}

Expand Down Expand Up @@ -489,13 +466,16 @@ function Import-ResultsFromVM

$EtlFileSize = (Get-ChildItem $WorkingDirectory\$EtlFile).Length/1MB
Write-Log "ETL file Size: $EtlFileSize MB"

Write-Log "Compressing $WorkingDirectory\$EtlFile ..."
Compress-File -SourcePath "$WorkingDirectory\$EtlFile" -DestinationPath "$WorkingDirectory\$EtlFile.zip"
} -ArgumentList ("eBPF", $LogFileName, $EtlFile) -ErrorAction Ignore

# Copy ETL from Test VM.
Write-Log ("Copy $WorkingDirectory\$EtlFile on $VMName to $pwd\TestLogs\$VMName\Logs")
Write-Log ("Copy $VMSystemDrive\eBPF\$EtlFile.zip on $VMName to $pwd\TestLogs\$VMName\Logs")
Copy-Item `
-FromSession $VMSession `
-Path "$VMSystemDrive\eBPF\$EtlFile" `
-Path "$VMSystemDrive\eBPF\$EtlFile.zip" `
-Destination ".\TestLogs\$VMName\Logs" `
-Recurse `
-Force `
Expand Down
Loading
Loading