Skip to content

Commit

Permalink
EC2 Auto Discover SSM: add support for debugging custom SSM Docs (#41706
Browse files Browse the repository at this point in the history
)

This PR uses a new AWS API that list the steps of the current
invocation.
After listing them, it will ask for the output of each one.

Previously, we were using a static list of steps: those defined in the
default SSM Document.

However, for custom documts with different list of steps that would
fail.

If the client does not have access to this new API, we will fallback to
the list of steps that exist in the default SSM Document.

If we ask for a status of one of those steps, and we receive a known
error indicating that the step does not exist, instead of failing we
will emit the overall invocation result (which doesnt include
stdout/stderr, but better than nothing)
  • Loading branch information
marcoandredinis committed May 22, 2024
1 parent 02d74d9 commit 8d110e6
Show file tree
Hide file tree
Showing 7 changed files with 247 additions and 69 deletions.
2 changes: 2 additions & 0 deletions docs/pages/auto-discovery/servers/ec2-discovery.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ AWS
"ec2:DescribeInstances",
"ssm:DescribeInstanceInformation",
"ssm:GetCommandInvocation",
"ssm:ListCommandInvocations",
"ssm:SendCommand"
],
"Resource": [
Expand All @@ -186,6 +187,7 @@ AWS
"ec2:DescribeInstances",
"ssm:DescribeInstanceInformation",
"ssm:GetCommandInvocation",
"ssm:ListCommandInvocations",
"ssm:SendCommand"
],
"Resource": [
Expand Down
1 change: 1 addition & 0 deletions lib/cloud/aws/policy_statements.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ func StatementForEC2SSMAutoDiscover() *Statement {
"ec2:DescribeInstances",
"ssm:DescribeInstanceInformation",
"ssm:GetCommandInvocation",
"ssm:ListCommandInvocations",
"ssm:SendCommand",
},
Resources: allResources,
Expand Down
7 changes: 7 additions & 0 deletions lib/cloud/aws/ssm_documents.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,10 @@ mainSteps:
}

const EC2DiscoveryPolicyName = "TeleportEC2Discovery"

// EC2DiscoverySSMDocumentSteps is the list of Steps defined in the default SSM Document for Teleport Discovery.
// Used to query step results after executing a command using SSM.
var EC2DiscoverySSMDocumentSteps = []string{
"downloadContent",
"runShellScript",
}
2 changes: 2 additions & 0 deletions lib/configurators/aws/aws_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ func TestAWSIAMDocuments(t *testing.T) {
"ec2:DescribeInstances",
"ssm:DescribeInstanceInformation",
"ssm:GetCommandInvocation",
"ssm:ListCommandInvocations",
"ssm:SendCommand",
},
Resources: []string{"*"},
Expand All @@ -596,6 +597,7 @@ func TestAWSIAMDocuments(t *testing.T) {
"ec2:DescribeInstances",
"ssm:DescribeInstanceInformation",
"ssm:GetCommandInvocation",
"ssm:ListCommandInvocations",
"ssm:SendCommand",
},
Resources: []string{"*"},
Expand Down
1 change: 1 addition & 0 deletions lib/integrations/awsoidc/ec2_ssm_iam_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ func NewEC2SSMConfigureClient(ctx context.Context, region string) (EC2SSMConfigu
// Action: Run a command and get its output.
// - ssm:SendCommand
// - ssm:GetCommandInvocation
// - ssm:ListCommandInvocations
//
// Besides setting up the required IAM policies, this method also adds the SSM Document.
// This SSM Document downloads and runs the Teleport Installer Script, which installs teleport in the target EC2 instance.
Expand Down
94 changes: 73 additions & 21 deletions lib/srv/server/ssm_install.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,13 +284,6 @@ func skipAWSWaitErr(err error) error {
return trace.Wrap(err)
}

// This list of ssmSteps must match the ones used in awslib.EC2DiscoverySSMDocument
// These vars are used to obtain specific status of each step when checking a command output.
var (
ssmStepDownloadContent = "downloadContent"
ssmStepRunShellScript = "runShellScript"
)

func (si *SSMInstaller) checkCommand(ctx context.Context, req SSMRunRequest, commandID, instanceID *string) error {
err := req.SSM.WaitUntilCommandExecutedWithContext(ctx, &ssm.GetCommandInvocationInput{
CommandId: commandID,
Expand All @@ -301,32 +294,91 @@ func (si *SSMInstaller) checkCommand(ctx context.Context, req SSMRunRequest, com
return trace.Wrap(err)
}

// Check 1st step: download Content
downloadContentStep, err := si.getCommandStepStatusEvent(ctx, &ssmStepDownloadContent, req, commandID, instanceID)
if err != nil {
invocationSteps, err := si.getInvocationSteps(ctx, req, commandID, instanceID)
switch {
case trace.IsAccessDenied(err):
// getInvocationSteps uses `ssm:ListCommandInvocations` to gather all the executed steps.
// Using `ssm:ListCommandInvocations` is not always possible because previous Docs versions (pre-v16) did not ask for that permission.
// If the IAM role does not have access to that action, an Access Denied is returned here.
// The process continues but the user is warned that they should add that permission to get better diagnostics.
si.Logger.WarnContext(ctx,
"Add ssm:ListCommandInvocations action to IAM Role to improve diagnostics of EC2 Teleport installation failures",
"error", err)

invocationSteps = awslib.EC2DiscoverySSMDocumentSteps

case err != nil:
return trace.Wrap(err)
}

// Only check runShellScript step if downloadContent was a success.
if downloadContentStep.Metadata.Code != libevents.SSMRunSuccessCode {
return trace.Wrap(si.Emitter.EmitAuditEvent(ctx, downloadContentStep))
for i, step := range invocationSteps {
stepResultEvent, err := si.getCommandStepStatusEvent(ctx, step, req, commandID, instanceID)
if err != nil {
var invalidPluginNameErr *ssm.InvalidPluginName
if errors.As(err, &invalidPluginNameErr) {
// If using a custom SSM Document and the client does not have access to ssm:ListCommandInvocations
// the list of invocationSteps (ie plugin name) might be wrong.
// If that's the case, emit an event with the overall invocation result (ignoring specific steps' stdout and stderr).
invocationResultEvent, err := si.getCommandStepStatusEvent(ctx, "" /*no step*/, req, commandID, instanceID)
if err != nil {
return trace.Wrap(err)
}

return trace.Wrap(si.Emitter.EmitAuditEvent(ctx, invocationResultEvent))
}

return trace.Wrap(err)
}

// Emit an event for the first failed step or for the latest step.
lastStep := i+1 == len(invocationSteps)
if stepResultEvent.Metadata.Code != libevents.SSMRunSuccessCode || lastStep {
return trace.Wrap(si.Emitter.EmitAuditEvent(ctx, stepResultEvent))
}
}

// Check 2nd step: run shell script
runShellScriptStep, err := si.getCommandStepStatusEvent(ctx, &ssmStepRunShellScript, req, commandID, instanceID)
return nil
}

func (si *SSMInstaller) getInvocationSteps(ctx context.Context, req SSMRunRequest, commandID, instanceID *string) ([]string, error) {
// ssm:ListCommandInvocations is used to list the actual steps because users might be using a custom SSM Document.
listCommandInvocationResp, err := req.SSM.ListCommandInvocationsWithContext(ctx, &ssm.ListCommandInvocationsInput{
CommandId: commandID,
InstanceId: instanceID,
Details: aws.Bool(true),
})
if err != nil {
return trace.Wrap(err)
return nil, trace.Wrap(awslib.ConvertRequestFailureError(err))
}

return trace.Wrap(si.Emitter.EmitAuditEvent(ctx, runShellScriptStep))
// We only expect a single invocation because we are sending both the CommandID and the InstanceID.
// This call happens after WaitUntilCommandExecuted, so there's no reason for this to ever return 0 elements.
if len(listCommandInvocationResp.CommandInvocations) == 0 {
si.Logger.WarnContext(ctx,
"No command invocation was found.",
"command_id", aws.StringValue(commandID),
"instance_id", aws.StringValue(instanceID),
)
return nil, trace.BadParameter("no command invocation was found")
}
commandInvocation := listCommandInvocationResp.CommandInvocations[0]

documentSteps := make([]string, 0, len(commandInvocation.CommandPlugins))
for _, step := range commandInvocation.CommandPlugins {
documentSteps = append(documentSteps, aws.StringValue(step.Name))
}
return documentSteps, nil
}

func (si *SSMInstaller) getCommandStepStatusEvent(ctx context.Context, step *string, req SSMRunRequest, commandID, instanceID *string) (*apievents.SSMRun, error) {
stepResult, err := req.SSM.GetCommandInvocationWithContext(ctx, &ssm.GetCommandInvocationInput{
func (si *SSMInstaller) getCommandStepStatusEvent(ctx context.Context, step string, req SSMRunRequest, commandID, instanceID *string) (*apievents.SSMRun, error) {
getCommandInvocationReq := &ssm.GetCommandInvocationInput{
CommandId: commandID,
InstanceId: instanceID,
PluginName: step,
})
}
if step != "" {
getCommandInvocationReq.PluginName = aws.String(step)
}
stepResult, err := req.SSM.GetCommandInvocationWithContext(ctx, getCommandInvocationReq)
if err != nil {
return nil, trace.Wrap(err)
}
Expand Down
Loading

0 comments on commit 8d110e6

Please sign in to comment.