-
Notifications
You must be signed in to change notification settings - Fork 478
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Unified Queue: fix failing tests and address TODOs #26067
Changes from 12 commits
4c93cbe
c33652d
ca63308
ab39c53
b460193
e2d3751
5332ee6
e58e30a
e33ca41
7472383
7ca3c6a
b294ac9
135e4c9
663bf55
c85816b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -276,11 +276,13 @@ func (ds *Datastore) ListReadyToExecuteScriptsForHost(ctx context.Context, hostI | |
func (ds *Datastore) listUpcomingHostScriptExecutions(ctx context.Context, hostID uint, onlyShowInternal, onlyReadyToExecute bool) ([]*fleet.HostScriptResult, error) { | ||
extraWhere := "" | ||
if onlyShowInternal { | ||
extraWhere = " AND ua.payload->'$.is_internal' = 1" | ||
// software_uninstalls are implicitly internal | ||
extraWhere = " AND COALESCE(ua.payload->'$.is_internal', 1) = 1" | ||
} | ||
if onlyReadyToExecute { | ||
extraWhere += " AND ua.activated_at IS NOT NULL" | ||
} | ||
// this selects software uninstalls too as they run as scripts | ||
listStmt := fmt.Sprintf(` | ||
SELECT | ||
id, | ||
|
@@ -299,21 +301,17 @@ func (ds *Datastore) listUpcomingHostScriptExecutions(ctx context.Context, hostI | |
IF(ua.activated_at IS NULL, 0, 1) AS topmost | ||
FROM | ||
upcoming_activities ua | ||
INNER JOIN script_upcoming_activities sua | ||
-- left join because software_uninstall has no script join | ||
LEFT JOIN script_upcoming_activities sua | ||
ON ua.id = sua.upcoming_activity_id | ||
WHERE | ||
ua.host_id = ? AND | ||
ua.activity_type = 'script' AND | ||
( | ||
ua.payload->'$.sync_request' = 0 OR | ||
ua.created_at >= DATE_SUB(NOW(), INTERVAL ? SECOND) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sync scripts are treated as any other script in the upcoming queue: #22866 (comment) |
||
) | ||
ua.activity_type IN ('script', 'software_uninstall') | ||
%s | ||
ORDER BY topmost DESC, priority DESC, created_at ASC) t`, extraWhere) | ||
|
||
var results []*fleet.HostScriptResult | ||
seconds := int(constants.MaxServerWaitTime.Seconds()) | ||
if err := sqlx.SelectContext(ctx, ds.reader(ctx), &results, listStmt, hostID, seconds); err != nil { | ||
if err := sqlx.SelectContext(ctx, ds.reader(ctx), &results, listStmt, hostID); err != nil { | ||
return nil, ctxerr.Wrap(ctx, err, "list pending host script executions") | ||
} | ||
return results, nil | ||
|
@@ -911,7 +909,7 @@ WHERE | |
INNER JOIN script_upcoming_activities sua | ||
ON upcoming_activities.id = sua.upcoming_activity_id | ||
WHERE | ||
upcoming_activities.activity_type = 'script' | ||
upcoming_activities.activity_type = 'script' | ||
AND (upcoming_activities.payload->'$.sync_request' = 0 OR upcoming_activities.created_at >= NOW() - INTERVAL ? SECOND) | ||
AND sua.script_id IN (SELECT id FROM scripts WHERE global_or_team_id = ?)` | ||
|
||
|
@@ -932,13 +930,13 @@ WHERE | |
exit_code IS NULL AND (sync_request = 0 OR created_at >= NOW() - INTERVAL ? SECOND) | ||
AND script_id IN (SELECT id FROM scripts WHERE global_or_team_id = ? AND name NOT IN (?))` | ||
|
||
const clearPendingExecutionsNotInListUA = `DELETE FROM upcoming_activities | ||
USING | ||
const clearPendingExecutionsNotInListUA = `DELETE FROM upcoming_activities | ||
USING | ||
upcoming_activities | ||
INNER JOIN script_upcoming_activities sua | ||
ON upcoming_activities.id = sua.upcoming_activity_id | ||
WHERE | ||
upcoming_activities.activity_type = 'script' | ||
upcoming_activities.activity_type = 'script' | ||
AND (upcoming_activities.payload->'$.sync_request' = 0 OR upcoming_activities.created_at >= NOW() - INTERVAL ? SECOND) | ||
AND sua.script_id IN (SELECT id FROM scripts WHERE global_or_team_id = ? AND name NOT IN (?))` | ||
|
||
|
@@ -957,13 +955,13 @@ ON DUPLICATE KEY UPDATE | |
exit_code IS NULL AND (sync_request = 0 OR created_at >= NOW() - INTERVAL ? SECOND) | ||
AND script_id = ? AND script_content_id != ?` | ||
|
||
const clearPendingExecutionsWithObsoleteScriptUA = `DELETE FROM upcoming_activities | ||
USING | ||
upcoming_activities | ||
const clearPendingExecutionsWithObsoleteScriptUA = `DELETE FROM upcoming_activities | ||
USING | ||
upcoming_activities | ||
INNER JOIN script_upcoming_activities sua | ||
ON upcoming_activities.id = sua.upcoming_activity_id | ||
WHERE | ||
upcoming_activities.activity_type = 'script' | ||
upcoming_activities.activity_type = 'script' | ||
AND (upcoming_activities.payload->'$.sync_request' = 0 OR upcoming_activities.created_at >= NOW() - INTERVAL ? SECOND) | ||
AND sua.script_id = ? AND sua.script_content_id != ?` | ||
|
||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -1089,7 +1089,7 @@ WHERE | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
func (ds *Datastore) GetSummaryHostSoftwareInstalls(ctx context.Context, installerID uint) (*fleet.SoftwareInstallerStatusSummary, error) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
var dest fleet.SoftwareInstallerStatusSummary | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// TODO(uniq): AFAICT we don't have uniqueness for host_id + title_id in upcoming or | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// TODO(Sarah): AFAICT we don't have uniqueness for host_id + title_id in upcoming or | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here and a couple more todos below @gillespi314 I changed the TODO to you since it's not related to the unified queue, let me know if that's ok with you (or if you want me to just remove it)? The load tests should (hopefully) tell us if we need to improve some queries. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one seems a bit more related to UniQ insofar we probably want the ordering of activities to be consistent with other queries. What if we change these to use the same group-wise max approach we're using elsewhere? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I see, yeah I think once the row is in When the filter is for non-pending, we do this: fleet/server/datastore/mysql/software_installers.go Lines 1274 to 1305 in c85816b
But I think it should not consider any of those rows if there's also a pending request in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I'll address both points in a subsequent PR (use the left join approach for the groupwise max, and fix the status filter). Let me know if I'm mistaken and it's current behavior is correct, though! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Doesn't the early return at line 1271 cover it? Or are you saying that when filtering on a terminal status (e.g., installed), if a host has matching record for a prior install the host should always be excluded from the results if there's an entry in upcoming activities? TBH I'm not very clear what is expected in that case. What you're suggesting makes sense if pending is always supposed to take precedence of prior activity. Does it depend on the specific use case (i.e. for "last install" we always give preference to upcoming, but maybe something else when filtering for a specific status)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah the latter, if I filter on That's how it worked before, it always considers the latest install attempt, regardless of status: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh wait, no I misread that... I think it does return the host if it has the requested status, it's just that we take the latest entry with that status. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah no, sorry again :D It's as I said before, it only considers the latest attempt, and then it returns the host if that attempt's status is the one requested by the filter. A bit hairy but yeah, that's the behavior to reproduce so I'll update that query accordingly. |
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// past activities. In the past the max(id) approach was "good enough" as a proxy for the most | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// recent activity since we didn't really need to worry about the order of activities. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// Moving to a time-based approach would be more accurate but would require a more complex and | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
@@ -1207,7 +1207,7 @@ WHERE | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
status = fleet.SoftwareInstallFailed // TODO: When VPP supports uninstall this should become STATUS IN ('failed_install', 'failed_uninstall') | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// TODO(uniq): AFAICT we don't have uniqueness for host_id + title_id in upcoming or | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// TODO(Sarah): AFAICT we don't have uniqueness for host_id + title_id in upcoming or | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// past activities. In the past the max(id) approach was "good enough" as a proxy for the most | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// recent activity since we didn't really need to worry about the order of activities. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// Moving to a time-based approach would be more accurate but would require a more complex and | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
@@ -1278,7 +1278,7 @@ WHERE | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
statusFilter = "hsi.status IN (:installFailed, :uninstallFailed)" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// TODO(uniq): AFAICT we don't have uniqueness for host_id + title_id in upcoming or | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// TODO(Sarah): AFAICT we don't have uniqueness for host_id + title_id in upcoming or | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// past activities. In the past the max(id) approach was "good enough" as a proxy for the most | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// recent activity since we didn't really need to worry about the order of activities. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// Moving to a time-based approach would be more accurate but would require a more complex and | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
@@ -1308,7 +1308,7 @@ WHERE | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
"status": status, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
"installFailed": fleet.SoftwareInstallFailed, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
"uninstallFailed": fleet.SoftwareUninstallFailed, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// TODO(uniq): prior code was joining based on installer id but based on how list options are parsed [1] it seems like this should be the title id | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// TODO(Sarah): prior code was joining based on installer id but based on how list options are parsed [1] it seems like this should be the title id | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// [1] https://github.com/fleetdm/fleet/blob/8aecae4d853829cb6e7f828099a4f0953643cf18/server/datastore/mysql/hosts.go#L1088-L1089 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
"title_id": titleID, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
}) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@gillespi314 I changed the TODO to you since it's not related to the unified queue, let me know if that's ok with you (or if you want me to just remove it)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's change it to a FIXME and I'll flag it for the g-software folks to take a closer look.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On second thought, is it really "to fix" or is your change the actual fix? I.e. is it just to raise awareness that this was changed, or does that current implementation need to be changed? I'd remove the comment altogether if there's not really anything to fix.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@mna, you're right, it was more so to raise awareness of the change in case I might have missed something.
@jahzielv, do you mind giving this a quick sanity check? There's a similar note regarding VPP to check here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@gillespi314 so sorry, I totally missed this notification! I can take a look