From 918773bc5388e631121cf5722f78c5ea3ae35321 Mon Sep 17 00:00:00 2001 From: Victor Lyuboslavsky Date: Tue, 25 Jun 2024 14:10:51 -0500 Subject: [PATCH] Fixing DB issue with vuln counts. (#19989) #18115 Fixing issue saw in load test: ``` level=error ts=2024-06-25T17:09:08.230514976Z cron=vulnerabilities schedule=vulnerabilities instanceID="5boTc/PamsSp8Jsh4kiEOpECmPu+bmOAJaVX4XV7ZOG4vgO4U6peHyxH8mFQhBXYJt+roRpwNuGmUoEI8n/otg==" err="running job" details="get critical vulnerabilities count: Error 1114 (HY000): The table '/rdsdbdata/tmp/#sql127_6b4b_ad107' is full" jobID=update_host_issues_vulnerabilities_counts ``` --- server/datastore/mysql/hosts.go | 36 ++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/server/datastore/mysql/hosts.go b/server/datastore/mysql/hosts.go index 9050d8928d..17c8462c4b 100644 --- a/server/datastore/mysql/hosts.go +++ b/server/datastore/mysql/hosts.go @@ -5357,28 +5357,50 @@ func (ds *Datastore) UpdateHostIssuesVulnerabilities(ctx context.Context) error return clearAllFn() } + var allHostIDs []uint + if err := sqlx.SelectContext(ctx, ds.reader(ctx), &allHostIDs, `SELECT id FROM hosts ORDER BY id`); err != nil { + return ctxerr.Wrap(ctx, err, "get all host IDs") + } + type issuesCount struct { HostID uint64 `db:"host_id"` Count uint64 `db:"count"` } - var criticalCounts []issuesCount - criticalVulnerabilitiesCountStmt := ` + // We must batch the query extracting the critical vulnerabilities count because the query is too complex for MySQL to handle in one go. + // We saw MySQL error 1114 (HY000), where the temporary table reached its max capacity. + for i := 0; i < len(allHostIDs); i += hostIssuesInsertBatchSize { + start := i + end := i + hostIssuesInsertBatchSize + if end > len(allHostIDs) { + end = len(allHostIDs) + } + criticalVulnerabilitiesCountStmt := ` SELECT combined.host_id, COUNT(*) as count FROM (SELECT host_id, cve FROM host_software hs INNER JOIN software_cve sc ON sc.software_id = hs.software_id + WHERE host_id IN (?) UNION SELECT host_id, cve FROM host_operating_system hos - INNER JOIN operating_system_vulnerabilities osv ON osv.operating_system_id = hos.os_id) combined + INNER JOIN operating_system_vulnerabilities osv ON osv.operating_system_id = hos.os_id + WHERE host_id IN (?) + ) combined INNER JOIN cve_meta cm ON cm.cve = combined.cve WHERE cm.cvss_score > ? GROUP BY combined.host_id ORDER BY combined.host_id` - err := sqlx.SelectContext(ctx, ds.reader(ctx), &criticalCounts, criticalVulnerabilitiesCountStmt, criticalCVSSScoreCutoff) - if err != nil { - return ctxerr.Wrap(ctx, err, "get critical vulnerabilities count") + stmt, args, err := sqlx.In(criticalVulnerabilitiesCountStmt, allHostIDs[start:end], allHostIDs[start:end], criticalCVSSScoreCutoff) + if err != nil { + return ctxerr.Wrap(ctx, err, "building IN statement for getting critical vulnerabilities count") + } + var batchCriticalCounts []issuesCount + err = sqlx.SelectContext(ctx, ds.reader(ctx), &batchCriticalCounts, stmt, args...) + if err != nil { + return ctxerr.Wrap(ctx, err, "get critical vulnerabilities count") + } + criticalCounts = append(criticalCounts, batchCriticalCounts...) } // Update the host_issues table, including deleting items with no issues @@ -5417,7 +5439,7 @@ func (ds *Datastore) UpdateHostIssuesVulnerabilities(ctx context.Context) error for j := start; j < end; j++ { hostIDs = append(hostIDs, criticalCounts[j].HostID) } - stmt, args, err = sqlx.In( + stmt, args, err := sqlx.In( "UPDATE host_issues SET critical_vulnerabilities_count = 0, total_issues_count = failing_policies_count WHERE host_id NOT IN (?)", hostIDs, )