Optimize users detail query (#3754)

@Smjert reported instances of Windows Domain Controllers having massive
resource utilization and being killed by the watchdog when running this
query. In his test environment, this new query performs much better.
This commit is contained in:
Zach Wasserman 2022-01-18 16:39:32 -08:00 committed by GitHub
parent c975766545
commit a79d5fbfcc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 1 deletions

View file

@ -0,0 +1 @@
* Optimize users detail query to improve performance when running on system with a large number of users (particularly Windows Domain Controllers).

View file

@ -499,7 +499,15 @@ FROM python_packages;
}
var usersQuery = DetailQuery{
Query: `SELECT uid, username, type, groupname, shell FROM users u LEFT JOIN groups g ON g.gid=u.gid WHERE type <> 'special' AND shell NOT LIKE '%/false' AND shell NOT LIKE '%/nologin' AND shell NOT LIKE '%/shutdown' AND shell NOT LIKE '%/halt' AND username NOT LIKE '%$' AND username NOT LIKE '\_%' ESCAPE '\' AND NOT (username = 'sync' AND shell ='/bin/sync')`,
// Note we use the cached_groups CTE (`WITH` clause) here to suggest to SQLite that it generate
// the `groups` table only once. Without doing this, on some Windows systems (Domain Controllers)
// with many user accounts and groups, this query could be very expensive as the `groups` table
// was generated once for each user.
Query: `
WITH cached_groups AS (select * from groups)
SELECT uid, username, type, groupname, shell
FROM users LEFT JOIN cached_groups USING (gid)
WHERE type <> 'special' AND shell NOT LIKE '%/false' AND shell NOT LIKE '%/nologin' AND shell NOT LIKE '%/shutdown' AND shell NOT LIKE '%/halt' AND username NOT LIKE '%$' AND username NOT LIKE '\_%' ESCAPE '\' AND NOT (username = 'sync' AND shell ='/bin/sync')`,
DirectIngestFunc: directIngestUsers,
}