diff --git a/changes/issue-6116-filter-users-everywhere b/changes/issue-6116-filter-users-everywhere new file mode 100644 index 0000000000..d10ab83f8b --- /dev/null +++ b/changes/issue-6116-filter-users-everywhere @@ -0,0 +1 @@ +* Use an improved users query in every query we send osquery diff --git a/server/service/osquery_utils/queries.go b/server/service/osquery_utils/queries.go index a7c4d80e4d..3e2268065f 100644 --- a/server/service/osquery_utils/queries.go +++ b/server/service/osquery_utils/queries.go @@ -339,14 +339,22 @@ func discoveryTable(tableName string) string { return fmt.Sprintf("SELECT 1 FROM osquery_registry WHERE active = true AND registry = 'table' AND name = '%s';", tableName) } +const usersQueryStr = `WITH cached_groups AS (select * from groups) + SELECT uid, username, type, groupname, shell + FROM users LEFT JOIN cached_groups USING (gid) + WHERE type <> 'special' AND shell NOT LIKE '%/false' AND shell NOT LIKE '%/nologin' AND shell NOT LIKE '%/shutdown' AND shell NOT LIKE '%/halt' AND username NOT LIKE '%$' AND username NOT LIKE '\_%' ESCAPE '\' AND NOT (username = 'sync' AND shell ='/bin/sync' AND directory <> '')` + +func withCachedUsers(query string) string { + return fmt.Sprintf(query, usersQueryStr) +} + var softwareMacOS = DetailQuery{ // Note that we create the cached_users CTE (the WITH clause) in order to suggest to SQLite // that it generates the users once instead of once for each UNIONed query. We use CROSS JOIN to // ensure that the nested loops in the query generation are ordered correctly for the _extensions // tables that need a uid parameter. CROSS JOIN ensures that SQLite does not reorder the loop // nesting, which is important as described in https://youtu.be/hcn3HIcHAAo?t=77. - Query: ` -WITH cached_users AS (SELECT * FROM users) + Query: withCachedUsers(`WITH cached_users AS (%s) SELECT name AS name, bundle_short_version AS version, @@ -409,7 +417,7 @@ SELECT 'homebrew_packages' AS source, 0 AS last_opened_at FROM homebrew_packages; -`, +`), Platforms: []string{"darwin"}, DirectIngestFunc: directIngestSoftware, } @@ -423,8 +431,7 @@ var scheduledQueryStats = DetailQuery{ } var softwareLinux = DetailQuery{ - Query: ` -WITH cached_users AS (SELECT * FROM users) + Query: withCachedUsers(`WITH cached_users AS (%s) SELECT name AS name, version AS version, @@ -504,14 +511,13 @@ SELECT '' AS vendor, '' AS arch FROM python_packages; -`, +`), Platforms: fleet.HostLinuxOSs, DirectIngestFunc: directIngestSoftware, } var softwareWindows = DetailQuery{ - Query: ` -WITH cached_users AS (SELECT * FROM users WHERE directory <> '') + Query: withCachedUsers(`WITH cached_users AS (%s) SELECT name AS name, version AS version, @@ -567,7 +573,7 @@ SELECT 'Package (Python)' AS type, 'python_packages' AS source FROM python_packages; -`, +`), Platforms: []string{"windows"}, DirectIngestFunc: directIngestSoftware, } @@ -577,11 +583,7 @@ var usersQuery = DetailQuery{ // the `groups` table only once. Without doing this, on some Windows systems (Domain Controllers) // with many user accounts and groups, this query could be very expensive as the `groups` table // was generated once for each user. - Query: ` -WITH cached_groups AS (select * from groups) -SELECT uid, username, type, groupname, shell -FROM users LEFT JOIN cached_groups USING (gid) -WHERE type <> 'special' AND shell NOT LIKE '%/false' AND shell NOT LIKE '%/nologin' AND shell NOT LIKE '%/shutdown' AND shell NOT LIKE '%/halt' AND username NOT LIKE '%$' AND username NOT LIKE '\_%' ESCAPE '\' AND NOT (username = 'sync' AND shell ='/bin/sync')`, + Query: usersQueryStr, DirectIngestFunc: directIngestUsers, }