mirror of
https://github.com/fleetdm/fleet
synced 2026-04-21 21:47:20 +00:00
<!-- Add the related story/sub-task/bug number, like Resolves #123, or remove if NA --> **Related issue:** Resolves #34677 and #35932 Adding ~450K software to the loadtest, including scripts to add more software in the future. Software is held in a `software.sql` file, which is used to create a sqlite DB during osquery perf run/deployment. # Checklist for submitter ## Testing - [x] QA'd all new/changed functionality manually <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added support for loading software data from an external SQLite database via a new `--software_db_path` command-line flag for more realistic simulation scenarios. * Added import and SQL generation tools to build and manage custom software libraries. * **Documentation** * Added comprehensive README with setup instructions, tool usage, and end-to-end workflow guidance for the software library. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
553 lines
17 KiB
Go
553 lines
17 KiB
Go
// Package softwaredb provides SQLite database loading for realistic software data used in osquery-perf load testing.
|
|
package softwaredb
|
|
|
|
import (
|
|
"database/sql"
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"math/rand/v2"
|
|
"os"
|
|
"strings"
|
|
|
|
_ "github.com/mattn/go-sqlite3"
|
|
)
|
|
|
|
const (
|
|
// SoftwareMutationProb is the probability of mutating software after initial load
|
|
SoftwareMutationProb = 0.2
|
|
// MaxSoftwareAdd is the maximum number of software items to add during mutation
|
|
MaxSoftwareAdd = 20
|
|
// MaxSoftwareRemove is the maximum number of software items to remove during mutation
|
|
MaxSoftwareRemove = 20
|
|
// MaxSoftwarePerPlatform is the maximum number of software items to load per platform
|
|
MaxSoftwarePerPlatform = 50000
|
|
)
|
|
|
|
// String interning pools to reduce memory usage by reusing common strings
|
|
var (
|
|
sourcePool = map[string]string{
|
|
"apps": "apps",
|
|
"homebrew_packages": "homebrew_packages",
|
|
"firefox_addons": "firefox_addons",
|
|
"chrome_extensions": "chrome_extensions",
|
|
"python_packages": "python_packages",
|
|
"vscode_extensions": "vscode_extensions",
|
|
"safari_extensions": "safari_extensions",
|
|
"programs": "programs",
|
|
"ie_extensions": "ie_extensions",
|
|
"deb_packages": "deb_packages",
|
|
"npm_packages": "npm_packages",
|
|
"rpm_packages": "rpm_packages",
|
|
"android_apps": "android_apps",
|
|
"ios_apps": "ios_apps",
|
|
"ipados_apps": "ipados_apps",
|
|
"jetbrains_plugins": "jetbrains_plugins",
|
|
}
|
|
vendorPool = make(map[string]string) // populated during load
|
|
)
|
|
|
|
// internString returns an interned version of s from the vendor pool, reducing memory usage
|
|
func internString(s string) string {
|
|
if s == "" {
|
|
return ""
|
|
}
|
|
if interned, ok := vendorPool[s]; ok {
|
|
return interned
|
|
}
|
|
vendorPool[s] = s
|
|
return s
|
|
}
|
|
|
|
// internSource returns an interned source string
|
|
func internSource(s string) string {
|
|
if interned, ok := sourcePool[s]; ok {
|
|
return interned
|
|
}
|
|
return s
|
|
}
|
|
|
|
// ptrString returns a pointer to s, or nil if s is empty
|
|
func ptrString(s string) *string {
|
|
if s == "" {
|
|
return nil
|
|
}
|
|
return &s
|
|
}
|
|
|
|
// RandomSoftwareCount returns a random count between min and max for the given platform
|
|
func RandomSoftwareCount(platform string) int {
|
|
config, ok := platformCounts[platform]
|
|
if !ok {
|
|
return 0
|
|
}
|
|
return config.min + rand.IntN(config.max-config.min+1) // nolint:gosec,G404 // load testing, not security-sensitive
|
|
}
|
|
|
|
// DarwinSoftware represents macOS/iOS software
|
|
type DarwinSoftware struct {
|
|
Name string
|
|
Version string
|
|
Source string // apps, homebrew_packages, firefox_addons, chrome_extensions, python_packages, vscode_extensions, safari_extensions (interned)
|
|
BundleIdentifier *string // optional - used by apps
|
|
Vendor *string // optional - used by apps, vscode_extensions (interned)
|
|
ExtensionID *string // optional - used by firefox_addons, chrome_extensions, vscode_extensions
|
|
ExtensionFor *string // optional - used by firefox_addons, chrome_extensions, vscode_extensions
|
|
}
|
|
|
|
// WindowsSoftware represents Windows software
|
|
type WindowsSoftware struct {
|
|
Name string
|
|
Version string
|
|
Source string // firefox_addons, chrome_extensions, programs, vscode_extensions, ie_extensions, python_packages, deb_packages (interned)
|
|
Vendor *string // optional - used by programs, vscode_extensions (interned)
|
|
UpgradeCode *string // optional - used by programs
|
|
ExtensionID *string // optional - used by firefox_addons, chrome_extensions, vscode_extensions
|
|
ExtensionFor *string // optional - used by firefox_addons, chrome_extensions, vscode_extensions
|
|
}
|
|
|
|
// UbuntuSoftware represents Ubuntu/Linux software
|
|
type UbuntuSoftware struct {
|
|
Name string
|
|
Version string
|
|
Source string // firefox_addons, chrome_extensions, python_packages, deb_packages, vscode_extensions, npm_packages, rpm_packages (interned)
|
|
Vendor *string // optional - used by rpm_packages, vscode_extensions (interned)
|
|
Arch *string // optional - used by rpm_packages
|
|
Release *string // optional - used by rpm_packages
|
|
ExtensionID *string // optional - used by firefox_addons, chrome_extensions, vscode_extensions
|
|
ExtensionFor *string // optional - used by firefox_addons, chrome_extensions, vscode_extensions
|
|
}
|
|
|
|
// DB holds the loaded software data for each platform
|
|
type DB struct {
|
|
Darwin []DarwinSoftware
|
|
Windows []WindowsSoftware
|
|
Ubuntu []UbuntuSoftware
|
|
}
|
|
|
|
// DarwinToMaps converts Darwin software at given indices to osquery result format
|
|
func (db *DB) DarwinToMaps(indices []uint32) []map[string]string {
|
|
results := make([]map[string]string, 0, len(indices))
|
|
for _, idx := range indices {
|
|
s := db.Darwin[idx]
|
|
m := map[string]string{
|
|
"name": s.Name,
|
|
"source": s.Source,
|
|
"version": s.Version,
|
|
}
|
|
if s.BundleIdentifier != nil {
|
|
m["bundle_identifier"] = *s.BundleIdentifier
|
|
}
|
|
if s.Vendor != nil {
|
|
m["vendor"] = *s.Vendor
|
|
}
|
|
if s.ExtensionID != nil {
|
|
m["extension_id"] = *s.ExtensionID
|
|
}
|
|
if s.ExtensionFor != nil {
|
|
m["browser"] = *s.ExtensionFor
|
|
}
|
|
results = append(results, m)
|
|
}
|
|
return results
|
|
}
|
|
|
|
// WindowsToMaps converts Windows software at given indices to osquery result format
|
|
func (db *DB) WindowsToMaps(indices []uint32) []map[string]string {
|
|
results := make([]map[string]string, 0, len(indices))
|
|
for _, idx := range indices {
|
|
s := db.Windows[idx]
|
|
m := map[string]string{
|
|
"name": s.Name,
|
|
"source": s.Source,
|
|
"version": s.Version,
|
|
}
|
|
if s.Vendor != nil {
|
|
m["vendor"] = *s.Vendor
|
|
}
|
|
if s.UpgradeCode != nil {
|
|
m["upgrade_code"] = *s.UpgradeCode
|
|
}
|
|
if s.ExtensionID != nil {
|
|
m["extension_id"] = *s.ExtensionID
|
|
}
|
|
if s.ExtensionFor != nil {
|
|
m["browser"] = *s.ExtensionFor
|
|
}
|
|
results = append(results, m)
|
|
}
|
|
return results
|
|
}
|
|
|
|
// UbuntuToMaps converts Ubuntu software at given indices to osquery result format
|
|
func (db *DB) UbuntuToMaps(indices []uint32) []map[string]string {
|
|
results := make([]map[string]string, 0, len(indices))
|
|
for _, idx := range indices {
|
|
s := db.Ubuntu[idx]
|
|
m := map[string]string{
|
|
"name": s.Name,
|
|
"source": s.Source,
|
|
"version": s.Version,
|
|
}
|
|
if s.Vendor != nil {
|
|
m["vendor"] = *s.Vendor
|
|
}
|
|
if s.Arch != nil {
|
|
m["arch"] = *s.Arch
|
|
}
|
|
if s.Release != nil {
|
|
m["release"] = *s.Release
|
|
}
|
|
if s.ExtensionID != nil {
|
|
m["extension_id"] = *s.ExtensionID
|
|
}
|
|
if s.ExtensionFor != nil {
|
|
m["browser"] = *s.ExtensionFor
|
|
}
|
|
results = append(results, m)
|
|
}
|
|
return results
|
|
}
|
|
|
|
// MaybeMutateSoftware randomly mutates software indices (adds/removes items) 20% of the time.
|
|
// This simulates software being installed/uninstalled on a host over time.
|
|
// maxPoolSize is the total number of available software items in the database.
|
|
func MaybeMutateSoftware(indices []uint32, maxPoolSize int) []uint32 {
|
|
// Only mutate 20% of the time
|
|
if rand.Float64() >= SoftwareMutationProb { // nolint:gosec,G404 // load testing, not security-sensitive
|
|
return indices
|
|
}
|
|
|
|
// Copy indices to avoid mutating the original slice
|
|
result := make([]uint32, len(indices))
|
|
copy(result, indices)
|
|
|
|
// Randomly remove 0-20 items
|
|
numToRemove := rand.IntN(MaxSoftwareRemove + 1) // nolint:gosec,G404 // load testing, not security-sensitive
|
|
if numToRemove > len(result) {
|
|
numToRemove = len(result)
|
|
}
|
|
if numToRemove > 0 {
|
|
// Remove random items
|
|
rand.Shuffle(len(result), func(i, j int) { // nolint:gosec,G404 // load testing, not security-sensitive
|
|
result[i], result[j] = result[j], result[i]
|
|
})
|
|
result = result[:len(result)-numToRemove]
|
|
}
|
|
|
|
// Randomly add 0-20 items
|
|
numToAdd := rand.IntN(MaxSoftwareAdd + 1) // nolint:gosec,G404 // load testing, not security-sensitive
|
|
if numToAdd > 0 {
|
|
// Create a map of existing indices for quick lookup
|
|
existing := make(map[uint32]bool, len(result))
|
|
for _, idx := range result {
|
|
existing[idx] = true
|
|
}
|
|
|
|
// Add new random indices that don't already exist
|
|
added := 0
|
|
attempts := 0
|
|
maxAttempts := numToAdd * 10 // Avoid infinite loop
|
|
for added < numToAdd && attempts < maxAttempts {
|
|
newIdx := uint32(rand.IntN(maxPoolSize)) // nolint:gosec,G404 // load testing, not security-sensitive
|
|
if !existing[newIdx] {
|
|
result = append(result, newIdx)
|
|
existing[newIdx] = true
|
|
added++
|
|
}
|
|
attempts++
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// Platform-specific counts based on production averages (±20%):
|
|
// - Ubuntu: 2,460 ± 20% = 1,968 to 2,952
|
|
// - Darwin: 453 ± 20% = 362 to 544
|
|
// - Windows: 251 ± 20% = 201 to 301
|
|
var platformCounts = map[string]struct {
|
|
sources []string
|
|
min int
|
|
max int
|
|
}{
|
|
"darwin": {
|
|
sources: []string{"apps", "homebrew_packages", "firefox_addons", "chrome_extensions", "python_packages", "vscode_extensions", "safari_extensions"},
|
|
min: 362,
|
|
max: 544,
|
|
},
|
|
"windows": {
|
|
sources: []string{"firefox_addons", "chrome_extensions", "programs", "vscode_extensions", "ie_extensions", "python_packages", "deb_packages"},
|
|
min: 201,
|
|
max: 301,
|
|
},
|
|
"ubuntu": {
|
|
sources: []string{"firefox_addons", "chrome_extensions", "python_packages", "deb_packages", "vscode_extensions", "npm_packages", "rpm_packages"},
|
|
min: 1968,
|
|
max: 2952,
|
|
},
|
|
}
|
|
|
|
// LoadFromDatabase loads software from the SQLite database with platform-specific counts.
|
|
// If the database doesn't exist, it attempts to auto-generate it from software.sql.
|
|
func LoadFromDatabase(dbPath string) (*DB, error) {
|
|
// Check if database file exists
|
|
if _, err := os.Stat(dbPath); os.IsNotExist(err) {
|
|
// Database doesn't exist, try to generate it from software.sql
|
|
log.Printf("Database not found at %s, attempting to generate from software.sql...", dbPath)
|
|
|
|
// Look for software.sql in the same directory
|
|
sqlPath := strings.TrimSuffix(dbPath, ".db") + ".sql"
|
|
if _, err := os.Stat(sqlPath); os.IsNotExist(err) {
|
|
return nil, fmt.Errorf("software database not found: %s\nAlso could not find SQL file: %s\n\nPlease ensure software.sql exists, or create the database manually:\n cd cmd/osquery-perf/software-library\n sqlite3 software.db < software.sql", dbPath, sqlPath)
|
|
}
|
|
|
|
if err := generateDatabaseFromSQL(dbPath, sqlPath); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// Open database
|
|
db, err := sql.Open("sqlite3", dbPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("opening software database: %w", err)
|
|
}
|
|
defer db.Close()
|
|
|
|
// Verify table exists
|
|
var count int
|
|
err = db.QueryRow("SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='software'").Scan(&count)
|
|
if err != nil || count == 0 {
|
|
return nil, errors.New("database exists but 'software' table not found\n\nPlease initialize the database:\n cd cmd/osquery-perf/software-library\n sqlite3 software.db < software.sql")
|
|
}
|
|
|
|
// Load ALL software for each platform (agents will select random subsets)
|
|
softwareDB := &DB{}
|
|
|
|
// Load Darwin software
|
|
darwinConfig := platformCounts["darwin"]
|
|
darwinSoftware, err := loadDarwinSoftware(db, darwinConfig.sources)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
softwareDB.Darwin = darwinSoftware
|
|
log.Printf("Loaded %d darwin software items from database", len(darwinSoftware))
|
|
|
|
// Load Windows software
|
|
windowsConfig := platformCounts["windows"]
|
|
windowsSoftware, err := loadWindowsSoftware(db, windowsConfig.sources)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
softwareDB.Windows = windowsSoftware
|
|
log.Printf("Loaded %d windows software items from database", len(windowsSoftware))
|
|
|
|
// Load Ubuntu software
|
|
ubuntuConfig := platformCounts["ubuntu"]
|
|
ubuntuSoftware, err := loadUbuntuSoftware(db, ubuntuConfig.sources)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
softwareDB.Ubuntu = ubuntuSoftware
|
|
log.Printf("Loaded %d ubuntu software items from database", len(ubuntuSoftware))
|
|
|
|
return softwareDB, nil
|
|
}
|
|
|
|
// generateDatabaseFromSQL creates a SQLite database from a SQL file
|
|
func generateDatabaseFromSQL(dbPath, sqlPath string) error {
|
|
// Read SQL file
|
|
sqlContent, err := os.ReadFile(sqlPath)
|
|
if err != nil {
|
|
return fmt.Errorf("reading SQL file %s: %w", sqlPath, err)
|
|
}
|
|
|
|
log.Printf("Found %s (%d bytes), creating database...", sqlPath, len(sqlContent))
|
|
|
|
// Create database and execute SQL
|
|
db, err := sql.Open("sqlite3", dbPath)
|
|
if err != nil {
|
|
return fmt.Errorf("creating database: %w", err)
|
|
}
|
|
defer db.Close()
|
|
|
|
// Execute the SQL file
|
|
if _, err := db.Exec(string(sqlContent)); err != nil {
|
|
os.Remove(dbPath) // Clean up partial database
|
|
return fmt.Errorf("executing SQL file: %w", err)
|
|
}
|
|
|
|
log.Printf("✅ Successfully created database from %s", sqlPath)
|
|
return nil
|
|
}
|
|
|
|
// loadDarwinSoftware loads all macOS/iOS software from the database for the given sources
|
|
func loadDarwinSoftware(db *sql.DB, sources []string) ([]DarwinSoftware, error) {
|
|
sourceList := "'" + strings.Join(sources, "', '") + "'"
|
|
// nolint:gosec // sources are hardcoded, not user input
|
|
query := fmt.Sprintf(`
|
|
SELECT name, version, source, bundle_identifier, vendor, extension_id, extension_for
|
|
FROM software
|
|
WHERE source IN (%s)
|
|
ORDER BY RANDOM()
|
|
LIMIT %d
|
|
`, sourceList, MaxSoftwarePerPlatform)
|
|
|
|
rows, err := db.Query(query)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("querying darwin software: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
software := make([]DarwinSoftware, 0, MaxSoftwarePerPlatform)
|
|
for rows.Next() {
|
|
var sw DarwinSoftware
|
|
var source string
|
|
var bundleID, vendor, extensionID, extensionFor sql.NullString
|
|
|
|
err := rows.Scan(&sw.Name, &sw.Version, &source, &bundleID, &vendor, &extensionID, &extensionFor)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("scanning darwin software row: %w", err)
|
|
}
|
|
|
|
// Use interned source string
|
|
sw.Source = internSource(source)
|
|
|
|
// Use pointers for optional fields
|
|
if bundleID.Valid {
|
|
sw.BundleIdentifier = ptrString(bundleID.String)
|
|
}
|
|
if vendor.Valid {
|
|
sw.Vendor = ptrString(internString(vendor.String))
|
|
}
|
|
if extensionID.Valid {
|
|
sw.ExtensionID = ptrString(extensionID.String)
|
|
}
|
|
if extensionFor.Valid {
|
|
sw.ExtensionFor = ptrString(extensionFor.String)
|
|
}
|
|
|
|
software = append(software, sw)
|
|
}
|
|
|
|
if err := rows.Err(); err != nil {
|
|
return nil, fmt.Errorf("iterating darwin software rows: %w", err)
|
|
}
|
|
|
|
return software, nil
|
|
}
|
|
|
|
// loadWindowsSoftware loads all Windows software from the database for the given sources
|
|
func loadWindowsSoftware(db *sql.DB, sources []string) ([]WindowsSoftware, error) {
|
|
sourceList := "'" + strings.Join(sources, "', '") + "'"
|
|
// nolint:gosec // sources are hardcoded, not user input
|
|
query := fmt.Sprintf(`
|
|
SELECT name, version, source, vendor, upgrade_code, extension_id, extension_for
|
|
FROM software
|
|
WHERE source IN (%s)
|
|
ORDER BY RANDOM()
|
|
LIMIT %d
|
|
`, sourceList, MaxSoftwarePerPlatform)
|
|
|
|
rows, err := db.Query(query)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("querying windows software: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
software := make([]WindowsSoftware, 0, MaxSoftwarePerPlatform)
|
|
for rows.Next() {
|
|
var sw WindowsSoftware
|
|
var source string
|
|
var vendor, upgradeCode, extensionID, extensionFor sql.NullString
|
|
|
|
err := rows.Scan(&sw.Name, &sw.Version, &source, &vendor, &upgradeCode, &extensionID, &extensionFor)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("scanning windows software row: %w", err)
|
|
}
|
|
|
|
// Use interned source string
|
|
sw.Source = internSource(source)
|
|
|
|
// Use pointers for optional fields
|
|
if vendor.Valid {
|
|
sw.Vendor = ptrString(internString(vendor.String))
|
|
}
|
|
if upgradeCode.Valid {
|
|
sw.UpgradeCode = ptrString(upgradeCode.String)
|
|
}
|
|
if extensionID.Valid {
|
|
sw.ExtensionID = ptrString(extensionID.String)
|
|
}
|
|
if extensionFor.Valid {
|
|
sw.ExtensionFor = ptrString(extensionFor.String)
|
|
}
|
|
|
|
software = append(software, sw)
|
|
}
|
|
|
|
if err := rows.Err(); err != nil {
|
|
return nil, fmt.Errorf("iterating windows software rows: %w", err)
|
|
}
|
|
|
|
return software, nil
|
|
}
|
|
|
|
// loadUbuntuSoftware loads all Ubuntu/Linux software from the database for the given sources
|
|
func loadUbuntuSoftware(db *sql.DB, sources []string) ([]UbuntuSoftware, error) {
|
|
sourceList := "'" + strings.Join(sources, "', '") + "'"
|
|
// nolint:gosec // sources are hardcoded, not user input
|
|
query := fmt.Sprintf(`
|
|
SELECT name, version, source, vendor, arch, release, extension_id, extension_for
|
|
FROM software
|
|
WHERE source IN (%s)
|
|
ORDER BY RANDOM()
|
|
LIMIT %d
|
|
`, sourceList, MaxSoftwarePerPlatform)
|
|
|
|
rows, err := db.Query(query)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("querying ubuntu software: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
software := make([]UbuntuSoftware, 0, MaxSoftwarePerPlatform)
|
|
for rows.Next() {
|
|
var sw UbuntuSoftware
|
|
var source string
|
|
var vendor, arch, release, extensionID, extensionFor sql.NullString
|
|
|
|
err := rows.Scan(&sw.Name, &sw.Version, &source, &vendor, &arch, &release, &extensionID, &extensionFor)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("scanning ubuntu software row: %w", err)
|
|
}
|
|
|
|
// Use interned source string
|
|
sw.Source = internSource(source)
|
|
|
|
// Use pointers for optional fields
|
|
if vendor.Valid {
|
|
sw.Vendor = ptrString(internString(vendor.String))
|
|
}
|
|
if arch.Valid {
|
|
sw.Arch = ptrString(arch.String)
|
|
}
|
|
if release.Valid {
|
|
sw.Release = ptrString(release.String)
|
|
}
|
|
if extensionID.Valid {
|
|
sw.ExtensionID = ptrString(extensionID.String)
|
|
}
|
|
if extensionFor.Valid {
|
|
sw.ExtensionFor = ptrString(extensionFor.String)
|
|
}
|
|
|
|
software = append(software, sw)
|
|
}
|
|
|
|
if err := rows.Err(); err != nil {
|
|
return nil, fmt.Errorf("iterating ubuntu software rows: %w", err)
|
|
}
|
|
|
|
return software, nil
|
|
}
|