fleet/cmd/osquery-perf/softwaredb/softwaredb.go
Victor Lyuboslavsky 6ab79dd5a7
Add more software to loadtest (#35756)
<!-- Add the related story/sub-task/bug number, like Resolves #123, or
remove if NA -->
**Related issue:** Resolves #34677 and #35932

Adding ~450K software to the loadtest, including scripts to add more
software in the future.
Software is held in a `software.sql` file, which is used to create a
sqlite DB during osquery perf run/deployment.

# Checklist for submitter

## Testing

- [x] QA'd all new/changed functionality manually

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

* **New Features**
* Added support for loading software data from an external SQLite
database via a new `--software_db_path` command-line flag for more
realistic simulation scenarios.
* Added import and SQL generation tools to build and manage custom
software libraries.

* **Documentation**
* Added comprehensive README with setup instructions, tool usage, and
end-to-end workflow guidance for the software library.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2025-11-21 10:42:19 -06:00

553 lines
17 KiB
Go

// Package softwaredb provides SQLite database loading for realistic software data used in osquery-perf load testing.
package softwaredb
import (
"database/sql"
"errors"
"fmt"
"log"
"math/rand/v2"
"os"
"strings"
_ "github.com/mattn/go-sqlite3"
)
const (
// SoftwareMutationProb is the probability of mutating software after initial load
SoftwareMutationProb = 0.2
// MaxSoftwareAdd is the maximum number of software items to add during mutation
MaxSoftwareAdd = 20
// MaxSoftwareRemove is the maximum number of software items to remove during mutation
MaxSoftwareRemove = 20
// MaxSoftwarePerPlatform is the maximum number of software items to load per platform
MaxSoftwarePerPlatform = 50000
)
// String interning pools to reduce memory usage by reusing common strings
var (
sourcePool = map[string]string{
"apps": "apps",
"homebrew_packages": "homebrew_packages",
"firefox_addons": "firefox_addons",
"chrome_extensions": "chrome_extensions",
"python_packages": "python_packages",
"vscode_extensions": "vscode_extensions",
"safari_extensions": "safari_extensions",
"programs": "programs",
"ie_extensions": "ie_extensions",
"deb_packages": "deb_packages",
"npm_packages": "npm_packages",
"rpm_packages": "rpm_packages",
"android_apps": "android_apps",
"ios_apps": "ios_apps",
"ipados_apps": "ipados_apps",
"jetbrains_plugins": "jetbrains_plugins",
}
vendorPool = make(map[string]string) // populated during load
)
// internString returns an interned version of s from the vendor pool, reducing memory usage
func internString(s string) string {
if s == "" {
return ""
}
if interned, ok := vendorPool[s]; ok {
return interned
}
vendorPool[s] = s
return s
}
// internSource returns an interned source string
func internSource(s string) string {
if interned, ok := sourcePool[s]; ok {
return interned
}
return s
}
// ptrString returns a pointer to s, or nil if s is empty
func ptrString(s string) *string {
if s == "" {
return nil
}
return &s
}
// RandomSoftwareCount returns a random count between min and max for the given platform
func RandomSoftwareCount(platform string) int {
config, ok := platformCounts[platform]
if !ok {
return 0
}
return config.min + rand.IntN(config.max-config.min+1) // nolint:gosec,G404 // load testing, not security-sensitive
}
// DarwinSoftware represents macOS/iOS software
type DarwinSoftware struct {
Name string
Version string
Source string // apps, homebrew_packages, firefox_addons, chrome_extensions, python_packages, vscode_extensions, safari_extensions (interned)
BundleIdentifier *string // optional - used by apps
Vendor *string // optional - used by apps, vscode_extensions (interned)
ExtensionID *string // optional - used by firefox_addons, chrome_extensions, vscode_extensions
ExtensionFor *string // optional - used by firefox_addons, chrome_extensions, vscode_extensions
}
// WindowsSoftware represents Windows software
type WindowsSoftware struct {
Name string
Version string
Source string // firefox_addons, chrome_extensions, programs, vscode_extensions, ie_extensions, python_packages, deb_packages (interned)
Vendor *string // optional - used by programs, vscode_extensions (interned)
UpgradeCode *string // optional - used by programs
ExtensionID *string // optional - used by firefox_addons, chrome_extensions, vscode_extensions
ExtensionFor *string // optional - used by firefox_addons, chrome_extensions, vscode_extensions
}
// UbuntuSoftware represents Ubuntu/Linux software
type UbuntuSoftware struct {
Name string
Version string
Source string // firefox_addons, chrome_extensions, python_packages, deb_packages, vscode_extensions, npm_packages, rpm_packages (interned)
Vendor *string // optional - used by rpm_packages, vscode_extensions (interned)
Arch *string // optional - used by rpm_packages
Release *string // optional - used by rpm_packages
ExtensionID *string // optional - used by firefox_addons, chrome_extensions, vscode_extensions
ExtensionFor *string // optional - used by firefox_addons, chrome_extensions, vscode_extensions
}
// DB holds the loaded software data for each platform
type DB struct {
Darwin []DarwinSoftware
Windows []WindowsSoftware
Ubuntu []UbuntuSoftware
}
// DarwinToMaps converts Darwin software at given indices to osquery result format
func (db *DB) DarwinToMaps(indices []uint32) []map[string]string {
results := make([]map[string]string, 0, len(indices))
for _, idx := range indices {
s := db.Darwin[idx]
m := map[string]string{
"name": s.Name,
"source": s.Source,
"version": s.Version,
}
if s.BundleIdentifier != nil {
m["bundle_identifier"] = *s.BundleIdentifier
}
if s.Vendor != nil {
m["vendor"] = *s.Vendor
}
if s.ExtensionID != nil {
m["extension_id"] = *s.ExtensionID
}
if s.ExtensionFor != nil {
m["browser"] = *s.ExtensionFor
}
results = append(results, m)
}
return results
}
// WindowsToMaps converts Windows software at given indices to osquery result format
func (db *DB) WindowsToMaps(indices []uint32) []map[string]string {
results := make([]map[string]string, 0, len(indices))
for _, idx := range indices {
s := db.Windows[idx]
m := map[string]string{
"name": s.Name,
"source": s.Source,
"version": s.Version,
}
if s.Vendor != nil {
m["vendor"] = *s.Vendor
}
if s.UpgradeCode != nil {
m["upgrade_code"] = *s.UpgradeCode
}
if s.ExtensionID != nil {
m["extension_id"] = *s.ExtensionID
}
if s.ExtensionFor != nil {
m["browser"] = *s.ExtensionFor
}
results = append(results, m)
}
return results
}
// UbuntuToMaps converts Ubuntu software at given indices to osquery result format
func (db *DB) UbuntuToMaps(indices []uint32) []map[string]string {
results := make([]map[string]string, 0, len(indices))
for _, idx := range indices {
s := db.Ubuntu[idx]
m := map[string]string{
"name": s.Name,
"source": s.Source,
"version": s.Version,
}
if s.Vendor != nil {
m["vendor"] = *s.Vendor
}
if s.Arch != nil {
m["arch"] = *s.Arch
}
if s.Release != nil {
m["release"] = *s.Release
}
if s.ExtensionID != nil {
m["extension_id"] = *s.ExtensionID
}
if s.ExtensionFor != nil {
m["browser"] = *s.ExtensionFor
}
results = append(results, m)
}
return results
}
// MaybeMutateSoftware randomly mutates software indices (adds/removes items) 20% of the time.
// This simulates software being installed/uninstalled on a host over time.
// maxPoolSize is the total number of available software items in the database.
func MaybeMutateSoftware(indices []uint32, maxPoolSize int) []uint32 {
// Only mutate 20% of the time
if rand.Float64() >= SoftwareMutationProb { // nolint:gosec,G404 // load testing, not security-sensitive
return indices
}
// Copy indices to avoid mutating the original slice
result := make([]uint32, len(indices))
copy(result, indices)
// Randomly remove 0-20 items
numToRemove := rand.IntN(MaxSoftwareRemove + 1) // nolint:gosec,G404 // load testing, not security-sensitive
if numToRemove > len(result) {
numToRemove = len(result)
}
if numToRemove > 0 {
// Remove random items
rand.Shuffle(len(result), func(i, j int) { // nolint:gosec,G404 // load testing, not security-sensitive
result[i], result[j] = result[j], result[i]
})
result = result[:len(result)-numToRemove]
}
// Randomly add 0-20 items
numToAdd := rand.IntN(MaxSoftwareAdd + 1) // nolint:gosec,G404 // load testing, not security-sensitive
if numToAdd > 0 {
// Create a map of existing indices for quick lookup
existing := make(map[uint32]bool, len(result))
for _, idx := range result {
existing[idx] = true
}
// Add new random indices that don't already exist
added := 0
attempts := 0
maxAttempts := numToAdd * 10 // Avoid infinite loop
for added < numToAdd && attempts < maxAttempts {
newIdx := uint32(rand.IntN(maxPoolSize)) // nolint:gosec,G404 // load testing, not security-sensitive
if !existing[newIdx] {
result = append(result, newIdx)
existing[newIdx] = true
added++
}
attempts++
}
}
return result
}
// Platform-specific counts based on production averages (±20%):
// - Ubuntu: 2,460 ± 20% = 1,968 to 2,952
// - Darwin: 453 ± 20% = 362 to 544
// - Windows: 251 ± 20% = 201 to 301
var platformCounts = map[string]struct {
sources []string
min int
max int
}{
"darwin": {
sources: []string{"apps", "homebrew_packages", "firefox_addons", "chrome_extensions", "python_packages", "vscode_extensions", "safari_extensions"},
min: 362,
max: 544,
},
"windows": {
sources: []string{"firefox_addons", "chrome_extensions", "programs", "vscode_extensions", "ie_extensions", "python_packages", "deb_packages"},
min: 201,
max: 301,
},
"ubuntu": {
sources: []string{"firefox_addons", "chrome_extensions", "python_packages", "deb_packages", "vscode_extensions", "npm_packages", "rpm_packages"},
min: 1968,
max: 2952,
},
}
// LoadFromDatabase loads software from the SQLite database with platform-specific counts.
// If the database doesn't exist, it attempts to auto-generate it from software.sql.
func LoadFromDatabase(dbPath string) (*DB, error) {
// Check if database file exists
if _, err := os.Stat(dbPath); os.IsNotExist(err) {
// Database doesn't exist, try to generate it from software.sql
log.Printf("Database not found at %s, attempting to generate from software.sql...", dbPath)
// Look for software.sql in the same directory
sqlPath := strings.TrimSuffix(dbPath, ".db") + ".sql"
if _, err := os.Stat(sqlPath); os.IsNotExist(err) {
return nil, fmt.Errorf("software database not found: %s\nAlso could not find SQL file: %s\n\nPlease ensure software.sql exists, or create the database manually:\n cd cmd/osquery-perf/software-library\n sqlite3 software.db < software.sql", dbPath, sqlPath)
}
if err := generateDatabaseFromSQL(dbPath, sqlPath); err != nil {
return nil, err
}
}
// Open database
db, err := sql.Open("sqlite3", dbPath)
if err != nil {
return nil, fmt.Errorf("opening software database: %w", err)
}
defer db.Close()
// Verify table exists
var count int
err = db.QueryRow("SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='software'").Scan(&count)
if err != nil || count == 0 {
return nil, errors.New("database exists but 'software' table not found\n\nPlease initialize the database:\n cd cmd/osquery-perf/software-library\n sqlite3 software.db < software.sql")
}
// Load ALL software for each platform (agents will select random subsets)
softwareDB := &DB{}
// Load Darwin software
darwinConfig := platformCounts["darwin"]
darwinSoftware, err := loadDarwinSoftware(db, darwinConfig.sources)
if err != nil {
return nil, err
}
softwareDB.Darwin = darwinSoftware
log.Printf("Loaded %d darwin software items from database", len(darwinSoftware))
// Load Windows software
windowsConfig := platformCounts["windows"]
windowsSoftware, err := loadWindowsSoftware(db, windowsConfig.sources)
if err != nil {
return nil, err
}
softwareDB.Windows = windowsSoftware
log.Printf("Loaded %d windows software items from database", len(windowsSoftware))
// Load Ubuntu software
ubuntuConfig := platformCounts["ubuntu"]
ubuntuSoftware, err := loadUbuntuSoftware(db, ubuntuConfig.sources)
if err != nil {
return nil, err
}
softwareDB.Ubuntu = ubuntuSoftware
log.Printf("Loaded %d ubuntu software items from database", len(ubuntuSoftware))
return softwareDB, nil
}
// generateDatabaseFromSQL creates a SQLite database from a SQL file
func generateDatabaseFromSQL(dbPath, sqlPath string) error {
// Read SQL file
sqlContent, err := os.ReadFile(sqlPath)
if err != nil {
return fmt.Errorf("reading SQL file %s: %w", sqlPath, err)
}
log.Printf("Found %s (%d bytes), creating database...", sqlPath, len(sqlContent))
// Create database and execute SQL
db, err := sql.Open("sqlite3", dbPath)
if err != nil {
return fmt.Errorf("creating database: %w", err)
}
defer db.Close()
// Execute the SQL file
if _, err := db.Exec(string(sqlContent)); err != nil {
os.Remove(dbPath) // Clean up partial database
return fmt.Errorf("executing SQL file: %w", err)
}
log.Printf("✅ Successfully created database from %s", sqlPath)
return nil
}
// loadDarwinSoftware loads all macOS/iOS software from the database for the given sources
func loadDarwinSoftware(db *sql.DB, sources []string) ([]DarwinSoftware, error) {
sourceList := "'" + strings.Join(sources, "', '") + "'"
// nolint:gosec // sources are hardcoded, not user input
query := fmt.Sprintf(`
SELECT name, version, source, bundle_identifier, vendor, extension_id, extension_for
FROM software
WHERE source IN (%s)
ORDER BY RANDOM()
LIMIT %d
`, sourceList, MaxSoftwarePerPlatform)
rows, err := db.Query(query)
if err != nil {
return nil, fmt.Errorf("querying darwin software: %w", err)
}
defer rows.Close()
software := make([]DarwinSoftware, 0, MaxSoftwarePerPlatform)
for rows.Next() {
var sw DarwinSoftware
var source string
var bundleID, vendor, extensionID, extensionFor sql.NullString
err := rows.Scan(&sw.Name, &sw.Version, &source, &bundleID, &vendor, &extensionID, &extensionFor)
if err != nil {
return nil, fmt.Errorf("scanning darwin software row: %w", err)
}
// Use interned source string
sw.Source = internSource(source)
// Use pointers for optional fields
if bundleID.Valid {
sw.BundleIdentifier = ptrString(bundleID.String)
}
if vendor.Valid {
sw.Vendor = ptrString(internString(vendor.String))
}
if extensionID.Valid {
sw.ExtensionID = ptrString(extensionID.String)
}
if extensionFor.Valid {
sw.ExtensionFor = ptrString(extensionFor.String)
}
software = append(software, sw)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("iterating darwin software rows: %w", err)
}
return software, nil
}
// loadWindowsSoftware loads all Windows software from the database for the given sources
func loadWindowsSoftware(db *sql.DB, sources []string) ([]WindowsSoftware, error) {
sourceList := "'" + strings.Join(sources, "', '") + "'"
// nolint:gosec // sources are hardcoded, not user input
query := fmt.Sprintf(`
SELECT name, version, source, vendor, upgrade_code, extension_id, extension_for
FROM software
WHERE source IN (%s)
ORDER BY RANDOM()
LIMIT %d
`, sourceList, MaxSoftwarePerPlatform)
rows, err := db.Query(query)
if err != nil {
return nil, fmt.Errorf("querying windows software: %w", err)
}
defer rows.Close()
software := make([]WindowsSoftware, 0, MaxSoftwarePerPlatform)
for rows.Next() {
var sw WindowsSoftware
var source string
var vendor, upgradeCode, extensionID, extensionFor sql.NullString
err := rows.Scan(&sw.Name, &sw.Version, &source, &vendor, &upgradeCode, &extensionID, &extensionFor)
if err != nil {
return nil, fmt.Errorf("scanning windows software row: %w", err)
}
// Use interned source string
sw.Source = internSource(source)
// Use pointers for optional fields
if vendor.Valid {
sw.Vendor = ptrString(internString(vendor.String))
}
if upgradeCode.Valid {
sw.UpgradeCode = ptrString(upgradeCode.String)
}
if extensionID.Valid {
sw.ExtensionID = ptrString(extensionID.String)
}
if extensionFor.Valid {
sw.ExtensionFor = ptrString(extensionFor.String)
}
software = append(software, sw)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("iterating windows software rows: %w", err)
}
return software, nil
}
// loadUbuntuSoftware loads all Ubuntu/Linux software from the database for the given sources
func loadUbuntuSoftware(db *sql.DB, sources []string) ([]UbuntuSoftware, error) {
sourceList := "'" + strings.Join(sources, "', '") + "'"
// nolint:gosec // sources are hardcoded, not user input
query := fmt.Sprintf(`
SELECT name, version, source, vendor, arch, release, extension_id, extension_for
FROM software
WHERE source IN (%s)
ORDER BY RANDOM()
LIMIT %d
`, sourceList, MaxSoftwarePerPlatform)
rows, err := db.Query(query)
if err != nil {
return nil, fmt.Errorf("querying ubuntu software: %w", err)
}
defer rows.Close()
software := make([]UbuntuSoftware, 0, MaxSoftwarePerPlatform)
for rows.Next() {
var sw UbuntuSoftware
var source string
var vendor, arch, release, extensionID, extensionFor sql.NullString
err := rows.Scan(&sw.Name, &sw.Version, &source, &vendor, &arch, &release, &extensionID, &extensionFor)
if err != nil {
return nil, fmt.Errorf("scanning ubuntu software row: %w", err)
}
// Use interned source string
sw.Source = internSource(source)
// Use pointers for optional fields
if vendor.Valid {
sw.Vendor = ptrString(internString(vendor.String))
}
if arch.Valid {
sw.Arch = ptrString(arch.String)
}
if release.Valid {
sw.Release = ptrString(release.String)
}
if extensionID.Valid {
sw.ExtensionID = ptrString(extensionID.String)
}
if extensionFor.Valid {
sw.ExtensionFor = ptrString(extensionFor.String)
}
software = append(software, sw)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("iterating ubuntu software rows: %w", err)
}
return software, nil
}