diff --git a/changes/41644-improve-cpe-matching b/changes/41644-improve-cpe-matching new file mode 100644 index 0000000000..e5a9996139 --- /dev/null +++ b/changes/41644-improve-cpe-matching @@ -0,0 +1 @@ +* Added software source to ecosystem matching to help prevent non-deterministic CPE selection when multiple vendors exist for the same product. \ No newline at end of file diff --git a/server/vulnerabilities/nvd/cpe.go b/server/vulnerabilities/nvd/cpe.go index 6c2a8f33dd..135de66b62 100644 --- a/server/vulnerabilities/nvd/cpe.go +++ b/server/vulnerabilities/nvd/cpe.go @@ -120,8 +120,10 @@ type cpeSearchQuery struct { args []any } -const cpeSelectColumns = `SELECT c.rowid, c.product, c.vendor, c.deprecated FROM cpe_2 c` -const cpeOrderBy = ` ORDER BY c.vendor, c.product` +const ( + cpeSelectColumns = `SELECT c.rowid, c.product, c.vendor, c.target_sw, c.deprecated FROM cpe_2 c` + cpeOrderBy = ` ORDER BY c.vendor, c.product` +) // cpeSearchQueries returns individual search queries in priority order for finding CPE matches. // Query 1 (vendor+product) and 2 (product-only) are cheap index lookups. Query 3 (full-text search) @@ -192,6 +194,72 @@ func cpeVendorMatchesSoftware(item *IndexedCPEItem, software *fleet.Software) bo return matched } +// cpeTargetSWMatchesSoftware returns a score (0-3) indicating how well the CPE's vendor +// and target_sw fields match the expected ecosystem for the software's source. +func cpeTargetSWMatchesSoftware(item *IndexedCPEItem, software *fleet.Software) int { + expectedTargetSW := targetSW(software) + + if expectedTargetSW != "*" { + // Best match: CPE's target_sw matches what we expect for this software source + // Example: + // software.source="npm_packages" (expectedTargetSW="node.js") + // item.TargetSW="node.js" + if item.TargetSW != "" && strings.EqualFold(item.TargetSW, expectedTargetSW) { + return 3 + } + + // Good match: CPE vendor contains the ecosystem name + // Example: + // software.source="python_packages" (expectedTargetSW="python") + // item.Vendor="python" + expectedLower := strings.ToLower(expectedTargetSW) + vendorLower := strings.ToLower(item.Vendor) + + // "node.js" -> "node" + ecosystemName := expectedLower + if strings.Contains(ecosystemName, ".") { + ecosystemName = strings.Split(ecosystemName, ".")[0] + } + + if strings.Contains(vendorLower, ecosystemName) { + return 2 + } + } + + if expectedTargetSW == "*" { + // Good match: CPE vendor contains the ecosystem name + vendorLower := strings.ToLower(item.Vendor) + switch software.Source { + case "deb_packages": + // Example: + // software.source="deb_packages" (expectedTargetSW="*") + // item.Vendor="debian" + if strings.Contains(vendorLower, "debian") { + return 2 + } + case "rpm_packages": + // Example: + // software.source="rpm_packages" (expectedTargetSW="*") + // item.Vendor="redhat" + if strings.Contains(vendorLower, "redhat") || strings.Contains(vendorLower, "fedora") { + return 2 + } + } + } + + // Partial match: CPE vendor matches software name with common _project suffix + // Example: + // software.name="duplicity", source="python_packages" + // item.Vendor="duplicity_project", item.Product="duplicity" + productLower := strings.ToLower(item.Product) + vendorLower := strings.ToLower(item.Vendor) + if vendorLower == productLower+"_project" { + return 1 + } + + return 0 +} + // cpeItemMatchesSoftware checks whether a CPE result's vendor/product terms all appear in the // software's name, vendor, and bundle identifier. func cpeItemMatchesSoftware(item *IndexedCPEItem, software *fleet.Software) bool { @@ -644,6 +712,8 @@ func CPEFromSoftware(ctx context.Context, logger *slog.Logger, db *sqlx.DB, soft // This avoids nondeterministic results when multiple CPE entries match // (e.g. "ge:line" vs "linecorp:line" for the "Line" app). var bestMatch *IndexedCPEItem + var bestTargetSWScore int + var bestVendorMatch bool var deprecatedMatches []IndexedCPEItem for i := range results { if !cpeItemMatchesSoftware(&results[i], software) { @@ -653,8 +723,19 @@ func CPEFromSoftware(ctx context.Context, logger *slog.Logger, db *sqlx.DB, soft deprecatedMatches = append(deprecatedMatches, results[i]) continue } - if bestMatch == nil || (!cpeVendorMatchesSoftware(bestMatch, software) && cpeVendorMatchesSoftware(&results[i], software)) { + + targetSWScore := cpeTargetSWMatchesSoftware(&results[i], software) + vendorMatch := cpeVendorMatchesSoftware(&results[i], software) + + // first valid match, OR + // better target_sw score (ecosystem match), OR + // Same target_sw score but better vendor match + if bestMatch == nil || + targetSWScore > bestTargetSWScore || + (targetSWScore == bestTargetSWScore && !bestVendorMatch && vendorMatch) { bestMatch = &results[i] + bestTargetSWScore = targetSWScore + bestVendorMatch = vendorMatch } } if bestMatch != nil { diff --git a/server/vulnerabilities/nvd/cpe_test.go b/server/vulnerabilities/nvd/cpe_test.go index 88194120a8..b53edcef18 100644 --- a/server/vulnerabilities/nvd/cpe_test.go +++ b/server/vulnerabilities/nvd/cpe_test.go @@ -100,6 +100,42 @@ func TestCPEFromSoftware(t *testing.T) { ) require.NoError(t, err, "software name %q should not cause FTS5 syntax error", name) } + + // Target_SW scoring: python_packages source should prefer python vendor over jenkins vendor + // when multiple CPE entries exist for the same product name. + cpe, err = CPEFromSoftware(t.Context(), slog.New(slog.DiscardHandler), db, &fleet.Software{ + Name: "requests", Version: "2.31.0", Source: "python_packages", + }, nil, reCache) + require.NoError(t, err) + require.Equal(t, "cpe:2.3:a:python:requests:2.31.0:*:*:*:*:python:*:*", cpe, + "python_packages should prefer python:requests (vendor contains 'python')") + + // Target_SW scoring: npm_packages source should prefer openjsf vendor over checkpoint vendor + // when the CPE has target_sw=node.js matching the expected target_sw. + cpe, err = CPEFromSoftware(t.Context(), slog.New(slog.DiscardHandler), db, &fleet.Software{ + Name: "express", Version: "4.18.0", Source: "npm_packages", + }, nil, reCache) + require.NoError(t, err) + require.Equal(t, "cpe:2.3:a:openjsf:express:4.18.0:*:*:*:*:node.js:*:*", cpe, + "npm_packages should prefer openjsf:express with target_sw=node.js") + + // Target_SW scoring with _project fallback pattern. + // For duplicity from python_packages, neither vendor relates to Python ecosystem, but + // duplicity_project:duplicity follows the NVD "_project" pattern for upstream CPEs. + cpe, err = CPEFromSoftware(t.Context(), slog.New(slog.DiscardHandler), db, &fleet.Software{ + Name: "duplicity", Version: "0.8.0", Source: "python_packages", + }, nil, reCache) + require.NoError(t, err) + require.Equal(t, "cpe:2.3:a:duplicity_project:duplicity:0.8.0:*:*:*:*:python:*:*", cpe, + "should prefer duplicity_project (upstream) over debian (distro-specific) using _project pattern") + + // Target_SW scoring: deb_packages source should prefer debian vendor for duplicity + cpe, err = CPEFromSoftware(t.Context(), slog.New(slog.DiscardHandler), db, &fleet.Software{ + Name: "duplicity", Version: "0.8.0", Source: "deb_packages", + }, nil, reCache) + require.NoError(t, err) + require.Equal(t, "cpe:2.3:a:debian:duplicity:0.8.0:*:*:*:*:*:*:*", cpe, + "deb_packages duplicity should prefer debian:duplicity (vendor contains 'debian')") } func TestCPETranslations(t *testing.T) { @@ -1112,7 +1148,16 @@ func TestCPEFromSoftwareIntegration(t *testing.T) { Version: "0.8.21", Vendor: "", BundleIdentifier: "", - }, cpe: "cpe:2.3:a:debian:duplicity:0.8.21:*:*:*:*:python:*:*", + }, cpe: "cpe:2.3:a:duplicity_project:duplicity:0.8.21:*:*:*:*:python:*:*", + }, + { + software: fleet.Software{ + Name: "duplicity", + Source: "deb_packages", + Version: "0.8.21", + Vendor: "", + BundleIdentifier: "", + }, cpe: "cpe:2.3:a:debian:duplicity:0.8.21:*:*:*:*:*:*:*", }, { software: fleet.Software{ @@ -1346,7 +1391,7 @@ func TestCPEFromSoftwareIntegration(t *testing.T) { Version: "2.25.1", Vendor: "", BundleIdentifier: "", - }, cpe: "cpe:2.3:a:jenkins:requests:2.25.1:*:*:*:*:python:*:*", + }, cpe: "cpe:2.3:a:python:requests:2.25.1:*:*:*:*:python:*:*", }, { software: fleet.Software{ diff --git a/server/vulnerabilities/nvd/indexed_cpe_item.go b/server/vulnerabilities/nvd/indexed_cpe_item.go index 6c9bfae475..7275f517b4 100644 --- a/server/vulnerabilities/nvd/indexed_cpe_item.go +++ b/server/vulnerabilities/nvd/indexed_cpe_item.go @@ -14,6 +14,7 @@ type IndexedCPEItem struct { Product string `json:"product" db:"product"` Vendor string `json:"vendor" db:"vendor"` SWEdition string `json:"sw_edition" db:"sw_edition"` + TargetSW string `json:"target_sw" db:"target_sw"` Deprecated bool `json:"deprecated" db:"deprecated"` Weight int `db:"weight"` } diff --git a/server/vulnerabilities/nvd/testing_utils.go b/server/vulnerabilities/nvd/testing_utils.go index 76cc1262d0..929401f108 100644 --- a/server/vulnerabilities/nvd/testing_utils.go +++ b/server/vulnerabilities/nvd/testing_utils.go @@ -68,5 +68,29 @@ const XmlCPETestDict = ` Good Corp Correct Result 1.0 + + Python Requests 2.31.0 + + + + Jenkins Requests 2.31.0 + + + + Duplicity Project Duplicity 0.8.0 + + + + Debian Duplicity 0.8.0 + + + + OpenJS Foundation Express 4.18.0 + + + + Check Point Express 4.18.0 + + `