Fixed nondeterministic CPE matching when multiple CPE candidates share the same product name (#41649)

<!-- Add the related story/sub-task/bug number, like Resolves #123, or
remove if NA -->
**Related issue:** Resolves #39899

This fix fixes the determinism issue by ordering the results, however,
it does not necessarily fix the correctness issue. Another bug opened
for that: https://github.com/fleetdm/fleet/issues/41644

That's why you see changes in `cpe_test.go` that may seem incorrect in
some cases. In reality the previous behavior was purely by coincidence
(based on insert order).

# Checklist for submitter

- [x] Changes file added for user-visible changes in `changes/`,
`orbit/changes/` or `ee/fleetd-chrome/changes`.

## Testing

- [x] Added/updated automated tests
- [x] QA'd all new/changed functionality manually

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

* **Bug Fixes**
* Fixed nondeterministic CPE matching when multiple candidates share the
same product name. CPE selection is now deterministic and prioritizes
matches based on vendor alignment with the software being analyzed.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Victor Lyuboslavsky 2026-03-17 07:22:23 -05:00 committed by GitHub
parent c0fc3787fc
commit 3b43629a81
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 141 additions and 15 deletions

View file

@ -0,0 +1 @@
* Fixed nondeterministic CPE matching when multiple CPE candidates share the same product name.

View file

@ -121,6 +121,7 @@ type cpeSearchQuery struct {
}
const cpeSelectColumns = `SELECT c.rowid, c.product, c.vendor, c.deprecated FROM cpe_2 c`
const cpeOrderBy = ` ORDER BY c.vendor, c.product`
// cpeSearchQueries returns individual search queries in priority order for finding CPE matches.
// Query 1 (vendor+product) and 2 (product-only) are cheap index lookups. Query 3 (full-text search)
@ -138,12 +139,12 @@ func cpeSearchQueries(software *fleet.Software) []cpeSearchQuery {
productPlaceholders := strings.TrimSuffix(strings.Repeat("?,", len(products)), ",")
if len(vendors) > 0 {
vendorPlaceholders := strings.TrimSuffix(strings.Repeat("?,", len(vendors)), ",")
stm = cpeSelectColumns + " WHERE vendor IN (" + vendorPlaceholders + ") AND product IN (" + productPlaceholders + ")"
stm = cpeSelectColumns + " WHERE vendor IN (" + vendorPlaceholders + ") AND product IN (" + productPlaceholders + ")" + cpeOrderBy
for _, v := range vendors {
args = append(args, v)
}
} else {
stm = cpeSelectColumns + " WHERE product IN (" + productPlaceholders + ")"
stm = cpeSelectColumns + " WHERE product IN (" + productPlaceholders + ")" + cpeOrderBy
}
for _, p := range products {
args = append(args, p)
@ -153,7 +154,7 @@ func cpeSearchQueries(software *fleet.Software) []cpeSearchQuery {
// 2 - Try to match product by sanitized name
queries = append(queries, cpeSearchQuery{
stm: cpeSelectColumns + " WHERE product = ?",
stm: cpeSelectColumns + " WHERE product = ?" + cpeOrderBy,
args: []any{sanitizeSoftwareName(software)},
})
@ -161,7 +162,7 @@ func cpeSearchQueries(software *fleet.Software) []cpeSearchQuery {
sanitizedName := sanitizeMatch(software.Name)
if strings.TrimSpace(sanitizedName) != "" {
queries = append(queries, cpeSearchQuery{
stm: cpeSelectColumns + " JOIN cpe_search cs ON cs.rowid = c.rowid WHERE cs.title MATCH ?",
stm: cpeSelectColumns + " JOIN cpe_search cs ON cs.rowid = c.rowid WHERE cs.title MATCH ?" + cpeOrderBy,
args: []any{sanitizedName},
})
}
@ -170,7 +171,7 @@ func cpeSearchQueries(software *fleet.Software) []cpeSearchQuery {
bundleParts := strings.Split(software.BundleIdentifier, ".")
if len(bundleParts) == 3 {
queries = append(queries, cpeSearchQuery{
stm: cpeSelectColumns + " WHERE vendor = ? AND product = ?",
stm: cpeSelectColumns + " WHERE vendor = ? AND product = ?" + cpeOrderBy,
args: []any{strings.ToLower(bundleParts[1]), strings.ToLower(bundleParts[2])},
})
}
@ -178,6 +179,19 @@ func cpeSearchQueries(software *fleet.Software) []cpeSearchQuery {
return queries
}
// cpeVendorMatchesSoftware returns true when the CPE item's vendor appears in
// the software's vendor field. Used as a tiebreaker when multiple CPE candidates
// pass cpeItemMatchesSoftware.
func cpeVendorMatchesSoftware(item *IndexedCPEItem, software *fleet.Software) bool {
sVendor := strings.ToLower(software.Vendor)
if sVendor == "" {
return false
}
pattern := `\b` + regexp.QuoteMeta(item.Vendor) + `\b`
matched, _ := regexp.MatchString(pattern, sVendor)
return matched
}
// cpeItemMatchesSoftware checks whether a CPE result's vendor/product terms all appear in the
// software's name, vendor, and bundle identifier.
func cpeItemMatchesSoftware(item *IndexedCPEItem, software *fleet.Software) bool {
@ -626,22 +640,35 @@ func CPEFromSoftware(ctx context.Context, logger *slog.Logger, db *sqlx.DB, soft
return "", fmt.Errorf("getting cpes for: %s: %w", software.Name, err)
}
// Collect all matching candidates for this query, then pick the best one.
// This avoids nondeterministic results when multiple CPE entries match
// (e.g. "ge:line" vs "linecorp:line" for the "Line" app).
var bestMatch *IndexedCPEItem
var deprecatedMatches []IndexedCPEItem
for i := range results {
if !cpeItemMatchesSoftware(&results[i], software) {
continue
}
if !results[i].Deprecated {
return results[i].FmtStr(software), nil
if results[i].Deprecated {
deprecatedMatches = append(deprecatedMatches, results[i])
continue
}
// Match is deprecated; try to resolve via deprecation chain
cpe, err := resolveDeprecatedCPE(db, results, software)
if bestMatch == nil || (!cpeVendorMatchesSoftware(bestMatch, software) && cpeVendorMatchesSoftware(&results[i], software)) {
bestMatch = &results[i]
}
}
if bestMatch != nil {
return bestMatch.FmtStr(software), nil
}
// All matches are deprecated; try to resolve via deprecation chain
if len(deprecatedMatches) > 0 {
cpe, err := resolveDeprecatedCPE(db, deprecatedMatches, software)
if err != nil {
return "", err
}
if cpe != "" {
return cpe, nil
}
continue // deprecation unresolved for this result, try next result
}
}
}

View file

@ -47,6 +47,31 @@ func TestCPEFromSoftware(t *testing.T) {
require.NoError(t, err)
require.Equal(t, "cpe:2.3:a:vendor2:product4:0.3:*:*:*:*:macos:*:*", cpe)
// When multiple CPE candidates share the same product name and no vendor info
// is available, ORDER BY ensures deterministic results across runs.
for range 5 {
cpe, err = CPEFromSoftware(t.Context(), slog.New(slog.DiscardHandler), db, &fleet.Software{
Name: "Line", Version: "3.5.1", Source: "chrome_extensions",
}, nil, reCache)
require.NoError(t, err)
require.Equal(t, "cpe:2.3:a:ge:line:3.5.1:*:*:*:*:chrome:*:*", cpe, "should be deterministic across runs")
}
// When vendor info is present and matches a CPE vendor, prefer that match.
cpe, err = CPEFromSoftware(t.Context(), slog.New(slog.DiscardHandler), db, &fleet.Software{
Name: "Line", Version: "4.3.1", Vendor: "linecorp inc", Source: "apps",
}, nil, reCache)
require.NoError(t, err)
require.Equal(t, "cpe:2.3:a:linecorp:line:4.3.1:*:*:*:*:macos:*:*", cpe)
// Deprecated CPE: when the only matching CPE is deprecated, follows the deprecation
// chain to find the non-deprecated replacement.
cpe, err = CPEFromSoftware(t.Context(), slog.New(slog.DiscardHandler), db, &fleet.Software{
Name: "Widget", Version: "1.0", Vendor: "goodcorp inc", Source: "programs",
}, nil, reCache)
require.NoError(t, err)
require.Equal(t, "cpe:2.3:a:goodcorp:correct_result:1.0:*:*:*:*:windows:*:*", cpe)
// Does not error on Unicode Names
_, err = CPEFromSoftware(t.Context(), slog.New(slog.DiscardHandler), db, &fleet.Software{Name: "Девушка Фонарём", Version: "1.2.3", BundleIdentifier: "vendor", Source: "apps"}, nil, reCache)
require.NoError(t, err)
@ -910,7 +935,7 @@ func TestCPEFromSoftwareIntegration(t *testing.T) {
Version: "2.37.1",
Vendor: "The Git Development Community",
BundleIdentifier: "",
}, cpe: "cpe:2.3:a:git-scm:git:2.37.1:*:*:*:*:windows:*:*",
}, cpe: "cpe:2.3:a:git:git:2.37.1:*:*:*:*:windows:*:*",
},
{
software: fleet.Software{
@ -1258,7 +1283,7 @@ func TestCPEFromSoftwareIntegration(t *testing.T) {
Version: "3.12.4",
Vendor: "",
BundleIdentifier: "",
}, cpe: "cpe:2.3:a:google:protobuf:3.12.4:*:*:*:*:python:*:*",
}, cpe: "cpe:2.3:a:golang:protobuf:3.12.4:*:*:*:*:python:*:*",
},
{
software: fleet.Software{
@ -1285,7 +1310,7 @@ func TestCPEFromSoftwareIntegration(t *testing.T) {
Version: "2.3.0+ubuntu2.1",
Vendor: "",
BundleIdentifier: "",
}, cpe: "cpe:2.3:a:ubuntu:python-apt:2.3.0.ubuntu2.1:*:*:*:*:python:*:*",
}, cpe: "cpe:2.3:a:debian:python-apt:2.3.0.ubuntu2.1:*:*:*:*:python:*:*",
},
{
software: fleet.Software{
@ -1321,7 +1346,7 @@ func TestCPEFromSoftwareIntegration(t *testing.T) {
Version: "2.25.1",
Vendor: "",
BundleIdentifier: "",
}, cpe: "cpe:2.3:a:python:requests:2.25.1:*:*:*:*:python:*:*",
}, cpe: "cpe:2.3:a:jenkins:requests:2.25.1:*:*:*:*:python:*:*",
},
{
software: fleet.Software{
@ -1800,7 +1825,7 @@ func TestCPEFromSoftwareIntegration(t *testing.T) {
Version: "3.9.18_2",
Vendor: "",
},
cpe: `cpe:2.3:a:python:python:3.9.18_2:-:*:*:*:macos:*:*`,
cpe: `cpe:2.3:a:microsoft:python:3.9.18_2:*:*:*:*:macos:*:*`,
},
{
software: fleet.Software{
@ -2031,6 +2056,47 @@ func TestCPEFromSoftwareIntegration(t *testing.T) {
}
}
func TestCPEVendorMatchesSoftware(t *testing.T) {
tests := []struct {
name string
cpeVendor string
softwareVendor string
want bool
}{
{
name: "CPE vendor appears in software vendor",
cpeVendor: "linecorp",
softwareVendor: "linecorp inc",
want: true,
},
{
name: "CPE vendor does not appear in software vendor",
cpeVendor: "ge",
softwareVendor: "linecorp inc",
want: false,
},
{
name: "software vendor is empty",
cpeVendor: "linecorp",
softwareVendor: "",
want: false,
},
{
name: "CPE vendor appears in software vendor case-insensitive",
cpeVendor: "python",
softwareVendor: "Python Software Foundation",
want: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
item := &IndexedCPEItem{Vendor: tt.cpeVendor}
sw := &fleet.Software{Vendor: tt.softwareVendor}
assert.Equal(t, tt.want, cpeVendorMatchesSoftware(item, sw))
})
}
}
func TestContainsNonASCII(t *testing.T) {
testCases := []struct {
input string

View file

@ -36,5 +36,37 @@ const XmlCPETestDict = `
<title xml:lang="en-US">Vendor2 Product4 999 for MacOS</title>
<cpe-23:cpe23-item name="cpe:2.3:a:vendor2:product4:999:*:*:*:*:macos:*:*"/>
</cpe-item>
<cpe-item name="cpe:/a:ge:line:1.0">
<title xml:lang="en-US">GE Line 1.0</title>
<cpe-23:cpe23-item name="cpe:2.3:a:ge:line:1.0:*:*:*:*:*:*:*"/>
</cpe-item>
<cpe-item name="cpe:/a:linecorp:line:1.0">
<title xml:lang="en-US">LINE Corporation Line 1.0</title>
<cpe-23:cpe23-item name="cpe:2.3:a:linecorp:line:1.0:*:*:*:*:*:*:*"/>
</cpe-item>
<cpe-item name="cpe:/a:badvendor:widget:1.0" deprecated="true" deprecation_date="2021-06-10T15:28:05.490Z">
<title xml:lang="en-US">Bad Vendor Widget 1.0</title>
<cpe-23:cpe23-item name="cpe:2.3:a:badvendor:widget:1.0:*:*:*:*:*:*:*">
<cpe-23:deprecation date="2021-06-10T11:28:05.490-04:00">
<cpe-23:deprecated-by name="cpe:2.3:a:badvendor:wrong_result:1.0:*:*:*:*:*:*:*" type="NAME_CORRECTION"/>
</cpe-23:deprecation>
</cpe-23:cpe23-item>
</cpe-item>
<cpe-item name="cpe:/a:badvendor:wrong_result:1.0">
<title xml:lang="en-US">Bad Vendor Wrong Result 1.0</title>
<cpe-23:cpe23-item name="cpe:2.3:a:badvendor:wrong_result:1.0:*:*:*:*:*:*:*"/>
</cpe-item>
<cpe-item name="cpe:/a:goodcorp:widget:1.0" deprecated="true" deprecation_date="2021-06-10T15:28:05.490Z">
<title xml:lang="en-US">Good Corp Widget 1.0</title>
<cpe-23:cpe23-item name="cpe:2.3:a:goodcorp:widget:1.0:*:*:*:*:*:*:*">
<cpe-23:deprecation date="2021-06-10T11:28:05.490-04:00">
<cpe-23:deprecated-by name="cpe:2.3:a:goodcorp:correct_result:1.0:*:*:*:*:*:*:*" type="NAME_CORRECTION"/>
</cpe-23:deprecation>
</cpe-23:cpe23-item>
</cpe-item>
<cpe-item name="cpe:/a:goodcorp:correct_result:1.0">
<title xml:lang="en-US">Good Corp Correct Result 1.0</title>
<cpe-23:cpe23-item name="cpe:2.3:a:goodcorp:correct_result:1.0:*:*:*:*:*:*:*"/>
</cpe-item>
</cpe-list>
`