fleet/server/vulnerabilities/nvd/cpe.go
Konstantin Sykulev a599889152
Improved cpe deterministic matching (#42325)
**Related issue:** Resolves #41644

There are two cases that exist in the cpe database where this generic
logic could not be applied.

django from python_packages:
gofiber:django
djangoproject:django

npm from npm_packages:
microsoft:npm
npmjs:npm

These will require individual cve overrides that is outside the scope of
this task.

- [x] Changes file added for user-visible changes in `changes/`,
`orbit/changes/` or `ee/fleetd-chrome/changes`.
See [Changes
files](https://github.com/fleetdm/fleet/blob/main/docs/Contributing/guides/committing-changes.md#changes-files)
for more information.

- [x] Input data is properly validated, `SELECT *` is avoided, SQL
injection is prevented (using placeholders for values in statements), JS
inline code is prevented especially for url redirects, and untrusted
data interpolated into shell scripts/commands is validated against shell
metacharacters.

## Testing

- [x] Added/updated automated tests
- [x] QA'd all new/changed functionality manually

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

* **Bug Fixes**
* Enhanced CPE (Common Platform Enumeration) matching to reduce
non-deterministic vendor selection when multiple vendors exist for the
same software product. The algorithm now incorporates software ecosystem
information to ensure more accurate and consistent vulnerability
resolution across package types.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2026-03-24 17:48:02 -05:00

999 lines
32 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package nvd
import (
"context"
"database/sql"
"errors"
"fmt"
"log/slog"
"net/url"
"os"
"path/filepath"
"regexp"
"slices"
"strconv"
"strings"
"time"
"unicode"
"github.com/doug-martin/goqu/v9"
"github.com/fleetdm/fleet/v4/pkg/download"
"github.com/fleetdm/fleet/v4/pkg/fleethttp"
"github.com/fleetdm/fleet/v4/server/contexts/ctxerr"
"github.com/fleetdm/fleet/v4/server/fleet"
"github.com/fleetdm/fleet/v4/server/vulnerabilities/oval"
"github.com/google/go-github/v37/github"
"github.com/jmoiron/sqlx"
)
const (
owner = "fleetdm"
repo = "nvd"
cpeDBFilename = "cpe.sqlite"
)
var cpeDBRegex = regexp.MustCompile(`^cpe-.*\.sqlite\.gz$`)
// GetGithubNVDAsset looks at the last 10 releases and returns the first (release, asset) pair that
// matches pred
func GetGithubNVDAsset(pred func(rel *github.ReleaseAsset) bool) (*github.RepositoryRelease, *github.ReleaseAsset, error) {
ghClient := github.NewClient(fleethttp.NewGithubClient())
releases, _, err := ghClient.Repositories.ListReleases(
context.Background(),
owner,
repo,
&github.ListOptions{Page: 0, PerPage: 10},
)
if err != nil {
return nil, nil, err
}
for _, release := range releases {
// skip draft releases
if release.GetDraft() {
continue
}
for _, asset := range release.Assets {
if pred(asset) {
return release, asset, nil
}
}
}
return nil, nil, errors.New("no nvd release found")
}
// DownloadCPEDB downloads the CPE database to the given vulnPath. If cpeDBURL is empty, attempts to download it
// from the latest release of github.com/fleetdm/nvd. Skips downloading if CPE database is newer than the release.
func DownloadCPEDBFromGithub(vulnPath string, cpeDBURL string) error {
path := filepath.Join(vulnPath, cpeDBFilename)
if cpeDBURL == "" {
stat, err := os.Stat(path)
switch {
case errors.Is(err, os.ErrNotExist):
// okay
case err != nil:
return err
case stat.ModTime().Truncate(24 * time.Hour).Equal(time.Now().Truncate(24 * time.Hour)):
// Vulnerability assets are published once per day - if the asset in question has a
// mod date of 'today', then we can assume that is already up to day so there's nothing
// else to do.
return nil
}
rel, asset, err := GetGithubNVDAsset(func(asset *github.ReleaseAsset) bool {
return cpeDBRegex.MatchString(asset.GetName())
})
if err != nil {
return err
}
if asset == nil {
return errors.New("failed to find cpe database in nvd release")
}
if stat != nil && stat.ModTime().After(rel.CreatedAt.Time) {
// file is newer than release, do nothing
return nil
}
cpeDBURL = asset.GetBrowserDownloadURL()
}
u, err := url.Parse(cpeDBURL)
if err != nil {
return err
}
githubClient := fleethttp.NewGithubClient()
if err := download.DownloadAndExtract(githubClient, u, path); err != nil {
return fmt.Errorf("download and extract %s: %w", u.String(), err)
}
return nil
}
type cpeSearchQuery struct {
stm string
args []any
}
const (
cpeSelectColumns = `SELECT c.rowid, c.product, c.vendor, c.target_sw, c.deprecated FROM cpe_2 c`
cpeOrderBy = ` ORDER BY c.vendor, c.product`
)
// cpeSearchQueries returns individual search queries in priority order for finding CPE matches.
// Query 1 (vendor+product) and 2 (product-only) are cheap index lookups. Query 3 (full-text search)
// is expensive. By running them sequentially and returning early on a match, the expensive full-text
// search is skipped for most software.
func cpeSearchQueries(software *fleet.Software) []cpeSearchQuery {
var queries []cpeSearchQuery
// 1 - Try to match product and vendor terms (or product-only if no vendor info available)
vendors := vendorVariations(software)
products := productVariations(software)
if len(products) > 0 {
var args []any
var stm string
productPlaceholders := strings.TrimSuffix(strings.Repeat("?,", len(products)), ",")
if len(vendors) > 0 {
vendorPlaceholders := strings.TrimSuffix(strings.Repeat("?,", len(vendors)), ",")
stm = cpeSelectColumns + " WHERE vendor IN (" + vendorPlaceholders + ") AND product IN (" + productPlaceholders + ")" + cpeOrderBy
for _, v := range vendors {
args = append(args, v)
}
} else {
stm = cpeSelectColumns + " WHERE product IN (" + productPlaceholders + ")" + cpeOrderBy
}
for _, p := range products {
args = append(args, p)
}
queries = append(queries, cpeSearchQuery{stm: stm, args: args})
}
// 2 - Try to match product by sanitized name
queries = append(queries, cpeSearchQuery{
stm: cpeSelectColumns + " WHERE product = ?" + cpeOrderBy,
args: []any{sanitizeSoftwareName(software)},
})
// 3 - Try full-text match (only if sanitized name has content)
sanitizedName := sanitizeMatch(software.Name)
if strings.TrimSpace(sanitizedName) != "" {
queries = append(queries, cpeSearchQuery{
stm: cpeSelectColumns + " JOIN cpe_search cs ON cs.rowid = c.rowid WHERE cs.title MATCH ?" + cpeOrderBy,
args: []any{sanitizedName},
})
}
// 4 - Try vendor/product from bundle identifier, like tld.vendor.product
bundleParts := strings.Split(software.BundleIdentifier, ".")
if len(bundleParts) == 3 {
queries = append(queries, cpeSearchQuery{
stm: cpeSelectColumns + " WHERE vendor = ? AND product = ?" + cpeOrderBy,
args: []any{strings.ToLower(bundleParts[1]), strings.ToLower(bundleParts[2])},
})
}
return queries
}
// cpeVendorMatchesSoftware returns true when the CPE item's vendor appears in
// the software's vendor field. Used as a tiebreaker when multiple CPE candidates
// pass cpeItemMatchesSoftware.
func cpeVendorMatchesSoftware(item *IndexedCPEItem, software *fleet.Software) bool {
sVendor := strings.ToLower(software.Vendor)
if sVendor == "" {
return false
}
pattern := `\b` + regexp.QuoteMeta(item.Vendor) + `\b`
matched, _ := regexp.MatchString(pattern, sVendor)
return matched
}
// cpeTargetSWMatchesSoftware returns a score (0-3) indicating how well the CPE's vendor
// and target_sw fields match the expected ecosystem for the software's source.
func cpeTargetSWMatchesSoftware(item *IndexedCPEItem, software *fleet.Software) int {
expectedTargetSW := targetSW(software)
if expectedTargetSW != "*" {
// Best match: CPE's target_sw matches what we expect for this software source
// Example:
// software.source="npm_packages" (expectedTargetSW="node.js")
// item.TargetSW="node.js"
if item.TargetSW != "" && strings.EqualFold(item.TargetSW, expectedTargetSW) {
return 3
}
// Good match: CPE vendor contains the ecosystem name
// Example:
// software.source="python_packages" (expectedTargetSW="python")
// item.Vendor="python"
expectedLower := strings.ToLower(expectedTargetSW)
vendorLower := strings.ToLower(item.Vendor)
// "node.js" -> "node"
ecosystemName := expectedLower
if strings.Contains(ecosystemName, ".") {
ecosystemName = strings.Split(ecosystemName, ".")[0]
}
if strings.Contains(vendorLower, ecosystemName) {
return 2
}
}
if expectedTargetSW == "*" {
// Good match: CPE vendor contains the ecosystem name
vendorLower := strings.ToLower(item.Vendor)
switch software.Source {
case "deb_packages":
// Example:
// software.source="deb_packages" (expectedTargetSW="*")
// item.Vendor="debian"
if strings.Contains(vendorLower, "debian") {
return 2
}
case "rpm_packages":
// Example:
// software.source="rpm_packages" (expectedTargetSW="*")
// item.Vendor="redhat"
if strings.Contains(vendorLower, "redhat") || strings.Contains(vendorLower, "fedora") {
return 2
}
}
}
// Partial match: CPE vendor matches software name with common _project suffix
// Example:
// software.name="duplicity", source="python_packages"
// item.Vendor="duplicity_project", item.Product="duplicity"
productLower := strings.ToLower(item.Product)
vendorLower := strings.ToLower(item.Vendor)
if vendorLower == productLower+"_project" {
return 1
}
return 0
}
// cpeItemMatchesSoftware checks whether a CPE result's vendor/product terms all appear in the
// software's name, vendor, and bundle identifier.
func cpeItemMatchesSoftware(item *IndexedCPEItem, software *fleet.Software) bool {
sName := strings.ToLower(software.Name)
for sN := range strings.SplitSeq(item.Product, "_") {
if !strings.Contains(sName, sN) {
return false
}
}
sVendor := strings.ToLower(software.Vendor)
sBundle := strings.ToLower(software.BundleIdentifier)
for sV := range strings.SplitSeq(item.Vendor, "_") {
if sVendor != "" && !strings.Contains(sVendor, sV) {
return false
}
if sBundle != "" && !strings.Contains(sBundle, sV) {
return false
}
}
return true
}
// resolveDeprecatedCPE follows the deprecation chain for the given CPE items to find a non-deprecated replacement.
func resolveDeprecatedCPE(db *sqlx.DB, items []IndexedCPEItem, software *fleet.Software) (string, error) {
for _, item := range items {
deprecatedItem := item
for {
var deprecation IndexedCPEItem
err := db.Get(
&deprecation,
`SELECT rowid, product, vendor, deprecated FROM cpe_2
WHERE cpe23 IN (SELECT cpe23 FROM deprecated_by d WHERE d.cpe_id = ?)`,
deprecatedItem.ID,
)
if errors.Is(err, sql.ErrNoRows) {
break
}
if err != nil {
return "", fmt.Errorf("getting deprecation: %w", err)
}
if deprecation.Deprecated {
deprecatedItem = deprecation
continue
}
return deprecation.FmtStr(software), nil
}
}
return "", nil
}
// softwareTransformers provide logic for tweaking e.g. software versions to match what's in the NVD database. These
// changes are done here rather than in sanitizeSoftware to ensure that software versions visible in the UI are the
// raw version strings.
var (
macOSMSTeamsVersion = regexp.MustCompile(`(\d).00.(\d)(\d+)`)
citrixName = regexp.MustCompile(`Citrix Workspace [0-9]+`)
minioAltDate = regexp.MustCompile(`^\d{14}$`)
softwareTransformers = []struct {
matches func(*fleet.Software) bool
mutate func(context.Context, *fleet.Software, *slog.Logger)
}{
{
// JetBrains EAP version numbers aren't what are used in CPEs; this handles the translation for Mac versions.
// See #22723 for background. Bundle identifier for EAPs also ends with "-EAP" but checking version makes it
// a bit easier to add other platforms later. EAP version numbers are e.g. EAP GO-243.21565.42, and checking
// here for the dash ensures that string splitting in the mutator always works without a bounds check.
matches: func(s *fleet.Software) bool {
return s.BundleIdentifier != "" && strings.HasPrefix(s.BundleIdentifier, "com.jetbrains.") &&
strings.HasPrefix(s.Version, "EAP ") && strings.Contains(s.Version, "-")
},
mutate: func(ctx context.Context, s *fleet.Software, logger *slog.Logger) {
// 243 -> 2024.3
eapMajorVersion := strings.Split(strings.Split(s.Version, "-")[1], ".")[0]
yearBasedMajorVersion, err := strconv.Atoi("20" + eapMajorVersion[:2])
if err != nil {
logger.DebugContext(ctx, "failed to parse JetBrains EAP major version", "version", s.Version, "err", err)
return
}
yearBasedMinorVersion, err := strconv.Atoi(eapMajorVersion[2:])
if err != nil {
logger.DebugContext(ctx, "failed to parse JetBrains EAP minor version", "version", s.Version, "err", err)
return
}
// EAPs are treated as having all fixes from the previous year-based release, but no fixes from the
// year-based release they're an EAP of. The exception to this would be CVE-2024-37051, which was fixed
// in a second/third EAP depending on product, but at this point all vulnerable EAPs force exit on
// startup due to being expired, so that CVE can't be exploited.
yearBasedMinorVersion -= 1
if yearBasedMinorVersion <= 0 { // wrap e.g. 2024.1 to 2023.4 (not a real version, but has all 2023.3 fixes)
yearBasedMajorVersion -= 1
yearBasedMinorVersion = 4
}
// pass through minor and patch version for EAP to tell different EAP builds apart
eapMinorAndPatchVersion := strings.Join(strings.Split(strings.Split(s.Version, "-")[1], ".")[1:], ".")
s.Version = fmt.Sprintf("%d.%d.%s.%s", yearBasedMajorVersion, yearBasedMinorVersion, "99", eapMinorAndPatchVersion)
},
},
{
matches: func(s *fleet.Software) bool {
return s.Source == "programs" && strings.HasPrefix(s.Name, "Python 3.")
},
mutate: func(ctx context.Context, s *fleet.Software, logger *slog.Logger) {
versionComponents := strings.Split(s.Version, ".")
// Python 3 versions on Windows should always look like 3.14.102.0; if they don't we
// should bail out to avoid bad indexing panics.
if len(versionComponents) < 4 {
logger.DebugContext(ctx, "expected 4 version components", "gotCount", len(versionComponents))
return
}
if len(versionComponents[2]) < 3 {
logger.DebugContext(ctx, "got a patch version component with unexpected length", "gotPatchVersion", versionComponents[2])
return
}
patchVersion := versionComponents[2][0 : len(versionComponents[2])-3]
releaseLevel := versionComponents[2][len(versionComponents[2])-3 : len(versionComponents[2])-1]
releaseSerial := versionComponents[2][len(versionComponents[2])-1 : len(versionComponents[2])]
candidateSuffix := ""
switch releaseLevel { // see https://github.com/python/cpython/issues/100829#issuecomment-1374656643
case "10":
candidateSuffix = "a" + releaseSerial
case "11":
candidateSuffix = "b" + releaseSerial
case "12":
candidateSuffix = "rc" + releaseSerial
} // default
if patchVersion == "" { // dot-zero patch releases have a 3-digit patch + build number
patchVersion = "0"
}
versionComponents[2] = patchVersion + candidateSuffix
s.Version = strings.Join(versionComponents[0:3], ".")
},
},
{
matches: func(s *fleet.Software) bool {
return s.Name == "Cloudflare WARP" && s.Source == "programs"
},
mutate: func(ctx context.Context, s *fleet.Software, logger *slog.Logger) {
// Perform some sanity check on the version before mutating it.
parts := strings.Split(s.Version, ".")
if len(parts) <= 1 {
logger.DebugContext(ctx, "failed to parse software version", "name", s.Name, "version", s.Version)
return
}
_, err := strconv.Atoi(parts[0])
if err != nil {
logger.DebugContext(ctx, "failed to parse software version", "name", s.Name, "version", s.Version, "err", err)
return
}
// In case Cloudflare starts returning the full year.
if len(parts[0]) == 4 {
return
}
s.Version = "20" + s.Version // Cloudflare WARP was released on 2019.
},
},
{
matches: func(s *fleet.Software) bool {
return s.Source == "apps" && (s.Name == "Microsoft Teams.app" || s.Name == "Microsoft Teams classic.app")
},
mutate: func(ctx context.Context, s *fleet.Software, logger *slog.Logger) {
if matches := macOSMSTeamsVersion.FindStringSubmatch(s.Version); len(matches) > 0 {
s.Version = fmt.Sprintf("%s.%s.00.%s", matches[1], matches[2], matches[3])
}
},
},
{
matches: func(s *fleet.Software) bool {
return citrixName.Match([]byte(s.Name)) || s.Name == "Citrix Workspace.app"
},
mutate: func(ctx context.Context, s *fleet.Software, logger *slog.Logger) {
parts := strings.Split(s.Version, ".")
if len(parts) <= 1 {
logger.DebugContext(ctx, "failed to parse software version", "name", s.Name, "version", s.Version)
return
}
if len(parts[0]) > 2 {
// then the versioning is correct, so no need to change
return
}
part1, err := strconv.Atoi(parts[0])
if err != nil {
logger.DebugContext(ctx, "failed to parse software version", "name", s.Name, "version", s.Version, "err", err)
return
}
part2, err := strconv.Atoi(parts[1])
if err != nil {
logger.DebugContext(ctx, "failed to parse software version", "name", s.Name, "version", s.Version, "err", err)
return
}
newFirstPart := part1*100 + part2
newFirstStr := strconv.Itoa(newFirstPart)
newParts := []string{newFirstStr}
newParts = append(newParts, parts[2:]...)
s.Version = strings.Join(newParts, ".")
},
},
{
// Trim the "RELEASE." prefix from Minio versions.
matches: func(s *fleet.Software) bool {
return s.Name == "minio" && strings.Contains(s.Version, "RELEASE.")
},
mutate: func(ctx context.Context, s *fleet.Software, logger *slog.Logger) {
// trim the "RELEASE." prefix from the version
s.Version = strings.TrimPrefix(s.Version, "RELEASE.")
// trim any unexpected trailing characters
if idx := strings.Index(s.Version, "_"); idx != -1 {
s.Version = s.Version[:idx]
}
},
},
{
// Convert the timestamp to NVD's format for Minio versions.
matches: func(s *fleet.Software) bool {
return s.Name == "minio" && minioAltDate.MatchString(s.Version)
},
mutate: func(ctx context.Context, s *fleet.Software, logger *slog.Logger) {
timestamp, err := time.Parse("20060102150405", s.Version)
if err != nil {
logger.DebugContext(ctx, "failed to parse software version", "name", s.Name, "version", s.Version, "err", err)
return
}
s.Version = timestamp.Format("2006-01-02T15-04-05Z")
},
},
{
// Powershell preview versions 7.5* all have CVE-2025-21171
// Non-preview 7.5* do not
matches: func(s *fleet.Software) bool {
return strings.Contains(strings.ToLower(s.Name), "powershell")
},
mutate: func(ctx context.Context, s *fleet.Software, logger *slog.Logger) {
parts := strings.Split(s.Version, ".")
if len(parts) < 3 {
return
}
isSpecificVer := parts[0] == "7" && parts[1] == "5"
var newVersion string
switch {
case isSpecificVer && (strings.Contains(parts[2], "-") ||
strings.Contains(strings.ToLower(s.Name), "preview")):
newVersion = fmt.Sprintf("%s.%s", parts[0], parts[1])
case isSpecificVer:
newVersion = fmt.Sprintf("%s.%s.%s", parts[0], parts[1], parts[2])
default:
return
}
s.Name = "powershell"
s.Version = newVersion
},
},
{
// MacVim uses dual versioning: MacVim release numbers (r178, r179, etc.) and bundled Vim versions (9.0.1897, 9.1.0, etc.)
// NVD CVEs reference MacVim release numbers, but Fleet inventories the bundled Vim version from macOS metadata.
// See https://github.com/macvim-dev/macvim/releases for version mappings.
matches: func(s *fleet.Software) bool {
return s.Name == "MacVim" && s.BundleIdentifier == "org.vim.MacVim" && s.Source == "apps"
},
mutate: func(ctx context.Context, s *fleet.Software, logger *slog.Logger) {
vimToMacVimMap := map[string]string{
// r182 series
"9.1.2068": "182.1", // r182.1 (prerelease)
"9.1.1887": "182", // r182 (stable)
// r181 series
"9.1.1577": "181.2", // r181.2 (prerelease)
"9.1.1251": "181.1", // r181.1 (prerelease)
"9.1.1128": "181", // r181 (stable)
// r180 series
"9.1.1050": "180.2", // r180.2 (prerelease)
"9.1.1000": "180.1", // r180.1 (prerelease)
"9.1.0727": "180", // r180 (stable)
// r179 series
"9.1.0695": "179.1", // r179.1 (prerelease)
"9.1.0": "179", // r179 (stable)
// r178 series
"9.0.1897": "178", // r178 (stable)
}
if macVimRelease, ok := vimToMacVimMap[s.Version]; ok {
logger.DebugContext(ctx, "converting MacVim Vim version to release number",
"original_version", s.Version, "macvim_release", macVimRelease)
s.Version = macVimRelease
} else {
// For unknown versions, leave as-is to avoid false negatives
logger.DebugContext(ctx, "unknown MacVim Vim version, unable to convert to release number",
"version", s.Version)
}
},
},
{
// Homebrew's "imp" (Integrative Modeling Platform) is incorrectly matched against
// Horde IMP CPEs. Rename the Homebrew package to prevent incorrect CPE
// matching with horde:imp.
matches: func(s *fleet.Software) bool {
return s.Name == "imp" && s.Source == "homebrew_packages"
},
mutate: func(ctx context.Context, s *fleet.Software, logger *slog.Logger) {
s.Name = "integrative-modeling-platform"
},
},
{
// ninxsoft/Mist (macOS installer download tool) is incorrectly matched against
// mist.io/Mist CPEs. Rename the app to prevent incorrect CPE matching with mist:mist.
// See https://github.com/fleetdm/fleet/issues/37111
matches: func(s *fleet.Software) bool {
return s.BundleIdentifier == "com.ninxsoft.mist" && s.Source == "apps"
},
mutate: func(ctx context.Context, s *fleet.Software, logger *slog.Logger) {
s.Name = "ninxsoft-mist"
},
},
{
// 7-Zip on Windows installed with MSI reports versions like "24.09.00.0" but NVD uses "24.09".
// Strip trailing ".00.0" components to match NVD version format.
// See https://github.com/fleetdm/fleet/issues/36335
matches: func(s *fleet.Software) bool {
return strings.HasPrefix(s.Name, "7-Zip") && s.Source == "programs"
},
mutate: func(ctx context.Context, s *fleet.Software, logger *slog.Logger) {
parts := strings.Split(s.Version, ".")
switch len(parts) {
case 0, 1:
logger.DebugContext(ctx, "unexpected 7-Zip version format", "source", "programs", "name", s.Name, "version", s.Version)
return
case 2:
return // Already in the correct format
default:
s.Version = parts[0] + "." + parts[1]
}
},
},
}
)
func mutateSoftware(ctx context.Context, software *fleet.Software, logger *slog.Logger) {
for _, transformer := range softwareTransformers {
if transformer.matches(software) {
defer func() {
if r := recover(); r != nil {
logger.WarnContext(ctx, "panic during software mutation", "softwareName", software.Name, "softwareVersion", software.Version, "error", r)
}
}()
transformer.mutate(ctx, software, logger)
break
}
}
}
// CPEFromSoftware attempts to find a matching cpe entry for the given software in the NVD CPE dictionary. `db` contains data from the NVD CPE dictionary
// and is optimized for lookups, see `GenerateCPEDB`. `translations` are used to aid in cpe matching. When searching for cpes, we first check if it matches
// any translations, and then lookup in the cpe database based on the title, product and vendor.
func CPEFromSoftware(ctx context.Context, logger *slog.Logger, db *sqlx.DB, software *fleet.Software, translations CPETranslations, reCache *regexpCache) (string, error) {
if containsNonASCII(software.Name) {
logger.DebugContext(ctx, "skipping software with non-ascii characters", "software", software.Name, "version", software.Version, "source", software.Source)
return "", nil
}
mutateSoftware(ctx, software, logger) // tweak e.g. software versions prior to CPE matching if needed
translation, match, err := translations.Translate(reCache, software)
if err != nil {
return "", fmt.Errorf("translate software: %w", err)
}
if match {
if translation.Skip {
logger.DebugContext(ctx, "CPE match skipped", "software", software.Name, "version", software.Version, "source", software.Source)
return "", nil
}
ds := goqu.Dialect("sqlite").From(goqu.I("cpe_2").As("c")).
Select(
"c.rowid",
"c.product",
"c.vendor",
"c.sw_edition",
"c.deprecated",
goqu.L("1 as weight"),
).Limit(1)
if len(translation.Product) > 0 {
var exps []goqu.Expression
for _, product := range translation.Product {
exps = append(exps, goqu.I("c.product").Eq(product))
}
ds = ds.Where(goqu.Or(exps...))
}
if len(translation.Vendor) > 0 {
var exps []goqu.Expression
for _, vendor := range translation.Vendor {
exps = append(exps, goqu.I("c.vendor").Eq(vendor))
}
ds = ds.Where(goqu.Or(exps...))
}
if len(translation.TargetSW) > 0 {
var exps []goqu.Expression
for _, targetSW := range translation.TargetSW {
exps = append(exps, goqu.I("c.target_sw").Eq(targetSW))
}
ds = ds.Where(goqu.Or(exps...))
}
if len(translation.SWEdition) > 0 {
var exps []goqu.Expression
for _, SWEdition := range translation.SWEdition {
exps = append(exps, goqu.I("c.sw_edition").Eq(SWEdition))
}
ds = ds.Where(goqu.Or(exps...))
}
stm, args, _ := ds.ToSQL()
var result IndexedCPEItem
err = db.Get(&result, stm, args...)
if errors.Is(err, sql.ErrNoRows) {
return "", nil
}
if err != nil {
return "", fmt.Errorf("getting CPE for: %s: %w", software.Name, err)
}
if result.ID != 0 {
if translation.Part != "" {
result.Part = translation.Part
}
return result.FmtStr(software), nil
}
} else {
queries := cpeSearchQueries(software)
for _, q := range queries {
var results []IndexedCPEItem
err := db.Select(&results, q.stm, q.args...)
if err != nil && !errors.Is(err, sql.ErrNoRows) {
return "", fmt.Errorf("getting cpes for: %s: %w", software.Name, err)
}
// Collect all matching candidates for this query, then pick the best one.
// This avoids nondeterministic results when multiple CPE entries match
// (e.g. "ge:line" vs "linecorp:line" for the "Line" app).
var bestMatch *IndexedCPEItem
var bestTargetSWScore int
var bestVendorMatch bool
var deprecatedMatches []IndexedCPEItem
for i := range results {
if !cpeItemMatchesSoftware(&results[i], software) {
continue
}
if results[i].Deprecated {
deprecatedMatches = append(deprecatedMatches, results[i])
continue
}
targetSWScore := cpeTargetSWMatchesSoftware(&results[i], software)
vendorMatch := cpeVendorMatchesSoftware(&results[i], software)
// first valid match, OR
// better target_sw score (ecosystem match), OR
// Same target_sw score but better vendor match
if bestMatch == nil ||
targetSWScore > bestTargetSWScore ||
(targetSWScore == bestTargetSWScore && !bestVendorMatch && vendorMatch) {
bestMatch = &results[i]
bestTargetSWScore = targetSWScore
bestVendorMatch = vendorMatch
}
}
if bestMatch != nil {
return bestMatch.FmtStr(software), nil
}
// All matches are deprecated; try to resolve via deprecation chain
if len(deprecatedMatches) > 0 {
cpe, err := resolveDeprecatedCPE(db, deprecatedMatches, software)
if err != nil {
return "", err
}
if cpe != "" {
return cpe, nil
}
}
}
}
return "", nil
}
func consumeCPEBuffer(
ctx context.Context,
ds fleet.Datastore,
logger *slog.Logger,
batch []fleet.SoftwareCPE,
) error {
var toDelete []fleet.SoftwareCPE
var toUpsert []fleet.SoftwareCPE
for i := range batch {
// This could be because of a new translation rule or because we fixed a bug with the CPE
// detection process
if batch[i].CPE == "" {
toDelete = append(toDelete, batch[i])
continue
}
toUpsert = append(toUpsert, batch[i])
}
if len(toUpsert) != 0 {
upserted, err := ds.UpsertSoftwareCPEs(ctx, toUpsert)
if err != nil {
return err
}
if int(upserted) != len(toUpsert) {
logger.DebugContext(ctx, "CPE upsert count mismatch", "toUpsert", len(toUpsert), "upserted", upserted)
}
}
if len(toDelete) != 0 {
deleted, err := ds.DeleteSoftwareCPEs(ctx, toDelete)
if err != nil {
return err
}
if int(deleted) != len(toDelete) {
logger.DebugContext(ctx, "CPE delete count mismatch", "toDelete", len(toDelete), "deleted", deleted)
}
}
return nil
}
// mysql 5.7 compatible regexp for ubuntu kernel package names
const LinuxImageRegex = `^linux-image-[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+-[[:digit:]]+-[[:alnum:]]+`
// knownUbuntuKernelVariants is a list of known kernel variants that are used in the Ubuntu kernel
// OVAL feeds. These are used to determine if a kernel package is a custom variant and should be
// matched against the NVD feed rather than the OVAL feed.
var knownUbuntuKernelVariants = []string{
"allwinner",
"aws",
"aws-hwe",
"azure",
"azure-fde",
"bluefield",
"dell300x",
"euclid",
"gcp",
"generic",
"generic-64k",
"generic-lpae",
"gke",
"gkeop",
"intel",
"intel-iotg",
"ibm",
"iot",
"kvm",
"laptop",
"lowlatency",
"lowlatency-64k",
"nvidia",
"nvidia-64k",
"nvidia-lowlatency",
"oem",
"oem-osp1",
"oracle",
"oracle-64k",
"powerpc-e500",
"powerpc-e500mc",
"powerpc-smp",
"powerpc64-emb",
"powerpc64-smp",
"raspi",
"raspi-nolpae",
"raspi2",
"snapdragon",
"starfive",
"xilinx-zynqmp",
}
func BuildLinuxExclusionRegex() string {
return fmt.Sprintf("-(%s)$", strings.Join(knownUbuntuKernelVariants, "|"))
}
func TranslateSoftwareToCPE(
ctx context.Context,
ds fleet.Datastore,
vulnPath string,
logger *slog.Logger,
) error {
// Skip software from sources for which we will be using OVAL or goval-dictionary for vulnerability detection.
nonOvalIterator, err := ds.AllSoftwareIterator(
ctx,
fleet.SoftwareIterQueryOptions{
// Also exclude iOS and iPadOS apps until we enable vulnerabilities support for them.
ExcludedSources: append(oval.SupportedSoftwareSources, "ios_apps", "ipados_apps"),
},
)
if err != nil {
return ctxerr.Wrap(ctx, err, "non-oval software iterator")
}
defer nonOvalIterator.Close()
err = translateSoftwareToCPEWithIterator(ctx, ds, vulnPath, logger, nonOvalIterator)
if err != nil {
return ctxerr.Wrap(ctx, err, "translate non-oval software to CPE")
}
if err := nonOvalIterator.Close(); err != nil {
return ctxerr.Wrap(ctx, err, "closing non-oval software iterator")
}
ubuntuKernelIterator, err := ds.AllSoftwareIterator(
ctx,
fleet.SoftwareIterQueryOptions{
IncludedSources: []string{"deb_packages"},
NameMatch: LinuxImageRegex,
NameExclude: BuildLinuxExclusionRegex(),
},
)
if err != nil {
return ctxerr.Wrap(ctx, err, "ubuntu kernel iterator")
}
defer ubuntuKernelIterator.Close()
err = translateSoftwareToCPEWithIterator(ctx, ds, vulnPath, logger, ubuntuKernelIterator)
if err != nil {
return ctxerr.Wrap(ctx, err, "translate ubuntu kernel to CPE")
}
if err := ubuntuKernelIterator.Close(); err != nil {
return ctxerr.Wrap(ctx, err, "closing ubuntu kernel iterator")
}
return nil
}
func translateSoftwareToCPEWithIterator(
ctx context.Context,
ds fleet.Datastore,
vulnPath string,
logger *slog.Logger,
iterator fleet.SoftwareIterator,
) error {
dbPath := filepath.Join(vulnPath, cpeDBFilename)
db, err := sqliteDBReadOnly(ctx, dbPath, logger)
if err != nil {
return ctxerr.Wrap(ctx, err, "opening the cpe db")
}
defer db.Close()
cpeTranslationsPath := filepath.Join(vulnPath, cpeTranslationsFilename)
cpeTranslations, err := loadCPETranslations(cpeTranslationsPath)
if err != nil {
logger.ErrorContext(ctx, "failed to load cpe translations", "err", err)
}
reCache := newRegexpCache()
var buffer []fleet.SoftwareCPE
bufferMaxSize := 500
for iterator.Next() {
software, err := iterator.Value()
if err != nil {
return ctxerr.Wrap(ctx, err, "getting value from iterator")
}
var cpe string
// Skip software without version to avoid false positives in the CPE
// matching process.
if software.Version == "" {
logger.DebugContext(ctx, "skipping software without version",
"software", software.Name,
"source", software.Source,
)
// We want to continue here in case the software had an invalid CPE
// generated by a previous version of Fleet.
} else {
cpe, err = CPEFromSoftware(ctx, logger, db, software, cpeTranslations, reCache)
if err != nil {
logger.ErrorContext(ctx, "error translating to CPE, skipping",
"software", software.Name,
"version", software.Version,
"source", software.Source,
"err", err,
)
continue
}
}
if cpe == software.GenerateCPE {
// If the generated CPE hasn't changed from what's already stored in the DB
// then we don't need to do anything.
continue
}
buffer = append(buffer, fleet.SoftwareCPE{SoftwareID: software.ID, CPE: cpe})
if len(buffer) == bufferMaxSize {
if err = consumeCPEBuffer(ctx, ds, logger, buffer); err != nil {
return ctxerr.Wrap(ctx, err, "inserting cpe")
}
buffer = nil
}
}
if err = consumeCPEBuffer(ctx, ds, logger, buffer); err != nil {
return ctxerr.Wrap(ctx, err, "inserting cpe")
}
if err := iterator.Err(); err != nil {
return ctxerr.Wrap(ctx, err, "iterator contains error at the end of iteration")
}
return nil
}
var allowedNonASCII = []int32{
'', // en dash
'—', // em dash
}
func containsNonASCII(s string) bool {
for _, char := range s {
if char > unicode.MaxASCII && !slices.Contains(allowedNonASCII, char) {
return true
}
}
return false
}