fleet/cmd/osv-processor/main.go
2026-04-21 13:39:22 -06:00

795 lines
22 KiB
Go

package main
import (
"bufio"
"compress/gzip"
"encoding/json"
"errors"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"time"
)
type OSVData struct {
SchemaVersion string `json:"schema_version"`
ID string `json:"id"`
Published string `json:"published"`
Modified string `json:"modified"`
Details string `json:"details"`
Affected []Affected `json:"affected"`
Upstream []string `json:"upstream,omitempty"`
Related []string `json:"related,omitempty"`
}
type Affected struct {
Package Package `json:"package"`
Ranges []Range `json:"ranges"`
Versions []string `json:"versions,omitempty"`
EcosystemSpecific map[string]any `json:"ecosystem_specific,omitempty"`
DatabaseSpecific map[string]any `json:"database_specific,omitempty"`
}
type Package struct {
Ecosystem string `json:"ecosystem"`
Name string `json:"name"`
Purl string `json:"purl,omitempty"`
}
type Range struct {
Type string `json:"type"`
Events []Event `json:"events"`
}
type Event struct {
Introduced string `json:"introduced,omitempty"`
Fixed string `json:"fixed,omitempty"`
}
type ProcessedVuln struct {
CVE string `json:"cve"`
Published string `json:"published"`
Modified string `json:"modified"`
Introduced string `json:"introduced,omitempty"`
Fixed string `json:"fixed,omitempty"`
Versions []string `json:"versions,omitempty"`
}
type Config struct {
Platform string
InputDir string
OutputDir string
Versions string
ExcludeVersions string
ChangedFilesToday string
ChangedFilesYesterday string
DateStr string
YesterdayStr string
GeneratedTimestamp string
RunTime time.Time
}
type ArtifactData struct {
SchemaVersion string `json:"schema_version"`
UbuntuVersion string `json:"ubuntu_version"`
Generated string `json:"generated"`
TotalCVEs int `json:"total_cves"`
TotalPackages int `json:"total_packages"`
Vulnerabilities map[string][]ProcessedVuln `json:"vulnerabilities"`
}
type RHELArtifactData struct {
SchemaVersion string `json:"schema_version"`
RHELVersion string `json:"rhel_version"`
Generated string `json:"generated"`
TotalCVEs int `json:"total_cves"`
TotalPackages int `json:"total_packages"`
Vulnerabilities map[string][]ProcessedVuln `json:"vulnerabilities"`
}
func main() {
platform := flag.String("platform", "ubuntu", "Platform to process: ubuntu or rhel")
inputDir := flag.String("input", "", "Input directory with OSV JSON files (default: /tmp/ubuntu-osv for ubuntu, /tmp/rhel-osv for rhel)")
outputDir := flag.String("output", "./artifacts", "Output directory for artifacts")
versions := flag.String("versions", "", "Comma-separated versions to process (inclusive)")
excludeVersions := flag.String("exclude-versions", "", "Comma-separated versions to exclude (ignored if --versions is set)")
changedFilesToday := flag.String("changed-files-today", "", "Path to file containing CVE files changed today (ubuntu only)")
changedFilesYesterday := flag.String("changed-files-yesterday", "", "Path to file containing CVE files changed yesterday (ubuntu only)")
flag.Parse()
if *inputDir == "" {
switch *platform {
case "rhel":
*inputDir = "/tmp/rhel-osv"
default:
*inputDir = "/tmp/ubuntu-osv"
}
}
runTime := time.Now().UTC()
cfg := Config{
Platform: *platform,
InputDir: *inputDir,
OutputDir: *outputDir,
Versions: *versions,
ExcludeVersions: *excludeVersions,
ChangedFilesToday: *changedFilesToday,
ChangedFilesYesterday: *changedFilesYesterday,
DateStr: runTime.Format("2006-01-02"),
YesterdayStr: runTime.AddDate(0, 0, -1).Format("2006-01-02"),
GeneratedTimestamp: runTime.Format(time.RFC3339),
RunTime: runTime,
}
switch cfg.Platform {
case "ubuntu":
if err := run(cfg); err != nil {
log.Fatalf("Error: %v", err)
}
case "rhel":
if err := runRHEL(cfg); err != nil {
log.Fatalf("Error: %v", err)
}
default:
log.Fatalf("Unknown platform: %s (supported: ubuntu, rhel)", cfg.Platform)
}
}
func run(cfg Config) error {
if err := os.MkdirAll(cfg.OutputDir, 0o755); err != nil {
return fmt.Errorf("failed to create output directory: %w", err)
}
// Build version filter
targetVersions, excludedVersions := buildVersionFilter(cfg.Versions, cfg.ExcludeVersions)
switch {
case targetVersions != nil:
log.Printf("Processing OSV files from %s for versions: %s", cfg.InputDir, cfg.Versions)
case excludedVersions != nil:
log.Printf("Processing OSV files from %s (auto-detecting, excluding: %s)", cfg.InputDir, cfg.ExcludeVersions)
default:
log.Printf("Processing OSV files from %s (auto-detecting all versions)", cfg.InputDir)
}
// Load changed CVE files for delta generation
var todayCVEFiles, yesterdayCVEFiles map[string]struct{}
generateTodayDeltas := cfg.ChangedFilesToday != ""
generateYesterdayDeltas := cfg.ChangedFilesYesterday != ""
if generateTodayDeltas {
log.Printf("Loading today's changed CVE files from %s", cfg.ChangedFilesToday)
var err error
todayCVEFiles, err = loadChangedFiles(cfg.ChangedFilesToday)
if err != nil {
return fmt.Errorf("failed to load today's changed files: %w", err)
}
log.Printf("Found %d CVE files changed today", len(todayCVEFiles))
}
if generateYesterdayDeltas {
log.Printf("Loading yesterday's changed CVE files from %s", cfg.ChangedFilesYesterday)
var err error
yesterdayCVEFiles, err = loadChangedFiles(cfg.ChangedFilesYesterday)
if err != nil {
return fmt.Errorf("failed to load yesterday's changed files: %w", err)
}
log.Printf("Found %d CVE files changed yesterday", len(yesterdayCVEFiles))
}
artifacts := make(map[string]*ArtifactData)
todayArtifacts := make(map[string]*ArtifactData)
yesterdayArtifacts := make(map[string]*ArtifactData)
filesProcessed := 0
filesSkipped := 0
err := filepath.Walk(cfg.InputDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() || !strings.HasSuffix(path, ".json") {
return nil
}
osvData, err := parseOSVFile(path)
if err != nil {
log.Printf("Failed to parse %s: %v", path, err)
filesSkipped++
return nil
}
inToday := false
inYesterday := false
if generateTodayDeltas {
inToday = shouldIncludeInDelta(cfg.InputDir, path, todayCVEFiles)
}
if generateYesterdayDeltas {
inYesterday = shouldIncludeInDelta(cfg.InputDir, path, yesterdayCVEFiles)
}
for _, affected := range osvData.Affected {
ecosystem := affected.Package.Ecosystem
packageName := affected.Package.Name
ubuntuVer := extractUbuntuVersion(ecosystem)
if ubuntuVer == "" {
continue
}
// Filter versions based on flags
if targetVersions != nil {
// Inclusive mode: only process if in target list
if !targetVersions[ubuntuVer] {
continue
}
} else if excludedVersions != nil {
// Exclusive mode: skip if in excluded list
if excludedVersions[ubuntuVer] {
continue
}
}
// Otherwise auto-detect all versions (no filtering)
cveID := extractCVEID(osvData)
if cveID == "" {
cveID = osvData.ID
}
introduced, fixed := extractVersionRange(affected.Ranges)
vuln := ProcessedVuln{
CVE: cveID,
Published: osvData.Published,
Modified: osvData.Modified,
Introduced: introduced,
Fixed: fixed,
Versions: affected.Versions,
}
// Apply any transformations/filters to modify the package name or cve
packages, modifiedVuln := transformVuln(packageName, cveID, &vuln)
if packages == nil {
continue
}
// Use modified vulnerability if provided, otherwise use original
vulnToUse := &vuln
if modifiedVuln != nil {
vulnToUse = modifiedVuln
}
for _, pkg := range packages {
if _, exists := artifacts[ubuntuVer]; !exists {
artifacts[ubuntuVer] = &ArtifactData{
SchemaVersion: "1.0",
UbuntuVersion: ubuntuVer,
Vulnerabilities: make(map[string][]ProcessedVuln),
}
}
artifacts[ubuntuVer].Vulnerabilities[pkg] = append(artifacts[ubuntuVer].Vulnerabilities[pkg], *vulnToUse)
}
// Add to today's delta artifact if this file was changed today
if inToday {
for _, pkg := range packages {
if _, exists := todayArtifacts[ubuntuVer]; !exists {
todayArtifacts[ubuntuVer] = &ArtifactData{
SchemaVersion: "1.0",
UbuntuVersion: ubuntuVer,
Vulnerabilities: make(map[string][]ProcessedVuln),
}
}
todayArtifacts[ubuntuVer].Vulnerabilities[pkg] = append(todayArtifacts[ubuntuVer].Vulnerabilities[pkg], *vulnToUse)
}
}
// Add to yesterday's delta artifact if this file was changed yesterday
if inYesterday {
for _, pkg := range packages {
if _, exists := yesterdayArtifacts[ubuntuVer]; !exists {
yesterdayArtifacts[ubuntuVer] = &ArtifactData{
SchemaVersion: "1.0",
UbuntuVersion: ubuntuVer,
Vulnerabilities: make(map[string][]ProcessedVuln),
}
}
yesterdayArtifacts[ubuntuVer].Vulnerabilities[pkg] = append(yesterdayArtifacts[ubuntuVer].Vulnerabilities[pkg], *vulnToUse)
}
}
}
filesProcessed++
if filesProcessed%1000 == 0 {
log.Printf("Processed %d files...", filesProcessed)
}
return nil
})
if err != nil {
return fmt.Errorf("error walking directory: %w", err)
}
log.Printf("Processed %d files, skipped %d files", filesProcessed, filesSkipped)
log.Printf("Discovered %d Ubuntu versions", len(artifacts))
// Write full artifacts
for ver, artifact := range artifacts {
artifact.Generated = cfg.GeneratedTimestamp
artifact.TotalCVEs = countTotalCVEs(artifact)
artifact.TotalPackages = len(artifact.Vulnerabilities)
outputFile := filepath.Join(cfg.OutputDir, fmt.Sprintf("osv-ubuntu-%s-%s.json.gz",
strings.ReplaceAll(ver, ".", ""),
cfg.DateStr))
if err := writeArtifact(outputFile, artifact); err != nil {
return fmt.Errorf("failed to write artifact for Ubuntu %s: %w", ver, err)
}
log.Printf("Ubuntu %s: %d packages, %d CVEs -> %s",
ver, artifact.TotalPackages, artifact.TotalCVEs, outputFile)
}
// Write delta artifacts (if any were generated)
if generateTodayDeltas && len(todayArtifacts) > 0 {
log.Printf("\nWriting today's delta artifacts (%s)...", cfg.DateStr)
for ver, artifact := range todayArtifacts {
artifact.Generated = cfg.GeneratedTimestamp
artifact.TotalCVEs = countTotalCVEs(artifact)
artifact.TotalPackages = len(artifact.Vulnerabilities)
outputFile := filepath.Join(cfg.OutputDir, fmt.Sprintf("osv-ubuntu-%s-delta-%s.json.gz",
strings.ReplaceAll(ver, ".", ""), cfg.DateStr))
if err := writeArtifact(outputFile, artifact); err != nil {
return fmt.Errorf("failed to write today's delta for Ubuntu %s: %w", ver, err)
}
log.Printf("Ubuntu %s (today): %d packages, %d CVEs -> %s",
ver, artifact.TotalPackages, artifact.TotalCVEs, outputFile)
}
}
if generateYesterdayDeltas && len(yesterdayArtifacts) > 0 {
log.Printf("\nWriting yesterday's delta artifacts (%s)...", cfg.YesterdayStr)
for ver, artifact := range yesterdayArtifacts {
artifact.Generated = cfg.GeneratedTimestamp
artifact.TotalCVEs = countTotalCVEs(artifact)
artifact.TotalPackages = len(artifact.Vulnerabilities)
outputFile := filepath.Join(cfg.OutputDir, fmt.Sprintf("osv-ubuntu-%s-delta-%s.json.gz",
strings.ReplaceAll(ver, ".", ""), cfg.YesterdayStr))
if err := writeArtifact(outputFile, artifact); err != nil {
return fmt.Errorf("failed to write yesterday's delta for Ubuntu %s: %w", ver, err)
}
log.Printf("Ubuntu %s (yesterday): %d packages, %d CVEs -> %s",
ver, artifact.TotalPackages, artifact.TotalCVEs, outputFile)
}
}
return nil
}
func buildVersionFilter(versions, excludeVersions string) (targetVersions, excludedVersions map[string]bool) {
if versions != "" {
// Inclusive mode: only process specified versions
targetVersions = make(map[string]bool)
for ver := range strings.SplitSeq(versions, ",") {
trimmed := strings.TrimSpace(ver)
if trimmed != "" {
targetVersions[trimmed] = true
}
}
// If no valid versions were parsed, fall back to auto-detect
if len(targetVersions) == 0 {
return nil, nil
}
return targetVersions, nil
}
if excludeVersions != "" {
// Exclusive mode: process all except specified versions
excludedVersions = make(map[string]bool)
for ver := range strings.SplitSeq(excludeVersions, ",") {
trimmed := strings.TrimSpace(ver)
if trimmed != "" {
excludedVersions[trimmed] = true
}
}
// If no valid versions were parsed, fall back to auto-detect
if len(excludedVersions) == 0 {
return nil, nil
}
return nil, excludedVersions
}
// Auto-detect all versions
return nil, nil
}
func shouldIncludeInDelta(inputDir, filePath string, changedFiles map[string]struct{}) bool {
relPath, err := filepath.Rel(inputDir, filePath)
if err != nil {
return false
}
normalizedRelPath := strings.TrimPrefix(filepath.ToSlash(relPath), "osv/cve/")
fullRelPath := "osv/cve/" + normalizedRelPath
_, exists := changedFiles[fullRelPath]
return exists
}
func parseOSVFile(path string) (*OSVData, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, err
}
var osv OSVData
if err := json.Unmarshal(data, &osv); err != nil {
return nil, err
}
return &osv, nil
}
func extractUbuntuVersion(ecosystem string) string {
// Example: "Ubuntu:24.04:LTS" -> "24.04"
// Example: "Ubuntu:Pro:22.04:LTS" -> "22.04"
for part := range strings.SplitSeq(ecosystem, ":") {
// Look for version pattern like "24.04", "22.04", "20.04"
if len(part) == 5 && strings.Contains(part, ".") {
return part
}
}
return ""
}
func extractCVEID(osv *OSVData) string {
for _, upstream := range osv.Upstream {
if strings.HasPrefix(upstream, "CVE-") {
return upstream
}
}
if strings.HasPrefix(osv.ID, "CVE-") {
return osv.ID
}
if strings.HasPrefix(osv.ID, "UBUNTU-CVE-") {
return strings.TrimPrefix(osv.ID, "UBUNTU-")
}
return ""
}
func extractVersionRange(ranges []Range) (introduced string, fixed string) {
for _, r := range ranges {
if r.Type == "ECOSYSTEM" {
for _, event := range r.Events {
if event.Introduced != "" && introduced == "" {
introduced = event.Introduced
}
if event.Fixed != "" && fixed == "" {
fixed = event.Fixed
}
}
}
}
return
}
func countTotalCVEs(artifact *ArtifactData) int {
seen := make(map[string]bool)
for _, vulns := range artifact.Vulnerabilities {
for _, vuln := range vulns {
seen[vuln.CVE] = true
}
}
return len(seen)
}
func writeArtifact(path string, artifact *ArtifactData) (err error) {
file, err := os.Create(path)
if err != nil {
return err
}
defer func() {
if cerr := file.Close(); err == nil && cerr != nil {
err = cerr
}
}()
gzWriter := gzip.NewWriter(file)
defer func() {
if cerr := gzWriter.Close(); err == nil && cerr != nil {
err = cerr
}
}()
encoder := json.NewEncoder(gzWriter)
if err = encoder.Encode(artifact); err != nil {
return err
}
return nil
}
func loadChangedFiles(changedFilesPath string) (map[string]struct{}, error) {
file, err := os.Open(changedFilesPath)
if err != nil {
return nil, fmt.Errorf("failed to open changed files list: %w", err)
}
defer file.Close()
changedFiles := make(map[string]struct{})
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
changedFiles[line] = struct{}{}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("error reading changed files: %w", err)
}
return changedFiles, nil
}
// extractRHELVersion extracts the major RHEL version from an ecosystem string.
// Only "enterprise_linux" ecosystems are supported; variants like rhel_e4s, rhel_eus,
// and rhel_software_collections are skipped.
//
// Repository suffixes (appstream, baseos, crb, nfv, realtime) and variant suffixes
// (server, workstation, client, computenode, fastdatapath, hypervisor) are stripped —
// all collapse to the same major version. For example, both
// "Red Hat:enterprise_linux:7::server" and "Red Hat:enterprise_linux:7::workstation"
// map to "7". Deduplication of CVE+package pairs across these variants happens in
// runRHEL.
//
// Examples:
//
// "Red Hat:enterprise_linux:9::appstream" -> "9"
// "Red Hat:enterprise_linux:8::baseos" -> "8"
// "Red Hat:enterprise_linux:7::server" -> "7"
// "Red Hat:enterprise_linux:7::workstation"-> "7"
// "Red Hat:enterprise_linux:10.0" -> "10"
// "Red Hat:enterprise_linux:10.1" -> "10"
// "Red Hat:rhel_e4s:8.8::appstream" -> "" (not enterprise_linux)
func extractRHELVersion(ecosystem string) string {
parts := strings.Split(ecosystem, ":")
if len(parts) < 3 || parts[0] != "Red Hat" {
return ""
}
if parts[1] != "enterprise_linux" {
return ""
}
// parts[2] is the version, possibly with minor: "9", "8", "10.0", "10.1"
ver := parts[2]
// Extract major version only
if dotIdx := strings.Index(ver, "."); dotIdx >= 0 {
ver = ver[:dotIdx]
}
return ver
}
// vulnKey is used for deduplication of CVE+package entries across ecosystems.
type vulnKey struct {
pkg string
cve string
}
func runRHEL(cfg Config) error {
// Delta generation is not supported for RHEL — the data source is a full GCS zip
// download with no git-based change tracking. Fail fast if callers pass delta flags.
if cfg.ChangedFilesToday != "" || cfg.ChangedFilesYesterday != "" {
return errors.New("--changed-files-today and --changed-files-yesterday are not supported with --platform rhel (no git-based change tracking for GCS data)")
}
if err := os.MkdirAll(cfg.OutputDir, 0o755); err != nil {
return fmt.Errorf("failed to create output directory: %w", err)
}
targetVersions, excludedVersions := buildVersionFilter(cfg.Versions, cfg.ExcludeVersions)
log.Printf("Processing RHEL OSV files from %s", cfg.InputDir)
artifacts := make(map[string]*RHELArtifactData)
// Track seen CVE+package pairs per version for deduplication across ecosystems
seen := make(map[string]map[vulnKey]struct{})
filesProcessed := 0
filesSkipped := 0
err := filepath.Walk(cfg.InputDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() || !strings.HasSuffix(path, ".json") {
return nil
}
osvData, err := parseOSVFile(path)
if err != nil {
log.Printf("Failed to parse %s: %v", path, err)
filesSkipped++
return nil
}
// Extract all CVE IDs from this advisory
cveIDs := extractCVEIDs(osvData)
if len(cveIDs) == 0 {
filesSkipped++
return nil
}
for _, affected := range osvData.Affected {
ecosystem := affected.Package.Ecosystem
packageName := affected.Package.Name
rhelVer := extractRHELVersion(ecosystem)
if rhelVer == "" {
continue
}
if targetVersions != nil {
if !targetVersions[rhelVer] {
continue
}
} else if excludedVersions != nil {
if excludedVersions[rhelVer] {
continue
}
}
introduced, fixed := extractVersionRange(affected.Ranges)
for _, cveID := range cveIDs {
// Deduplicate: same CVE+package can appear in baseos, appstream, crb
if seen[rhelVer] == nil {
seen[rhelVer] = make(map[vulnKey]struct{})
}
key := vulnKey{pkg: packageName, cve: cveID}
if _, exists := seen[rhelVer][key]; exists {
continue
}
seen[rhelVer][key] = struct{}{}
vuln := ProcessedVuln{
CVE: cveID,
Published: osvData.Published,
Modified: osvData.Modified,
Introduced: introduced,
Fixed: fixed,
Versions: affected.Versions,
}
packages, modifiedVuln := transformVuln(packageName, cveID, &vuln)
if packages == nil {
continue
}
vulnToUse := &vuln
if modifiedVuln != nil {
vulnToUse = modifiedVuln
}
for _, pkg := range packages {
if _, exists := artifacts[rhelVer]; !exists {
artifacts[rhelVer] = &RHELArtifactData{
SchemaVersion: "1.0",
RHELVersion: rhelVer,
Vulnerabilities: make(map[string][]ProcessedVuln),
}
}
artifacts[rhelVer].Vulnerabilities[pkg] = append(artifacts[rhelVer].Vulnerabilities[pkg], *vulnToUse)
}
}
}
filesProcessed++
if filesProcessed%1000 == 0 {
log.Printf("Processed %d files...", filesProcessed)
}
return nil
})
if err != nil {
return fmt.Errorf("error walking directory: %w", err)
}
log.Printf("Processed %d files, skipped %d files", filesProcessed, filesSkipped)
log.Printf("Discovered %d RHEL versions", len(artifacts))
for ver, artifact := range artifacts {
artifact.Generated = cfg.GeneratedTimestamp
artifact.TotalCVEs = countTotalRHELCVEs(artifact)
artifact.TotalPackages = len(artifact.Vulnerabilities)
outputFile := filepath.Join(cfg.OutputDir, fmt.Sprintf("osv-rhel-%s-%s.json.gz", ver, cfg.DateStr))
if err := writeRHELArtifact(outputFile, artifact); err != nil {
return fmt.Errorf("failed to write artifact for RHEL %s: %w", ver, err)
}
log.Printf("RHEL %s: %d packages, %d CVEs -> %s",
ver, artifact.TotalPackages, artifact.TotalCVEs, outputFile)
}
return nil
}
// extractCVEIDs returns all CVE IDs from an OSV entry.
// RHEL advisories list CVEs in the "upstream" field (same as Ubuntu).
func extractCVEIDs(osv *OSVData) []string {
var cves []string
for _, upstream := range osv.Upstream {
if strings.HasPrefix(upstream, "CVE-") {
cves = append(cves, upstream)
}
}
// Fallback: check Related field
if len(cves) == 0 {
for _, related := range osv.Related {
if strings.HasPrefix(related, "CVE-") {
cves = append(cves, related)
}
}
}
// Fallback: check ID itself
if len(cves) == 0 {
if strings.HasPrefix(osv.ID, "CVE-") {
cves = append(cves, osv.ID)
}
}
return cves
}
func countTotalRHELCVEs(artifact *RHELArtifactData) int {
seen := make(map[string]bool)
for _, vulns := range artifact.Vulnerabilities {
for _, vuln := range vulns {
seen[vuln.CVE] = true
}
}
return len(seen)
}
func writeRHELArtifact(path string, artifact *RHELArtifactData) (err error) {
file, err := os.Create(path)
if err != nil {
return err
}
defer func() {
if cerr := file.Close(); err == nil && cerr != nil {
err = cerr
}
}()
gzWriter := gzip.NewWriter(file)
defer func() {
if cerr := gzWriter.Close(); err == nil && cerr != nil {
err = cerr
}
}()
encoder := json.NewEncoder(gzWriter)
if err = encoder.Encode(artifact); err != nil {
return err
}
return nil
}