fleet/orbit/pkg/platform/platform.go
Lucas Manuel Rodriguez b65739f035
Orbit to kill pre-existing osqueryd processes during startup (#16343)
This should fix #16006.

On Windows when a process is killed by the Task Manager, it is killed
without any signaling, thus the osqueryd processes are left orphaned.
Executing osqueryd (which we do to get host information) was failing
because the lingering processes had a lock on the database file. The
solution implemented in this PR is to kill any pre-existing osqueryd
processes before running osqueryd.

- [X] Changes file added for user-visible changes in `changes/` or
`orbit/changes/`.
- [X] Manual QA for all new/changed functionality
  - For Orbit and Fleet Desktop changes:
- [X] Manual QA must be performed in the three main OSs, macOS, Windows
and Linux.
- [X] Auto-update manual QA, from released version of component to new
version (see [tools/tuf/test](../tools/tuf/test/README.md)).

PS: I added a log of the stdout+stderr of osqueryd execution when such
command fails to execute. (This helped me find the root cause.)
```
2024-01-25T11:57:56-08:00 ERR getHostInfo via osquery output= stderr="E0125 11:57:56.744843
7860 shutdown.cpp:79] IO error: Failed to create lock file:
C:\\Program Files\\Orbit\\osquery.db/LOCK: The process cannot access the file because it is
being used by another process.\r\r\n"
```

PPS: I removed some unused exported methods in the `orbit/pkg/platform`
package.
2024-01-29 16:44:50 -03:00

103 lines
2.6 KiB
Go

package platform
import (
"errors"
"fmt"
"strings"
gopsutil_process "github.com/shirou/gopsutil/v3/process"
)
var (
ErrProcessNotFound = errors.New("process not found")
ErrComChannelNotFound = errors.New("comm channel not found")
)
type UUIDSource string
const (
UUIDSourceInvalid = "UUID_Source_Invalid"
UUIDSourceWMI = "UUID_Source_WMI"
UUIDSourceHardware = "UUID_Source_Hardware"
)
// killProcessByName kills a single process by its name.
func killProcessByName(name string) error {
if name == "" {
return errors.New("process name should not be empty")
}
foundProcess, err := GetProcessByName(name)
if err != nil {
return fmt.Errorf("get process: %w", err)
}
if err := foundProcess.Kill(); err != nil {
return fmt.Errorf("kill process %d: %w", foundProcess.Pid, err)
}
return nil
}
// getProcessesByName returns all the running processes with the given prefix in their name.
func getProcessesByName(namePrefix string) ([]*gopsutil_process.Process, error) {
if namePrefix == "" {
return nil, errors.New("process name prefix should not be empty")
}
processes, err := gopsutil_process.Processes()
if err != nil {
return nil, err
}
var foundProcesses []*gopsutil_process.Process
for _, process := range processes {
processName, err := process.Name()
if err != nil {
// No need to print errors here as this method might file for system processes
continue
}
if strings.HasPrefix(processName, namePrefix) {
foundProcesses = append(foundProcesses, process)
}
}
return foundProcesses, nil
}
// Process holds basic information of a process.
type Process struct {
// Name is the name of the process.
Name string
// PID is the process identifier.
PID int32
}
// KillAllProcessByName kills all the running processes with the given prefix in their name.
// It returns the processes that were killed. It returns `nil, nil` if there were no processes
// running with such name prefix.
func KillAllProcessByName(namePrefix string) ([]Process, error) {
if namePrefix == "" {
return nil, errors.New("process name prefix should not be empty")
}
foundProcesses, err := getProcessesByName(namePrefix)
if err != nil {
return nil, fmt.Errorf("get processes by name: %w", err)
}
var killedProcesses []Process
for _, foundProcess := range foundProcesses {
processName, _ := foundProcess.Name()
if err := foundProcess.Kill(); err != nil {
return nil, fmt.Errorf("kill process %d: %w", foundProcess.Pid, err)
}
killedProcesses = append(killedProcesses, Process{
Name: processName,
PID: foundProcess.Pid,
})
}
return killedProcesses, nil
}