fleet/orbit/pkg/osquery/osquery.go
Lucas Manuel Rodriguez b65739f035
Orbit to kill pre-existing osqueryd processes during startup (#16343)
This should fix #16006.

On Windows when a process is killed by the Task Manager, it is killed
without any signaling, thus the osqueryd processes are left orphaned.
Executing osqueryd (which we do to get host information) was failing
because the lingering processes had a lock on the database file. The
solution implemented in this PR is to kill any pre-existing osqueryd
processes before running osqueryd.

- [X] Changes file added for user-visible changes in `changes/` or
`orbit/changes/`.
- [X] Manual QA for all new/changed functionality
  - For Orbit and Fleet Desktop changes:
- [X] Manual QA must be performed in the three main OSs, macOS, Windows
and Linux.
- [X] Auto-update manual QA, from released version of component to new
version (see [tools/tuf/test](../tools/tuf/test/README.md)).

PS: I added a log of the stdout+stderr of osqueryd execution when such
command fails to execute. (This helped me find the root cause.)
```
2024-01-25T11:57:56-08:00 ERR getHostInfo via osquery output= stderr="E0125 11:57:56.744843
7860 shutdown.cpp:79] IO error: Failed to create lock file:
C:\\Program Files\\Orbit\\osquery.db/LOCK: The process cannot access the file because it is
being used by another process.\r\r\n"
```

PPS: I removed some unused exported methods in the `orbit/pkg/platform`
package.
2024-01-29 16:44:50 -03:00

229 lines
5.5 KiB
Go

// package osquery implements a runtime for osqueryd.
package osquery
import (
"context"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"runtime"
"sync"
"time"
"github.com/fleetdm/fleet/v4/orbit/pkg/constant"
"github.com/fleetdm/fleet/v4/orbit/pkg/process"
"github.com/fleetdm/fleet/v4/pkg/secure"
"github.com/rs/zerolog/log"
)
// Runner is a specialized runner for osquery. It is designed with Execute and
// Interrupt functions to be compatible with oklog/run.
type Runner struct {
proc *process.Process
cmd *exec.Cmd
dataPath string
cancelMu sync.Mutex
cancel func()
singleQuery bool
}
type Option func(*Runner) error
// NewRunner creates a new osquery runner given the provided functional options.
func NewRunner(path string, options ...Option) (*Runner, error) {
switch _, err := os.Stat(path); {
case err == nil:
// OK
case errors.Is(err, os.ErrNotExist):
return nil, fmt.Errorf("osqueryd doesn't exist at path %q", path)
default:
return nil, fmt.Errorf("failed to check for osqueryd file: %w", err)
}
r := &Runner{}
cmd := exec.Command(path)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
r.cmd = cmd
r.proc = process.NewWithCmd(cmd)
for _, option := range options {
err := option(r)
if err != nil {
return nil, fmt.Errorf("apply option: %w", err)
}
}
// Attempt to cleanup any extension socket leftover from previous runs.
// In some cases it's not cleaned up properly by osquery before exit.
if err := os.Remove(r.ExtensionSocketPath()); err != nil && !errors.Is(err, os.ErrNotExist) {
log.Error().Err(err).Msg("clean-up extension socket")
}
return r, nil
}
// WithFlags adds additional flags to the osqueryd invocation.
func WithFlags(flags []string) Option {
return func(r *Runner) error {
r.cmd.Args = append(r.cmd.Args, flags...)
return nil
}
}
// WithEnv adds additional environment variables to the osqueryd invocation.
// Inputs should be in the form "KEY=VAL".
func WithEnv(env []string) Option {
return func(r *Runner) error {
r.cmd.Env = append(r.cmd.Env, env...)
return nil
}
}
// SingleQuery configures the osqueryd invocation to run a SQL statement and exit.
func SingleQuery() Option {
return func(r *Runner) error {
r.singleQuery = true
return nil
}
}
// WithShell adds the -S flag to run an osqueryi shell.
func WithShell() func(*Runner) error {
return func(r *Runner) error {
r.cmd.Args = append(r.cmd.Args, "-S")
r.cmd.Stdin = os.Stdin
return nil
}
}
func WithDataPath(path string) Option {
return func(r *Runner) error {
r.dataPath = path
if err := secure.MkdirAll(path, constant.DefaultDirMode); err != nil {
return fmt.Errorf("initialize osquery data path: %w", err)
}
r.cmd.Args = append(r.cmd.Args,
"--pidfile="+filepath.Join(path, constant.OsqueryPidfile),
"--database_path="+filepath.Join(path, "osquery.db"),
"--extensions_socket="+r.ExtensionSocketPath(),
)
return nil
}
}
func WithDataPathAndExtensionPathPostfix(path string, extensionPathPostfix string) Option {
return func(r *Runner) error {
r.dataPath = path
if err := secure.MkdirAll(path, constant.DefaultDirMode); err != nil {
return fmt.Errorf("initialize osquery data path: %w", err)
}
r.cmd.Args = append(r.cmd.Args,
"--pidfile="+filepath.Join(path, constant.OsqueryPidfile),
"--database_path="+filepath.Join(path, "osquery.db"),
"--extensions_socket="+r.ExtensionSocketPath()+extensionPathPostfix,
)
return nil
}
}
// WithStderr sets the runner's cmd's stderr to the given writer.
func WithStderr(w io.Writer) Option {
return func(r *Runner) error {
r.cmd.Stderr = w
return nil
}
}
// WithStdout sets the runner's cmd's stdout to the given writer.
func WithStdout(w io.Writer) Option {
return func(r *Runner) error {
r.cmd.Stdout = w
return nil
}
}
func WithLogPath(path string) Option {
return func(r *Runner) error {
if err := secure.MkdirAll(path, constant.DefaultDirMode); err != nil {
return fmt.Errorf("initialize osquery log path: %w", err)
}
r.cmd.Args = append(r.cmd.Args,
"--logger_path="+path,
)
return nil
}
}
// Execute begins running osqueryd and returns when the process exits. The
// process may not be restarted after exit. Instead create a new one with
// NewRunner.
func (r *Runner) Execute() error {
log.Info().Str("cmd", r.cmd.String()).Msg("start osqueryd")
if r.singleQuery {
// When running in "SQL STATEMENT" mode, start osqueryd
// and wait for it to exit.
if err := r.cmd.Run(); err != nil {
return fmt.Errorf("start osqueryd shell: %w", err)
}
} else {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
r.setCancel(cancel)
if err := r.proc.Start(); err != nil {
return fmt.Errorf("start osqueryd: %w", err)
}
if err := r.proc.WaitOrKill(ctx, 10*time.Second); err != nil {
return fmt.Errorf("osqueryd exited with error: %w", err)
}
}
return nil
}
// Runner interrupts the running osquery process.
func (r *Runner) Interrupt(err error) {
log.Error().Err(err).Msg("interrupt osquery")
if cancel := r.getCancel(); cancel != nil {
cancel()
}
}
func (r *Runner) ExtensionSocketPath() string {
const (
extensionSocketName = "orbit-osquery.em"
windowsExtensionSocketPath = `\\.\pipe\orbit-osquery-extension`
)
if runtime.GOOS == "windows" {
return windowsExtensionSocketPath
}
return filepath.Join(r.dataPath, extensionSocketName)
}
func (r *Runner) setCancel(c func()) {
r.cancelMu.Lock()
defer r.cancelMu.Unlock()
r.cancel = c
}
func (r *Runner) getCancel() func() {
r.cancelMu.Lock()
defer r.cancelMu.Unlock()
return r.cancel
}