fleet/server/pubsub/inmem_query_results.go
Zachary Wasserman 39ebd81dc5 Close orphaned distributed query campaign after failed publish attempt (#707)
A distributed query campaign can be "orphaned" (left in the QueryRunning state)
if the Kolide server restarts while it is running, or other weirdness occurs.
When this happens, no subscribers are waiting to read results written by
osqueryd agents, but the agents continue to receive the query. Previously, this
would cause us to error on ingestion.

The new behavior will instead set the campaign to completed when it detects
that it is orphaned. This should prevent sending queries for which there is no
subscriber.

- New NoSubscriber error interface in pubsub
- Detect NoSubscriber errors and close campaigns
- Tests on pubsub and service methods

Fixes #695
2016-12-27 10:35:19 -05:00

63 lines
1.5 KiB
Go

package pubsub
import (
"strconv"
"sync"
"golang.org/x/net/context"
"github.com/kolide/kolide-ose/server/kolide"
)
type inmemQueryResults struct {
resultChannels map[uint]chan interface{}
channelMutex sync.Mutex
}
var _ kolide.QueryResultStore = &inmemQueryResults{}
// NewInmemQueryResults initializes a new in-memory implementation of the
// QueryResultStore interface.
func NewInmemQueryResults() *inmemQueryResults {
return &inmemQueryResults{resultChannels: map[uint]chan interface{}{}}
}
func (im *inmemQueryResults) getChannel(id uint) chan interface{} {
im.channelMutex.Lock()
defer im.channelMutex.Unlock()
channel, ok := im.resultChannels[id]
if !ok {
channel = make(chan interface{})
im.resultChannels[id] = channel
}
return channel
}
func (im *inmemQueryResults) WriteResult(result kolide.DistributedQueryResult) error {
channel, ok := im.resultChannels[result.DistributedQueryCampaignID]
if !ok {
return noSubscriberError{strconv.Itoa(int(result.DistributedQueryCampaignID))}
}
select {
case channel <- result:
// intentionally do nothing
default:
return noSubscriberError{strconv.Itoa(int(result.DistributedQueryCampaignID))}
}
return nil
}
func (im *inmemQueryResults) ReadChannel(ctx context.Context, query kolide.DistributedQueryCampaign) (<-chan interface{}, error) {
channel := im.getChannel(query.ID)
go func() {
<-ctx.Done()
close(channel)
im.channelMutex.Lock()
delete(im.resultChannels, query.ID)
im.channelMutex.Unlock()
}()
return channel, nil
}