fleet/orbit/pkg/dataflatten/flatten_test.go
2023-11-01 20:11:35 -06:00

625 lines
18 KiB
Go

// based on github.com/kolide/launcher/pkg/osquery/tables
package dataflatten
import (
"bytes"
"encoding/json"
"os"
"path/filepath"
"sort"
"sync"
"testing"
"github.com/stretchr/testify/require"
)
type flattenTestCase struct {
in string
out []Row
options []FlattenOpts
comment string
err bool
}
func TestFlatten_Complex2(t *testing.T) {
t.Parallel()
dataRaw, err := os.ReadFile(filepath.Join("testdata", "complex2.json"))
require.NoError(t, err, "reading file")
var dataIn interface{}
require.NoError(t, json.Unmarshal(dataRaw, &dataIn), "unmarshalling json")
var tests = []flattenTestCase{
{
out: []Row{
{Path: []string{"addons", "0", "bool1"}, Value: "true"},
{Path: []string{"addons", "0", "nest2", "0", "string2"}, Value: "foo"},
{Path: []string{"addons", "0", "nest3", "string6"}, Value: "null"},
{Path: []string{"addons", "0", "nest3", "string7"}, Value: "A Very Long Sentence"},
{Path: []string{"addons", "0", "nest3", "string8"}, Value: "short"},
{Path: []string{"addons", "0", "string1"}, Value: "hello"},
},
},
{
out: []Row{
{Path: []string{"addons", "0", "bool1"}, Value: "true"},
{Path: []string{"addons", "0", "nest2", "0", "null3"}, Value: ""},
{Path: []string{"addons", "0", "nest2", "0", "null4"}, Value: ""},
{Path: []string{"addons", "0", "nest2", "0", "string2"}, Value: "foo"},
{Path: []string{"addons", "0", "nest3", "string3"}, Value: ""},
{Path: []string{"addons", "0", "nest3", "string4"}, Value: ""},
{Path: []string{"addons", "0", "nest3", "string5"}, Value: ""},
{Path: []string{"addons", "0", "nest3", "string6"}, Value: "null"},
{Path: []string{"addons", "0", "nest3", "string7"}, Value: "A Very Long Sentence"},
{Path: []string{"addons", "0", "nest3", "string8"}, Value: "short"},
{Path: []string{"addons", "0", "null1"}, Value: ""},
{Path: []string{"addons", "0", "null2"}, Value: ""},
{Path: []string{"addons", "0", "string1"}, Value: "hello"},
},
options: []FlattenOpts{IncludeNulls()},
comment: "includes null",
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.comment, func(t *testing.T) {
t.Parallel()
actual, err := Flatten(dataIn, tt.options...)
testFlattenCase(t, tt, actual, err)
})
}
}
func TestFlatten_NestingBug(t *testing.T) {
t.Parallel()
dataRaw, err := os.ReadFile(filepath.Join("testdata", "nested.json"))
require.NoError(t, err, "reading file")
var dataIn interface{}
require.NoError(t, json.Unmarshal(dataRaw, &dataIn), "unmarshalling json")
var tests = []flattenTestCase{
{
out: []Row{
{Path: []string{"addons", "0", "name"}, Value: "Nested Strings"},
{Path: []string{"addons", "0", "nest1", "string3"}, Value: "string3"},
{Path: []string{"addons", "0", "nest1", "string4"}, Value: "string4"},
{Path: []string{"addons", "0", "nest1", "string5"}, Value: "string5"},
{Path: []string{"addons", "0", "nest1", "string6"}, Value: "string6"},
},
},
{
out: []Row{
{Path: []string{"addons", "0", "name"}, Value: "Nested Strings"},
{Path: []string{"addons", "0", "nest1", "string1"}, Value: ""},
{Path: []string{"addons", "0", "nest1", "string2"}, Value: ""},
{Path: []string{"addons", "0", "nest1", "string3"}, Value: "string3"},
{Path: []string{"addons", "0", "nest1", "string4"}, Value: "string4"},
{Path: []string{"addons", "0", "nest1", "string5"}, Value: "string5"},
{Path: []string{"addons", "0", "nest1", "string6"}, Value: "string6"},
},
options: []FlattenOpts{IncludeNulls()},
comment: "includes null",
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.comment, func(t *testing.T) {
t.Parallel()
actual, err := Flatten(dataIn, tt.options...)
testFlattenCase(t, tt, actual, err)
})
}
}
func TestFlatten_Jsonl_Complex(t *testing.T) {
t.Parallel()
// Do the unmarshaling here, so we don't keep doing it again and again
dataRaw, err := os.ReadFile(filepath.Join("testdata", "animals.jsonl"))
require.NoError(t, err, "reading file")
// We do a bunch of tests to select this user. So we'll pull
// this out here and make the testcases more DRY
testdataUser0 := []Row{
{Path: []string{"2", "users", "0", "favorites", "0"}, Value: "ants"},
{Path: []string{"2", "users", "0", "name"}, Value: "Alex Aardvark"},
{Path: []string{"2", "users", "0", "uuid"}, Value: "abc123"},
{Path: []string{"2", "users", "0", "id"}, Value: "1"},
}
var tests = []flattenTestCase{
{
out: []Row{
{Path: []string{"0", "metadata", "testing"}, Value: "true"},
{Path: []string{"0", "metadata", "version"}, Value: "1.0.1"},
{Path: []string{"1", "system"}, Value: "users demo"},
{Path: []string{"2", "users", "0", "favorites", "0"}, Value: "ants"},
{Path: []string{"2", "users", "0", "id"}, Value: "1"},
{Path: []string{"2", "users", "0", "name"}, Value: "Alex Aardvark"},
{Path: []string{"2", "users", "0", "uuid"}, Value: "abc123"},
{Path: []string{"2", "users", "1", "favorites", "0"}, Value: "mice"},
{Path: []string{"2", "users", "1", "favorites", "1"}, Value: "birds"},
{Path: []string{"2", "users", "1", "id"}, Value: "2"},
{Path: []string{"2", "users", "1", "name"}, Value: "Bailey Bobcat"},
{Path: []string{"2", "users", "1", "uuid"}, Value: "def456"},
{Path: []string{"2", "users", "2", "favorites", "0"}, Value: "seeds"},
{Path: []string{"2", "users", "2", "id"}, Value: "3"},
{Path: []string{"2", "users", "2", "name"}, Value: "Cam Chipmunk"},
{Path: []string{"2", "users", "2", "uuid"}, Value: "ghi789"},
{Path: []string{"3", "0"}, Value: "array-item-A"},
{Path: []string{"3", "1"}, Value: "array-item-B"},
{Path: []string{"3", "2"}, Value: "array-item-C"},
},
comment: "all together",
},
{
comment: "query metadata",
options: []FlattenOpts{WithQuery([]string{"*", "metadata"})},
out: []Row{
{Path: []string{"0", "metadata", "testing"}, Value: "true"},
{Path: []string{"0", "metadata", "version"}, Value: "1.0.1"},
},
},
{
comment: "array by #",
options: []FlattenOpts{WithQuery([]string{"*", "users", "0"})},
out: testdataUser0,
},
{
comment: "array by id value",
options: []FlattenOpts{WithQuery([]string{"*", "users", "id=>1"})},
out: testdataUser0,
},
{
comment: "array by uuid",
options: []FlattenOpts{WithQuery([]string{"*", "users", "uuid=>abc123"})},
out: testdataUser0,
},
{
comment: "array by name with suffix wildcard",
options: []FlattenOpts{WithQuery([]string{"*", "users", "name=>Al*"})},
out: testdataUser0,
},
{
comment: "array by name with prefix wildcard",
options: []FlattenOpts{WithQuery([]string{"*", "users", "name=>*Aardvark"})},
out: testdataUser0,
},
{
comment: "array by name with suffix and prefix",
options: []FlattenOpts{WithQuery([]string{"*", "users", "name=>*Aardv*"})},
out: testdataUser0,
},
{
comment: "who likes ants, array re-written",
options: []FlattenOpts{WithQuery([]string{"*", "users", "#name", "favorites", "ants"})},
out: []Row{
{Path: []string{"2", "users", "Alex Aardvark", "favorites", "0"}, Value: "ants"},
},
},
{
comment: "rewritten and filtered",
options: []FlattenOpts{WithQuery([]string{"*", "users", "#name=>Al*", "id"})},
out: []Row{
{Path: []string{"2", "users", "Alex Aardvark", "id"}, Value: "1"},
},
},
{
comment: "bad key name",
options: []FlattenOpts{WithQuery([]string{"*", "users", "#nokey"})},
out: []Row{},
},
{
comment: "rewrite array to map",
options: []FlattenOpts{WithQuery([]string{"*", "users", "#name", "id"})},
out: []Row{
{Path: []string{"2", "users", "Alex Aardvark", "id"}, Value: "1"},
{Path: []string{"2", "users", "Bailey Bobcat", "id"}, Value: "2"},
{Path: []string{"2", "users", "Cam Chipmunk", "id"}, Value: "3"},
},
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.comment, func(t *testing.T) {
t.Parallel()
actual, err := Jsonl(bytes.NewReader(dataRaw), tt.options...)
testFlattenCase(t, tt, actual, err)
})
}
}
func TestFlatten_Complex(t *testing.T) {
t.Parallel()
// Do the unmarshaling here, so we don't keep doing it again and again
dataRaw, err := os.ReadFile(filepath.Join("testdata", "animals.json"))
require.NoError(t, err, "reading file")
var dataIn interface{}
require.NoError(t, json.Unmarshal(dataRaw, &dataIn), "unmarshalling json")
// We do a bunch of tests to select this user. So we'll pull
// this out here and make the testcases more DRY
testdataUser0 := []Row{
{Path: []string{"users", "0", "favorites", "0"}, Value: "ants"},
{Path: []string{"users", "0", "id"}, Value: "1"},
{Path: []string{"users", "0", "name"}, Value: "Alex Aardvark"},
{Path: []string{"users", "0", "uuid"}, Value: "abc123"},
}
var tests = []flattenTestCase{
{
out: []Row{
{Path: []string{"metadata", "testing"}, Value: "true"},
{Path: []string{"metadata", "version"}, Value: "1.0.1"},
{Path: []string{"system"}, Value: "users demo"},
{Path: []string{"users", "0", "favorites", "0"}, Value: "ants"},
{Path: []string{"users", "0", "id"}, Value: "1"},
{Path: []string{"users", "0", "name"}, Value: "Alex Aardvark"},
{Path: []string{"users", "0", "uuid"}, Value: "abc123"},
{Path: []string{"users", "1", "favorites", "0"}, Value: "mice"},
{Path: []string{"users", "1", "favorites", "1"}, Value: "birds"},
{Path: []string{"users", "1", "id"}, Value: "2"},
{Path: []string{"users", "1", "name"}, Value: "Bailey Bobcat"},
{Path: []string{"users", "1", "uuid"}, Value: "def456"},
{Path: []string{"users", "2", "favorites", "0"}, Value: "seeds"},
{Path: []string{"users", "2", "id"}, Value: "3"},
{Path: []string{"users", "2", "name"}, Value: "Cam Chipmunk"},
{Path: []string{"users", "2", "uuid"}, Value: "ghi789"},
},
comment: "all together",
},
{
options: []FlattenOpts{WithQuery([]string{"metadata"})},
out: []Row{
{Path: []string{"metadata", "testing"}, Value: "true"},
{Path: []string{"metadata", "version"}, Value: "1.0.1"},
},
},
{
comment: "array by #",
options: []FlattenOpts{WithQuery([]string{"users", "0"})},
out: testdataUser0,
},
{
comment: "array by id value",
options: []FlattenOpts{WithQuery([]string{"users", "id=>1"})},
out: testdataUser0,
},
{
comment: "array by uuid",
options: []FlattenOpts{WithQuery([]string{"users", "uuid=>abc123"})},
out: testdataUser0,
},
{
comment: "array by name with suffix wildcard",
options: []FlattenOpts{WithQuery([]string{"users", "name=>Al*"})},
out: testdataUser0,
},
{
comment: "array by name with prefix wildcard",
options: []FlattenOpts{WithQuery([]string{"users", "name=>*Aardvark"})},
out: testdataUser0,
},
{
comment: "array by name with suffix and prefix",
options: []FlattenOpts{WithQuery([]string{"users", "name=>*Aardv*"})},
out: testdataUser0,
},
{
comment: "who likes ants, array re-written",
options: []FlattenOpts{WithQuery([]string{"users", "#name", "favorites", "ants"})},
out: []Row{
{Path: []string{"users", "Alex Aardvark", "favorites", "0"}, Value: "ants"},
},
},
{
comment: "rewritten and filtered",
options: []FlattenOpts{WithQuery([]string{"users", "#name=>Al*", "id"})},
out: []Row{
{Path: []string{"users", "Alex Aardvark", "id"}, Value: "1"},
},
},
{
comment: "bad key name",
options: []FlattenOpts{WithQuery([]string{"users", "#nokey"})},
out: []Row{},
},
{
comment: "rewrite array to map",
options: []FlattenOpts{WithQuery([]string{"users", "#name", "id"})},
out: []Row{
{Path: []string{"users", "Alex Aardvark", "id"}, Value: "1"},
{Path: []string{"users", "Bailey Bobcat", "id"}, Value: "2"},
{Path: []string{"users", "Cam Chipmunk", "id"}, Value: "3"},
},
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.comment, func(t *testing.T) {
t.Parallel()
actual, err := Flatten(dataIn, tt.options...)
testFlattenCase(t, tt, actual, err)
})
}
}
func TestFlatten_ArrayMaps(t *testing.T) {
t.Parallel()
var tests = []flattenTestCase{
{
in: `{"data": [{"v":1,"id":"a"},{"v":2,"id":"b"},{"v":3,"id":"c"}]}`,
out: []Row{
{Path: []string{"data", "0", "id"}, Value: "a"},
{Path: []string{"data", "0", "v"}, Value: "1"},
{Path: []string{"data", "1", "id"}, Value: "b"},
{Path: []string{"data", "1", "v"}, Value: "2"},
{Path: []string{"data", "2", "id"}, Value: "c"},
{Path: []string{"data", "2", "v"}, Value: "3"},
},
comment: "nested array as array",
},
{
in: `{"data": [{"v":1,"id":"a"},{"v":2,"id":"b"},{"v":3,"id":"c"}]}`,
out: []Row{
{Path: []string{"data", "a", "id"}, Value: "a"},
{Path: []string{"data", "a", "v"}, Value: "1"},
{Path: []string{"data", "b", "id"}, Value: "b"},
{Path: []string{"data", "b", "v"}, Value: "2"},
{Path: []string{"data", "c", "id"}, Value: "c"},
{Path: []string{"data", "c", "v"}, Value: "3"},
},
options: []FlattenOpts{WithQuery([]string{"data", "#id"})},
comment: "nested array as map",
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.comment, func(t *testing.T) {
t.Parallel()
actual, err := Json([]byte(tt.in), tt.options...)
testFlattenCase(t, tt, actual, err)
})
}
}
func TestFlatten(t *testing.T) {
t.Parallel()
var tests = []flattenTestCase{
{
in: "a",
err: true,
},
{
in: `["a", null]`,
out: []Row{
{Path: []string{"0"}, Value: "a"},
},
comment: "skip null",
},
{
in: `["a", "b", null]`,
out: []Row{
{Path: []string{"0"}, Value: "a"},
{Path: []string{"1"}, Value: "b"},
{Path: []string{"2"}, Value: ""},
},
options: []FlattenOpts{IncludeNulls()},
comment: "includes null",
},
{
in: `["1"]`,
out: []Row{
{Path: []string{"0"}, Value: "1"},
},
},
{
in: `["a", true, false, "1", 2, 3.3]`,
out: []Row{
{Path: []string{"0"}, Value: "a"},
{Path: []string{"1"}, Value: "true"},
{Path: []string{"2"}, Value: "false"},
{Path: []string{"3"}, Value: "1"},
{Path: []string{"4"}, Value: "2"},
{Path: []string{"5"}, Value: "3.3"},
},
comment: "mixed types",
},
{
in: `{"a": 1, "b": "2.2", "c": [1,2,3]}`,
out: []Row{
{Path: []string{"a"}, Value: "1"},
{Path: []string{"b"}, Value: "2.2"},
{Path: []string{"c", "0"}, Value: "1"},
{Path: []string{"c", "1"}, Value: "2"},
{Path: []string{"c", "2"}, Value: "3"},
},
comment: "nested types",
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.comment, func(t *testing.T) {
t.Parallel()
actual, err := Json([]byte(tt.in), tt.options...)
testFlattenCase(t, tt, actual, err)
})
}
}
func TestFlattenJsonlErrors(t *testing.T) {
t.Parallel()
var tests = []flattenTestCase{
{
in: "a",
err: true,
},
{
// this test case was left over from attempting to parse json that
// is contained within a file that is not stricly jsonl
// it should error, maybe look at this again in the future?
comment: "valid json inline text",
in: `valid json is hidden["a"]in me`,
err: true,
},
{
// this test case was left over from attempting to parse json that
// is contained within a file that is not stricly jsonl
// it should error, maybe look at this again in the future?
comment: "valid json sandwich",
in: `
there is some json under me
["a"]
there is some json above me
`,
err: true,
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.comment, func(t *testing.T) {
t.Parallel()
actual, err := Jsonl(bytes.NewBuffer([]byte(tt.in)), tt.options...)
testFlattenCase(t, tt, actual, err)
})
}
}
// add mutext due to data races when running locally, don't seem to appear in CI
// maybe remove if slows down CI too much
var testFlattenCaseMutex sync.Mutex
// testFlattenCase runs tests for a single test case. Normally this
// would be in a for loop, instead it's abstracted here to make it
// simpler to split up a giant array of test cases.
func testFlattenCase(t *testing.T, tt flattenTestCase, actual []Row, actualErr error) {
testFlattenCaseMutex.Lock()
defer testFlattenCaseMutex.Unlock()
if tt.err {
require.Error(t, actualErr, "test %s %s", tt.in, tt.comment)
return
}
require.NoError(t, actualErr, "test %s %s", tt.in, tt.comment)
// Despite being an array. data is returned
// unordered. This greatly complicates our testing. We
// can either sort it, or use an unordered comparison
// operator. The `require.ElementsMatch` produces much
// harder to read diffs, so instead we'll sort things.
sort.SliceStable(tt.out, func(i, j int) bool { return tt.out[i].StringPath("/") < tt.out[j].StringPath("/") })
sort.SliceStable(actual, func(i, j int) bool { return actual[i].StringPath("/") < actual[j].StringPath("/") })
require.EqualValues(t, tt.out, actual, "test %s %s", tt.in, tt.comment)
}
func TestFlattenSliceOfMaps(t *testing.T) {
t.Parallel()
tests := []struct {
name string
in interface{}
opts []FlattenOpts
out []Row
wantErr bool
}{
{
name: "single",
in: []map[string]interface{}{
{
"id": "a",
"v": 1,
},
},
opts: []FlattenOpts{},
out: []Row{
{Path: []string{"0", "id"}, Value: "a"},
{Path: []string{"0", "v"}, Value: "1"},
},
wantErr: false,
},
{
name: "multiple",
in: []map[string]interface{}{
{
"id": "a",
"v": 1,
},
{
"id": "b",
"v": 2,
},
{
"id": "c",
"v": 3,
},
},
opts: []FlattenOpts{},
out: []Row{
{Path: []string{"0", "id"}, Value: "a"},
{Path: []string{"0", "v"}, Value: "1"},
{Path: []string{"1", "id"}, Value: "b"},
{Path: []string{"1", "v"}, Value: "2"},
{Path: []string{"2", "id"}, Value: "c"},
{Path: []string{"2", "v"}, Value: "3"},
},
wantErr: false,
},
{
name: "error",
in: []map[string]interface{}{
{
"id": []string{"this should cause an error"},
},
},
opts: []FlattenOpts{},
out: nil,
wantErr: true,
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
got, err := Flatten(tt.in, tt.opts...)
if tt.wantErr {
require.Error(t, err)
} else {
require.NoError(t, err)
}
require.ElementsMatch(t, tt.out, got)
})
}
}