diff --git a/core/backend/audio_transform.go b/core/backend/audio_transform.go index 3dbc8c833..399e5c803 100644 --- a/core/backend/audio_transform.go +++ b/core/backend/audio_transform.go @@ -78,7 +78,7 @@ func ModelAudioTransform( var startTime time.Time if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) startTime = time.Now() } @@ -104,7 +104,7 @@ func ModelAudioTransform( data["sample_rate"] = res.SampleRate data["samples"] = res.Samples data["reference_provided"] = res.ReferenceProvided - if snippet := trace.AudioSnippet(dst); snippet != nil { + if snippet := trace.AudioSnippet(dst, appConfig.TracingMaxBodyBytes); snippet != nil { maps.Copy(data, snippet) } } diff --git a/core/backend/detection.go b/core/backend/detection.go index 13a923e9f..1c92fefdf 100644 --- a/core/backend/detection.go +++ b/core/backend/detection.go @@ -35,7 +35,7 @@ func Detection( var startTime time.Time if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) startTime = time.Now() } diff --git a/core/backend/embeddings.go b/core/backend/embeddings.go index 382f8f358..f7944827d 100644 --- a/core/backend/embeddings.go +++ b/core/backend/embeddings.go @@ -67,7 +67,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, modelConf } if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) traceData := map[string]any{ "input_text": trace.TruncateString(s, 1000), diff --git a/core/backend/face_analyze.go b/core/backend/face_analyze.go index 24d70ac40..bcd447243 100644 --- a/core/backend/face_analyze.go +++ b/core/backend/face_analyze.go @@ -32,7 +32,7 @@ func FaceAnalyze( var startTime time.Time if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) startTime = time.Now() } diff --git a/core/backend/face_verify.go b/core/backend/face_verify.go index 15b7dcdaf..f99ed12d5 100644 --- a/core/backend/face_verify.go +++ b/core/backend/face_verify.go @@ -32,7 +32,7 @@ func FaceVerify( var startTime time.Time if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) startTime = time.Now() } diff --git a/core/backend/image.go b/core/backend/image.go index 44ca010c2..cd3b8ce6b 100644 --- a/core/backend/image.go +++ b/core/backend/image.go @@ -41,7 +41,7 @@ func ImageGeneration(height, width, step, seed int, positive_prompt, negative_pr } if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) traceData := map[string]any{ "positive_prompt": positive_prompt, diff --git a/core/backend/llm.go b/core/backend/llm.go index 0c291ff0a..572d943e0 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -305,7 +305,7 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima } if o.EnableTracing { - trace.InitBackendTracingIfEnabled(o.TracingMaxItems) + trace.InitBackendTracingIfEnabled(o.TracingMaxItems, o.TracingMaxBodyBytes) traceData := map[string]any{ "chat_template": c.TemplateConfig.Chat, @@ -316,9 +316,13 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima "audios_count": len(audios), } + // Cap the captured fields up front: agent-pool LLM calls embed the + // full augmented chat history in messages and the full reply in + // response, so without a per-field cap a single trace can dwarf the + // rest of the buffer. The cap matches the API-trace body cap. if len(messages) > 0 { if msgJSON, err := json.Marshal(messages); err == nil { - traceData["messages"] = string(msgJSON) + traceData["messages"] = trace.TruncateToBytes(string(msgJSON), o.TracingMaxBodyBytes) } } if reasoningJSON, err := json.Marshal(c.ReasoningConfig); err == nil { @@ -337,7 +341,7 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima resp, err := originalFn() duration := time.Since(startTime) - traceData["response"] = resp.Response + traceData["response"] = trace.TruncateToBytes(resp.Response, o.TracingMaxBodyBytes) traceData["token_usage"] = map[string]any{ "prompt": resp.Usage.Prompt, "completion": resp.Usage.Completion, @@ -359,10 +363,10 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima toolCallCount += len(d.ToolCalls) } if len(contentParts) > 0 { - chatDeltasInfo["content"] = strings.Join(contentParts, "") + chatDeltasInfo["content"] = trace.TruncateToBytes(strings.Join(contentParts, ""), o.TracingMaxBodyBytes) } if len(reasoningParts) > 0 { - chatDeltasInfo["reasoning_content"] = strings.Join(reasoningParts, "") + chatDeltasInfo["reasoning_content"] = trace.TruncateToBytes(strings.Join(reasoningParts, ""), o.TracingMaxBodyBytes) } if toolCallCount > 0 { chatDeltasInfo["tool_call_count"] = toolCallCount diff --git a/core/backend/options.go b/core/backend/options.go index 73985d8fe..a7d332344 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -21,7 +21,7 @@ func recordModelLoadFailure(appConfig *config.ApplicationConfig, modelName, back if !appConfig.EnableTracing { return } - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) trace.RecordBackendTrace(trace.BackendTrace{ Timestamp: time.Now(), Type: trace.BackendTraceModelLoad, diff --git a/core/backend/rerank.go b/core/backend/rerank.go index 9672a1ca8..a90c2aad1 100644 --- a/core/backend/rerank.go +++ b/core/backend/rerank.go @@ -25,7 +25,7 @@ func Rerank(ctx context.Context, request *proto.RerankRequest, loader *model.Mod var startTime time.Time if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) startTime = time.Now() } diff --git a/core/backend/soundgeneration.go b/core/backend/soundgeneration.go index dccc4df74..5a4ff8882 100644 --- a/core/backend/soundgeneration.go +++ b/core/backend/soundgeneration.go @@ -98,7 +98,7 @@ func SoundGeneration( var startTime time.Time if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) startTime = time.Now() } diff --git a/core/backend/tokenize.go b/core/backend/tokenize.go index 761329973..96618d89c 100644 --- a/core/backend/tokenize.go +++ b/core/backend/tokenize.go @@ -27,7 +27,7 @@ func ModelTokenize(s string, loader *model.ModelLoader, modelConfig config.Model var startTime time.Time if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) startTime = time.Now() } diff --git a/core/backend/transcript.go b/core/backend/transcript.go index be651516b..e9b5f5360 100644 --- a/core/backend/transcript.go +++ b/core/backend/transcript.go @@ -76,10 +76,10 @@ func ModelTranscriptionWithOptions(ctx context.Context, req TranscriptionRequest var startTime time.Time var audioSnippet map[string]any if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) startTime = time.Now() // Capture audio before the backend call — the backend may delete the file. - audioSnippet = trace.AudioSnippet(req.Audio) + audioSnippet = trace.AudioSnippet(req.Audio, appConfig.TracingMaxBodyBytes) } r, err := transcriptionModel.AudioTranscription(ctx, req.toProto(uint32(*modelConfig.Threads))) diff --git a/core/backend/tts.go b/core/backend/tts.go index 9af9d0d44..62c394714 100644 --- a/core/backend/tts.go +++ b/core/backend/tts.go @@ -67,7 +67,7 @@ func ModelTTS( var startTime time.Time if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) startTime = time.Now() } @@ -93,7 +93,7 @@ func ModelTTS( "language": language, } if err == nil && res.Success { - if snippet := trace.AudioSnippet(filePath); snippet != nil { + if snippet := trace.AudioSnippet(filePath, appConfig.TracingMaxBodyBytes); snippet != nil { maps.Copy(data, snippet) } } @@ -161,7 +161,7 @@ func ModelTTSStream( var startTime time.Time if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) startTime = time.Now() } @@ -260,7 +260,7 @@ func ModelTTSStream( "streaming": true, } if resultErr == nil && len(snippetPCM) > 0 { - if snippet := trace.AudioSnippetFromPCM(snippetPCM, int(sampleRate), totalPCMBytes); snippet != nil { + if snippet := trace.AudioSnippetFromPCM(snippetPCM, int(sampleRate), totalPCMBytes, appConfig.TracingMaxBodyBytes); snippet != nil { maps.Copy(data, snippet) } } diff --git a/core/backend/video.go b/core/backend/video.go index 65677f055..e016d1a22 100644 --- a/core/backend/video.go +++ b/core/backend/video.go @@ -42,7 +42,7 @@ func VideoGeneration(height, width int32, prompt, negativePrompt, startImage, en } if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) traceData := map[string]any{ "prompt": prompt, diff --git a/core/backend/voice_analyze.go b/core/backend/voice_analyze.go index 022692921..84e5547d5 100644 --- a/core/backend/voice_analyze.go +++ b/core/backend/voice_analyze.go @@ -31,7 +31,7 @@ func VoiceAnalyze( var startTime time.Time if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) startTime = time.Now() } diff --git a/core/backend/voice_embed.go b/core/backend/voice_embed.go index 6cdc9b6a2..89df76a06 100644 --- a/core/backend/voice_embed.go +++ b/core/backend/voice_embed.go @@ -34,7 +34,7 @@ func VoiceEmbed( var startTime time.Time if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) startTime = time.Now() } diff --git a/core/backend/voice_verify.go b/core/backend/voice_verify.go index bd4c04808..c5af1196b 100644 --- a/core/backend/voice_verify.go +++ b/core/backend/voice_verify.go @@ -32,7 +32,7 @@ func VoiceVerify( var startTime time.Time if appConfig.EnableTracing { - trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems) + trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems, appConfig.TracingMaxBodyBytes) startTime = time.Now() } diff --git a/core/http/react-ui/e2e/traces.spec.js b/core/http/react-ui/e2e/traces.spec.js index 4a247b5b3..9382593e9 100644 --- a/core/http/react-ui/e2e/traces.spec.js +++ b/core/http/react-ui/e2e/traces.spec.js @@ -52,11 +52,22 @@ test.describe('Traces Settings', () => { await page.locator('button', { hasText: 'Tracing is' }).click() await expect(page.locator('text=Enable Tracing')).toBeVisible() - const maxItemsInput = page.locator('input[type="number"]') + // The Tracing panel has two numeric inputs (Max Items and Max Body Bytes). + // Disambiguate by placeholder so adding a third field later doesn't break this. + const maxItemsInput = page.getByPlaceholder('100') await maxItemsInput.fill('500') await expect(maxItemsInput).toHaveValue('500') }) + test('set max body bytes value', async ({ page }) => { + await page.locator('button', { hasText: 'Tracing is' }).click() + await expect(page.locator('text=Enable Tracing')).toBeVisible() + + const maxBodyBytesInput = page.getByPlaceholder('65536') + await maxBodyBytesInput.fill('16384') + await expect(maxBodyBytesInput).toHaveValue('16384') + }) + test('save shows toast', async ({ page }) => { // Expand settings await page.locator('button', { hasText: 'Tracing is' }).click() diff --git a/core/http/react-ui/src/pages/Settings.jsx b/core/http/react-ui/src/pages/Settings.jsx index 3174eed58..1e7b1a6db 100644 --- a/core/http/react-ui/src/pages/Settings.jsx +++ b/core/http/react-ui/src/pages/Settings.jsx @@ -435,6 +435,9 @@ export default function Settings() { update('tracing_max_items', parseInt(e.target.value) || 0)} placeholder="100" disabled={!settings.enable_tracing} /> + + update('tracing_max_body_bytes', parseInt(e.target.value) || 0)} placeholder="65536" disabled={!settings.enable_tracing} /> + update('enable_backend_logging', v)} /> diff --git a/core/http/react-ui/src/pages/Traces.jsx b/core/http/react-ui/src/pages/Traces.jsx index 5bceb53e4..b42c13a63 100644 --- a/core/http/react-ui/src/pages/Traces.jsx +++ b/core/http/react-ui/src/pages/Traces.jsx @@ -470,6 +470,17 @@ export default function Traces() { disabled={!settings.enable_tracing} /> + + setSettings(prev => ({ ...prev, tracing_max_body_bytes: parseInt(e.target.value) || 0 }))} + placeholder="65536" + disabled={!settings.enable_tracing} + /> + 0 { + t.Data = capDataStrings(t.Data, backendMaxBodyBytes) + } select { case backendLogChan <- &t: default: @@ -78,6 +94,35 @@ func RecordBackendTrace(t BackendTrace) { } } +// capDataStrings walks a trace Data map and replaces any string value (at any +// depth) that exceeds maxBytes with a fixed-size marker that names the +// original byte count. The replacement is intentionally short and not valid +// base64/JSON: the goal is to flag "this was dropped" cheaply, not to keep a +// partial value that the UI might try to render. Non-string scalars and +// non-map containers pass through untouched so structural fields like +// total_deltas or audio_sample_rate remain useful. +func capDataStrings(data map[string]any, maxBytes int) map[string]any { + out := make(map[string]any, len(data)) + for k, v := range data { + out[k] = capValue(v, maxBytes) + } + return out +} + +func capValue(v any, maxBytes int) any { + switch val := v.(type) { + case string: + if len(val) > maxBytes { + return fmt.Sprintf("", len(val)) + } + return val + case map[string]any: + return capDataStrings(val, maxBytes) + default: + return v + } +} + func GetBackendTraces() []BackendTrace { backendMu.Lock() if backendTraceBuffer == nil { @@ -136,3 +181,24 @@ func TruncateString(s string, maxLen int) string { } return s[:maxLen] + "..." } + +// TruncateToBytes caps a string at exactly maxBytes, preserving the leading +// content and appending a marker so the UI knows the value was clipped. +// Unlike TruncateString it guarantees output <= maxBytes, which matters for +// fields that feed back into the trace pipeline: capDataStrings in +// RecordBackendTrace re-checks size and would otherwise replace a producer's +// head-preserving truncation with the bare marker, losing the prefix. +// +// maxBytes <= 0 disables the cap, matching backendMaxBodyBytes semantics. +func TruncateToBytes(s string, maxBytes int) string { + if maxBytes <= 0 || len(s) <= maxBytes { + return s + } + suffix := fmt.Sprintf("...[truncated, %d bytes]", len(s)) + if len(suffix) >= maxBytes { + // Pathologically small caps can't fit the marker; fall back to a + // hard cut so the contract (output <= maxBytes) still holds. + return s[:maxBytes] + } + return s[:maxBytes-len(suffix)] + suffix +} diff --git a/core/trace/backend_trace_cap_test.go b/core/trace/backend_trace_cap_test.go new file mode 100644 index 000000000..b850bd1ae --- /dev/null +++ b/core/trace/backend_trace_cap_test.go @@ -0,0 +1,160 @@ +package trace_test + +import ( + "strings" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/mudler/LocalAI/core/trace" +) + +// The /api/backend-traces endpoint ships up to TracingMaxItems entries to the +// admin Traces UI on every 5s auto-refresh. Without a cap on the per-trace +// Data field, a chatty agent-pool workload (LLM traces carry the full +// `messages` array, TTS traces carry ~1.3 MiB of audio_wav_base64) makes the +// response tens of MiB. The UI then stays in "loading" forever because the +// download + parse runs longer than the refresh interval: the same symptom +// the API-trace fix (commit 61bf34ea) addressed on the other side. +// +// These specs pin the generic safety net (Option A) so any future producer +// that stuffs a large string into Data is automatically bounded. + +const ( + smallCap = 1024 + smallCapStep = 16 +) + +var _ = Describe("RecordBackendTrace Data capping", func() { + BeforeEach(func() { + // Init is sync.Once so the first test wins; subsequent tests just + // clear the buffer. The cap value below has to match the first call. + trace.InitBackendTracingIfEnabled(64, smallCap) + trace.ClearBackendTraces() + }) + + It("replaces oversized top-level string values with a truncation marker", func() { + oversized := strings.Repeat("x", smallCap*4) + + trace.RecordBackendTrace(trace.BackendTrace{ + Timestamp: time.Now(), + Type: trace.BackendTraceLLM, + ModelName: "m", + Data: map[string]any{ + "messages": oversized, + "small": "fits", + }, + }) + + Eventually(trace.GetBackendTraces).Should(HaveLen(1)) + got := trace.GetBackendTraces()[0] + + Expect(got.Data["small"]).To(Equal("fits"), "fields under the cap must pass through untouched") + + // The marker is the contract the UI reads to show truncation; the + // concrete shape can evolve but it must be a short fixed-size string + // that encodes the original byte count so users know what was dropped. + msg, ok := got.Data["messages"].(string) + Expect(ok).To(BeTrue(), "string fields stay strings after capping") + Expect(len(msg)).To(BeNumerically("<", smallCap), "capped value must fit under the configured cap") + Expect(msg).To(ContainSubstring("truncated")) + Expect(msg).To(ContainSubstring("4096"), "marker should reference the original byte count for diagnostics") + }) + + It("recurses into nested maps so deeply nested oversized strings are also bounded", func() { + oversized := strings.Repeat("y", smallCap*2) + + trace.RecordBackendTrace(trace.BackendTrace{ + Timestamp: time.Now(), + Type: trace.BackendTraceLLM, + ModelName: "m", + Data: map[string]any{ + "chat_deltas": map[string]any{ + "content": oversized, + "total_deltas": 5, + "tool_call_count": 0, + }, + }, + }) + + Eventually(trace.GetBackendTraces).Should(HaveLen(1)) + got := trace.GetBackendTraces()[0] + + deltas, ok := got.Data["chat_deltas"].(map[string]any) + Expect(ok).To(BeTrue(), "nested map structure must be preserved") + Expect(deltas["total_deltas"]).To(Equal(5), "non-string siblings must pass through untouched") + + content, ok := deltas["content"].(string) + Expect(ok).To(BeTrue()) + Expect(len(content)).To(BeNumerically("<", smallCap), "nested oversized string must still be capped") + Expect(content).To(ContainSubstring("truncated")) + }) + + It("leaves values within the cap untouched", func() { + smallVal := strings.Repeat("z", smallCap-smallCapStep) + + trace.RecordBackendTrace(trace.BackendTrace{ + Timestamp: time.Now(), + Type: trace.BackendTraceEmbedding, + ModelName: "m", + Data: map[string]any{ + "input_text": smallVal, + }, + }) + + Eventually(trace.GetBackendTraces).Should(HaveLen(1)) + got := trace.GetBackendTraces()[0] + + Expect(got.Data["input_text"]).To(Equal(smallVal)) + }) + + It("does not re-truncate values that producers already capped with TruncateToBytes", func() { + // Producers (LLM messages/response, etc.) prefer head-preserving + // truncation so users can still read the start of the conversation. + // TruncateToBytes guarantees output <= cap, so the generic safety + // net below must leave it alone, otherwise the kept prefix gets + // thrown away and replaced with the marker. + preTruncated := trace.TruncateToBytes(strings.Repeat("a", smallCap*4), smallCap) + Expect(len(preTruncated)).To(BeNumerically("<=", smallCap)) + + trace.RecordBackendTrace(trace.BackendTrace{ + Timestamp: time.Now(), + Type: trace.BackendTraceLLM, + ModelName: "m", + Data: map[string]any{ + "messages": preTruncated, + }, + }) + + Eventually(trace.GetBackendTraces).Should(HaveLen(1)) + got := trace.GetBackendTraces()[0] + Expect(got.Data["messages"]).To(Equal(preTruncated)) + }) +}) + +var _ = Describe("TruncateToBytes", func() { + It("returns the input unchanged when it fits", func() { + Expect(trace.TruncateToBytes("hello", 1024)).To(Equal("hello")) + }) + + It("treats maxBytes <= 0 as unlimited", func() { + Expect(trace.TruncateToBytes("hello", 0)).To(Equal("hello")) + Expect(trace.TruncateToBytes("hello", -1)).To(Equal("hello")) + }) + + It("caps oversized input to at most maxBytes and preserves the head", func() { + in := strings.Repeat("a", 5000) + out := trace.TruncateToBytes(in, 100) + Expect(len(out)).To(BeNumerically("<=", 100), "output must never exceed the cap so the generic Record-time safety net doesn't fire") + Expect(out).To(HavePrefix("a"), "should keep the leading content readable") + Expect(out).To(ContainSubstring("truncated"), "should mark the value as truncated for the UI") + }) + + It("falls back to plain truncation when the cap is smaller than the suffix", func() { + in := strings.Repeat("a", 100) + out := trace.TruncateToBytes(in, 4) + Expect(len(out)).To(Equal(4)) + Expect(out).To(Equal("aaaa")) + }) +}) diff --git a/core/trace/trace_suite_test.go b/core/trace/trace_suite_test.go new file mode 100644 index 000000000..917fdc332 --- /dev/null +++ b/core/trace/trace_suite_test.go @@ -0,0 +1,13 @@ +package trace_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestTrace(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Trace test suite") +}