fix(anthropic): do not emit empty tokens and fix SSE tool calls (#9258)

This fixes Claude Code compatibility Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-04-21 13:27:21 +00:00 · 2026-04-07 00:38:21 +02:00 · 2026-04-07 00:38:21 +02:00 · 0f9d516a6c
commit 0f9d516a6c
parent 33b124c6f1
3 changed files with 296 additions and 4 deletions
--- a/core/http/endpoints/anthropic/messages.go
+++ b/core/http/endpoints/anthropic/messages.go
@ -3,6 +3,8 @@ package anthropic
 import (
 	"encoding/json"
 	"fmt"
+	"sync"
+	"time"

 	"github.com/google/uuid"
 	"github.com/labstack/echo/v4"
@ -366,7 +368,33 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 		// Collect tool calls for MCP execution
 		var collectedToolCalls []functions.FuncCallResults

+		// SSE keepalive: send comment pings every 3s until the first token arrives.
+		// This prevents clients (e.g. Claude Code) from timing out while the model loads or processes the prompt.
+		firstTokenReceived := make(chan struct{})
+		keepaliveDone := make(chan struct{})
+		go func() {
+			defer close(keepaliveDone)
+			ticker := time.NewTicker(3 * time.Second)
+			defer ticker.Stop()
+			for {
+				select {
+				case <-firstTokenReceived:
+					return
+				case <-c.Request().Context().Done():
+					return
+				case <-ticker.C:
+					fmt.Fprintf(c.Response().Writer, "event: ping\ndata: {\"type\": \"ping\"}\n\n")
+					c.Response().Flush()
+				}
+			}
+		}()
+		firstTokenOnce := sync.Once{}
+
 		tokenCallback := func(token string, usage backend.TokenUsage) bool {
+			firstTokenOnce.Do(func() {
+				close(firstTokenReceived)
+				<-keepaliveDone // wait for keepalive goroutine to exit before writing
+			})
 			accumulatedContent += token

 			if shouldUseFn {
@ -414,7 +442,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 				}
 			}

-			if !inToolCall {
+			if !inToolCall && token != "" {
 				sendAnthropicSSE(c, schema.AnthropicStreamEvent{
 					Type:  "content_block_delta",
 					Index: intPtr(0),
@ -433,6 +461,11 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 		openAIReq.Metadata = input.Metadata

 		_, tokenUsage, chatDeltas, err := openaiEndpoint.ComputeChoices(openAIReq, predInput, cfg, cl, appConfig, ml, func(s string, c *[]schema.Choice) {}, tokenCallback)
+
+		// Stop the keepalive goroutine now that inference is done
+		firstTokenOnce.Do(func() { close(firstTokenReceived) })
+		<-keepaliveDone
+
 		if err != nil {
 			xlog.Error("Anthropic stream model inference failed", "error", err)
 			sendAnthropicSSE(c, schema.AnthropicStreamEvent{
@ -445,9 +478,68 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
 			return nil
 		}

-		// Also check chat deltas for tool calls
-		if deltaToolCalls := functions.ToolCallsFromChatDeltas(chatDeltas); len(deltaToolCalls) > 0 && len(collectedToolCalls) == 0 {
-			collectedToolCalls = deltaToolCalls
+		// Check chat deltas from C++ autoparser — when active, the raw
+		// message is cleared and content/tool calls arrive via ChatDeltas.
+		if len(chatDeltas) > 0 {
+			deltaContent := functions.ContentFromChatDeltas(chatDeltas)
+			deltaToolCalls := functions.ToolCallsFromChatDeltas(chatDeltas)
+
+			// Emit text content from ChatDeltas only when the tokenCallback
+			// didn't already stream it (autoparser clears raw text, so
+			// accumulatedContent will be empty in that case).
+			if deltaContent != "" && !inToolCall && accumulatedContent == "" {
+				sendAnthropicSSE(c, schema.AnthropicStreamEvent{
+					Type:  "content_block_delta",
+					Index: intPtr(0),
+					Delta: &schema.AnthropicStreamDelta{
+						Type: "text_delta",
+						Text: deltaContent,
+					},
+				})
+			}
+
+			// Emit tool_use blocks from ChatDeltas
+			if len(deltaToolCalls) > 0 && len(collectedToolCalls) == 0 {
+				collectedToolCalls = deltaToolCalls
+
+				if !inToolCall && currentBlockIndex == 0 {
+					sendAnthropicSSE(c, schema.AnthropicStreamEvent{
+						Type:  "content_block_stop",
+						Index: intPtr(currentBlockIndex),
+					})
+					currentBlockIndex++
+					inToolCall = true
+				}
+				for i, tc := range deltaToolCalls {
+					toolCallID := tc.ID
+					if toolCallID == "" {
+						toolCallID = fmt.Sprintf("toolu_%s_%d", id, i)
+					}
+					sendAnthropicSSE(c, schema.AnthropicStreamEvent{
+						Type:  "content_block_start",
+						Index: intPtr(currentBlockIndex),
+						ContentBlock: &schema.AnthropicContentBlock{
+							Type: "tool_use",
+							ID:   toolCallID,
+							Name: tc.Name,
+						},
+					})
+					sendAnthropicSSE(c, schema.AnthropicStreamEvent{
+						Type:  "content_block_delta",
+						Index: intPtr(currentBlockIndex),
+						Delta: &schema.AnthropicStreamDelta{
+							Type:        "input_json_delta",
+							PartialJSON: tc.Arguments,
+						},
+					})
+					sendAnthropicSSE(c, schema.AnthropicStreamEvent{
+						Type:  "content_block_stop",
+						Index: intPtr(currentBlockIndex),
+					})
+					currentBlockIndex++
+					toolCallsEmitted++
+				}
+			}
 		}

 		// MCP streaming tool execution: if we collected MCP tool calls, execute and loop
--- a/docs/content/integrations.md
+++ b/docs/content/integrations.md
@ -166,6 +166,67 @@ This section provides step-by-step instructions for configuring specific softwar
   After saving the configuration file, restart OpenCode for the changes to take effect.


+### Claude Code
+
+[Claude Code](https://docs.anthropic.com/en/docs/claude-code) is Anthropic's official CLI tool for coding with Claude. LocalAI implements the Anthropic Messages API (`/v1/messages`), so Claude Code can be pointed directly at a LocalAI instance.
+
+#### Prerequisites
+
+- LocalAI must be running and accessible (either locally or on a network)
+- You need to know your LocalAI server's IP address/hostname and port (default is `8080`)
+- An API key configured in your LocalAI instance
+
+#### Running Claude Code with LocalAI
+
+Set the `ANTHROPIC_BASE_URL` and `ANTHROPIC_API_KEY` environment variables to point Claude Code at your LocalAI server:
+
+```bash
+ANTHROPIC_BASE_URL=http://127.0.0.1:8080 \
+ANTHROPIC_API_KEY=your-localai-api-key \
+claude --model your-model-name
+```
+
+For example, if you have a Gemma model loaded:
+
+```bash
+ANTHROPIC_BASE_URL=http://127.0.0.1:8080 \
+ANTHROPIC_API_KEY=your-localai-api-key \
+claude --model gemma-4-12B-it-GGUF
+```
+
+You can also run a single prompt non-interactively:
+
+```bash
+ANTHROPIC_BASE_URL=http://127.0.0.1:8080 \
+ANTHROPIC_API_KEY=your-localai-api-key \
+claude -p "list the files in /tmp" --model your-model-name
+```
+
+#### Configuration
+
+To avoid setting environment variables every time, you can add them to your shell profile (e.g., `~/.bashrc` or `~/.zshrc`):
+
+```bash
+export ANTHROPIC_BASE_URL=http://127.0.0.1:8080
+export ANTHROPIC_API_KEY=your-localai-api-key
+```
+
+#### Verify available models
+
+Check which models are available in your LocalAI instance:
+
+```bash
+curl http://127.0.0.1:8080/v1/models
+```
+
+Use one of the listed model IDs as the `--model` argument.
+
+#### Notes
+
+- Models with tool calling support (e.g., Gemma 4, Qwen 3) work best, as Claude Code relies heavily on tool use for file operations and code editing.
+- Larger models generally produce better results for complex coding tasks.
+- The Anthropic Messages API endpoint supports both streaming and non-streaming modes.
+
 ### Charm Crush

 You can ask [Charm Crush](https://charm.land/crush) to generate your config by giving it this documentation's URL and your LocalAI instance URL. The configuration will look something like the following and goes in `~/.config/crush/crush.json`:
--- a/tests/e2e/e2e_anthropic_test.go
+++ b/tests/e2e/e2e_anthropic_test.go
@ -383,5 +383,144 @@ var _ = Describe("Anthropic API E2E test", func() {
 				Expect(string(message.StopReason)).To(Equal("tool_use"))
 			})
 		})
+
+		Context("ChatDeltas (C++ autoparser)", func() {
+			It("streams tool calls via ChatDeltas", func() {
+				stream := client.Messages.NewStreaming(context.TODO(), anthropic.MessageNewParams{
+					Model:     "mock-model-autoparser",
+					MaxTokens: 1024,
+					Messages: []anthropic.MessageParam{
+						anthropic.NewUserMessage(anthropic.NewTextBlock("AUTOPARSER_TOOL_CALL What's the weather like in San Francisco?")),
+					},
+					Tools: []anthropic.ToolUnionParam{
+						anthropic.ToolUnionParam{
+							OfTool: &anthropic.ToolParam{
+								Name:        "get_weather",
+								Description: anthropic.Opt("Get the current weather in a given location"),
+								InputSchema: anthropic.ToolInputSchemaParam{
+									Type: constant.ValueOf[constant.Object](),
+									Properties: map[string]any{
+										"location": map[string]any{
+											"type":        "string",
+											"description": "The city and state",
+										},
+									},
+									Required: []string{"location"},
+								},
+							},
+						},
+					},
+				})
+
+				message := anthropic.Message{}
+				hasToolUseStart := false
+
+				for stream.Next() {
+					event := stream.Current()
+					err := message.Accumulate(event)
+					Expect(err).ToNot(HaveOccurred())
+
+					if e, ok := event.AsAny().(anthropic.ContentBlockStartEvent); ok {
+						if e.ContentBlock.Type == "tool_use" {
+							hasToolUseStart = true
+						}
+					}
+				}
+
+				Expect(stream.Err()).ToNot(HaveOccurred())
+				Expect(hasToolUseStart).To(BeTrue(), "Should have tool_use content_block_start event from ChatDeltas")
+				Expect(string(message.StopReason)).To(Equal("tool_use"))
+
+				// Verify tool call is present in accumulated message
+				foundToolUse := false
+				for _, block := range message.Content {
+					if block.Type == "tool_use" {
+						foundToolUse = true
+						Expect(block.ID).ToNot(BeEmpty())
+					}
+				}
+				Expect(foundToolUse).To(BeTrue(), "Accumulated message should contain tool_use block from ChatDeltas")
+			})
+
+			It("streams content via ChatDeltas without duplication", func() {
+				stream := client.Messages.NewStreaming(context.TODO(), anthropic.MessageNewParams{
+					Model:     "mock-model-autoparser",
+					MaxTokens: 1024,
+					Messages: []anthropic.MessageParam{
+						anthropic.NewUserMessage(anthropic.NewTextBlock("AUTOPARSER_CONTENT Tell me about LocalAI")),
+					},
+				})
+
+				message := anthropic.Message{}
+				var textDeltas []string
+
+				for stream.Next() {
+					event := stream.Current()
+					err := message.Accumulate(event)
+					Expect(err).ToNot(HaveOccurred())
+
+					if e, ok := event.AsAny().(anthropic.ContentBlockDeltaEvent); ok {
+						if e.Delta.Type == "text_delta" && e.Delta.Text != "" {
+							textDeltas = append(textDeltas, e.Delta.Text)
+						}
+					}
+				}
+
+				Expect(stream.Err()).ToNot(HaveOccurred())
+				Expect(message.Content).ToNot(BeEmpty())
+				Expect(string(message.StopReason)).To(Equal("end_turn"))
+
+				// Content should appear exactly once (no duplication)
+				fullText := ""
+				for _, block := range message.Content {
+					if block.Type == "text" {
+						fullText += block.Text
+					}
+				}
+				Expect(fullText).To(ContainSubstring("LocalAI"))
+				// Check that the content is not duplicated by counting occurrences
+				Expect(len(fullText)).To(BeNumerically("<", 200), "Content should not be duplicated")
+			})
+
+			It("handles tool calls via ChatDeltas in non-streaming mode", func() {
+				message, err := client.Messages.New(context.TODO(), anthropic.MessageNewParams{
+					Model:     "mock-model-autoparser",
+					MaxTokens: 1024,
+					Messages: []anthropic.MessageParam{
+						anthropic.NewUserMessage(anthropic.NewTextBlock("AUTOPARSER_TOOL_CALL What's the weather like in San Francisco?")),
+					},
+					Tools: []anthropic.ToolUnionParam{
+						anthropic.ToolUnionParam{
+							OfTool: &anthropic.ToolParam{
+								Name:        "get_weather",
+								Description: anthropic.Opt("Get the current weather"),
+								InputSchema: anthropic.ToolInputSchemaParam{
+									Type: constant.ValueOf[constant.Object](),
+									Properties: map[string]any{
+										"location": map[string]any{
+											"type": "string",
+										},
+									},
+									Required: []string{"location"},
+								},
+							},
+						},
+					},
+				})
+
+				Expect(err).ToNot(HaveOccurred())
+				Expect(message.Content).ToNot(BeEmpty())
+				Expect(string(message.StopReason)).To(Equal("tool_use"))
+
+				foundToolUse := false
+				for _, block := range message.Content {
+					if block.Type == "tool_use" {
+						foundToolUse = true
+						Expect(block.ID).ToNot(BeEmpty())
+					}
+				}
+				Expect(foundToolUse).To(BeTrue(), "Should have tool_use block from ChatDeltas")
+			})
+		})
 	})
 })