mirror of
https://github.com/mudler/LocalAI
synced 2026-04-21 13:27:21 +00:00
fix(anthropic): do not emit empty tokens and fix SSE tool calls (#9258)
This fixes Claude Code compatibility Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
33b124c6f1
commit
0f9d516a6c
3 changed files with 296 additions and 4 deletions
|
|
@ -3,6 +3,8 @@ package anthropic
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/labstack/echo/v4"
|
||||
|
|
@ -366,7 +368,33 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
|
|||
// Collect tool calls for MCP execution
|
||||
var collectedToolCalls []functions.FuncCallResults
|
||||
|
||||
// SSE keepalive: send comment pings every 3s until the first token arrives.
|
||||
// This prevents clients (e.g. Claude Code) from timing out while the model loads or processes the prompt.
|
||||
firstTokenReceived := make(chan struct{})
|
||||
keepaliveDone := make(chan struct{})
|
||||
go func() {
|
||||
defer close(keepaliveDone)
|
||||
ticker := time.NewTicker(3 * time.Second)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-firstTokenReceived:
|
||||
return
|
||||
case <-c.Request().Context().Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
fmt.Fprintf(c.Response().Writer, "event: ping\ndata: {\"type\": \"ping\"}\n\n")
|
||||
c.Response().Flush()
|
||||
}
|
||||
}
|
||||
}()
|
||||
firstTokenOnce := sync.Once{}
|
||||
|
||||
tokenCallback := func(token string, usage backend.TokenUsage) bool {
|
||||
firstTokenOnce.Do(func() {
|
||||
close(firstTokenReceived)
|
||||
<-keepaliveDone // wait for keepalive goroutine to exit before writing
|
||||
})
|
||||
accumulatedContent += token
|
||||
|
||||
if shouldUseFn {
|
||||
|
|
@ -414,7 +442,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
|
|||
}
|
||||
}
|
||||
|
||||
if !inToolCall {
|
||||
if !inToolCall && token != "" {
|
||||
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
|
||||
Type: "content_block_delta",
|
||||
Index: intPtr(0),
|
||||
|
|
@ -433,6 +461,11 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
|
|||
openAIReq.Metadata = input.Metadata
|
||||
|
||||
_, tokenUsage, chatDeltas, err := openaiEndpoint.ComputeChoices(openAIReq, predInput, cfg, cl, appConfig, ml, func(s string, c *[]schema.Choice) {}, tokenCallback)
|
||||
|
||||
// Stop the keepalive goroutine now that inference is done
|
||||
firstTokenOnce.Do(func() { close(firstTokenReceived) })
|
||||
<-keepaliveDone
|
||||
|
||||
if err != nil {
|
||||
xlog.Error("Anthropic stream model inference failed", "error", err)
|
||||
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
|
||||
|
|
@ -445,9 +478,68 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
|
|||
return nil
|
||||
}
|
||||
|
||||
// Also check chat deltas for tool calls
|
||||
if deltaToolCalls := functions.ToolCallsFromChatDeltas(chatDeltas); len(deltaToolCalls) > 0 && len(collectedToolCalls) == 0 {
|
||||
collectedToolCalls = deltaToolCalls
|
||||
// Check chat deltas from C++ autoparser — when active, the raw
|
||||
// message is cleared and content/tool calls arrive via ChatDeltas.
|
||||
if len(chatDeltas) > 0 {
|
||||
deltaContent := functions.ContentFromChatDeltas(chatDeltas)
|
||||
deltaToolCalls := functions.ToolCallsFromChatDeltas(chatDeltas)
|
||||
|
||||
// Emit text content from ChatDeltas only when the tokenCallback
|
||||
// didn't already stream it (autoparser clears raw text, so
|
||||
// accumulatedContent will be empty in that case).
|
||||
if deltaContent != "" && !inToolCall && accumulatedContent == "" {
|
||||
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
|
||||
Type: "content_block_delta",
|
||||
Index: intPtr(0),
|
||||
Delta: &schema.AnthropicStreamDelta{
|
||||
Type: "text_delta",
|
||||
Text: deltaContent,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// Emit tool_use blocks from ChatDeltas
|
||||
if len(deltaToolCalls) > 0 && len(collectedToolCalls) == 0 {
|
||||
collectedToolCalls = deltaToolCalls
|
||||
|
||||
if !inToolCall && currentBlockIndex == 0 {
|
||||
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
|
||||
Type: "content_block_stop",
|
||||
Index: intPtr(currentBlockIndex),
|
||||
})
|
||||
currentBlockIndex++
|
||||
inToolCall = true
|
||||
}
|
||||
for i, tc := range deltaToolCalls {
|
||||
toolCallID := tc.ID
|
||||
if toolCallID == "" {
|
||||
toolCallID = fmt.Sprintf("toolu_%s_%d", id, i)
|
||||
}
|
||||
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
|
||||
Type: "content_block_start",
|
||||
Index: intPtr(currentBlockIndex),
|
||||
ContentBlock: &schema.AnthropicContentBlock{
|
||||
Type: "tool_use",
|
||||
ID: toolCallID,
|
||||
Name: tc.Name,
|
||||
},
|
||||
})
|
||||
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
|
||||
Type: "content_block_delta",
|
||||
Index: intPtr(currentBlockIndex),
|
||||
Delta: &schema.AnthropicStreamDelta{
|
||||
Type: "input_json_delta",
|
||||
PartialJSON: tc.Arguments,
|
||||
},
|
||||
})
|
||||
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
|
||||
Type: "content_block_stop",
|
||||
Index: intPtr(currentBlockIndex),
|
||||
})
|
||||
currentBlockIndex++
|
||||
toolCallsEmitted++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MCP streaming tool execution: if we collected MCP tool calls, execute and loop
|
||||
|
|
|
|||
|
|
@ -166,6 +166,67 @@ This section provides step-by-step instructions for configuring specific softwar
|
|||
After saving the configuration file, restart OpenCode for the changes to take effect.
|
||||
|
||||
|
||||
### Claude Code
|
||||
|
||||
[Claude Code](https://docs.anthropic.com/en/docs/claude-code) is Anthropic's official CLI tool for coding with Claude. LocalAI implements the Anthropic Messages API (`/v1/messages`), so Claude Code can be pointed directly at a LocalAI instance.
|
||||
|
||||
#### Prerequisites
|
||||
|
||||
- LocalAI must be running and accessible (either locally or on a network)
|
||||
- You need to know your LocalAI server's IP address/hostname and port (default is `8080`)
|
||||
- An API key configured in your LocalAI instance
|
||||
|
||||
#### Running Claude Code with LocalAI
|
||||
|
||||
Set the `ANTHROPIC_BASE_URL` and `ANTHROPIC_API_KEY` environment variables to point Claude Code at your LocalAI server:
|
||||
|
||||
```bash
|
||||
ANTHROPIC_BASE_URL=http://127.0.0.1:8080 \
|
||||
ANTHROPIC_API_KEY=your-localai-api-key \
|
||||
claude --model your-model-name
|
||||
```
|
||||
|
||||
For example, if you have a Gemma model loaded:
|
||||
|
||||
```bash
|
||||
ANTHROPIC_BASE_URL=http://127.0.0.1:8080 \
|
||||
ANTHROPIC_API_KEY=your-localai-api-key \
|
||||
claude --model gemma-4-12B-it-GGUF
|
||||
```
|
||||
|
||||
You can also run a single prompt non-interactively:
|
||||
|
||||
```bash
|
||||
ANTHROPIC_BASE_URL=http://127.0.0.1:8080 \
|
||||
ANTHROPIC_API_KEY=your-localai-api-key \
|
||||
claude -p "list the files in /tmp" --model your-model-name
|
||||
```
|
||||
|
||||
#### Configuration
|
||||
|
||||
To avoid setting environment variables every time, you can add them to your shell profile (e.g., `~/.bashrc` or `~/.zshrc`):
|
||||
|
||||
```bash
|
||||
export ANTHROPIC_BASE_URL=http://127.0.0.1:8080
|
||||
export ANTHROPIC_API_KEY=your-localai-api-key
|
||||
```
|
||||
|
||||
#### Verify available models
|
||||
|
||||
Check which models are available in your LocalAI instance:
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8080/v1/models
|
||||
```
|
||||
|
||||
Use one of the listed model IDs as the `--model` argument.
|
||||
|
||||
#### Notes
|
||||
|
||||
- Models with tool calling support (e.g., Gemma 4, Qwen 3) work best, as Claude Code relies heavily on tool use for file operations and code editing.
|
||||
- Larger models generally produce better results for complex coding tasks.
|
||||
- The Anthropic Messages API endpoint supports both streaming and non-streaming modes.
|
||||
|
||||
### Charm Crush
|
||||
|
||||
You can ask [Charm Crush](https://charm.land/crush) to generate your config by giving it this documentation's URL and your LocalAI instance URL. The configuration will look something like the following and goes in `~/.config/crush/crush.json`:
|
||||
|
|
|
|||
|
|
@ -383,5 +383,144 @@ var _ = Describe("Anthropic API E2E test", func() {
|
|||
Expect(string(message.StopReason)).To(Equal("tool_use"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("ChatDeltas (C++ autoparser)", func() {
|
||||
It("streams tool calls via ChatDeltas", func() {
|
||||
stream := client.Messages.NewStreaming(context.TODO(), anthropic.MessageNewParams{
|
||||
Model: "mock-model-autoparser",
|
||||
MaxTokens: 1024,
|
||||
Messages: []anthropic.MessageParam{
|
||||
anthropic.NewUserMessage(anthropic.NewTextBlock("AUTOPARSER_TOOL_CALL What's the weather like in San Francisco?")),
|
||||
},
|
||||
Tools: []anthropic.ToolUnionParam{
|
||||
anthropic.ToolUnionParam{
|
||||
OfTool: &anthropic.ToolParam{
|
||||
Name: "get_weather",
|
||||
Description: anthropic.Opt("Get the current weather in a given location"),
|
||||
InputSchema: anthropic.ToolInputSchemaParam{
|
||||
Type: constant.ValueOf[constant.Object](),
|
||||
Properties: map[string]any{
|
||||
"location": map[string]any{
|
||||
"type": "string",
|
||||
"description": "The city and state",
|
||||
},
|
||||
},
|
||||
Required: []string{"location"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
message := anthropic.Message{}
|
||||
hasToolUseStart := false
|
||||
|
||||
for stream.Next() {
|
||||
event := stream.Current()
|
||||
err := message.Accumulate(event)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
if e, ok := event.AsAny().(anthropic.ContentBlockStartEvent); ok {
|
||||
if e.ContentBlock.Type == "tool_use" {
|
||||
hasToolUseStart = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Expect(stream.Err()).ToNot(HaveOccurred())
|
||||
Expect(hasToolUseStart).To(BeTrue(), "Should have tool_use content_block_start event from ChatDeltas")
|
||||
Expect(string(message.StopReason)).To(Equal("tool_use"))
|
||||
|
||||
// Verify tool call is present in accumulated message
|
||||
foundToolUse := false
|
||||
for _, block := range message.Content {
|
||||
if block.Type == "tool_use" {
|
||||
foundToolUse = true
|
||||
Expect(block.ID).ToNot(BeEmpty())
|
||||
}
|
||||
}
|
||||
Expect(foundToolUse).To(BeTrue(), "Accumulated message should contain tool_use block from ChatDeltas")
|
||||
})
|
||||
|
||||
It("streams content via ChatDeltas without duplication", func() {
|
||||
stream := client.Messages.NewStreaming(context.TODO(), anthropic.MessageNewParams{
|
||||
Model: "mock-model-autoparser",
|
||||
MaxTokens: 1024,
|
||||
Messages: []anthropic.MessageParam{
|
||||
anthropic.NewUserMessage(anthropic.NewTextBlock("AUTOPARSER_CONTENT Tell me about LocalAI")),
|
||||
},
|
||||
})
|
||||
|
||||
message := anthropic.Message{}
|
||||
var textDeltas []string
|
||||
|
||||
for stream.Next() {
|
||||
event := stream.Current()
|
||||
err := message.Accumulate(event)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
if e, ok := event.AsAny().(anthropic.ContentBlockDeltaEvent); ok {
|
||||
if e.Delta.Type == "text_delta" && e.Delta.Text != "" {
|
||||
textDeltas = append(textDeltas, e.Delta.Text)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Expect(stream.Err()).ToNot(HaveOccurred())
|
||||
Expect(message.Content).ToNot(BeEmpty())
|
||||
Expect(string(message.StopReason)).To(Equal("end_turn"))
|
||||
|
||||
// Content should appear exactly once (no duplication)
|
||||
fullText := ""
|
||||
for _, block := range message.Content {
|
||||
if block.Type == "text" {
|
||||
fullText += block.Text
|
||||
}
|
||||
}
|
||||
Expect(fullText).To(ContainSubstring("LocalAI"))
|
||||
// Check that the content is not duplicated by counting occurrences
|
||||
Expect(len(fullText)).To(BeNumerically("<", 200), "Content should not be duplicated")
|
||||
})
|
||||
|
||||
It("handles tool calls via ChatDeltas in non-streaming mode", func() {
|
||||
message, err := client.Messages.New(context.TODO(), anthropic.MessageNewParams{
|
||||
Model: "mock-model-autoparser",
|
||||
MaxTokens: 1024,
|
||||
Messages: []anthropic.MessageParam{
|
||||
anthropic.NewUserMessage(anthropic.NewTextBlock("AUTOPARSER_TOOL_CALL What's the weather like in San Francisco?")),
|
||||
},
|
||||
Tools: []anthropic.ToolUnionParam{
|
||||
anthropic.ToolUnionParam{
|
||||
OfTool: &anthropic.ToolParam{
|
||||
Name: "get_weather",
|
||||
Description: anthropic.Opt("Get the current weather"),
|
||||
InputSchema: anthropic.ToolInputSchemaParam{
|
||||
Type: constant.ValueOf[constant.Object](),
|
||||
Properties: map[string]any{
|
||||
"location": map[string]any{
|
||||
"type": "string",
|
||||
},
|
||||
},
|
||||
Required: []string{"location"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(message.Content).ToNot(BeEmpty())
|
||||
Expect(string(message.StopReason)).To(Equal("tool_use"))
|
||||
|
||||
foundToolUse := false
|
||||
for _, block := range message.Content {
|
||||
if block.Type == "tool_use" {
|
||||
foundToolUse = true
|
||||
Expect(block.ID).ToNot(BeEmpty())
|
||||
}
|
||||
}
|
||||
Expect(foundToolUse).To(BeTrue(), "Should have tool_use block from ChatDeltas")
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
|
|||
Loading…
Reference in a new issue