mirror of
https://github.com/mudler/LocalAI
synced 2026-05-24 09:28:23 +00:00
Merge c703a03247 into 7980629bc5
This commit is contained in:
commit
6dc01afda5
6 changed files with 191 additions and 4 deletions
|
|
@ -4,6 +4,7 @@ import dataclasses
|
|||
import difflib
|
||||
from concurrent import futures
|
||||
import argparse
|
||||
import json
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
|
@ -25,6 +26,21 @@ from grpc_auth import get_auth_interceptors
|
|||
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||
from vllm.sampling_params import SamplingParams
|
||||
|
||||
# vLLM renamed GuidedDecodingParams to StructuredOutputsParams in newer versions.
|
||||
# The corresponding SamplingParams field also changed from guided_decoding to structured_outputs.
|
||||
try:
|
||||
from vllm.sampling_params import StructuredOutputsParams
|
||||
_structured_output_cls = StructuredOutputsParams
|
||||
_structured_output_field = "structured_outputs"
|
||||
except ImportError:
|
||||
try:
|
||||
from vllm.sampling_params import GuidedDecodingParams
|
||||
_structured_output_cls = GuidedDecodingParams
|
||||
_structured_output_field = "guided_decoding"
|
||||
except ImportError:
|
||||
_structured_output_cls = None
|
||||
_structured_output_field = None
|
||||
from vllm.utils import random_uuid
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.multimodal.utils import fetch_image
|
||||
|
|
|
|||
|
|
@ -316,6 +316,12 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
|
|||
metadata["enable_thinking"] = "true"
|
||||
}
|
||||
}
|
||||
if c.ResponseFormat != "" {
|
||||
metadata["response_format"] = c.ResponseFormat
|
||||
}
|
||||
for k, v := range c.RequestMetadata {
|
||||
metadata[k] = v
|
||||
}
|
||||
pbOpts.Metadata = metadata
|
||||
|
||||
// Logprobs and TopLogprobs are set by the caller if provided
|
||||
|
|
|
|||
|
|
@ -221,7 +221,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
|||
switch d.Type {
|
||||
case "json_object":
|
||||
input.Grammar = functions.JSONBNF
|
||||
config.ResponseFormat = "json_object"
|
||||
case "json_schema":
|
||||
config.ResponseFormat = "json_schema"
|
||||
d := schema.JsonSchemaRequest{}
|
||||
dat, err := json.Marshal(config.ResponseFormatMap)
|
||||
if err != nil {
|
||||
|
|
@ -231,6 +233,16 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Pass raw JSON schema via metadata for backends that support native structured output
|
||||
schemaBytes, err := json.Marshal(d.JsonSchema.Schema)
|
||||
if err == nil {
|
||||
if config.RequestMetadata == nil {
|
||||
config.RequestMetadata = map[string]string{}
|
||||
}
|
||||
config.RequestMetadata["json_schema"] = string(schemaBytes)
|
||||
}
|
||||
|
||||
fs := &functions.JSONFunctionStructure{
|
||||
AnyOf: []functions.Item{d.JsonSchema.Schema},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -92,8 +92,34 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
|
|||
d := schema.ChatCompletionResponseFormat{}
|
||||
dat, _ := json.Marshal(config.ResponseFormatMap)
|
||||
_ = json.Unmarshal(dat, &d)
|
||||
if d.Type == "json_object" {
|
||||
switch d.Type {
|
||||
case "json_object":
|
||||
input.Grammar = functions.JSONBNF
|
||||
config.ResponseFormat = "json_object"
|
||||
case "json_schema":
|
||||
config.ResponseFormat = "json_schema"
|
||||
jsr := schema.JsonSchemaRequest{}
|
||||
dat, err := json.Marshal(config.ResponseFormatMap)
|
||||
if err == nil {
|
||||
if err := json.Unmarshal(dat, &jsr); err == nil {
|
||||
schemaBytes, err := json.Marshal(jsr.JsonSchema.Schema)
|
||||
if err == nil {
|
||||
if config.RequestMetadata == nil {
|
||||
config.RequestMetadata = map[string]string{}
|
||||
}
|
||||
config.RequestMetadata["json_schema"] = string(schemaBytes)
|
||||
}
|
||||
fs := &functions.JSONFunctionStructure{
|
||||
AnyOf: []functions.Item{jsr.JsonSchema.Schema},
|
||||
}
|
||||
g, err := fs.Grammar(config.FunctionsConfig.GrammarOptions()...)
|
||||
if err == nil {
|
||||
input.Grammar = g
|
||||
} else {
|
||||
xlog.Error("Failed generating grammar", "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -173,9 +173,42 @@ func ResponsesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eval
|
|||
Functions: funcs,
|
||||
}
|
||||
|
||||
// Handle text_format -> response_format conversion
|
||||
// Handle text_format -> response_format conversion and structured output
|
||||
if input.TextFormat != nil {
|
||||
openAIReq.ResponseFormat = convertTextFormatToResponseFormat(input.TextFormat)
|
||||
responseFormat := convertTextFormatToResponseFormat(input.TextFormat)
|
||||
openAIReq.ResponseFormat = responseFormat
|
||||
|
||||
// Generate grammar and pass schema for structured output (like OpenAI chat/completion)
|
||||
if rfMap, ok := responseFormat.(map[string]interface{}); ok {
|
||||
if rfType, _ := rfMap["type"].(string); rfType == "json_object" {
|
||||
cfg.Grammar = functions.JSONBNF
|
||||
cfg.ResponseFormat = "json_object"
|
||||
} else if rfType == "json_schema" {
|
||||
cfg.ResponseFormat = "json_schema"
|
||||
d := schema.JsonSchemaRequest{}
|
||||
dat, err := json.Marshal(rfMap)
|
||||
if err == nil {
|
||||
if err := json.Unmarshal(dat, &d); err == nil {
|
||||
schemaBytes, err := json.Marshal(d.JsonSchema.Schema)
|
||||
if err == nil {
|
||||
if cfg.RequestMetadata == nil {
|
||||
cfg.RequestMetadata = map[string]string{}
|
||||
}
|
||||
cfg.RequestMetadata["json_schema"] = string(schemaBytes)
|
||||
}
|
||||
fs := &functions.JSONFunctionStructure{
|
||||
AnyOf: []functions.Item{d.JsonSchema.Schema},
|
||||
}
|
||||
g, err := fs.Grammar(cfg.FunctionsConfig.GrammarOptions()...)
|
||||
if err == nil {
|
||||
cfg.Grammar = g
|
||||
} else {
|
||||
xlog.Error("Open Responses - Failed generating grammar for json_schema", "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate grammar for function calling (similar to OpenAI chat endpoint)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,11 @@ url = "/features/constrained_grammars/"
|
|||
The `chat` endpoint supports the `grammar` parameter, which allows users to specify a grammar in Backus-Naur Form (BNF). This feature enables the Large Language Model (LLM) to generate outputs adhering to a user-defined schema, such as `JSON`, `YAML`, or any other format that can be defined using BNF. For more details about BNF, see [Backus-Naur Form on Wikipedia](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form).
|
||||
|
||||
{{% notice note %}}
|
||||
**Compatibility Notice:** This feature is only supported by models that use the [llama.cpp](https://github.com/ggerganov/llama.cpp) backend. For a complete list of compatible models, refer to the [Model Compatibility]({{%relref "reference/compatibility-table" %}}) page. For technical details, see the related pull requests: [PR #1773](https://github.com/ggerganov/llama.cpp/pull/1773) and [PR #1887](https://github.com/ggerganov/llama.cpp/pull/1887).
|
||||
**Compatibility Notice:** Grammar and structured output support is available for the following backends:
|
||||
- **llama.cpp** — supports the `grammar` parameter (GBNF syntax) and `response_format` with `json_schema`/`json_object`
|
||||
- **vLLM** — supports the `grammar` parameter (via xgrammar), `response_format` with `json_schema` (native JSON schema enforcement), and `json_object`
|
||||
|
||||
For a complete list of compatible models, refer to the [Model Compatibility]({{%relref "reference/compatibility-table" %}}) page.
|
||||
{{% /notice %}}
|
||||
|
||||
## Setup
|
||||
|
|
@ -66,6 +70,96 @@ For more complex grammars, you can define multi-line BNF rules. The grammar pars
|
|||
- Character classes (`[a-z]`)
|
||||
- String literals (`"text"`)
|
||||
|
||||
## vLLM Backend
|
||||
|
||||
The vLLM backend supports structured output via three methods:
|
||||
|
||||
### JSON Schema (recommended)
|
||||
|
||||
Use the OpenAI-compatible `response_format` parameter with `json_schema` to enforce a specific JSON structure:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "my-vllm-model",
|
||||
"messages": [{"role": "user", "content": "Generate a person object"}],
|
||||
"response_format": {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "person",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"age": {"type": "integer"}
|
||||
},
|
||||
"required": ["name", "age"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
### JSON Object
|
||||
|
||||
Force the model to output valid JSON (without a specific schema):
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "my-vllm-model",
|
||||
"messages": [{"role": "user", "content": "Generate a person as JSON"}],
|
||||
"response_format": {"type": "json_object"}
|
||||
}'
|
||||
```
|
||||
|
||||
### Grammar
|
||||
|
||||
The `grammar` parameter also works with vLLM via xgrammar:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "my-vllm-model",
|
||||
"messages": [{"role": "user", "content": "Do you like apples?"}],
|
||||
"grammar": "root ::= (\"yes\" | \"no\")"
|
||||
}'
|
||||
```
|
||||
|
||||
## Open Responses API
|
||||
|
||||
The Open Responses API (`/v1/responses`) also supports structured output via the `text_format` parameter:
|
||||
|
||||
### JSON Schema
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/responses -H "Content-Type: application/json" -d '{
|
||||
"model": "my-model",
|
||||
"input": "Generate a person object",
|
||||
"text_format": {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "person",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"age": {"type": "integer"}
|
||||
},
|
||||
"required": ["name", "age"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
### JSON Object
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/responses -H "Content-Type: application/json" -d '{
|
||||
"model": "my-model",
|
||||
"input": "Generate a person as JSON",
|
||||
"text_format": {"type": "json_object"}
|
||||
}'
|
||||
```
|
||||
|
||||
## Related Features
|
||||
|
||||
- [OpenAI Functions]({{%relref "features/openai-functions" %}}) - Function calling with structured outputs
|
||||
|
|
|
|||
Loading…
Reference in a new issue