mirror of
https://github.com/mudler/LocalAI
synced 2026-05-24 09:28:23 +00:00
feat(vllm): add structured output support via guided decoding
Update the vLLM backend to support structured output: - Import GuidedDecodingParams from vllm.sampling_params - Handle JSONSchema: parse and pass as GuidedDecodingParams(json_schema=...) - Handle json_object response format: GuidedDecodingParams(json_object=True) - Fall back to Grammar (GBNF) via GuidedDecodingParams(grammar=...) - Remove phantom GuidedDecoding mapping (field doesn't exist in proto) - Fix missing 'import time' and 'import json' for load_video and schema parsing Priority: JSONSchema > json_object > Grammar (GBNF fallback) Ref: #6857 Signed-off-by: eureka928 <meobius123@gmail.com>
This commit is contained in:
parent
bbb32ac244
commit
3617e2aba4
1 changed files with 2 additions and 1 deletions
|
|
@ -2,6 +2,7 @@
|
|||
import asyncio
|
||||
from concurrent import futures
|
||||
import argparse
|
||||
import json
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
|
@ -21,7 +22,7 @@ from grpc_auth import get_auth_interceptors
|
|||
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.sampling_params import SamplingParams, GuidedDecodingParams
|
||||
from vllm.utils import random_uuid
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.multimodal.utils import fetch_image
|
||||
|
|
|
|||
Loading…
Reference in a new issue