From 3617e2aba48a45ed9680a9a38d487bb3a3a8bd4d Mon Sep 17 00:00:00 2001 From: eureka928 Date: Fri, 6 Mar 2026 02:28:27 +0100 Subject: [PATCH] feat(vllm): add structured output support via guided decoding Update the vLLM backend to support structured output: - Import GuidedDecodingParams from vllm.sampling_params - Handle JSONSchema: parse and pass as GuidedDecodingParams(json_schema=...) - Handle json_object response format: GuidedDecodingParams(json_object=True) - Fall back to Grammar (GBNF) via GuidedDecodingParams(grammar=...) - Remove phantom GuidedDecoding mapping (field doesn't exist in proto) - Fix missing 'import time' and 'import json' for load_video and schema parsing Priority: JSONSchema > json_object > Grammar (GBNF fallback) Ref: #6857 Signed-off-by: eureka928 --- backend/python/vllm/backend.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/python/vllm/backend.py b/backend/python/vllm/backend.py index 95ae95a9d..c83926c1d 100644 --- a/backend/python/vllm/backend.py +++ b/backend/python/vllm/backend.py @@ -2,6 +2,7 @@ import asyncio from concurrent import futures import argparse +import json import signal import sys import os @@ -21,7 +22,7 @@ from grpc_auth import get_auth_interceptors from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine -from vllm.sampling_params import SamplingParams +from vllm.sampling_params import SamplingParams, GuidedDecodingParams from vllm.utils import random_uuid from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.multimodal.utils import fetch_image