mirror of
https://github.com/justLV/onju-v2
synced 2026-04-21 07:37:34 +00:00
Only persist assistant history after successful TTS delivery
Move the conversational backend's _finalize() call out of stream()'s finally block and expose it as a public commit(text) method instead. The previous behavior persisted whatever was yielded even when the stream errored, the user interrupted, or a TTS send failed — so saved history diverged from what the user actually heard, and the next turn replayed phantom context to the LLM. main.py now calls backend.commit(response_text) only after a turn successfully completes and produced content. Agentic backend gets a no-op commit() since history lives on the remote service.
This commit is contained in:
parent
0bc3ae209f
commit
44c7be03b8
4 changed files with 21 additions and 11 deletions
|
|
@ -68,6 +68,9 @@ class AgenticBackend:
|
|||
if delta:
|
||||
yield delta
|
||||
|
||||
def commit(self, text: str) -> None:
|
||||
pass # remote service owns history
|
||||
|
||||
def reset(self) -> None:
|
||||
pass # session reset would require an API call if supported
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,11 @@ class ConversationBackend(Protocol):
|
|||
"""Send a user message, yield assistant text deltas as they arrive."""
|
||||
...
|
||||
|
||||
def commit(self, text: str) -> None:
|
||||
"""Persist the assistant response after successful delivery. Call
|
||||
only after the turn actually played out. No-op for managed backends."""
|
||||
...
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Clear conversation history / start a new session."""
|
||||
...
|
||||
|
|
|
|||
|
|
@ -75,17 +75,17 @@ class ConversationalBackend:
|
|||
kwargs["stream"] = True
|
||||
stream = await self.client.chat.completions.create(**kwargs)
|
||||
|
||||
parts: list[str] = []
|
||||
try:
|
||||
async for chunk in stream:
|
||||
if not chunk.choices:
|
||||
continue
|
||||
delta = chunk.choices[0].delta.content or ""
|
||||
if delta:
|
||||
parts.append(delta)
|
||||
yield delta
|
||||
finally:
|
||||
self._finalize("".join(parts))
|
||||
async for chunk in stream:
|
||||
if not chunk.choices:
|
||||
continue
|
||||
delta = chunk.choices[0].delta.content or ""
|
||||
if delta:
|
||||
yield delta
|
||||
|
||||
def commit(self, text: str) -> None:
|
||||
"""Persist the assistant response to history after successful
|
||||
delivery. The caller joins whatever was actually sent to TTS."""
|
||||
self._finalize(text)
|
||||
|
||||
def reset(self) -> None:
|
||||
self.messages = [{"role": "system", "content": self.cfg["system_prompt"]}]
|
||||
|
|
|
|||
|
|
@ -389,6 +389,8 @@ async def process_utterances(config: dict, manager: DeviceManager, utterance_que
|
|||
volume=0, fade=0)
|
||||
|
||||
response_text = " ".join(full_response)
|
||||
if response_text:
|
||||
device.conversation.commit(response_text)
|
||||
device.last_response = response_text
|
||||
elapsed = time.monotonic() - turn_t0
|
||||
ttfs = f"{first_sentence_at - turn_t0:.2f}s" if first_sentence_at else "—"
|
||||
|
|
|
|||
Loading…
Reference in a new issue