Ai_Assistant/server/_archive/main_chat.py

from faster_whisper import WhisperModel
from process.asr_func.asr_auto_record import record_on_speech, transcribe_audio
from process.asr_func.asr_transcribe_groq import record_on_speech, transcribe_audio_groq
from process.llm_funcs.llm_scr import llm_response, llm_response_with_memory
from process.tts_func.sovits_ping import sovits_gen, play_audio, get_wav_duration
from process.tts_func.tts_preprocess import clean_llm_output
from process.vrm_func.vrm_ping import vrm_talk, vrm_animate
from process.vrm_func.vrm_states_ping import set_vrm_state

from pathlib import Path
import os
import time
### transcribe audio 
import uuid
import soundfile as sf
import asyncio
import threading
import random
import os
import time
import uuid
import json
import shutil
import yaml
from pathlib import Path
from openai import OpenAI
from contextlib import suppress
from queue import Queue
from threading import Event, Thread
from dotenv import load_dotenv
from pathlib import Path

load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
def get_wav_duration(path):
    with sf.SoundFile(path) as f:
        return len(f) / f.samplerate


#!/usr/bin/env python3
"""
Streaming LLM -> chunked TTS -> queued playback script for Riko.

What it does:
- Records user speech (uses your record_on_speech)
- Transcribes (transcribe_audio)
- Streams LLM text (OpenAI Responses streaming)
- For each chunk: generate TTS via sovits_gen, copy to public audio dir, enqueue for playback
- Playback loop calls vrm_talk and vrm_animate and waits for the audio's duration to avoid overlap
- At the end of the stream, the full assistant text is appended to the JSON history file

Fill in or import the helper functions you already have in your project:
- record_on_speech(output_file, samplerate, channels, silence_threshold, silence_duration, device)
- transcribe_audio(whisper_model, aud_path)
- clean_asr_output(text)
- sovits_gen(in_text, emotion, output_wav_pth) -> returns path to generated wav (must write to output_wav_pth)
- vrm_talk(public_audio_path, expression, llm_output, duration)
- vrm_animate(cmd, path)
- clean_llm_output(text)
- get_emotion(text, emotion_model, tokenizer)
- map_emotion_to_expression(emotion)
- get_wav_duration(path)

Make sure char_config.yaml contains history_file and model keys (see your example config).
"""

import os
import time
import uuid
import json
import shutil
import yaml
from pathlib import Path
from openai import OpenAI
from contextlib import suppress
from queue import Queue
from threading import Thread

# ---------------------------
# Load config + OpenAI client
# ---------------------------
CONFIG_PATH = os.path.expanduser('character_config.yaml')
if not os.path.exists(CONFIG_PATH):
    raise FileNotFoundError(f"Config not found at {CONFIG_PATH}")

with open(CONFIG_PATH, 'r') as f:
    char_config = yaml.safe_load(f)

HISTORY_FILE = char_config['history_file']
MODEL = char_config.get('model', 'gpt-4.1-mini')
# MODEL = 'gpt-4.1'
CASE_SYSTEM_PROMPT = "You are a helpful assistant." # override for testing.

SYSTEM_PROMPT = [
    {
        "role": "system",
        "content": [
            {"type": "input_text", "text": char_config['presets']['default']['system_prompt']}
            #{"type": "input_text", "text": CASE_SYSTEM_PROMPT}
        ]
    }
]

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
if not OPENAI_API_KEY:
    raise EnvironmentError('Please set OPENAI_API_KEY in your environment')

client = OpenAI(api_key=OPENAI_API_KEY)

# ---------------------------
# History utilities
# ---------------------------

def load_history():
    if os.path.exists(HISTORY_FILE):
        with open(HISTORY_FILE, 'r') as f:
            return json.load(f)
    return SYSTEM_PROMPT.copy()


def save_history(history):
    with open(HISTORY_FILE, 'w') as f:
        json.dump(history, f, indent=2)

# ---------------------------
# Streaming helper
# ---------------------------

def stream_text_chunks(messages, min_len=30, max_len=120):
    """Synchronous generator that yields text chunks as the model streams.

    Uses the OpenAI Responses streaming client like your example.
    """
    buffer = ""

    with client.responses.stream(
        model=MODEL,
        input=messages,
        temperature=1,
        top_p=1,
        max_output_tokens=2048,
    ) as stream:

        for event in stream:
            # event.type values: "response.output_text.delta", "response.output_text.done", ...
            if event.type == "response.output_text.delta":
                buffer += event.delta

                # Rule 1: yield if buffer ends with punctuation AND is long enough
                if buffer.endswith(('.', '?', '!', '…')) and len(buffer) >= min_len:
                    yield buffer.strip()
                    buffer = ""

                # Rule 2: force yield if buffer gets too long
                elif len(buffer) >= max_len:
                    yield buffer.strip()
                    buffer = ""

            elif event.type == "response.output_text.done":
                # Final flush
                if buffer.strip():
                    yield buffer.strip()
                # After final flush we'll exit the context; final response object is available

        final_response = stream.get_final_response()
        # Optionally return final_response (can't return from generator).

# ---------------------------
# Playback worker (single-threaded sequential playback)
# ---------------------------

class PlaybackWorker:
    def __init__(self):
        # queue items are tuples: (public_audio_path (Path), expression (str), assistant_text (str), duration (float))
        self.q = Queue()
        self.thread = Thread(target=self._run, daemon=True)
        self._running = False
        self.queue_finished_event = Event()  # NEW: Event to signal queue is empty
        self.queue_finished_event.set()  # Start as finished (no items)
        # flag to indicate whether the avatar is currently in the "talking" animation state
        self._talking = False

    def start(self):
        if not self._running:
            self._running = True
            self.thread.start()

    def enqueue(self, public_audio_path: Path, expression: str, assistant_text: str, duration: float):
        self.queue_finished_event.clear()  # NEW: Mark queue as not finished
        self.q.put((public_audio_path, expression, assistant_text, duration))

    def wait_until_finished(self, timeout=None):
        """
        Wait until the playback queue is completely empty and processed.

        Args:
            timeout: Maximum time to wait in seconds (None = wait forever)

        Returns:
            True if queue finished, False if timeout occurred
        """
        return self.queue_finished_event.wait(timeout)
    
    def _run(self):
        while True:
            item = self.q.get()
            if item is None:
                break
            public_audio_path, expression, assistant_text, duration = item

            # Start the talking animation once when the first chunk of a sequence begins.
            # Subsequent chunks won't retrigger the animation (avoids jump/cut).
            try:
                if not self._talking:
                    thinking_anim = Path("animations/mixamo") / "Talking.fbx"
                    vrm_animate("start_mixamo", str(thinking_anim))
                    set_vrm_state("talking")
                    self._talking = True
            except Exception as e:
                print("vrm_animate (start talking) failed:", e)

            # Call vrm_talk for every chunk so the client receives the audio cue + metadata
            try:
                vrm_talk(str(public_audio_path), expression, assistant_text, int(duration))
            except Exception as e:
                print("vrm_talk failed:", e)

            # wait for the audio's duration so we don't overlap
            try:
                time.sleep(duration)
            except Exception:
                # defensive
                time.sleep(max(0.2, duration))

            # If the queue is empty after finishing this chunk, return to idle and clear talking flag.
            # This ensures a smooth transition back to idle at the end of the final chunk.
            try:
                if self.q.empty():
                    self.queue_finished_event.set()
                    idle_path = Path("animations/mixamo") / "Idle.fbx"
                    #vrm_animate("start_mixamo", str(idle_path))
                    self._talking = False
            except Exception as e:
                print("vrm_animate (idle) failed:", e)

    def stop(self):
        self.q.put(None)
        self.thread.join()

# ---------------------------
# Utilities
# ---------------------------

def ensure_dirs():
    Path('client/audio').mkdir(parents=True, exist_ok=True)
    Path('audio').mkdir(parents=True, exist_ok=True)


def copy_to_public(client_path: Path, public_path: Path):
    # Copy the client audio file to the public folder that vrm_talk expects
    shutil.copy2(client_path, public_path)

# Fallback duration reader (if you don't have one)

def fallback_get_wav_duration(p: Path):
    try:
        import wave, contextlib
        with contextlib.closing(wave.open(str(p),'r')) as wf:
            frames = wf.getnframes()
            rate = wf.getframerate()
            return frames / float(rate)
    except Exception:
        return 3.0

# ---------------------------
# Main orchestration
# ---------------------------

def main_loop():
    ensure_dirs()

    playback = PlaybackWorker()
    playback.start()

    # Load any models or tokenizers you have for emotion detection here
    # whisper_model, emotion_model, tokenizer = load_your_models()

    while True:

        try:

            print("\n⏳ Waiting for playback queue to finish...")
            playback.wait_until_finished()
            print("✅ Queue finished, ready for input")
            # 1) Idle animation + state 
            # try:
            idle_anim = Path("animations/mixamo") / "Idle.fbx"
            vrm_animate("start_mixamo", str(idle_anim))
            set_vrm_state("idle")
            # except Exception:
            #     pass
            
            # cont = input(">>> ENTER TO CONTINUE") # UNCOMMENT IF YOU WANT PUSH TO TALK

            conversation_recording = Path("audio") / "conversation.wav"
            conversation_recording.parent.mkdir(parents=True, exist_ok=True)
            conversation_recording = str(conversation_recording)
            record_on_speech(
                output_file=conversation_recording,
                samplerate=44100,
                channels=1,
                silence_threshold=0.02,  # Adjust based on your microphone sensitivity
                silence_duration=2,     # Stop after 3 seconds of silence
                device=None             # Use default device, or specify by ID or name
            )
            # record while listening sorry I don't think this works I'll have to work on it later. 
            # set_vrm_state("listening")


            # 3) Thinking animation
            try:
                thinking_anim = Path("animations/mixamo") / "Thinking.fbx"
                vrm_animate("start_mixamo", str(thinking_anim))
                set_vrm_state("thinking")
            except Exception:
                pass

            # 4) Transcribe
            user_spoken_text = transcribe_audio_groq(aud_path=conversation_recording)

            # 5) Build messages history
            messages = load_history()
            messages.append({
                "role": "user",
                "content": [
                    {"type": "input_text", "text": user_spoken_text}
                ]
            })

            # 6) Stream model and generate TTS per chunk
            print("[llm] streaming response...")
            # thinking_anim = Path("animations/mixamo") / "Talking.fbx"
            # vrm_animate("start_mixamo", str(thinking_anim))
            # set_vrm_state("talking")
            full_assistant_text = ""

            for chunk in stream_text_chunks(messages):
                print("[chunk]", chunk)

                # accumulate final text
                full_assistant_text += (chunk + " ")

                # prepare TTS text and emotion
                tts_read_text = clean_llm_output(chunk)
                # emotion = get_emotion(chunk, None, None)  # plug your emotion model/tokenizer
                # expression = map_emotion_to_expression(emotion)
                # temp implementation
                emotion = "relaxed"   # or "smug" etc.
                expression = "relaxed" 

                # create unique filename for this chunk
                uid = uuid.uuid4().hex
                filename = f"output_{uid}.wav"
                client_out = Path('client') / 'audio' / filename
                public_out = Path('audio') / filename
                client_out.parent.mkdir(parents=True, exist_ok=True)

                # generate TTS (blocking). Expected to write client_out
                try:
                    sovits_gen(tts_read_text, output_wav_pth=str(client_out))
                except TypeError:
                    # fallback if your function signature is sovits_gen(text, emotion, output_path)
                    sovits_gen(tts_read_text, str(client_out))

                # copy to public path expected by VRM bridge
                copy_to_public(client_out, public_out)

                # determine duration
                try:
                    duration = get_wav_duration(public_out)
                except Exception:
                    duration = fallback_get_wav_duration(public_out)

                # enqueue for sequential playback
                playback.enqueue(public_out, expression, chunk, duration)

            # 7) After streaming ends, append the full assistant message to history and save
            final_text = full_assistant_text.strip()
            print("[llm final]", final_text)

            # append assistant to messages and save history
            messages.append({
                "role": "assistant",
                "content": [
                    {"type": "output_text", "text": final_text}
                ]
            })
            save_history(messages)

            # Optionally wait a short moment for queued audio to finish playing before next loop
            # NOTE: playback worker will ensure queued audio plays sequentially; here we don't block.
            time.sleep(0.1)

        except KeyboardInterrupt:
            print("Interrupted by user, stopping.")
            playback.stop()
            break
        except Exception as e:
            print("Error in main loop:", e)
            time.sleep(1)


if __name__ == '__main__':
    main_loop()
Initial release 0.5 2026-05-24 13:31:30 +02:00			`from faster_whisper import WhisperModel`
			`from process.asr_func.asr_auto_record import record_on_speech, transcribe_audio`
			`from process.asr_func.asr_transcribe_groq import record_on_speech, transcribe_audio_groq`
			`from process.llm_funcs.llm_scr import llm_response, llm_response_with_memory`
			`from process.tts_func.sovits_ping import sovits_gen, play_audio, get_wav_duration`
			`from process.tts_func.tts_preprocess import clean_llm_output`
			`from process.vrm_func.vrm_ping import vrm_talk, vrm_animate`
			`from process.vrm_func.vrm_states_ping import set_vrm_state`

			`from pathlib import Path`
			`import os`
			`import time`
			`### transcribe audio`
			`import uuid`
			`import soundfile as sf`
			`import asyncio`
			`import threading`
			`import random`
			`import os`
			`import time`
			`import uuid`
			`import json`
			`import shutil`
			`import yaml`
			`from pathlib import Path`
			`from openai import OpenAI`
			`from contextlib import suppress`
			`from queue import Queue`
			`from threading import Event, Thread`
			`from dotenv import load_dotenv`
			`from pathlib import Path`

			`load_dotenv()`
			`openai_api_key = os.getenv("OPENAI_API_KEY")`
			`def get_wav_duration(path):`
			`with sf.SoundFile(path) as f:`
			`return len(f) / f.samplerate`


			`#!/usr/bin/env python3`
			`"""`
			`Streaming LLM -> chunked TTS -> queued playback script for Riko.`

			`What it does:`
			`- Records user speech (uses your record_on_speech)`
			`- Transcribes (transcribe_audio)`
			`- Streams LLM text (OpenAI Responses streaming)`
			`- For each chunk: generate TTS via sovits_gen, copy to public audio dir, enqueue for playback`
			`- Playback loop calls vrm_talk and vrm_animate and waits for the audio's duration to avoid overlap`
			`- At the end of the stream, the full assistant text is appended to the JSON history file`

			`Fill in or import the helper functions you already have in your project:`
			`- record_on_speech(output_file, samplerate, channels, silence_threshold, silence_duration, device)`
			`- transcribe_audio(whisper_model, aud_path)`
			`- clean_asr_output(text)`
			`- sovits_gen(in_text, emotion, output_wav_pth) -> returns path to generated wav (must write to output_wav_pth)`
			`- vrm_talk(public_audio_path, expression, llm_output, duration)`
			`- vrm_animate(cmd, path)`
			`- clean_llm_output(text)`
			`- get_emotion(text, emotion_model, tokenizer)`
			`- map_emotion_to_expression(emotion)`
			`- get_wav_duration(path)`

			`Make sure char_config.yaml contains history_file and model keys (see your example config).`
			`"""`

			`import os`
			`import time`
			`import uuid`
			`import json`
			`import shutil`
			`import yaml`
			`from pathlib import Path`
			`from openai import OpenAI`
			`from contextlib import suppress`
			`from queue import Queue`
			`from threading import Thread`

			`# ---------------------------`
			`# Load config + OpenAI client`
			`# ---------------------------`
			`CONFIG_PATH = os.path.expanduser('character_config.yaml')`
			`if not os.path.exists(CONFIG_PATH):`
			`raise FileNotFoundError(f"Config not found at {CONFIG_PATH}")`

			`with open(CONFIG_PATH, 'r') as f:`
			`char_config = yaml.safe_load(f)`

			`HISTORY_FILE = char_config['history_file']`
			`MODEL = char_config.get('model', 'gpt-4.1-mini')`
			`# MODEL = 'gpt-4.1'`
			`CASE_SYSTEM_PROMPT = "You are a helpful assistant." # override for testing.`

			`SYSTEM_PROMPT = [`
			`{`
			`"role": "system",`
			`"content": [`
			`{"type": "input_text", "text": char_config['presets']['default']['system_prompt']}`
			`#{"type": "input_text", "text": CASE_SYSTEM_PROMPT}`
			`]`
			`}`
			`]`

			`OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')`
			`if not OPENAI_API_KEY:`
			`raise EnvironmentError('Please set OPENAI_API_KEY in your environment')`

			`client = OpenAI(api_key=OPENAI_API_KEY)`

			`# ---------------------------`
			`# History utilities`
			`# ---------------------------`

			`def load_history():`
			`if os.path.exists(HISTORY_FILE):`
			`with open(HISTORY_FILE, 'r') as f:`
			`return json.load(f)`
			`return SYSTEM_PROMPT.copy()`


			`def save_history(history):`
			`with open(HISTORY_FILE, 'w') as f:`
			`json.dump(history, f, indent=2)`

			`# ---------------------------`
			`# Streaming helper`
			`# ---------------------------`

			`def stream_text_chunks(messages, min_len=30, max_len=120):`
			`"""Synchronous generator that yields text chunks as the model streams.`

			`Uses the OpenAI Responses streaming client like your example.`
			`"""`
			`buffer = ""`

			`with client.responses.stream(`
			`model=MODEL,`
			`input=messages,`
			`temperature=1,`
			`top_p=1,`
			`max_output_tokens=2048,`
			`) as stream:`

			`for event in stream:`
			`# event.type values: "response.output_text.delta", "response.output_text.done", ...`
			`if event.type == "response.output_text.delta":`
			`buffer += event.delta`

			`# Rule 1: yield if buffer ends with punctuation AND is long enough`
			`if buffer.endswith(('.', '?', '!', '…')) and len(buffer) >= min_len:`
			`yield buffer.strip()`
			`buffer = ""`

			`# Rule 2: force yield if buffer gets too long`
			`elif len(buffer) >= max_len:`
			`yield buffer.strip()`
			`buffer = ""`

			`elif event.type == "response.output_text.done":`
			`# Final flush`
			`if buffer.strip():`
			`yield buffer.strip()`
			`# After final flush we'll exit the context; final response object is available`

			`final_response = stream.get_final_response()`
			`# Optionally return final_response (can't return from generator).`

			`# ---------------------------`
			`# Playback worker (single-threaded sequential playback)`
			`# ---------------------------`

			`class PlaybackWorker:`
			`def __init__(self):`
			`# queue items are tuples: (public_audio_path (Path), expression (str), assistant_text (str), duration (float))`
			`self.q = Queue()`
			`self.thread = Thread(target=self._run, daemon=True)`
			`self._running = False`
			`self.queue_finished_event = Event() # NEW: Event to signal queue is empty`
			`self.queue_finished_event.set() # Start as finished (no items)`
			`# flag to indicate whether the avatar is currently in the "talking" animation state`
			`self._talking = False`

			`def start(self):`
			`if not self._running:`
			`self._running = True`
			`self.thread.start()`

			`def enqueue(self, public_audio_path: Path, expression: str, assistant_text: str, duration: float):`
			`self.queue_finished_event.clear() # NEW: Mark queue as not finished`
			`self.q.put((public_audio_path, expression, assistant_text, duration))`

			`def wait_until_finished(self, timeout=None):`
			`"""`
			`Wait until the playback queue is completely empty and processed.`

			`Args:`
			`timeout: Maximum time to wait in seconds (None = wait forever)`

			`Returns:`
			`True if queue finished, False if timeout occurred`
			`"""`
			`return self.queue_finished_event.wait(timeout)`

			`def _run(self):`
			`while True:`
			`item = self.q.get()`
			`if item is None:`
			`break`
			`public_audio_path, expression, assistant_text, duration = item`

			`# Start the talking animation once when the first chunk of a sequence begins.`
			`# Subsequent chunks won't retrigger the animation (avoids jump/cut).`
			`try:`
			`if not self._talking:`
			`thinking_anim = Path("animations/mixamo") / "Talking.fbx"`
			`vrm_animate("start_mixamo", str(thinking_anim))`
			`set_vrm_state("talking")`
			`self._talking = True`
			`except Exception as e:`
			`print("vrm_animate (start talking) failed:", e)`

			`# Call vrm_talk for every chunk so the client receives the audio cue + metadata`
			`try:`
			`vrm_talk(str(public_audio_path), expression, assistant_text, int(duration))`
			`except Exception as e:`
			`print("vrm_talk failed:", e)`

			`# wait for the audio's duration so we don't overlap`
			`try:`
			`time.sleep(duration)`
			`except Exception:`
			`# defensive`
			`time.sleep(max(0.2, duration))`

			`# If the queue is empty after finishing this chunk, return to idle and clear talking flag.`
			`# This ensures a smooth transition back to idle at the end of the final chunk.`
			`try:`
			`if self.q.empty():`
			`self.queue_finished_event.set()`
			`idle_path = Path("animations/mixamo") / "Idle.fbx"`
			`#vrm_animate("start_mixamo", str(idle_path))`
			`self._talking = False`
			`except Exception as e:`
			`print("vrm_animate (idle) failed:", e)`

			`def stop(self):`
			`self.q.put(None)`
			`self.thread.join()`

			`# ---------------------------`
			`# Utilities`
			`# ---------------------------`

			`def ensure_dirs():`
			`Path('client/audio').mkdir(parents=True, exist_ok=True)`
			`Path('audio').mkdir(parents=True, exist_ok=True)`


			`def copy_to_public(client_path: Path, public_path: Path):`
			`# Copy the client audio file to the public folder that vrm_talk expects`
			`shutil.copy2(client_path, public_path)`

			`# Fallback duration reader (if you don't have one)`

			`def fallback_get_wav_duration(p: Path):`
			`try:`
			`import wave, contextlib`
			`with contextlib.closing(wave.open(str(p),'r')) as wf:`
			`frames = wf.getnframes()`
			`rate = wf.getframerate()`
			`return frames / float(rate)`
			`except Exception:`
			`return 3.0`

			`# ---------------------------`
			`# Main orchestration`
			`# ---------------------------`

			`def main_loop():`
			`ensure_dirs()`

			`playback = PlaybackWorker()`
			`playback.start()`

			`# Load any models or tokenizers you have for emotion detection here`
			`# whisper_model, emotion_model, tokenizer = load_your_models()`

			`while True:`

			`try:`

			`print("\n⏳ Waiting for playback queue to finish...")`
			`playback.wait_until_finished()`
			`print("✅ Queue finished, ready for input")`
			`# 1) Idle animation + state`
			`# try:`
			`idle_anim = Path("animations/mixamo") / "Idle.fbx"`
			`vrm_animate("start_mixamo", str(idle_anim))`
			`set_vrm_state("idle")`
			`# except Exception:`
			`# pass`

			`# cont = input(">>> ENTER TO CONTINUE") # UNCOMMENT IF YOU WANT PUSH TO TALK`

			`conversation_recording = Path("audio") / "conversation.wav"`
			`conversation_recording.parent.mkdir(parents=True, exist_ok=True)`
			`conversation_recording = str(conversation_recording)`
			`record_on_speech(`
			`output_file=conversation_recording,`
			`samplerate=44100,`
			`channels=1,`
			`silence_threshold=0.02, # Adjust based on your microphone sensitivity`
			`silence_duration=2, # Stop after 3 seconds of silence`
			`device=None # Use default device, or specify by ID or name`
			`)`
			`# record while listening sorry I don't think this works I'll have to work on it later.`
			`# set_vrm_state("listening")`


			`# 3) Thinking animation`
			`try:`
			`thinking_anim = Path("animations/mixamo") / "Thinking.fbx"`
			`vrm_animate("start_mixamo", str(thinking_anim))`
			`set_vrm_state("thinking")`
			`except Exception:`
			`pass`

			`# 4) Transcribe`
			`user_spoken_text = transcribe_audio_groq(aud_path=conversation_recording)`

			`# 5) Build messages history`
			`messages = load_history()`
			`messages.append({`
			`"role": "user",`
			`"content": [`
			`{"type": "input_text", "text": user_spoken_text}`
			`]`
			`})`

			`# 6) Stream model and generate TTS per chunk`
			`print("[llm] streaming response...")`
			`# thinking_anim = Path("animations/mixamo") / "Talking.fbx"`
			`# vrm_animate("start_mixamo", str(thinking_anim))`
			`# set_vrm_state("talking")`
			`full_assistant_text = ""`

			`for chunk in stream_text_chunks(messages):`
			`print("[chunk]", chunk)`

			`# accumulate final text`
			`full_assistant_text += (chunk + " ")`

			`# prepare TTS text and emotion`
			`tts_read_text = clean_llm_output(chunk)`
			`# emotion = get_emotion(chunk, None, None) # plug your emotion model/tokenizer`
			`# expression = map_emotion_to_expression(emotion)`
			`# temp implementation`
			`emotion = "relaxed" # or "smug" etc.`
			`expression = "relaxed"`

			`# create unique filename for this chunk`
			`uid = uuid.uuid4().hex`
			`filename = f"output_{uid}.wav"`
			`client_out = Path('client') / 'audio' / filename`
			`public_out = Path('audio') / filename`
			`client_out.parent.mkdir(parents=True, exist_ok=True)`

			`# generate TTS (blocking). Expected to write client_out`
			`try:`
			`sovits_gen(tts_read_text, output_wav_pth=str(client_out))`
			`except TypeError:`
			`# fallback if your function signature is sovits_gen(text, emotion, output_path)`
			`sovits_gen(tts_read_text, str(client_out))`

			`# copy to public path expected by VRM bridge`
			`copy_to_public(client_out, public_out)`

			`# determine duration`
			`try:`
			`duration = get_wav_duration(public_out)`
			`except Exception:`
			`duration = fallback_get_wav_duration(public_out)`

			`# enqueue for sequential playback`
			`playback.enqueue(public_out, expression, chunk, duration)`

			`# 7) After streaming ends, append the full assistant message to history and save`
			`final_text = full_assistant_text.strip()`
			`print("[llm final]", final_text)`

			`# append assistant to messages and save history`
			`messages.append({`
			`"role": "assistant",`
			`"content": [`
			`{"type": "output_text", "text": final_text}`
			`]`
			`})`
			`save_history(messages)`

			`# Optionally wait a short moment for queued audio to finish playing before next loop`
			`# NOTE: playback worker will ensure queued audio plays sequentially; here we don't block.`
			`time.sleep(0.1)`

			`except KeyboardInterrupt:`
			`print("Interrupted by user, stopping.")`
			`playback.stop()`
			`break`
			`except Exception as e:`
			`print("Error in main loop:", e)`
			`time.sleep(1)`


			`if __name__ == '__main__':`
			`main_loop()`