Ai_Assistant/server/process/tts_func/tts_preprocess.py

19 lines
564 B
Python
Raw Permalink Normal View History

2026-05-24 13:31:30 +02:00
import re
def clean_llm_output(text: str) -> str:
# 1. Replace hyphens with space
text = text.replace('-', ' ')
# 2. Remove text in parentheses (including parentheses)
text = re.sub(r'\([^)]*\)', '', text)
# 3. Replace fancy apostrophe () with regular apostrophe (')
text = text.replace('\u2019', "'")
# 4. Normalize whitespace: collapse multiple spaces into one and strip
text = re.sub(r'\s+', ' ', text).strip()
# 5. Lowercase all letters (maybe make this a bit more advanced.)
text = text.lower()
return text