19 lines
564 B
Python
19 lines
564 B
Python
|
|
import re
|
|||
|
|
|
|||
|
|
def clean_llm_output(text: str) -> str:
|
|||
|
|
# 1. Replace hyphens with space
|
|||
|
|
text = text.replace('-', ' ')
|
|||
|
|
|
|||
|
|
# 2. Remove text in parentheses (including parentheses)
|
|||
|
|
text = re.sub(r'\([^)]*\)', '', text)
|
|||
|
|
|
|||
|
|
# 3. Replace fancy apostrophe (’) with regular apostrophe (')
|
|||
|
|
text = text.replace('\u2019', "'")
|
|||
|
|
|
|||
|
|
# 4. Normalize whitespace: collapse multiple spaces into one and strip
|
|||
|
|
text = re.sub(r'\s+', ' ', text).strip()
|
|||
|
|
|
|||
|
|
# 5. Lowercase all letters (maybe make this a bit more advanced.)
|
|||
|
|
text = text.lower()
|
|||
|
|
return text
|