84 lines
2.3 KiB
Python
84 lines
2.3 KiB
Python
# utils.py - Utility Functions
|
|
|
|
import io
|
|
import re
|
|
import base64
|
|
|
|
|
|
def convert_to_mp3(audio_base64, source_format='wav'):
|
|
"""
|
|
Convert audio from any format to MP3 base64.
|
|
"""
|
|
try:
|
|
from pydub import AudioSegment
|
|
|
|
audio_bytes = base64.b64decode(audio_base64)
|
|
audio_buffer = io.BytesIO(audio_bytes)
|
|
audio = AudioSegment.from_file(audio_buffer, format=source_format)
|
|
|
|
mp3_buffer = io.BytesIO()
|
|
audio.export(mp3_buffer, format='mp3', bitrate='192k')
|
|
mp3_buffer.seek(0)
|
|
|
|
mp3_base64 = base64.b64encode(mp3_buffer.read()).decode('utf-8')
|
|
return mp3_base64
|
|
except ImportError:
|
|
print("⚠️ pydub not installed, returning original format")
|
|
return audio_base64
|
|
except Exception as e:
|
|
print(f"⚠️ MP3 conversion failed: {e}, returning original")
|
|
return audio_base64
|
|
|
|
|
|
def sanitize_filename(name):
|
|
"""
|
|
Sanitize a string for use as a filename.
|
|
"""
|
|
if not name:
|
|
return 'unnamed'
|
|
return "".join(c for c in name if c.isalnum() or c in ('_', '-', ' '))
|
|
|
|
|
|
def strip_markdown(text):
|
|
"""
|
|
Strip markdown formatting from text to get plain text for TTS.
|
|
"""
|
|
if not text:
|
|
return ""
|
|
|
|
# Remove image references completely
|
|
text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text)
|
|
|
|
# Remove headings markers
|
|
text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
|
|
|
|
# Remove bold/italic
|
|
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
|
text = re.sub(r'\*(.+?)\*', r'\1', text)
|
|
text = re.sub(r'__(.+?)__', r'\1', text)
|
|
text = re.sub(r'_(.+?)_', r'\1', text)
|
|
|
|
# Remove strikethrough
|
|
text = re.sub(r'~~(.+?)~~', r'\1', text)
|
|
|
|
# Remove inline code
|
|
text = re.sub(r'`([^`]+)`', r'\1', text)
|
|
|
|
# Remove links but keep text
|
|
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
|
|
|
|
# Remove blockquote markers
|
|
text = re.sub(r'^>\s*', '', text, flags=re.MULTILINE)
|
|
|
|
# Remove list markers
|
|
text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
|
|
text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
|
|
|
|
# Remove horizontal rules
|
|
text = re.sub(r'^(-{3,}|\*{3,}|_{3,})$', '', text, flags=re.MULTILINE)
|
|
|
|
# Clean up whitespace
|
|
text = re.sub(r'\n{3,}', '\n\n', text)
|
|
|
|
return text.strip()
|