first commit
This commit is contained in:
83
utils.py
Normal file
83
utils.py
Normal file
@@ -0,0 +1,83 @@
|
||||
# utils.py - Utility Functions
|
||||
|
||||
import io
|
||||
import re
|
||||
import base64
|
||||
|
||||
|
||||
def convert_to_mp3(audio_base64, source_format='wav'):
|
||||
"""
|
||||
Convert audio from any format to MP3 base64.
|
||||
"""
|
||||
try:
|
||||
from pydub import AudioSegment
|
||||
|
||||
audio_bytes = base64.b64decode(audio_base64)
|
||||
audio_buffer = io.BytesIO(audio_bytes)
|
||||
audio = AudioSegment.from_file(audio_buffer, format=source_format)
|
||||
|
||||
mp3_buffer = io.BytesIO()
|
||||
audio.export(mp3_buffer, format='mp3', bitrate='192k')
|
||||
mp3_buffer.seek(0)
|
||||
|
||||
mp3_base64 = base64.b64encode(mp3_buffer.read()).decode('utf-8')
|
||||
return mp3_base64
|
||||
except ImportError:
|
||||
print("⚠️ pydub not installed, returning original format")
|
||||
return audio_base64
|
||||
except Exception as e:
|
||||
print(f"⚠️ MP3 conversion failed: {e}, returning original")
|
||||
return audio_base64
|
||||
|
||||
|
||||
def sanitize_filename(name):
|
||||
"""
|
||||
Sanitize a string for use as a filename.
|
||||
"""
|
||||
if not name:
|
||||
return 'unnamed'
|
||||
return "".join(c for c in name if c.isalnum() or c in ('_', '-', ' '))
|
||||
|
||||
|
||||
def strip_markdown(text):
|
||||
"""
|
||||
Strip markdown formatting from text to get plain text for TTS.
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# Remove image references completely
|
||||
text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text)
|
||||
|
||||
# Remove headings markers
|
||||
text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
|
||||
|
||||
# Remove bold/italic
|
||||
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
||||
text = re.sub(r'\*(.+?)\*', r'\1', text)
|
||||
text = re.sub(r'__(.+?)__', r'\1', text)
|
||||
text = re.sub(r'_(.+?)_', r'\1', text)
|
||||
|
||||
# Remove strikethrough
|
||||
text = re.sub(r'~~(.+?)~~', r'\1', text)
|
||||
|
||||
# Remove inline code
|
||||
text = re.sub(r'`([^`]+)`', r'\1', text)
|
||||
|
||||
# Remove links but keep text
|
||||
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
|
||||
|
||||
# Remove blockquote markers
|
||||
text = re.sub(r'^>\s*', '', text, flags=re.MULTILINE)
|
||||
|
||||
# Remove list markers
|
||||
text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
|
||||
text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
|
||||
|
||||
# Remove horizontal rules
|
||||
text = re.sub(r'^(-{3,}|\*{3,}|_{3,})$', '', text, flags=re.MULTILINE)
|
||||
|
||||
# Clean up whitespace
|
||||
text = re.sub(r'\n{3,}', '\n\n', text)
|
||||
|
||||
return text.strip()
|
||||
Reference in New Issue
Block a user