Files
audiobook-maker-pro-v4/utils.py
Ashim Kumar 8e02b9ad09 first commit
2026-02-20 13:53:36 +06:00

84 lines
2.3 KiB
Python

# utils.py - Utility Functions
import io
import re
import base64
def convert_to_mp3(audio_base64, source_format='wav'):
"""
Convert audio from any format to MP3 base64.
"""
try:
from pydub import AudioSegment
audio_bytes = base64.b64decode(audio_base64)
audio_buffer = io.BytesIO(audio_bytes)
audio = AudioSegment.from_file(audio_buffer, format=source_format)
mp3_buffer = io.BytesIO()
audio.export(mp3_buffer, format='mp3', bitrate='192k')
mp3_buffer.seek(0)
mp3_base64 = base64.b64encode(mp3_buffer.read()).decode('utf-8')
return mp3_base64
except ImportError:
print("⚠️ pydub not installed, returning original format")
return audio_base64
except Exception as e:
print(f"⚠️ MP3 conversion failed: {e}, returning original")
return audio_base64
def sanitize_filename(name):
"""
Sanitize a string for use as a filename.
"""
if not name:
return 'unnamed'
return "".join(c for c in name if c.isalnum() or c in ('_', '-', ' '))
def strip_markdown(text):
"""
Strip markdown formatting from text to get plain text for TTS.
"""
if not text:
return ""
# Remove image references completely
text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text)
# Remove headings markers
text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
# Remove bold/italic
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
text = re.sub(r'\*(.+?)\*', r'\1', text)
text = re.sub(r'__(.+?)__', r'\1', text)
text = re.sub(r'_(.+?)_', r'\1', text)
# Remove strikethrough
text = re.sub(r'~~(.+?)~~', r'\1', text)
# Remove inline code
text = re.sub(r'`([^`]+)`', r'\1', text)
# Remove links but keep text
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
# Remove blockquote markers
text = re.sub(r'^>\s*', '', text, flags=re.MULTILINE)
# Remove list markers
text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
# Remove horizontal rules
text = re.sub(r'^(-{3,}|\*{3,}|_{3,})$', '', text, flags=re.MULTILINE)
# Clean up whitespace
text = re.sub(r'\n{3,}', '\n\n', text)
return text.strip()