first commit

2026-02-20 13:53:36 +06:00
commit 8e02b9ad09
35 changed files with 11059 additions and 0 deletions
--- a/utils.py
+++ b/utils.py
@@ -0,0 +1,83 @@
+# utils.py - Utility Functions
+
+import io
+import re
+import base64
+
+
+def convert_to_mp3(audio_base64, source_format='wav'):
+    """
+    Convert audio from any format to MP3 base64.
+    """
+    try:
+        from pydub import AudioSegment
+        
+        audio_bytes = base64.b64decode(audio_base64)
+        audio_buffer = io.BytesIO(audio_bytes)
+        audio = AudioSegment.from_file(audio_buffer, format=source_format)
+        
+        mp3_buffer = io.BytesIO()
+        audio.export(mp3_buffer, format='mp3', bitrate='192k')
+        mp3_buffer.seek(0)
+        
+        mp3_base64 = base64.b64encode(mp3_buffer.read()).decode('utf-8')
+        return mp3_base64
+    except ImportError:
+        print("⚠️ pydub not installed, returning original format")
+        return audio_base64
+    except Exception as e:
+        print(f"⚠️ MP3 conversion failed: {e}, returning original")
+        return audio_base64
+
+
+def sanitize_filename(name):
+    """
+    Sanitize a string for use as a filename.
+    """
+    if not name:
+        return 'unnamed'
+    return "".join(c for c in name if c.isalnum() or c in ('_', '-', ' '))
+
+
+def strip_markdown(text):
+    """
+    Strip markdown formatting from text to get plain text for TTS.
+    """
+    if not text:
+        return ""
+    
+    # Remove image references completely
+    text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text)
+    
+    # Remove headings markers
+    text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
+    
+    # Remove bold/italic
+    text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
+    text = re.sub(r'\*(.+?)\*', r'\1', text)
+    text = re.sub(r'__(.+?)__', r'\1', text)
+    text = re.sub(r'_(.+?)_', r'\1', text)
+    
+    # Remove strikethrough
+    text = re.sub(r'~~(.+?)~~', r'\1', text)
+    
+    # Remove inline code
+    text = re.sub(r'`([^`]+)`', r'\1', text)
+    
+    # Remove links but keep text
+    text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
+    
+    # Remove blockquote markers
+    text = re.sub(r'^>\s*', '', text, flags=re.MULTILINE)
+    
+    # Remove list markers
+    text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
+    
+    # Remove horizontal rules
+    text = re.sub(r'^(-{3,}|\*{3,}|_{3,})$', '', text, flags=re.MULTILINE)
+    
+    # Clean up whitespace
+    text = re.sub(r'\n{3,}', '\n\n', text)
+    
+    return text.strip()