v4.3: file-based media storage + manual VACUUM maintenance

2026-06-12 13:24:00 +06:00
parent 965470853e
commit cc57204aff
10 changed files with 789 additions and 164 deletions
--- a/routes/generation_routes.py
+++ b/routes/generation_routes.py
@@ -1,4 +1,4 @@
-# routes/generation_routes.py - Combined Endpoint with Correct Task Polling
+# routes/generation_routes.py - Combined Endpoint with Correct Task Polling (v4.3)

 import json
 import time
@@ -40,8 +40,6 @@ def poll_beam_task(task_id):
    print(f"   URL: {task_url}")

    start_time = time.time()
-
-    # প্রথম কয়েকটা attempt এ 404 আসতে পারে — task register হতে delay
    initial_delay = True

    while True:
@@ -51,17 +49,14 @@ def poll_beam_task(task_id):
            print(f"❌ Polling timeout after {POLL_MAX_WAIT}s")
            return None, f'Task timed out after {int(POLL_MAX_WAIT)} seconds'

-        # প্রথম ২ সেকেন্ড wait করি task register হতে
        if initial_delay and elapsed < 2:
            time.sleep(2)
            initial_delay = False
            continue

        try:
-            # ★ Bearer token দিয়ে try
            resp = requests.get(task_url, headers=get_beam_auth_headers(), timeout=30)

-            # Bearer fail হলে Basic try করি
            if resp.status_code in (401, 403):
                print(f"   Bearer auth failed, trying Basic...")
                basic_headers = {
@@ -73,20 +68,14 @@ def poll_beam_task(task_id):
            print(f"   [{int(elapsed)}s] HTTP {resp.status_code} | Body: {len(resp.text)} chars")

            if resp.status_code == 404:
-                # Task এখনও register হয়নি — wait
                if elapsed < 30:
                    print(f"   Task not found yet, waiting...")
                    time.sleep(POLL_INTERVAL)
                    continue
                else:
-                    # ৩০ সেকেন্ড পরেও 404 — সমস্যা
                    print(f"❌ Task not found after {int(elapsed)}s")
-
-                    # ★ Debug: response body দেখি
                    print(f"   404 body: {resp.text[:300]}")

-                    # ★ Alternative: Beam API base URL ভিন্ন হতে পারে
-                    # কিছু Beam setup এ URL format ভিন্ন
                    alt_urls = [
                        f"https://api.beam.cloud/v2/task/{task_id}/status/",
                        f"https://api.beam.cloud/v2/task/{task_id}",
@@ -121,25 +110,14 @@ def poll_beam_task(task_id):
                if status in ('COMPLETE', 'COMPLETED', 'SUCCESS'):
                    print(f"✅ Task complete!")

-                    # ★ Result বের করা — Beam বিভিন্ন জায়গায় result রাখে
-                    # 1. 'output' key
-                    # 2. 'result' key
-                    # 3. 'outputs' list (file-based)
-                    # 4. response body তেই (endpoint mode)
-
                    actual_result = None

-                    # Check 'output' (endpoint mode — function return value)
                    if data.get('output') and isinstance(data['output'], dict):
                        actual_result = data['output']
                        print(f"   Result found in 'output' key")
-
-                    # Check 'result'
                    elif data.get('result') and isinstance(data['result'], dict):
                        actual_result = data['result']
                        print(f"   Result found in 'result' key")
-
-                    # Check if top-level has audio_base64 (unlikely but possible)
                    elif data.get('audio_base64'):
                        actual_result = data
                        print(f"   Result found in top-level data")
@@ -149,16 +127,12 @@ def poll_beam_task(task_id):
                    elif actual_result and actual_result.get('success'):
                        return actual_result, None

-                    # ★ Outputs (file-based) — need to download
                    outputs = data.get('outputs', [])
                    if outputs:
                        print(f"   Task has {len(outputs)} output files")
-                        # For our use case, result should be in 'output' not files
-                        # But log it for debug
                        for out in outputs:
                            print(f"   Output: {out.get('name', '?')} → {out.get('url', '?')}")

-                    # No usable result found
                    print(f"   ⚠️ Task complete but no audio in response")
                    print(f"   Response keys: {list(data.keys())}")
                    print(f"   Full response (first 500): {json.dumps(data, default=str)[:500]}")
@@ -177,7 +151,7 @@ def poll_beam_task(task_id):
                    return None, f'Task {status.lower()} on Beam. Container may not have started in time.'

                elif status in ('PENDING', 'RUNNING', 'RETRY'):
-                    pass  # Keep polling
+                    pass

                else:
                    print(f"   Unknown status: {status}")
@@ -221,16 +195,12 @@ def call_beam_and_get_result(text, voice='af_heart', speed=1.0):

    task_id = response.headers.get('X-Task-Id', '')

-    # ========================================
-    # CASE 1: Task ID + empty/no body → Async → Poll
-    # ========================================
+    # CASE 1: Task ID + empty body → Async → Poll
    if task_id and (not response.text or not response.text.strip() or response.headers.get('Content-Length') == '0'):
        print(f"📋 Async mode — Task ID: {task_id}")
        return poll_beam_task(task_id)

-    # ========================================
    # CASE 2: Task ID + body
-    # ========================================
    if task_id and response.text and response.text.strip():
        print(f"📋 Task ID: {task_id} + body ({len(response.text)} chars)")
        try:
@@ -238,20 +208,15 @@ def call_beam_and_get_result(text, voice='af_heart', speed=1.0):
            if result.get('success') and result.get('audio_base64'):
                print(f"✅ Direct sync result")
                return _extract(result), None
-            # Body isn't the final result — poll
            return poll_beam_task(task_id)
        except Exception:
            return poll_beam_task(task_id)

-    # ========================================
    # CASE 3: No task_id + empty body → Error
-    # ========================================
    if not response.text or not response.text.strip():
        return None, 'Empty response from Beam with no task ID'

-    # ========================================
    # CASE 4: Synchronous response
-    # ========================================
    if response.status_code != 200:
        try:
            err = response.json().get('error', response.text[:200])
@@ -326,15 +291,26 @@ def generate_audio():
        if source_format != 'mp3':
            audio_base64 = convert_to_mp3(audio_base64, source_format)

+        # block_id থাকলে সরাসরি ফাইলে সেভ করি (v4.3)
        if block_id:
+            from media_storage import save_audio
            db = get_db()
            cursor = db.cursor()
            cursor.execute('''
-                UPDATE markdown_blocks 
-                SET audio_data = ?, audio_format = 'mp3', transcription = ?
-                WHERE id = ?
-            ''', (audio_base64, json.dumps(transcription), block_id))
-            db.commit()
+                SELECT c.project_id FROM markdown_blocks mb
+                JOIN chapters c ON mb.chapter_id = c.id
+                WHERE mb.id = ?
+            ''', (block_id,))
+            row = cursor.fetchone()
+            if row:
+                project_id = row['project_id']
+                rel_path = save_audio(project_id, block_id, audio_base64, 'mp3')
+                cursor.execute('''
+                    UPDATE markdown_blocks 
+                    SET audio_path = ?, audio_data = '', audio_format = 'mp3', transcription = ?
+                    WHERE id = ?
+                ''', (rel_path, json.dumps(transcription), block_id))
+                db.commit()

        print(f"✅ DONE: audio={len(audio_base64)} bytes, words={len(transcription)}")
        print(f"{'='*60}")
@@ -377,7 +353,7 @@ def generate_chapter_audio():
    cursor = db.cursor()

    cursor.execute('''
-        SELECT id, content, tts_text, block_type FROM markdown_blocks 
+        SELECT id, content, tts_text, block_type, chapter_id FROM markdown_blocks 
        WHERE chapter_id = ? ORDER BY block_order
    ''', (chapter_id,))
    blocks = cursor.fetchall()
@@ -385,6 +361,11 @@ def generate_chapter_audio():
    if not blocks:
        return jsonify({'error': 'No blocks found'}), 404

+    # project_id বের করি (ফাইল সেভের জন্য)
+    cursor.execute('SELECT project_id FROM chapters WHERE id = ?', (chapter_id,))
+    ch_row = cursor.fetchone()
+    project_id = ch_row['project_id'] if ch_row else None
+
    results = []
    success_count = 0
    error_count = 0
@@ -394,6 +375,8 @@ def generate_chapter_audio():
    print(f"📖 CHAPTER: {total} blocks, voice={voice}")
    print(f"{'='*60}")

+    from media_storage import save_audio
+
    for idx, block in enumerate(blocks):
        block_id = block['id']
        block_type = block['block_type'] if 'block_type' in block.keys() else 'paragraph'
@@ -437,11 +420,14 @@ def generate_chapter_audio():
            if source_format != 'mp3':
                audio_base64 = convert_to_mp3(audio_base64, source_format)

+            # v4.3: ফাইলে সেভ
+            rel_path = save_audio(project_id, block_id, audio_base64, 'mp3') if project_id else None
+
            cursor.execute('''
                UPDATE markdown_blocks 
-                SET audio_data = ?, audio_format = 'mp3', transcription = ?
+                SET audio_path = ?, audio_data = '', audio_format = 'mp3', transcription = ?
                WHERE id = ?
-            ''', (audio_base64, json.dumps(transcription), block_id))
+            ''', (rel_path, json.dumps(transcription), block_id))

            results.append({
                'block_id': block_id,
--- a/routes/project_routes.py
+++ b/routes/project_routes.py
@@ -1,12 +1,18 @@
-# routes/project_routes.py - Project Management Routes (v4.2)
+# routes/project_routes.py - Project Management Routes (v4.3)

 import re
+import os
 import json
 import base64
-from flask import Blueprint, request, jsonify
+from flask import Blueprint, request, jsonify, send_file

-from db import get_db, vacuum_db
+from db import get_db, vacuum_db, get_db_stats
 from auth import login_required
+from media_storage import (
+    save_audio, save_image, save_thumbnail,
+    read_file_base64, get_safe_abs_path,
+    delete_project_media, get_storage_usage_bytes
+)

 project_bp = Blueprint('project', __name__)

@@ -48,6 +54,7 @@ def list_projects():
    cursor.execute('''
        SELECT p.id, p.name, p.created_at, p.updated_at,
               p.is_published, p.published_at, p.thumbnail_data, p.thumbnail_format,
+               p.thumbnail_path,
               p.description, p.author, p.category, p.view_count,
               (SELECT COUNT(*) FROM chapters WHERE project_id = p.id) as chapter_count,
               (SELECT COUNT(*) FROM markdown_blocks mb 
@@ -55,13 +62,20 @@ def list_projects():
                WHERE c.project_id = p.id) as block_count,
               (SELECT COUNT(*) FROM markdown_blocks mb 
                JOIN chapters c ON mb.chapter_id = c.id 
-                WHERE c.project_id = p.id AND mb.audio_data IS NOT NULL AND mb.audio_data != '') as audio_count
+                WHERE c.project_id = p.id 
+                AND ((mb.audio_data IS NOT NULL AND mb.audio_data != '')
+                     OR (mb.audio_path IS NOT NULL AND mb.audio_path != ''))) as audio_count
        FROM projects p
        ORDER BY p.updated_at DESC
    ''')
    
    projects = []
    for row in cursor.fetchall():
+        # thumbnail: path থাকলে ফাইল থেকে, নইলে পুরোনো base64
+        thumb_data = row['thumbnail_data']
+        if row['thumbnail_path']:
+            thumb_data = read_file_base64(row['thumbnail_path'])
+        
        projects.append({
            'id': row['id'],
            'name': row['name'],
@@ -72,7 +86,7 @@ def list_projects():
            'audio_count': row['audio_count'],
            'is_published': bool(row['is_published']),
            'published_at': row['published_at'],
-            'thumbnail_data': row['thumbnail_data'],
+            'thumbnail_data': thumb_data,
            'thumbnail_format': row['thumbnail_format'] or 'png',
            'description': row['description'] or '',
            'author': row['author'] or '',
@@ -115,9 +129,8 @@ def create_project():
@login_required
 def get_project(project_id):
    """
-    Get project metadata WITHOUT audio_data.
-    Audio is loaded lazily via /api/projects/<id>/audio/<block_id>.
-    This keeps the response small (<1 MB) and avoids proxy truncation issues.
+    Get project metadata WITHOUT audio_data (lazy-loaded separately).
+    Images served as base64 from files (editor compatibility).
    """
    db = get_db()
    cursor = db.cursor()
@@ -137,8 +150,9 @@ def get_project(project_id):
    for chapter in chapters:
        cursor.execute('''
            SELECT id, block_order, block_type, content, tts_text,
-                   audio_format, transcription,
-                   (audio_data IS NOT NULL AND audio_data != '') as has_audio
+                   audio_format, audio_path, transcription,
+                   ((audio_data IS NOT NULL AND audio_data != '') 
+                    OR (audio_path IS NOT NULL AND audio_path != '')) as has_audio
            FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order
        ''', (chapter['id'],))
        blocks = cursor.fetchall()
@@ -146,7 +160,8 @@ def get_project(project_id):
        blocks_data = []
        for block in blocks:
            cursor.execute('''
-                SELECT * FROM block_images WHERE block_id = ? ORDER BY id
+                SELECT id, image_data, image_format, alt_text, position, image_path
+                FROM block_images WHERE block_id = ? ORDER BY id
            ''', (block['id'],))
            images = cursor.fetchall()
            
@@ -158,23 +173,33 @@ def get_project(project_id):
                except (json.JSONDecodeError, TypeError):
                    transcription = []
            
+            images_data = []
+            for img in images:
+                # path থাকলে ফাইল থেকে, নইলে পুরোনো base64
+                img_data = ''
+                if img['image_path']:
+                    img_data = read_file_base64(img['image_path'])
+                elif img['image_data']:
+                    img_data = clean_str(img['image_data'])
+                images_data.append({
+                    'id': img['id'],
+                    'data': img_data,
+                    'format': clean_str(img['image_format']) or 'png',
+                    'alt_text': clean_str(img['alt_text']),
+                    'position': clean_str(img['position']) or 'before'
+                })
+            
            blocks_data.append({
                'id': block['id'],
                'block_order': block['block_order'],
                'block_type': clean_str(block['block_type']),
                'content': clean_str(block['content']),
                'tts_text': clean_str(block['tts_text']),
-                'audio_data': '',  # Empty here; loaded lazily by frontend
+                'audio_data': '',
                'audio_format': clean_str(block['audio_format']) or 'mp3',
                'has_audio': bool(block['has_audio']),
                'transcription': transcription,
-                'images': [{
-                    'id': img['id'],
-                    'data': clean_str(img['image_data']),
-                    'format': clean_str(img['image_format']) or 'png',
-                    'alt_text': clean_str(img['alt_text']),
-                    'position': clean_str(img['position']) or 'before'
-                } for img in images]
+                'images': images_data
            })
        
        chapters_data.append({
@@ -197,15 +222,12 @@ def get_project(project_id):
@project_bp.route('/api/projects/<int:project_id>/audio/<int:block_id>', methods=['GET'])
@login_required
 def get_block_audio(project_id, block_id):
-    """
-    Return audio_data (base64) for a single block.
-    Used by the frontend to lazy-load audio after metadata is loaded.
-    """
+    """Stream audio for a single block (v4.3: from file, with base64 fallback)."""
    db = get_db()
    cursor = db.cursor()
    
    cursor.execute('''
-        SELECT mb.audio_data, mb.audio_format
+        SELECT mb.audio_data, mb.audio_path, mb.audio_format
        FROM markdown_blocks mb
        JOIN chapters c ON mb.chapter_id = c.id
        WHERE mb.id = ? AND c.project_id = ?
@@ -215,13 +237,21 @@ def get_block_audio(project_id, block_id):
    if not row:
        return jsonify({'error': 'Block not found'}), 404
    
-    if not row['audio_data']:
-        return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'})
+    # নতুন: ফাইল থেকে সরাসরি stream (Range request সাপোর্ট সহ)
+    if row['audio_path']:
+        abs_path = get_safe_abs_path(row['audio_path'])
+        if abs_path and os.path.exists(abs_path):
+            return send_file(abs_path, mimetype=f"audio/{row['audio_format'] or 'mp3'}",
+                             conditional=True)
    
-    return jsonify({
-        'audio_data': clean_str(row['audio_data']),
-        'audio_format': clean_str(row['audio_format']) or 'mp3'
-    })
+    # পুরোনো: base64 JSON
+    if row['audio_data']:
+        return jsonify({
+            'audio_data': clean_str(row['audio_data']),
+            'audio_format': clean_str(row['audio_format']) or 'mp3'
+        })
+    
+    return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'})


@project_bp.route('/api/projects/<int:project_id>', methods=['PUT'])
@@ -256,7 +286,7 @@ def update_project(project_id):
@project_bp.route('/api/projects/<int:project_id>', methods=['DELETE'])
@login_required
 def delete_project(project_id):
-    """Delete a project and all its data."""
+    """Delete a project, all DB data, AND its media folder (v4.3, no auto-vacuum)."""
    db = get_db()
    cursor = db.cursor()
    
@@ -282,7 +312,11 @@ def delete_project(project_id):
    cursor.execute('DELETE FROM projects WHERE id = ?', (project_id,))
    
    db.commit()
-    vacuum_db()
+    
+    # v4.3: প্রজেক্টের সব মিডিয়া ফাইল মুছি
+    delete_project_media(project_id)
+    
+    # NOTE: vacuum আর অটোমেটিক চলে না — ইউজার সেটিংস থেকে ম্যানুয়ালি করবে
    
    return jsonify({'success': True})

@@ -290,7 +324,7 @@ def delete_project(project_id):
@project_bp.route('/api/projects/<int:project_id>/save', methods=['POST'])
@login_required
 def save_project_content(project_id):
-    """Save all chapters and blocks for a project."""
+    """Save all chapters and blocks. Audio/images stored as FILES (v4.3)."""
    data = request.json
    chapters = data.get('chapters', [])
    
@@ -301,6 +335,7 @@ def save_project_content(project_id):
    if not cursor.fetchone():
        return jsonify({'error': 'Project not found'}), 404
    
+    # পুরোনো DB রেকর্ড মুছি (ফাইলগুলো নতুন করে লেখা হবে)
    cursor.execute('''
        DELETE FROM block_images WHERE block_id IN (
            SELECT mb.id FROM markdown_blocks mb
@@ -332,35 +367,58 @@ def save_project_content(project_id):
        
        for block in chapter.get('blocks', []):
            transcription = clean_transcription(block.get('transcription', []))
+            audio_format = clean_str(block.get('audio_format', 'mp3')) or 'mp3'
            
            cursor.execute('''
                INSERT INTO markdown_blocks 
-                (chapter_id, block_order, block_type, content, tts_text, audio_data, audio_format, transcription)
-                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                (chapter_id, block_order, block_type, content, tts_text, 
+                 audio_data, audio_path, audio_format, transcription)
+                VALUES (?, ?, ?, ?, ?, '', NULL, ?, ?)
            ''', (
                chapter_id,
                block['block_order'],
                clean_str(block.get('block_type', 'paragraph')),
                clean_str(block.get('content', '')),
                clean_str(block.get('tts_text', '')),
-                clean_str(block.get('audio_data', '')),
-                clean_str(block.get('audio_format', 'mp3')),
+                audio_format,
                json.dumps(transcription)
            ))
            
            block_id = cursor.lastrowid
            
+            # অডিও ফাইলে সেভ করি
+            audio_b64 = block.get('audio_data', '')
+            if audio_b64:
+                rel_path = save_audio(project_id, block_id, audio_b64, audio_format)
+                if rel_path:
+                    cursor.execute(
+                        'UPDATE markdown_blocks SET audio_path = ? WHERE id = ?',
+                        (rel_path, block_id)
+                    )
+            
+            # ইমেজগুলো ফাইলে সেভ করি
            for img in block.get('images', []):
+                img_format = clean_str(img.get('format', 'png')) or 'png'
                cursor.execute('''
-                    INSERT INTO block_images (block_id, image_data, image_format, alt_text, position)
-                    VALUES (?, ?, ?, ?, ?)
+                    INSERT INTO block_images 
+                    (block_id, image_data, image_path, image_format, alt_text, position)
+                    VALUES (?, '', NULL, ?, ?, ?)
                ''', (
                    block_id,
-                    clean_str(img.get('data', '')),
-                    clean_str(img.get('format', 'png')),
+                    img_format,
                    clean_str(img.get('alt_text', '')),
                    clean_str(img.get('position', 'before'))
                ))
+                image_id = cursor.lastrowid
+                
+                img_b64 = img.get('data', '')
+                if img_b64:
+                    img_rel = save_image(project_id, image_id, img_b64, img_format)
+                    if img_rel:
+                        cursor.execute(
+                            'UPDATE block_images SET image_path = ? WHERE id = ?',
+                            (img_rel, image_id)
+                        )
    
    cursor.execute('''
        UPDATE projects SET updated_at = CURRENT_TIMESTAMP WHERE id = ?
@@ -392,7 +450,9 @@ def publish_project(project_id):
    cursor.execute('''
        SELECT COUNT(*) as cnt FROM markdown_blocks mb
        JOIN chapters c ON mb.chapter_id = c.id
-        WHERE c.project_id = ? AND mb.audio_data IS NOT NULL AND mb.audio_data != ''
+        WHERE c.project_id = ? 
+        AND ((mb.audio_data IS NOT NULL AND mb.audio_data != '')
+             OR (mb.audio_path IS NOT NULL AND mb.audio_path != ''))
    ''', (project_id,))
    audio_count = cursor.fetchone()['cnt']
    
@@ -440,7 +500,7 @@ def unpublish_project(project_id):
@project_bp.route('/api/projects/<int:project_id>/thumbnail', methods=['POST'])
@login_required
 def upload_thumbnail(project_id):
-    """Upload a thumbnail image."""
+    """Upload a thumbnail image (v4.3: stored as file)."""
    if 'file' not in request.files:
        return jsonify({'error': 'No file provided'}), 400
    
@@ -460,19 +520,21 @@ def upload_thumbnail(project_id):
    if fmt == 'jpg':
        fmt = 'jpeg'
    
-    b64 = base64.b64encode(img_bytes).decode('utf-8')
-    
    db = get_db()
    cursor = db.cursor()
    cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,))
    if not cursor.fetchone():
        return jsonify({'error': 'Project not found'}), 404
    
+    rel_path = save_thumbnail(project_id, img_bytes, fmt)
+    
    cursor.execute('''
-        UPDATE projects SET thumbnail_data = ?, thumbnail_format = ? WHERE id = ?
-    ''', (b64, fmt, project_id))
+        UPDATE projects SET thumbnail_path = ?, thumbnail_data = NULL, thumbnail_format = ? 
+        WHERE id = ?
+    ''', (rel_path, fmt, project_id))
    db.commit()
    
+    b64 = read_file_base64(rel_path)
    return jsonify({
        'success': True,
        'thumbnail_data': b64,
@@ -483,9 +545,51 @@ def upload_thumbnail(project_id):
@project_bp.route('/api/projects/<int:project_id>/thumbnail', methods=['DELETE'])
@login_required
 def delete_thumbnail(project_id):
-    """Remove project thumbnail."""
+    """Remove project thumbnail (DB + file)."""
    db = get_db()
    cursor = db.cursor()
-    cursor.execute('UPDATE projects SET thumbnail_data = NULL WHERE id = ?', (project_id,))
+    cursor.execute('SELECT thumbnail_path FROM projects WHERE id = ?', (project_id,))
+    row = cursor.fetchone()
+    if row and row['thumbnail_path']:
+        from media_storage import delete_file
+        delete_file(row['thumbnail_path'])
+    cursor.execute('UPDATE projects SET thumbnail_data = NULL, thumbnail_path = NULL WHERE id = ?',
+                   (project_id,))
    db.commit()
    return jsonify({'success': True})
+
+
+# ============================================
+# v4.3: Database Maintenance (VACUUM + stats)
+# ============================================
+
+@project_bp.route('/api/maintenance/db-stats', methods=['GET'])
+@login_required
+def db_stats():
+    """ডেটাবেস সাইজ, ফাঁকা স্পেস (%), এবং মিডিয়া স্টোরেজ সাইজ রিটার্ন করে।"""
+    stats = get_db_stats()
+    media_bytes = get_storage_usage_bytes()
+    stats['media_size_bytes'] = media_bytes
+    stats['media_size_mb'] = round(media_bytes / (1024 * 1024), 2)
+    return jsonify(stats)
+
+
+@project_bp.route('/api/maintenance/vacuum', methods=['POST'])
+@login_required
+def run_vacuum():
+    """ম্যানুয়ালি ডেটাবেস VACUUM চালায় (ফাঁকা স্পেস reclaim করে)।"""
+    before = get_db_stats()
+    try:
+        vacuum_db()
+    except Exception as e:
+        return jsonify({'error': f'VACUUM failed: {str(e)}'}), 500
+    after = get_db_stats()
+    
+    reclaimed_mb = round(before['file_size_mb'] - after['file_size_mb'], 2)
+    return jsonify({
+        'success': True,
+        'message': f'VACUUM complete. Reclaimed {reclaimed_mb} MB.',
+        'before': before,
+        'after': after,
+        'reclaimed_mb': reclaimed_mb
+    })
--- a/routes/public_routes.py
+++ b/routes/public_routes.py
@@ -1,10 +1,12 @@
-# routes/public_routes.py - Public (No Auth) Routes for Published Audiobooks
+# routes/public_routes.py - Public (No Auth) Routes for Published Audiobooks (v4.3)

 import re
+import os
 import json
-from flask import Blueprint, jsonify, send_from_directory, abort
+from flask import Blueprint, jsonify, send_from_directory, send_file, abort

 from db import get_db
+from media_storage import get_safe_abs_path, read_file_base64

 public_bp = Blueprint('public', __name__)

@@ -53,7 +55,6 @@ def public_reader(project_id):
    if not project or not project['is_published']:
        abort(404)
    
-    # Increment view count
    cursor.execute('UPDATE projects SET view_count = view_count + 1 WHERE id = ?', (project_id,))
    db.commit()
    
@@ -68,7 +69,7 @@ def list_published_books():
    
    cursor.execute('''
        SELECT p.id, p.name, p.description, p.author, p.category,
-               p.thumbnail_data, p.thumbnail_format, p.published_at,
+               p.thumbnail_data, p.thumbnail_format, p.thumbnail_path, p.published_at,
               p.view_count, p.created_at,
               (SELECT COUNT(*) FROM chapters WHERE project_id = p.id) as chapter_count
        FROM projects p
@@ -78,13 +79,17 @@ def list_published_books():
    
    books = []
    for row in cursor.fetchall():
+        thumb_data = row['thumbnail_data']
+        if row['thumbnail_path']:
+            thumb_data = read_file_base64(row['thumbnail_path'])
+        
        books.append({
            'id': row['id'],
            'name': row['name'],
            'description': row['description'] or '',
            'author': row['author'] or '',
            'category': row['category'] or '',
-            'thumbnail_data': row['thumbnail_data'],
+            'thumbnail_data': thumb_data,
            'thumbnail_format': row['thumbnail_format'] or 'png',
            'published_at': row['published_at'],
            'view_count': row['view_count'] or 0,
@@ -96,11 +101,7 @@ def list_published_books():

@public_bp.route('/api/public/books/<int:project_id>', methods=['GET'])
 def get_published_book(project_id):
-    """
-    Get book metadata WITHOUT audio_data.
-    Audio is loaded lazily via /api/public/books/<id>/audio/<block_id>.
-    This keeps the response small (<1 MB) and avoids proxy truncation issues.
-    """
+    """Get book metadata WITHOUT audio_data (lazy-loaded separately)."""
    db = get_db()
    cursor = db.cursor()
    
@@ -120,8 +121,9 @@ def get_published_book(project_id):
    chapters_data = []
    for chapter in chapters:
        cursor.execute('''
-            SELECT id, block_order, block_type, content, audio_format, transcription,
-                   (audio_data IS NOT NULL AND audio_data != '') as has_audio
+            SELECT id, block_order, block_type, content, audio_format, audio_path, transcription,
+                   ((audio_data IS NOT NULL AND audio_data != '')
+                    OR (audio_path IS NOT NULL AND audio_path != '')) as has_audio
            FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order
        ''', (chapter['id'],))
        blocks = cursor.fetchall()
@@ -129,7 +131,8 @@ def get_published_book(project_id):
        blocks_data = []
        for block in blocks:
            cursor.execute('''
-                SELECT * FROM block_images WHERE block_id = ? ORDER BY id
+                SELECT image_data, image_format, alt_text, position, image_path
+                FROM block_images WHERE block_id = ? ORDER BY id
            ''', (block['id'],))
            images = cursor.fetchall()
            
@@ -141,21 +144,30 @@ def get_published_book(project_id):
                except (json.JSONDecodeError, TypeError):
                    transcription = []
            
+            images_data = []
+            for img in images:
+                img_data = ''
+                if img['image_path']:
+                    img_data = read_file_base64(img['image_path'])
+                elif img['image_data']:
+                    img_data = clean_str(img['image_data'])
+                images_data.append({
+                    'data': img_data,
+                    'format': clean_str(img['image_format']) or 'png',
+                    'alt_text': clean_str(img['alt_text']),
+                    'position': clean_str(img['position']) or 'before'
+                })
+            
            blocks_data.append({
                'id': block['id'],
                'block_order': block['block_order'],
                'block_type': clean_str(block['block_type']),
                'content': clean_str(block['content']),
-                'audio_data': '',  # Empty here; loaded lazily by frontend
+                'audio_data': '',
                'audio_format': clean_str(block['audio_format']) or 'mp3',
                'has_audio': bool(block['has_audio']),
                'transcription': transcription,
-                'images': [{
-                    'data': clean_str(img['image_data']),
-                    'format': clean_str(img['image_format']) or 'png',
-                    'alt_text': clean_str(img['alt_text']),
-                    'position': clean_str(img['position']) or 'before'
-                } for img in images]
+                'images': images_data
            })
        
        chapters_data.append({
@@ -165,12 +177,16 @@ def get_published_book(project_id):
            'blocks': blocks_data
        })
    
+    thumb_data = project['thumbnail_data']
+    if project['thumbnail_path']:
+        thumb_data = read_file_base64(project['thumbnail_path'])
+    
    return jsonify({
        'id': project['id'],
        'name': clean_str(project['name']),
        'description': clean_str(project['description']) if project['description'] else '',
        'author': clean_str(project['author']) if project['author'] else '',
-        'thumbnail_data': project['thumbnail_data'],
+        'thumbnail_data': thumb_data,
        'thumbnail_format': project['thumbnail_format'] or 'png',
        'chapters': chapters_data
    })
@@ -178,21 +194,17 @@ def get_published_book(project_id):

@public_bp.route('/api/public/books/<int:project_id>/audio/<int:block_id>', methods=['GET'])
 def get_public_block_audio(project_id, block_id):
-    """
-    Return audio_data (base64) for a single block in a published book.
-    No auth required since the book is published publicly.
-    """
+    """Stream audio file for a published book block (v4.3)."""
    db = get_db()
    cursor = db.cursor()
    
-    # Verify project is published
    cursor.execute('SELECT is_published FROM projects WHERE id = ?', (project_id,))
    project = cursor.fetchone()
    if not project or not project['is_published']:
        return jsonify({'error': 'Book not found or not published'}), 404
    
    cursor.execute('''
-        SELECT mb.audio_data, mb.audio_format
+        SELECT mb.audio_data, mb.audio_path, mb.audio_format
        FROM markdown_blocks mb
        JOIN chapters c ON mb.chapter_id = c.id
        WHERE mb.id = ? AND c.project_id = ?
@@ -202,10 +214,16 @@ def get_public_block_audio(project_id, block_id):
    if not row:
        return jsonify({'error': 'Block not found'}), 404
    
-    if not row['audio_data']:
-        return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'})
+    if row['audio_path']:
+        abs_path = get_safe_abs_path(row['audio_path'])
+        if abs_path and os.path.exists(abs_path):
+            return send_file(abs_path, mimetype=f"audio/{row['audio_format'] or 'mp3'}",
+                             conditional=True)
    
-    return jsonify({
-        'audio_data': clean_str(row['audio_data']),
-        'audio_format': clean_str(row['audio_format']) or 'mp3'
-    })
+    if row['audio_data']:
+        return jsonify({
+            'audio_data': clean_str(row['audio_data']),
+            'audio_format': clean_str(row['audio_format']) or 'mp3'
+        })
+    
+    return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'})