v4.3: file-based media storage + manual VACUUM maintenance

2026-06-12 13:24:00 +06:00
parent 965470853e
commit cc57204aff
10 changed files with 789 additions and 164 deletions
--- a/routes/public_routes.py
+++ b/routes/public_routes.py
@@ -1,10 +1,12 @@
-# routes/public_routes.py - Public (No Auth) Routes for Published Audiobooks
+# routes/public_routes.py - Public (No Auth) Routes for Published Audiobooks (v4.3)

 import re
+import os
 import json
-from flask import Blueprint, jsonify, send_from_directory, abort
+from flask import Blueprint, jsonify, send_from_directory, send_file, abort

 from db import get_db
+from media_storage import get_safe_abs_path, read_file_base64

 public_bp = Blueprint('public', __name__)

@@ -53,7 +55,6 @@ def public_reader(project_id):
    if not project or not project['is_published']:
        abort(404)
    
-    # Increment view count
    cursor.execute('UPDATE projects SET view_count = view_count + 1 WHERE id = ?', (project_id,))
    db.commit()
    
@@ -68,7 +69,7 @@ def list_published_books():
    
    cursor.execute('''
        SELECT p.id, p.name, p.description, p.author, p.category,
-               p.thumbnail_data, p.thumbnail_format, p.published_at,
+               p.thumbnail_data, p.thumbnail_format, p.thumbnail_path, p.published_at,
               p.view_count, p.created_at,
               (SELECT COUNT(*) FROM chapters WHERE project_id = p.id) as chapter_count
        FROM projects p
@@ -78,13 +79,17 @@ def list_published_books():
    
    books = []
    for row in cursor.fetchall():
+        thumb_data = row['thumbnail_data']
+        if row['thumbnail_path']:
+            thumb_data = read_file_base64(row['thumbnail_path'])
+        
        books.append({
            'id': row['id'],
            'name': row['name'],
            'description': row['description'] or '',
            'author': row['author'] or '',
            'category': row['category'] or '',
-            'thumbnail_data': row['thumbnail_data'],
+            'thumbnail_data': thumb_data,
            'thumbnail_format': row['thumbnail_format'] or 'png',
            'published_at': row['published_at'],
            'view_count': row['view_count'] or 0,
@@ -96,11 +101,7 @@ def list_published_books():

@public_bp.route('/api/public/books/<int:project_id>', methods=['GET'])
 def get_published_book(project_id):
-    """
-    Get book metadata WITHOUT audio_data.
-    Audio is loaded lazily via /api/public/books/<id>/audio/<block_id>.
-    This keeps the response small (<1 MB) and avoids proxy truncation issues.
-    """
+    """Get book metadata WITHOUT audio_data (lazy-loaded separately)."""
    db = get_db()
    cursor = db.cursor()
    
@@ -120,8 +121,9 @@ def get_published_book(project_id):
    chapters_data = []
    for chapter in chapters:
        cursor.execute('''
-            SELECT id, block_order, block_type, content, audio_format, transcription,
-                   (audio_data IS NOT NULL AND audio_data != '') as has_audio
+            SELECT id, block_order, block_type, content, audio_format, audio_path, transcription,
+                   ((audio_data IS NOT NULL AND audio_data != '')
+                    OR (audio_path IS NOT NULL AND audio_path != '')) as has_audio
            FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order
        ''', (chapter['id'],))
        blocks = cursor.fetchall()
@@ -129,7 +131,8 @@ def get_published_book(project_id):
        blocks_data = []
        for block in blocks:
            cursor.execute('''
-                SELECT * FROM block_images WHERE block_id = ? ORDER BY id
+                SELECT image_data, image_format, alt_text, position, image_path
+                FROM block_images WHERE block_id = ? ORDER BY id
            ''', (block['id'],))
            images = cursor.fetchall()
            
@@ -141,21 +144,30 @@ def get_published_book(project_id):
                except (json.JSONDecodeError, TypeError):
                    transcription = []
            
+            images_data = []
+            for img in images:
+                img_data = ''
+                if img['image_path']:
+                    img_data = read_file_base64(img['image_path'])
+                elif img['image_data']:
+                    img_data = clean_str(img['image_data'])
+                images_data.append({
+                    'data': img_data,
+                    'format': clean_str(img['image_format']) or 'png',
+                    'alt_text': clean_str(img['alt_text']),
+                    'position': clean_str(img['position']) or 'before'
+                })
+            
            blocks_data.append({
                'id': block['id'],
                'block_order': block['block_order'],
                'block_type': clean_str(block['block_type']),
                'content': clean_str(block['content']),
-                'audio_data': '',  # Empty here; loaded lazily by frontend
+                'audio_data': '',
                'audio_format': clean_str(block['audio_format']) or 'mp3',
                'has_audio': bool(block['has_audio']),
                'transcription': transcription,
-                'images': [{
-                    'data': clean_str(img['image_data']),
-                    'format': clean_str(img['image_format']) or 'png',
-                    'alt_text': clean_str(img['alt_text']),
-                    'position': clean_str(img['position']) or 'before'
-                } for img in images]
+                'images': images_data
            })
        
        chapters_data.append({
@@ -165,12 +177,16 @@ def get_published_book(project_id):
            'blocks': blocks_data
        })
    
+    thumb_data = project['thumbnail_data']
+    if project['thumbnail_path']:
+        thumb_data = read_file_base64(project['thumbnail_path'])
+    
    return jsonify({
        'id': project['id'],
        'name': clean_str(project['name']),
        'description': clean_str(project['description']) if project['description'] else '',
        'author': clean_str(project['author']) if project['author'] else '',
-        'thumbnail_data': project['thumbnail_data'],
+        'thumbnail_data': thumb_data,
        'thumbnail_format': project['thumbnail_format'] or 'png',
        'chapters': chapters_data
    })
@@ -178,21 +194,17 @@ def get_published_book(project_id):

@public_bp.route('/api/public/books/<int:project_id>/audio/<int:block_id>', methods=['GET'])
 def get_public_block_audio(project_id, block_id):
-    """
-    Return audio_data (base64) for a single block in a published book.
-    No auth required since the book is published publicly.
-    """
+    """Stream audio file for a published book block (v4.3)."""
    db = get_db()
    cursor = db.cursor()
    
-    # Verify project is published
    cursor.execute('SELECT is_published FROM projects WHERE id = ?', (project_id,))
    project = cursor.fetchone()
    if not project or not project['is_published']:
        return jsonify({'error': 'Book not found or not published'}), 404
    
    cursor.execute('''
-        SELECT mb.audio_data, mb.audio_format
+        SELECT mb.audio_data, mb.audio_path, mb.audio_format
        FROM markdown_blocks mb
        JOIN chapters c ON mb.chapter_id = c.id
        WHERE mb.id = ? AND c.project_id = ?
@@ -202,10 +214,16 @@ def get_public_block_audio(project_id, block_id):
    if not row:
        return jsonify({'error': 'Block not found'}), 404
    
-    if not row['audio_data']:
-        return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'})
+    if row['audio_path']:
+        abs_path = get_safe_abs_path(row['audio_path'])
+        if abs_path and os.path.exists(abs_path):
+            return send_file(abs_path, mimetype=f"audio/{row['audio_format'] or 'mp3'}",
+                             conditional=True)
    
-    return jsonify({
-        'audio_data': clean_str(row['audio_data']),
-        'audio_format': clean_str(row['audio_format']) or 'mp3'
-    })
+    if row['audio_data']:
+        return jsonify({
+            'audio_data': clean_str(row['audio_data']),
+            'audio_format': clean_str(row['audio_format']) or 'mp3'
+        })
+    
+    return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'})