v4.3: file-based media storage + manual VACUUM maintenance

This commit is contained in:
Ashim Kumar
2026-06-12 13:24:00 +06:00
parent 965470853e
commit cc57204aff
10 changed files with 789 additions and 164 deletions

View File

@@ -1,10 +1,12 @@
# routes/public_routes.py - Public (No Auth) Routes for Published Audiobooks
# routes/public_routes.py - Public (No Auth) Routes for Published Audiobooks (v4.3)
import re
import os
import json
from flask import Blueprint, jsonify, send_from_directory, abort
from flask import Blueprint, jsonify, send_from_directory, send_file, abort
from db import get_db
from media_storage import get_safe_abs_path, read_file_base64
public_bp = Blueprint('public', __name__)
@@ -53,7 +55,6 @@ def public_reader(project_id):
if not project or not project['is_published']:
abort(404)
# Increment view count
cursor.execute('UPDATE projects SET view_count = view_count + 1 WHERE id = ?', (project_id,))
db.commit()
@@ -68,7 +69,7 @@ def list_published_books():
cursor.execute('''
SELECT p.id, p.name, p.description, p.author, p.category,
p.thumbnail_data, p.thumbnail_format, p.published_at,
p.thumbnail_data, p.thumbnail_format, p.thumbnail_path, p.published_at,
p.view_count, p.created_at,
(SELECT COUNT(*) FROM chapters WHERE project_id = p.id) as chapter_count
FROM projects p
@@ -78,13 +79,17 @@ def list_published_books():
books = []
for row in cursor.fetchall():
thumb_data = row['thumbnail_data']
if row['thumbnail_path']:
thumb_data = read_file_base64(row['thumbnail_path'])
books.append({
'id': row['id'],
'name': row['name'],
'description': row['description'] or '',
'author': row['author'] or '',
'category': row['category'] or '',
'thumbnail_data': row['thumbnail_data'],
'thumbnail_data': thumb_data,
'thumbnail_format': row['thumbnail_format'] or 'png',
'published_at': row['published_at'],
'view_count': row['view_count'] or 0,
@@ -96,11 +101,7 @@ def list_published_books():
@public_bp.route('/api/public/books/<int:project_id>', methods=['GET'])
def get_published_book(project_id):
"""
Get book metadata WITHOUT audio_data.
Audio is loaded lazily via /api/public/books/<id>/audio/<block_id>.
This keeps the response small (<1 MB) and avoids proxy truncation issues.
"""
"""Get book metadata WITHOUT audio_data (lazy-loaded separately)."""
db = get_db()
cursor = db.cursor()
@@ -120,8 +121,9 @@ def get_published_book(project_id):
chapters_data = []
for chapter in chapters:
cursor.execute('''
SELECT id, block_order, block_type, content, audio_format, transcription,
(audio_data IS NOT NULL AND audio_data != '') as has_audio
SELECT id, block_order, block_type, content, audio_format, audio_path, transcription,
((audio_data IS NOT NULL AND audio_data != '')
OR (audio_path IS NOT NULL AND audio_path != '')) as has_audio
FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order
''', (chapter['id'],))
blocks = cursor.fetchall()
@@ -129,7 +131,8 @@ def get_published_book(project_id):
blocks_data = []
for block in blocks:
cursor.execute('''
SELECT * FROM block_images WHERE block_id = ? ORDER BY id
SELECT image_data, image_format, alt_text, position, image_path
FROM block_images WHERE block_id = ? ORDER BY id
''', (block['id'],))
images = cursor.fetchall()
@@ -141,21 +144,30 @@ def get_published_book(project_id):
except (json.JSONDecodeError, TypeError):
transcription = []
images_data = []
for img in images:
img_data = ''
if img['image_path']:
img_data = read_file_base64(img['image_path'])
elif img['image_data']:
img_data = clean_str(img['image_data'])
images_data.append({
'data': img_data,
'format': clean_str(img['image_format']) or 'png',
'alt_text': clean_str(img['alt_text']),
'position': clean_str(img['position']) or 'before'
})
blocks_data.append({
'id': block['id'],
'block_order': block['block_order'],
'block_type': clean_str(block['block_type']),
'content': clean_str(block['content']),
'audio_data': '', # Empty here; loaded lazily by frontend
'audio_data': '',
'audio_format': clean_str(block['audio_format']) or 'mp3',
'has_audio': bool(block['has_audio']),
'transcription': transcription,
'images': [{
'data': clean_str(img['image_data']),
'format': clean_str(img['image_format']) or 'png',
'alt_text': clean_str(img['alt_text']),
'position': clean_str(img['position']) or 'before'
} for img in images]
'images': images_data
})
chapters_data.append({
@@ -165,12 +177,16 @@ def get_published_book(project_id):
'blocks': blocks_data
})
thumb_data = project['thumbnail_data']
if project['thumbnail_path']:
thumb_data = read_file_base64(project['thumbnail_path'])
return jsonify({
'id': project['id'],
'name': clean_str(project['name']),
'description': clean_str(project['description']) if project['description'] else '',
'author': clean_str(project['author']) if project['author'] else '',
'thumbnail_data': project['thumbnail_data'],
'thumbnail_data': thumb_data,
'thumbnail_format': project['thumbnail_format'] or 'png',
'chapters': chapters_data
})
@@ -178,21 +194,17 @@ def get_published_book(project_id):
@public_bp.route('/api/public/books/<int:project_id>/audio/<int:block_id>', methods=['GET'])
def get_public_block_audio(project_id, block_id):
"""
Return audio_data (base64) for a single block in a published book.
No auth required since the book is published publicly.
"""
"""Stream audio file for a published book block (v4.3)."""
db = get_db()
cursor = db.cursor()
# Verify project is published
cursor.execute('SELECT is_published FROM projects WHERE id = ?', (project_id,))
project = cursor.fetchone()
if not project or not project['is_published']:
return jsonify({'error': 'Book not found or not published'}), 404
cursor.execute('''
SELECT mb.audio_data, mb.audio_format
SELECT mb.audio_data, mb.audio_path, mb.audio_format
FROM markdown_blocks mb
JOIN chapters c ON mb.chapter_id = c.id
WHERE mb.id = ? AND c.project_id = ?
@@ -202,10 +214,16 @@ def get_public_block_audio(project_id, block_id):
if not row:
return jsonify({'error': 'Block not found'}), 404
if not row['audio_data']:
return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'})
if row['audio_path']:
abs_path = get_safe_abs_path(row['audio_path'])
if abs_path and os.path.exists(abs_path):
return send_file(abs_path, mimetype=f"audio/{row['audio_format'] or 'mp3'}",
conditional=True)
return jsonify({
'audio_data': clean_str(row['audio_data']),
'audio_format': clean_str(row['audio_format']) or 'mp3'
})
if row['audio_data']:
return jsonify({
'audio_data': clean_str(row['audio_data']),
'audio_format': clean_str(row['audio_format']) or 'mp3'
})
return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'})