v4.3: file-based media storage + manual VACUUM maintenance

This commit is contained in:
Ashim Kumar
2026-06-12 13:24:00 +06:00
parent 965470853e
commit cc57204aff
10 changed files with 789 additions and 164 deletions

View File

@@ -1,4 +1,4 @@
# routes/generation_routes.py - Combined Endpoint with Correct Task Polling
# routes/generation_routes.py - Combined Endpoint with Correct Task Polling (v4.3)
import json
import time
@@ -40,8 +40,6 @@ def poll_beam_task(task_id):
print(f" URL: {task_url}")
start_time = time.time()
# প্রথম কয়েকটা attempt এ 404 আসতে পারে — task register হতে delay
initial_delay = True
while True:
@@ -51,17 +49,14 @@ def poll_beam_task(task_id):
print(f"❌ Polling timeout after {POLL_MAX_WAIT}s")
return None, f'Task timed out after {int(POLL_MAX_WAIT)} seconds'
# প্রথম ২ সেকেন্ড wait করি task register হতে
if initial_delay and elapsed < 2:
time.sleep(2)
initial_delay = False
continue
try:
# ★ Bearer token দিয়ে try
resp = requests.get(task_url, headers=get_beam_auth_headers(), timeout=30)
# Bearer fail হলে Basic try করি
if resp.status_code in (401, 403):
print(f" Bearer auth failed, trying Basic...")
basic_headers = {
@@ -73,20 +68,14 @@ def poll_beam_task(task_id):
print(f" [{int(elapsed)}s] HTTP {resp.status_code} | Body: {len(resp.text)} chars")
if resp.status_code == 404:
# Task এখনও register হয়নি — wait
if elapsed < 30:
print(f" Task not found yet, waiting...")
time.sleep(POLL_INTERVAL)
continue
else:
# ৩০ সেকেন্ড পরেও 404 — সমস্যা
print(f"❌ Task not found after {int(elapsed)}s")
# ★ Debug: response body দেখি
print(f" 404 body: {resp.text[:300]}")
# ★ Alternative: Beam API base URL ভিন্ন হতে পারে
# কিছু Beam setup এ URL format ভিন্ন
alt_urls = [
f"https://api.beam.cloud/v2/task/{task_id}/status/",
f"https://api.beam.cloud/v2/task/{task_id}",
@@ -121,25 +110,14 @@ def poll_beam_task(task_id):
if status in ('COMPLETE', 'COMPLETED', 'SUCCESS'):
print(f"✅ Task complete!")
# ★ Result বের করা — Beam বিভিন্ন জায়গায় result রাখে
# 1. 'output' key
# 2. 'result' key
# 3. 'outputs' list (file-based)
# 4. response body তেই (endpoint mode)
actual_result = None
# Check 'output' (endpoint mode — function return value)
if data.get('output') and isinstance(data['output'], dict):
actual_result = data['output']
print(f" Result found in 'output' key")
# Check 'result'
elif data.get('result') and isinstance(data['result'], dict):
actual_result = data['result']
print(f" Result found in 'result' key")
# Check if top-level has audio_base64 (unlikely but possible)
elif data.get('audio_base64'):
actual_result = data
print(f" Result found in top-level data")
@@ -149,16 +127,12 @@ def poll_beam_task(task_id):
elif actual_result and actual_result.get('success'):
return actual_result, None
# ★ Outputs (file-based) — need to download
outputs = data.get('outputs', [])
if outputs:
print(f" Task has {len(outputs)} output files")
# For our use case, result should be in 'output' not files
# But log it for debug
for out in outputs:
print(f" Output: {out.get('name', '?')}{out.get('url', '?')}")
# No usable result found
print(f" ⚠️ Task complete but no audio in response")
print(f" Response keys: {list(data.keys())}")
print(f" Full response (first 500): {json.dumps(data, default=str)[:500]}")
@@ -177,7 +151,7 @@ def poll_beam_task(task_id):
return None, f'Task {status.lower()} on Beam. Container may not have started in time.'
elif status in ('PENDING', 'RUNNING', 'RETRY'):
pass # Keep polling
pass
else:
print(f" Unknown status: {status}")
@@ -221,16 +195,12 @@ def call_beam_and_get_result(text, voice='af_heart', speed=1.0):
task_id = response.headers.get('X-Task-Id', '')
# ========================================
# CASE 1: Task ID + empty/no body → Async → Poll
# ========================================
# CASE 1: Task ID + empty body → Async → Poll
if task_id and (not response.text or not response.text.strip() or response.headers.get('Content-Length') == '0'):
print(f"📋 Async mode — Task ID: {task_id}")
return poll_beam_task(task_id)
# ========================================
# CASE 2: Task ID + body
# ========================================
if task_id and response.text and response.text.strip():
print(f"📋 Task ID: {task_id} + body ({len(response.text)} chars)")
try:
@@ -238,20 +208,15 @@ def call_beam_and_get_result(text, voice='af_heart', speed=1.0):
if result.get('success') and result.get('audio_base64'):
print(f"✅ Direct sync result")
return _extract(result), None
# Body isn't the final result — poll
return poll_beam_task(task_id)
except Exception:
return poll_beam_task(task_id)
# ========================================
# CASE 3: No task_id + empty body → Error
# ========================================
if not response.text or not response.text.strip():
return None, 'Empty response from Beam with no task ID'
# ========================================
# CASE 4: Synchronous response
# ========================================
if response.status_code != 200:
try:
err = response.json().get('error', response.text[:200])
@@ -326,15 +291,26 @@ def generate_audio():
if source_format != 'mp3':
audio_base64 = convert_to_mp3(audio_base64, source_format)
# block_id থাকলে সরাসরি ফাইলে সেভ করি (v4.3)
if block_id:
from media_storage import save_audio
db = get_db()
cursor = db.cursor()
cursor.execute('''
UPDATE markdown_blocks
SET audio_data = ?, audio_format = 'mp3', transcription = ?
WHERE id = ?
''', (audio_base64, json.dumps(transcription), block_id))
db.commit()
SELECT c.project_id FROM markdown_blocks mb
JOIN chapters c ON mb.chapter_id = c.id
WHERE mb.id = ?
''', (block_id,))
row = cursor.fetchone()
if row:
project_id = row['project_id']
rel_path = save_audio(project_id, block_id, audio_base64, 'mp3')
cursor.execute('''
UPDATE markdown_blocks
SET audio_path = ?, audio_data = '', audio_format = 'mp3', transcription = ?
WHERE id = ?
''', (rel_path, json.dumps(transcription), block_id))
db.commit()
print(f"✅ DONE: audio={len(audio_base64)} bytes, words={len(transcription)}")
print(f"{'='*60}")
@@ -377,7 +353,7 @@ def generate_chapter_audio():
cursor = db.cursor()
cursor.execute('''
SELECT id, content, tts_text, block_type FROM markdown_blocks
SELECT id, content, tts_text, block_type, chapter_id FROM markdown_blocks
WHERE chapter_id = ? ORDER BY block_order
''', (chapter_id,))
blocks = cursor.fetchall()
@@ -385,6 +361,11 @@ def generate_chapter_audio():
if not blocks:
return jsonify({'error': 'No blocks found'}), 404
# project_id বের করি (ফাইল সেভের জন্য)
cursor.execute('SELECT project_id FROM chapters WHERE id = ?', (chapter_id,))
ch_row = cursor.fetchone()
project_id = ch_row['project_id'] if ch_row else None
results = []
success_count = 0
error_count = 0
@@ -394,6 +375,8 @@ def generate_chapter_audio():
print(f"📖 CHAPTER: {total} blocks, voice={voice}")
print(f"{'='*60}")
from media_storage import save_audio
for idx, block in enumerate(blocks):
block_id = block['id']
block_type = block['block_type'] if 'block_type' in block.keys() else 'paragraph'
@@ -437,11 +420,14 @@ def generate_chapter_audio():
if source_format != 'mp3':
audio_base64 = convert_to_mp3(audio_base64, source_format)
# v4.3: ফাইলে সেভ
rel_path = save_audio(project_id, block_id, audio_base64, 'mp3') if project_id else None
cursor.execute('''
UPDATE markdown_blocks
SET audio_data = ?, audio_format = 'mp3', transcription = ?
SET audio_path = ?, audio_data = '', audio_format = 'mp3', transcription = ?
WHERE id = ?
''', (audio_base64, json.dumps(transcription), block_id))
''', (rel_path, json.dumps(transcription), block_id))
results.append({
'block_id': block_id,

View File

@@ -1,12 +1,18 @@
# routes/project_routes.py - Project Management Routes (v4.2)
# routes/project_routes.py - Project Management Routes (v4.3)
import re
import os
import json
import base64
from flask import Blueprint, request, jsonify
from flask import Blueprint, request, jsonify, send_file
from db import get_db, vacuum_db
from db import get_db, vacuum_db, get_db_stats
from auth import login_required
from media_storage import (
save_audio, save_image, save_thumbnail,
read_file_base64, get_safe_abs_path,
delete_project_media, get_storage_usage_bytes
)
project_bp = Blueprint('project', __name__)
@@ -48,6 +54,7 @@ def list_projects():
cursor.execute('''
SELECT p.id, p.name, p.created_at, p.updated_at,
p.is_published, p.published_at, p.thumbnail_data, p.thumbnail_format,
p.thumbnail_path,
p.description, p.author, p.category, p.view_count,
(SELECT COUNT(*) FROM chapters WHERE project_id = p.id) as chapter_count,
(SELECT COUNT(*) FROM markdown_blocks mb
@@ -55,13 +62,20 @@ def list_projects():
WHERE c.project_id = p.id) as block_count,
(SELECT COUNT(*) FROM markdown_blocks mb
JOIN chapters c ON mb.chapter_id = c.id
WHERE c.project_id = p.id AND mb.audio_data IS NOT NULL AND mb.audio_data != '') as audio_count
WHERE c.project_id = p.id
AND ((mb.audio_data IS NOT NULL AND mb.audio_data != '')
OR (mb.audio_path IS NOT NULL AND mb.audio_path != ''))) as audio_count
FROM projects p
ORDER BY p.updated_at DESC
''')
projects = []
for row in cursor.fetchall():
# thumbnail: path থাকলে ফাইল থেকে, নইলে পুরোনো base64
thumb_data = row['thumbnail_data']
if row['thumbnail_path']:
thumb_data = read_file_base64(row['thumbnail_path'])
projects.append({
'id': row['id'],
'name': row['name'],
@@ -72,7 +86,7 @@ def list_projects():
'audio_count': row['audio_count'],
'is_published': bool(row['is_published']),
'published_at': row['published_at'],
'thumbnail_data': row['thumbnail_data'],
'thumbnail_data': thumb_data,
'thumbnail_format': row['thumbnail_format'] or 'png',
'description': row['description'] or '',
'author': row['author'] or '',
@@ -115,9 +129,8 @@ def create_project():
@login_required
def get_project(project_id):
"""
Get project metadata WITHOUT audio_data.
Audio is loaded lazily via /api/projects/<id>/audio/<block_id>.
This keeps the response small (<1 MB) and avoids proxy truncation issues.
Get project metadata WITHOUT audio_data (lazy-loaded separately).
Images served as base64 from files (editor compatibility).
"""
db = get_db()
cursor = db.cursor()
@@ -137,8 +150,9 @@ def get_project(project_id):
for chapter in chapters:
cursor.execute('''
SELECT id, block_order, block_type, content, tts_text,
audio_format, transcription,
(audio_data IS NOT NULL AND audio_data != '') as has_audio
audio_format, audio_path, transcription,
((audio_data IS NOT NULL AND audio_data != '')
OR (audio_path IS NOT NULL AND audio_path != '')) as has_audio
FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order
''', (chapter['id'],))
blocks = cursor.fetchall()
@@ -146,7 +160,8 @@ def get_project(project_id):
blocks_data = []
for block in blocks:
cursor.execute('''
SELECT * FROM block_images WHERE block_id = ? ORDER BY id
SELECT id, image_data, image_format, alt_text, position, image_path
FROM block_images WHERE block_id = ? ORDER BY id
''', (block['id'],))
images = cursor.fetchall()
@@ -158,23 +173,33 @@ def get_project(project_id):
except (json.JSONDecodeError, TypeError):
transcription = []
images_data = []
for img in images:
# path থাকলে ফাইল থেকে, নইলে পুরোনো base64
img_data = ''
if img['image_path']:
img_data = read_file_base64(img['image_path'])
elif img['image_data']:
img_data = clean_str(img['image_data'])
images_data.append({
'id': img['id'],
'data': img_data,
'format': clean_str(img['image_format']) or 'png',
'alt_text': clean_str(img['alt_text']),
'position': clean_str(img['position']) or 'before'
})
blocks_data.append({
'id': block['id'],
'block_order': block['block_order'],
'block_type': clean_str(block['block_type']),
'content': clean_str(block['content']),
'tts_text': clean_str(block['tts_text']),
'audio_data': '', # Empty here; loaded lazily by frontend
'audio_data': '',
'audio_format': clean_str(block['audio_format']) or 'mp3',
'has_audio': bool(block['has_audio']),
'transcription': transcription,
'images': [{
'id': img['id'],
'data': clean_str(img['image_data']),
'format': clean_str(img['image_format']) or 'png',
'alt_text': clean_str(img['alt_text']),
'position': clean_str(img['position']) or 'before'
} for img in images]
'images': images_data
})
chapters_data.append({
@@ -197,15 +222,12 @@ def get_project(project_id):
@project_bp.route('/api/projects/<int:project_id>/audio/<int:block_id>', methods=['GET'])
@login_required
def get_block_audio(project_id, block_id):
"""
Return audio_data (base64) for a single block.
Used by the frontend to lazy-load audio after metadata is loaded.
"""
"""Stream audio for a single block (v4.3: from file, with base64 fallback)."""
db = get_db()
cursor = db.cursor()
cursor.execute('''
SELECT mb.audio_data, mb.audio_format
SELECT mb.audio_data, mb.audio_path, mb.audio_format
FROM markdown_blocks mb
JOIN chapters c ON mb.chapter_id = c.id
WHERE mb.id = ? AND c.project_id = ?
@@ -215,13 +237,21 @@ def get_block_audio(project_id, block_id):
if not row:
return jsonify({'error': 'Block not found'}), 404
if not row['audio_data']:
return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'})
# নতুন: ফাইল থেকে সরাসরি stream (Range request সাপোর্ট সহ)
if row['audio_path']:
abs_path = get_safe_abs_path(row['audio_path'])
if abs_path and os.path.exists(abs_path):
return send_file(abs_path, mimetype=f"audio/{row['audio_format'] or 'mp3'}",
conditional=True)
return jsonify({
'audio_data': clean_str(row['audio_data']),
'audio_format': clean_str(row['audio_format']) or 'mp3'
})
# পুরোনো: base64 JSON
if row['audio_data']:
return jsonify({
'audio_data': clean_str(row['audio_data']),
'audio_format': clean_str(row['audio_format']) or 'mp3'
})
return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'})
@project_bp.route('/api/projects/<int:project_id>', methods=['PUT'])
@@ -256,7 +286,7 @@ def update_project(project_id):
@project_bp.route('/api/projects/<int:project_id>', methods=['DELETE'])
@login_required
def delete_project(project_id):
"""Delete a project and all its data."""
"""Delete a project, all DB data, AND its media folder (v4.3, no auto-vacuum)."""
db = get_db()
cursor = db.cursor()
@@ -282,7 +312,11 @@ def delete_project(project_id):
cursor.execute('DELETE FROM projects WHERE id = ?', (project_id,))
db.commit()
vacuum_db()
# v4.3: প্রজেক্টের সব মিডিয়া ফাইল মুছি
delete_project_media(project_id)
# NOTE: vacuum আর অটোমেটিক চলে না — ইউজার সেটিংস থেকে ম্যানুয়ালি করবে
return jsonify({'success': True})
@@ -290,7 +324,7 @@ def delete_project(project_id):
@project_bp.route('/api/projects/<int:project_id>/save', methods=['POST'])
@login_required
def save_project_content(project_id):
"""Save all chapters and blocks for a project."""
"""Save all chapters and blocks. Audio/images stored as FILES (v4.3)."""
data = request.json
chapters = data.get('chapters', [])
@@ -301,6 +335,7 @@ def save_project_content(project_id):
if not cursor.fetchone():
return jsonify({'error': 'Project not found'}), 404
# পুরোনো DB রেকর্ড মুছি (ফাইলগুলো নতুন করে লেখা হবে)
cursor.execute('''
DELETE FROM block_images WHERE block_id IN (
SELECT mb.id FROM markdown_blocks mb
@@ -332,35 +367,58 @@ def save_project_content(project_id):
for block in chapter.get('blocks', []):
transcription = clean_transcription(block.get('transcription', []))
audio_format = clean_str(block.get('audio_format', 'mp3')) or 'mp3'
cursor.execute('''
INSERT INTO markdown_blocks
(chapter_id, block_order, block_type, content, tts_text, audio_data, audio_format, transcription)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
(chapter_id, block_order, block_type, content, tts_text,
audio_data, audio_path, audio_format, transcription)
VALUES (?, ?, ?, ?, ?, '', NULL, ?, ?)
''', (
chapter_id,
block['block_order'],
clean_str(block.get('block_type', 'paragraph')),
clean_str(block.get('content', '')),
clean_str(block.get('tts_text', '')),
clean_str(block.get('audio_data', '')),
clean_str(block.get('audio_format', 'mp3')),
audio_format,
json.dumps(transcription)
))
block_id = cursor.lastrowid
# অডিও ফাইলে সেভ করি
audio_b64 = block.get('audio_data', '')
if audio_b64:
rel_path = save_audio(project_id, block_id, audio_b64, audio_format)
if rel_path:
cursor.execute(
'UPDATE markdown_blocks SET audio_path = ? WHERE id = ?',
(rel_path, block_id)
)
# ইমেজগুলো ফাইলে সেভ করি
for img in block.get('images', []):
img_format = clean_str(img.get('format', 'png')) or 'png'
cursor.execute('''
INSERT INTO block_images (block_id, image_data, image_format, alt_text, position)
VALUES (?, ?, ?, ?, ?)
INSERT INTO block_images
(block_id, image_data, image_path, image_format, alt_text, position)
VALUES (?, '', NULL, ?, ?, ?)
''', (
block_id,
clean_str(img.get('data', '')),
clean_str(img.get('format', 'png')),
img_format,
clean_str(img.get('alt_text', '')),
clean_str(img.get('position', 'before'))
))
image_id = cursor.lastrowid
img_b64 = img.get('data', '')
if img_b64:
img_rel = save_image(project_id, image_id, img_b64, img_format)
if img_rel:
cursor.execute(
'UPDATE block_images SET image_path = ? WHERE id = ?',
(img_rel, image_id)
)
cursor.execute('''
UPDATE projects SET updated_at = CURRENT_TIMESTAMP WHERE id = ?
@@ -392,7 +450,9 @@ def publish_project(project_id):
cursor.execute('''
SELECT COUNT(*) as cnt FROM markdown_blocks mb
JOIN chapters c ON mb.chapter_id = c.id
WHERE c.project_id = ? AND mb.audio_data IS NOT NULL AND mb.audio_data != ''
WHERE c.project_id = ?
AND ((mb.audio_data IS NOT NULL AND mb.audio_data != '')
OR (mb.audio_path IS NOT NULL AND mb.audio_path != ''))
''', (project_id,))
audio_count = cursor.fetchone()['cnt']
@@ -440,7 +500,7 @@ def unpublish_project(project_id):
@project_bp.route('/api/projects/<int:project_id>/thumbnail', methods=['POST'])
@login_required
def upload_thumbnail(project_id):
"""Upload a thumbnail image."""
"""Upload a thumbnail image (v4.3: stored as file)."""
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
@@ -460,19 +520,21 @@ def upload_thumbnail(project_id):
if fmt == 'jpg':
fmt = 'jpeg'
b64 = base64.b64encode(img_bytes).decode('utf-8')
db = get_db()
cursor = db.cursor()
cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,))
if not cursor.fetchone():
return jsonify({'error': 'Project not found'}), 404
rel_path = save_thumbnail(project_id, img_bytes, fmt)
cursor.execute('''
UPDATE projects SET thumbnail_data = ?, thumbnail_format = ? WHERE id = ?
''', (b64, fmt, project_id))
UPDATE projects SET thumbnail_path = ?, thumbnail_data = NULL, thumbnail_format = ?
WHERE id = ?
''', (rel_path, fmt, project_id))
db.commit()
b64 = read_file_base64(rel_path)
return jsonify({
'success': True,
'thumbnail_data': b64,
@@ -483,9 +545,51 @@ def upload_thumbnail(project_id):
@project_bp.route('/api/projects/<int:project_id>/thumbnail', methods=['DELETE'])
@login_required
def delete_thumbnail(project_id):
"""Remove project thumbnail."""
"""Remove project thumbnail (DB + file)."""
db = get_db()
cursor = db.cursor()
cursor.execute('UPDATE projects SET thumbnail_data = NULL WHERE id = ?', (project_id,))
cursor.execute('SELECT thumbnail_path FROM projects WHERE id = ?', (project_id,))
row = cursor.fetchone()
if row and row['thumbnail_path']:
from media_storage import delete_file
delete_file(row['thumbnail_path'])
cursor.execute('UPDATE projects SET thumbnail_data = NULL, thumbnail_path = NULL WHERE id = ?',
(project_id,))
db.commit()
return jsonify({'success': True})
# ============================================
# v4.3: Database Maintenance (VACUUM + stats)
# ============================================
@project_bp.route('/api/maintenance/db-stats', methods=['GET'])
@login_required
def db_stats():
"""ডেটাবেস সাইজ, ফাঁকা স্পেস (%), এবং মিডিয়া স্টোরেজ সাইজ রিটার্ন করে।"""
stats = get_db_stats()
media_bytes = get_storage_usage_bytes()
stats['media_size_bytes'] = media_bytes
stats['media_size_mb'] = round(media_bytes / (1024 * 1024), 2)
return jsonify(stats)
@project_bp.route('/api/maintenance/vacuum', methods=['POST'])
@login_required
def run_vacuum():
"""ম্যানুয়ালি ডেটাবেস VACUUM চালায় (ফাঁকা স্পেস reclaim করে)।"""
before = get_db_stats()
try:
vacuum_db()
except Exception as e:
return jsonify({'error': f'VACUUM failed: {str(e)}'}), 500
after = get_db_stats()
reclaimed_mb = round(before['file_size_mb'] - after['file_size_mb'], 2)
return jsonify({
'success': True,
'message': f'VACUUM complete. Reclaimed {reclaimed_mb} MB.',
'before': before,
'after': after,
'reclaimed_mb': reclaimed_mb
})

View File

@@ -1,10 +1,12 @@
# routes/public_routes.py - Public (No Auth) Routes for Published Audiobooks
# routes/public_routes.py - Public (No Auth) Routes for Published Audiobooks (v4.3)
import re
import os
import json
from flask import Blueprint, jsonify, send_from_directory, abort
from flask import Blueprint, jsonify, send_from_directory, send_file, abort
from db import get_db
from media_storage import get_safe_abs_path, read_file_base64
public_bp = Blueprint('public', __name__)
@@ -53,7 +55,6 @@ def public_reader(project_id):
if not project or not project['is_published']:
abort(404)
# Increment view count
cursor.execute('UPDATE projects SET view_count = view_count + 1 WHERE id = ?', (project_id,))
db.commit()
@@ -68,7 +69,7 @@ def list_published_books():
cursor.execute('''
SELECT p.id, p.name, p.description, p.author, p.category,
p.thumbnail_data, p.thumbnail_format, p.published_at,
p.thumbnail_data, p.thumbnail_format, p.thumbnail_path, p.published_at,
p.view_count, p.created_at,
(SELECT COUNT(*) FROM chapters WHERE project_id = p.id) as chapter_count
FROM projects p
@@ -78,13 +79,17 @@ def list_published_books():
books = []
for row in cursor.fetchall():
thumb_data = row['thumbnail_data']
if row['thumbnail_path']:
thumb_data = read_file_base64(row['thumbnail_path'])
books.append({
'id': row['id'],
'name': row['name'],
'description': row['description'] or '',
'author': row['author'] or '',
'category': row['category'] or '',
'thumbnail_data': row['thumbnail_data'],
'thumbnail_data': thumb_data,
'thumbnail_format': row['thumbnail_format'] or 'png',
'published_at': row['published_at'],
'view_count': row['view_count'] or 0,
@@ -96,11 +101,7 @@ def list_published_books():
@public_bp.route('/api/public/books/<int:project_id>', methods=['GET'])
def get_published_book(project_id):
"""
Get book metadata WITHOUT audio_data.
Audio is loaded lazily via /api/public/books/<id>/audio/<block_id>.
This keeps the response small (<1 MB) and avoids proxy truncation issues.
"""
"""Get book metadata WITHOUT audio_data (lazy-loaded separately)."""
db = get_db()
cursor = db.cursor()
@@ -120,8 +121,9 @@ def get_published_book(project_id):
chapters_data = []
for chapter in chapters:
cursor.execute('''
SELECT id, block_order, block_type, content, audio_format, transcription,
(audio_data IS NOT NULL AND audio_data != '') as has_audio
SELECT id, block_order, block_type, content, audio_format, audio_path, transcription,
((audio_data IS NOT NULL AND audio_data != '')
OR (audio_path IS NOT NULL AND audio_path != '')) as has_audio
FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order
''', (chapter['id'],))
blocks = cursor.fetchall()
@@ -129,7 +131,8 @@ def get_published_book(project_id):
blocks_data = []
for block in blocks:
cursor.execute('''
SELECT * FROM block_images WHERE block_id = ? ORDER BY id
SELECT image_data, image_format, alt_text, position, image_path
FROM block_images WHERE block_id = ? ORDER BY id
''', (block['id'],))
images = cursor.fetchall()
@@ -141,21 +144,30 @@ def get_published_book(project_id):
except (json.JSONDecodeError, TypeError):
transcription = []
images_data = []
for img in images:
img_data = ''
if img['image_path']:
img_data = read_file_base64(img['image_path'])
elif img['image_data']:
img_data = clean_str(img['image_data'])
images_data.append({
'data': img_data,
'format': clean_str(img['image_format']) or 'png',
'alt_text': clean_str(img['alt_text']),
'position': clean_str(img['position']) or 'before'
})
blocks_data.append({
'id': block['id'],
'block_order': block['block_order'],
'block_type': clean_str(block['block_type']),
'content': clean_str(block['content']),
'audio_data': '', # Empty here; loaded lazily by frontend
'audio_data': '',
'audio_format': clean_str(block['audio_format']) or 'mp3',
'has_audio': bool(block['has_audio']),
'transcription': transcription,
'images': [{
'data': clean_str(img['image_data']),
'format': clean_str(img['image_format']) or 'png',
'alt_text': clean_str(img['alt_text']),
'position': clean_str(img['position']) or 'before'
} for img in images]
'images': images_data
})
chapters_data.append({
@@ -165,12 +177,16 @@ def get_published_book(project_id):
'blocks': blocks_data
})
thumb_data = project['thumbnail_data']
if project['thumbnail_path']:
thumb_data = read_file_base64(project['thumbnail_path'])
return jsonify({
'id': project['id'],
'name': clean_str(project['name']),
'description': clean_str(project['description']) if project['description'] else '',
'author': clean_str(project['author']) if project['author'] else '',
'thumbnail_data': project['thumbnail_data'],
'thumbnail_data': thumb_data,
'thumbnail_format': project['thumbnail_format'] or 'png',
'chapters': chapters_data
})
@@ -178,21 +194,17 @@ def get_published_book(project_id):
@public_bp.route('/api/public/books/<int:project_id>/audio/<int:block_id>', methods=['GET'])
def get_public_block_audio(project_id, block_id):
"""
Return audio_data (base64) for a single block in a published book.
No auth required since the book is published publicly.
"""
"""Stream audio file for a published book block (v4.3)."""
db = get_db()
cursor = db.cursor()
# Verify project is published
cursor.execute('SELECT is_published FROM projects WHERE id = ?', (project_id,))
project = cursor.fetchone()
if not project or not project['is_published']:
return jsonify({'error': 'Book not found or not published'}), 404
cursor.execute('''
SELECT mb.audio_data, mb.audio_format
SELECT mb.audio_data, mb.audio_path, mb.audio_format
FROM markdown_blocks mb
JOIN chapters c ON mb.chapter_id = c.id
WHERE mb.id = ? AND c.project_id = ?
@@ -202,10 +214,16 @@ def get_public_block_audio(project_id, block_id):
if not row:
return jsonify({'error': 'Block not found'}), 404
if not row['audio_data']:
return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'})
if row['audio_path']:
abs_path = get_safe_abs_path(row['audio_path'])
if abs_path and os.path.exists(abs_path):
return send_file(abs_path, mimetype=f"audio/{row['audio_format'] or 'mp3'}",
conditional=True)
return jsonify({
'audio_data': clean_str(row['audio_data']),
'audio_format': clean_str(row['audio_format']) or 'mp3'
})
if row['audio_data']:
return jsonify({
'audio_data': clean_str(row['audio_data']),
'audio_format': clean_str(row['audio_format']) or 'mp3'
})
return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'})