diff --git a/routes/public_routes.py b/routes/public_routes.py index b625991..481973c 100644 --- a/routes/public_routes.py +++ b/routes/public_routes.py @@ -1,5 +1,6 @@ # routes/public_routes.py - Public (No Auth) Routes for Published Audiobooks +import re import json from flask import Blueprint, jsonify, send_from_directory, abort @@ -8,6 +9,33 @@ from db import get_db public_bp = Blueprint('public', __name__) +# ============================================ +# Helpers +# ============================================ + +_CONTROL_CHAR_RE = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]') + + +def clean_str(s): + if s is None: + return '' + if not isinstance(s, str): + s = str(s) + return _CONTROL_CHAR_RE.sub('', s) + + +def clean_transcription(transcription): + if isinstance(transcription, list): + for t in transcription: + if isinstance(t, dict) and 'word' in t: + t['word'] = clean_str(t.get('word', '')) + return transcription + + +# ============================================ +# Routes +# ============================================ + @public_bp.route('/home') def public_home(): """Public homepage - Bookcase view of published audiobooks.""" @@ -68,7 +96,11 @@ def list_published_books(): @public_bp.route('/api/public/books/', methods=['GET']) def get_published_book(project_id): - """Get full published book content for the reader.""" + """ + Get book metadata WITHOUT audio_data. + Audio is loaded lazily via /api/public/books//audio/. + This keeps the response small (<1 MB) and avoids proxy truncation issues. + """ db = get_db() cursor = db.cursor() @@ -88,7 +120,9 @@ def get_published_book(project_id): chapters_data = [] for chapter in chapters: cursor.execute(''' - SELECT * FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order + SELECT id, block_order, block_type, content, audio_format, transcription, + (audio_data IS NOT NULL AND audio_data != '') as has_audio + FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order ''', (chapter['id'],)) blocks = cursor.fetchall() @@ -99,35 +133,79 @@ def get_published_book(project_id): ''', (block['id'],)) images = cursor.fetchall() + transcription = [] + if block['transcription']: + try: + transcription = json.loads(block['transcription']) + transcription = clean_transcription(transcription) + except (json.JSONDecodeError, TypeError): + transcription = [] + blocks_data.append({ 'id': block['id'], 'block_order': block['block_order'], - 'block_type': block['block_type'], - 'content': block['content'], - 'audio_data': block['audio_data'], - 'audio_format': block['audio_format'], - 'transcription': json.loads(block['transcription']) if block['transcription'] else [], + 'block_type': clean_str(block['block_type']), + 'content': clean_str(block['content']), + 'audio_data': '', # Empty here; loaded lazily by frontend + 'audio_format': clean_str(block['audio_format']) or 'mp3', + 'has_audio': bool(block['has_audio']), + 'transcription': transcription, 'images': [{ - 'data': img['image_data'], - 'format': img['image_format'], - 'alt_text': img['alt_text'], - 'position': img['position'] + 'data': clean_str(img['image_data']), + 'format': clean_str(img['image_format']) or 'png', + 'alt_text': clean_str(img['alt_text']), + 'position': clean_str(img['position']) or 'before' } for img in images] }) chapters_data.append({ 'id': chapter['id'], 'chapter_number': chapter['chapter_number'], - 'title': chapter['title'], + 'title': clean_str(chapter['title']), 'blocks': blocks_data }) return jsonify({ 'id': project['id'], - 'name': project['name'], - 'description': project['description'] or '', - 'author': project['author'] or '', + 'name': clean_str(project['name']), + 'description': clean_str(project['description']) if project['description'] else '', + 'author': clean_str(project['author']) if project['author'] else '', 'thumbnail_data': project['thumbnail_data'], 'thumbnail_format': project['thumbnail_format'] or 'png', 'chapters': chapters_data }) + + +@public_bp.route('/api/public/books//audio/', methods=['GET']) +def get_public_block_audio(project_id, block_id): + """ + Return audio_data (base64) for a single block in a published book. + No auth required since the book is published publicly. + """ + db = get_db() + cursor = db.cursor() + + # Verify project is published + cursor.execute('SELECT is_published FROM projects WHERE id = ?', (project_id,)) + project = cursor.fetchone() + if not project or not project['is_published']: + return jsonify({'error': 'Book not found or not published'}), 404 + + cursor.execute(''' + SELECT mb.audio_data, mb.audio_format + FROM markdown_blocks mb + JOIN chapters c ON mb.chapter_id = c.id + WHERE mb.id = ? AND c.project_id = ? + ''', (block_id, project_id)) + row = cursor.fetchone() + + if not row: + return jsonify({'error': 'Block not found'}), 404 + + if not row['audio_data']: + return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'}) + + return jsonify({ + 'audio_data': clean_str(row['audio_data']), + 'audio_format': clean_str(row['audio_format']) or 'mp3' + }) diff --git a/static/js/interactive-reader.js b/static/js/interactive-reader.js index 2ef7f48..5416706 100644 --- a/static/js/interactive-reader.js +++ b/static/js/interactive-reader.js @@ -1,18 +1,12 @@ /** - * Interactive Reader Module — Smart Preload Architecture (v3) + * Interactive Reader Module — Lazy Audio Loading (v4) * - * Loading Strategy: - * - Text and timestamps come from in-memory `editorBlocks` (already loaded). - * - Audio base64 → Blob URL conversion is DEFERRED until needed. - * - When block N plays, preload blob URLs for N+1, N+2 (background). - * - At 70% mark of N's audio, ensure N+1 is ready (safety net). - * - Memory cap: keep at most MAX_AUDIO_LOADED blob URLs alive; - * revoke distant past audio to free browser memory. - * - * Scroll Strategy: - * - Manual navigation (button / outline / word click): scroll block to top. - * - Auto-advance (audio ended → next block): NO block scroll — let the - * word highlighter smoothly carry the user. Prevents jarring jumps. + * Strategy: + * - Text + transcription are already loaded (from editorBlocks in memory). + * - Audio is fetched on-demand from /api/projects//audio/ + * when the user wants to play that block. + * - Smart preload: at 70% of current block, fetch next block's audio. + * - Memory cap: keep at most MAX_AUDIO_LOADED blob URLs alive. */ // ============================================ @@ -72,7 +66,8 @@ function renderInteractiveReader() { isFirstBlockOfChapter = false; - if (!isImageBlock && blockData && blockData.audio_data) { + // has_audio comes from server; audio_data may not yet be loaded + if (!isImageBlock && blockData && (blockData.audio_data || blockData.has_audio)) { hasAudio = true; } currentIndex++; @@ -102,7 +97,6 @@ function renderInteractiveReader() { let html = '
'; - // Cleanup any previous instances (revoke blob URLs) cleanupAllReaderInstances(); readerInstances = []; @@ -112,7 +106,8 @@ function renderInteractiveReader() { const blockData = block._editorData; const isImageBlock = block._isImage; - const hasBlockAudio = !isImageBlock && blockData && blockData.audio_data; + // has_audio is the SOURCE OF TRUTH for whether this block has audio on server + const hasBlockAudio = !isImageBlock && blockData && (blockData.audio_data || blockData.has_audio); const blockId = blockData ? blockData.id : `reader_${Date.now()}_${Math.random().toString(36).substr(2, 5)}`; html += `
`; @@ -150,7 +145,7 @@ function renderInteractiveReader() { wordMap: [], sentenceData: [], audio: null, - audioUrl: null, // blob URL ref for cleanup + audioUrl: null, audioReady: false, audioLoadingPromise: null, midPreloadTriggered: false, @@ -166,7 +161,6 @@ function renderInteractiveReader() { html += '
'; container.innerHTML = html; - // Render words and run sync for every instance (text is cheap and already in memory) for (const inst of readerInstances) { if (inst.isImage || !inst.content) continue; const contentEl = document.getElementById(`reader-content-${inst.index}`); @@ -453,25 +447,52 @@ function setReaderButtonLoading(isLoading) { } // ============================================ -// Audio Lazy Loading + Memory Management +// Audio Lazy Loading // ============================================ +/** + * Fetch audio for an instance. If already loaded into editorBlocks + * by background loader, use that. Otherwise fetch from API directly. + */ +async function fetchAudioForInstance(inst) { + // Path 1: audio_data already in editorBlocks (loaded in background) + if (inst.blockData && inst.blockData.audio_data) { + return { + audio_data: inst.blockData.audio_data, + audio_format: inst.blockData.audio_format || 'mp3' + }; + } + + // Path 2: fetch from API + if (!inst.blockData || !inst.blockData.db_id || !currentProject || !currentProject.id) { + throw new Error('Cannot fetch audio: missing block info'); + } + + const resp = await fetch(`/api/projects/${currentProject.id}/audio/${inst.blockData.db_id}`); + const data = await resp.json(); + + if (data.error || !data.audio_data) { + throw new Error(data.error || 'No audio data'); + } + + // Cache into editorBlocks for future use + inst.blockData.audio_data = data.audio_data; + inst.blockData.audio_format = data.audio_format; + + return data; +} + function ensureReaderAudioLoaded(inst) { if (inst.audioReady && inst.audio) return Promise.resolve(inst); if (inst.audioLoadingPromise) return inst.audioLoadingPromise; - inst.audioLoadingPromise = new Promise((resolve, reject) => { - const blockData = inst.blockData; - if (!blockData || !blockData.audio_data) { - inst.audioLoadingPromise = null; - return reject(new Error('No audio data')); - } - - try { - const audioBlob = base64ToBlob(blockData.audio_data, `audio/${blockData.audio_format || 'mp3'}`); - const audioUrl = URL.createObjectURL(audioBlob); - const audio = new Audio(audioUrl); + inst.audioLoadingPromise = (async () => { + const audioInfo = await fetchAudioForInstance(inst); + const audioBlob = base64ToBlob(audioInfo.audio_data, `audio/${audioInfo.audio_format || 'mp3'}`); + const audioUrl = URL.createObjectURL(audioBlob); + const audio = new Audio(audioUrl); + return new Promise((resolve, reject) => { const onCanPlay = () => { audio.removeEventListener('error', onError); inst.audio = audio; @@ -488,14 +509,12 @@ function ensureReaderAudioLoaded(inst) { }; audio.addEventListener('canplay', onCanPlay, { once: true }); audio.addEventListener('error', onError, { once: true }); - - // Audio.load is implicit; setting src starts loading metadata audio.preload = 'auto'; audio.load(); - } catch (err) { - inst.audioLoadingPromise = null; - reject(err); - } + }); + })().catch(err => { + inst.audioLoadingPromise = null; + throw err; }); return inst.audioLoadingPromise; @@ -524,7 +543,6 @@ function wireReaderAudioEvents(inst) { currentReaderIndex = -1; } }); - // Mid-play safety net: ensure next is ready by 70% of current audio.addEventListener('timeupdate', () => { if (inst.midPreloadTriggered) return; if (!audio.duration || isNaN(audio.duration)) return; @@ -651,7 +669,6 @@ async function playReaderInstanceByIndex(index, opts = {}) { const inst = readerInstances[index]; if (!inst.hasAudio) { - // Skip non-audio blocks playReaderInstanceByIndex(findNextAudioIndex(index), opts); return; } @@ -680,7 +697,6 @@ async function playReaderInstanceByIndex(index, opts = {}) { await inst.audio.play(); updateReaderButton('playing'); - // Block-level scroll ONLY for manual navigation if (!isAutoAdvance) { const blockEl = document.querySelector(`.reader-block[data-reader-index="${index}"]`); if (blockEl) { @@ -749,7 +765,6 @@ function startReaderHighlightLoop(inst) { activeSpan.classList.add('current-word'); const rect = activeSpan.getBoundingClientRect(); - // Relaxed threshold for smoother scroll if (rect.top < window.innerHeight * 0.2 || rect.bottom > window.innerHeight * 0.8) { activeSpan.scrollIntoView({ behavior: 'smooth', block: 'center' }); } @@ -801,7 +816,6 @@ function updateReaderButton(state) { const playIcon = document.getElementById('reader-btn-play'); const pauseIcon = document.getElementById('reader-btn-pause'); - // If loading, the spinner overrides icons if (btn.classList.contains('loading')) return; if (readerStarted) { diff --git a/templates/public_reader.html b/templates/public_reader.html index 6da5f77..a6544fb 100644 --- a/templates/public_reader.html +++ b/templates/public_reader.html @@ -99,17 +99,6 @@ .story-text-container p { margin-bottom: 1.2em; } .story-text-container img { max-width: 100%; height: auto; border-radius: 8px; margin: 16px auto; display: block; } - .block-loading-spinner { - display: inline-flex; align-items: center; gap: 8px; - color: #6b7280; font-size: 0.9rem; font-family: "Inter", sans-serif; - padding: 8px 0; - } - .block-loading-spinner::before { - content: ''; width: 16px; height: 16px; - border: 2px solid #e2e8f0; border-top-color: #5753c9; - border-radius: 50%; animation: spin 0.8s linear infinite; - } - .word { transition: all 0.15s ease; border-radius: 3px; cursor: pointer; } .word:hover { background-color: #f1f5f9; } .current-sentence-bg { @@ -121,7 +110,6 @@ .story-image-block { text-align: center; margin: 24px 0; } .story-image-block img { max-width: 100%; height: auto; border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); } - /* Floating Player Button — Fixed RIGHT side */ #floating-player-btn { position: fixed; top: 5rem; @@ -186,7 +174,6 @@ border-radius: 50%; animation: spin 0.8s linear infinite; } - /* Block highlight on outline click */ .story-block.highlight-section, .story-image-block.highlight-section { animation: highlightPulse 2s ease-out; @@ -265,19 +252,11 @@