Lazy audio loading for interactive and public readers

This commit is contained in:
Ashim Kumar
2026-05-23 17:48:03 +06:00
parent e0e3b65c75
commit 965470853e
3 changed files with 181 additions and 141 deletions

View File

@@ -1,5 +1,6 @@
# routes/public_routes.py - Public (No Auth) Routes for Published Audiobooks
import re
import json
from flask import Blueprint, jsonify, send_from_directory, abort
@@ -8,6 +9,33 @@ from db import get_db
public_bp = Blueprint('public', __name__)
# ============================================
# Helpers
# ============================================
_CONTROL_CHAR_RE = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]')
def clean_str(s):
if s is None:
return ''
if not isinstance(s, str):
s = str(s)
return _CONTROL_CHAR_RE.sub('', s)
def clean_transcription(transcription):
if isinstance(transcription, list):
for t in transcription:
if isinstance(t, dict) and 'word' in t:
t['word'] = clean_str(t.get('word', ''))
return transcription
# ============================================
# Routes
# ============================================
@public_bp.route('/home')
def public_home():
"""Public homepage - Bookcase view of published audiobooks."""
@@ -68,7 +96,11 @@ def list_published_books():
@public_bp.route('/api/public/books/<int:project_id>', methods=['GET'])
def get_published_book(project_id):
"""Get full published book content for the reader."""
"""
Get book metadata WITHOUT audio_data.
Audio is loaded lazily via /api/public/books/<id>/audio/<block_id>.
This keeps the response small (<1 MB) and avoids proxy truncation issues.
"""
db = get_db()
cursor = db.cursor()
@@ -88,7 +120,9 @@ def get_published_book(project_id):
chapters_data = []
for chapter in chapters:
cursor.execute('''
SELECT * FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order
SELECT id, block_order, block_type, content, audio_format, transcription,
(audio_data IS NOT NULL AND audio_data != '') as has_audio
FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order
''', (chapter['id'],))
blocks = cursor.fetchall()
@@ -99,35 +133,79 @@ def get_published_book(project_id):
''', (block['id'],))
images = cursor.fetchall()
transcription = []
if block['transcription']:
try:
transcription = json.loads(block['transcription'])
transcription = clean_transcription(transcription)
except (json.JSONDecodeError, TypeError):
transcription = []
blocks_data.append({
'id': block['id'],
'block_order': block['block_order'],
'block_type': block['block_type'],
'content': block['content'],
'audio_data': block['audio_data'],
'audio_format': block['audio_format'],
'transcription': json.loads(block['transcription']) if block['transcription'] else [],
'block_type': clean_str(block['block_type']),
'content': clean_str(block['content']),
'audio_data': '', # Empty here; loaded lazily by frontend
'audio_format': clean_str(block['audio_format']) or 'mp3',
'has_audio': bool(block['has_audio']),
'transcription': transcription,
'images': [{
'data': img['image_data'],
'format': img['image_format'],
'alt_text': img['alt_text'],
'position': img['position']
'data': clean_str(img['image_data']),
'format': clean_str(img['image_format']) or 'png',
'alt_text': clean_str(img['alt_text']),
'position': clean_str(img['position']) or 'before'
} for img in images]
})
chapters_data.append({
'id': chapter['id'],
'chapter_number': chapter['chapter_number'],
'title': chapter['title'],
'title': clean_str(chapter['title']),
'blocks': blocks_data
})
return jsonify({
'id': project['id'],
'name': project['name'],
'description': project['description'] or '',
'author': project['author'] or '',
'name': clean_str(project['name']),
'description': clean_str(project['description']) if project['description'] else '',
'author': clean_str(project['author']) if project['author'] else '',
'thumbnail_data': project['thumbnail_data'],
'thumbnail_format': project['thumbnail_format'] or 'png',
'chapters': chapters_data
})
@public_bp.route('/api/public/books/<int:project_id>/audio/<int:block_id>', methods=['GET'])
def get_public_block_audio(project_id, block_id):
"""
Return audio_data (base64) for a single block in a published book.
No auth required since the book is published publicly.
"""
db = get_db()
cursor = db.cursor()
# Verify project is published
cursor.execute('SELECT is_published FROM projects WHERE id = ?', (project_id,))
project = cursor.fetchone()
if not project or not project['is_published']:
return jsonify({'error': 'Book not found or not published'}), 404
cursor.execute('''
SELECT mb.audio_data, mb.audio_format
FROM markdown_blocks mb
JOIN chapters c ON mb.chapter_id = c.id
WHERE mb.id = ? AND c.project_id = ?
''', (block_id, project_id))
row = cursor.fetchone()
if not row:
return jsonify({'error': 'Block not found'}), 404
if not row['audio_data']:
return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'})
return jsonify({
'audio_data': clean_str(row['audio_data']),
'audio_format': clean_str(row['audio_format']) or 'mp3'
})

View File

@@ -1,18 +1,12 @@
/**
* Interactive Reader Module — Smart Preload Architecture (v3)
* Interactive Reader Module — Lazy Audio Loading (v4)
*
* Loading Strategy:
* - Text and timestamps come from in-memory `editorBlocks` (already loaded).
* - Audio base64 → Blob URL conversion is DEFERRED until needed.
* - When block N plays, preload blob URLs for N+1, N+2 (background).
* - At 70% mark of N's audio, ensure N+1 is ready (safety net).
* - Memory cap: keep at most MAX_AUDIO_LOADED blob URLs alive;
* revoke distant past audio to free browser memory.
*
* Scroll Strategy:
* - Manual navigation (button / outline / word click): scroll block to top.
* - Auto-advance (audio ended → next block): NO block scroll — let the
* word highlighter smoothly carry the user. Prevents jarring jumps.
* Strategy:
* - Text + transcription are already loaded (from editorBlocks in memory).
* - Audio is fetched on-demand from /api/projects/<id>/audio/<block_id>
* when the user wants to play that block.
* - Smart preload: at 70% of current block, fetch next block's audio.
* - Memory cap: keep at most MAX_AUDIO_LOADED blob URLs alive.
*/
// ============================================
@@ -72,7 +66,8 @@ function renderInteractiveReader() {
isFirstBlockOfChapter = false;
if (!isImageBlock && blockData && blockData.audio_data) {
// has_audio comes from server; audio_data may not yet be loaded
if (!isImageBlock && blockData && (blockData.audio_data || blockData.has_audio)) {
hasAudio = true;
}
currentIndex++;
@@ -102,7 +97,6 @@ function renderInteractiveReader() {
let html = '<div class="reader-flow">';
// Cleanup any previous instances (revoke blob URLs)
cleanupAllReaderInstances();
readerInstances = [];
@@ -112,7 +106,8 @@ function renderInteractiveReader() {
const blockData = block._editorData;
const isImageBlock = block._isImage;
const hasBlockAudio = !isImageBlock && blockData && blockData.audio_data;
// has_audio is the SOURCE OF TRUTH for whether this block has audio on server
const hasBlockAudio = !isImageBlock && blockData && (blockData.audio_data || blockData.has_audio);
const blockId = blockData ? blockData.id : `reader_${Date.now()}_${Math.random().toString(36).substr(2, 5)}`;
html += `<div class="reader-block" data-block-id="${blockId}" data-reader-index="${globalBlockIndex}" data-has-audio="${!!hasBlockAudio}">`;
@@ -150,7 +145,7 @@ function renderInteractiveReader() {
wordMap: [],
sentenceData: [],
audio: null,
audioUrl: null, // blob URL ref for cleanup
audioUrl: null,
audioReady: false,
audioLoadingPromise: null,
midPreloadTriggered: false,
@@ -166,7 +161,6 @@ function renderInteractiveReader() {
html += '</div>';
container.innerHTML = html;
// Render words and run sync for every instance (text is cheap and already in memory)
for (const inst of readerInstances) {
if (inst.isImage || !inst.content) continue;
const contentEl = document.getElementById(`reader-content-${inst.index}`);
@@ -453,25 +447,52 @@ function setReaderButtonLoading(isLoading) {
}
// ============================================
// Audio Lazy Loading + Memory Management
// Audio Lazy Loading
// ============================================
/**
* Fetch audio for an instance. If already loaded into editorBlocks
* by background loader, use that. Otherwise fetch from API directly.
*/
async function fetchAudioForInstance(inst) {
// Path 1: audio_data already in editorBlocks (loaded in background)
if (inst.blockData && inst.blockData.audio_data) {
return {
audio_data: inst.blockData.audio_data,
audio_format: inst.blockData.audio_format || 'mp3'
};
}
// Path 2: fetch from API
if (!inst.blockData || !inst.blockData.db_id || !currentProject || !currentProject.id) {
throw new Error('Cannot fetch audio: missing block info');
}
const resp = await fetch(`/api/projects/${currentProject.id}/audio/${inst.blockData.db_id}`);
const data = await resp.json();
if (data.error || !data.audio_data) {
throw new Error(data.error || 'No audio data');
}
// Cache into editorBlocks for future use
inst.blockData.audio_data = data.audio_data;
inst.blockData.audio_format = data.audio_format;
return data;
}
function ensureReaderAudioLoaded(inst) {
if (inst.audioReady && inst.audio) return Promise.resolve(inst);
if (inst.audioLoadingPromise) return inst.audioLoadingPromise;
inst.audioLoadingPromise = new Promise((resolve, reject) => {
const blockData = inst.blockData;
if (!blockData || !blockData.audio_data) {
inst.audioLoadingPromise = null;
return reject(new Error('No audio data'));
}
try {
const audioBlob = base64ToBlob(blockData.audio_data, `audio/${blockData.audio_format || 'mp3'}`);
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
inst.audioLoadingPromise = (async () => {
const audioInfo = await fetchAudioForInstance(inst);
const audioBlob = base64ToBlob(audioInfo.audio_data, `audio/${audioInfo.audio_format || 'mp3'}`);
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
return new Promise((resolve, reject) => {
const onCanPlay = () => {
audio.removeEventListener('error', onError);
inst.audio = audio;
@@ -488,14 +509,12 @@ function ensureReaderAudioLoaded(inst) {
};
audio.addEventListener('canplay', onCanPlay, { once: true });
audio.addEventListener('error', onError, { once: true });
// Audio.load is implicit; setting src starts loading metadata
audio.preload = 'auto';
audio.load();
} catch (err) {
inst.audioLoadingPromise = null;
reject(err);
}
});
})().catch(err => {
inst.audioLoadingPromise = null;
throw err;
});
return inst.audioLoadingPromise;
@@ -524,7 +543,6 @@ function wireReaderAudioEvents(inst) {
currentReaderIndex = -1;
}
});
// Mid-play safety net: ensure next is ready by 70% of current
audio.addEventListener('timeupdate', () => {
if (inst.midPreloadTriggered) return;
if (!audio.duration || isNaN(audio.duration)) return;
@@ -651,7 +669,6 @@ async function playReaderInstanceByIndex(index, opts = {}) {
const inst = readerInstances[index];
if (!inst.hasAudio) {
// Skip non-audio blocks
playReaderInstanceByIndex(findNextAudioIndex(index), opts);
return;
}
@@ -680,7 +697,6 @@ async function playReaderInstanceByIndex(index, opts = {}) {
await inst.audio.play();
updateReaderButton('playing');
// Block-level scroll ONLY for manual navigation
if (!isAutoAdvance) {
const blockEl = document.querySelector(`.reader-block[data-reader-index="${index}"]`);
if (blockEl) {
@@ -749,7 +765,6 @@ function startReaderHighlightLoop(inst) {
activeSpan.classList.add('current-word');
const rect = activeSpan.getBoundingClientRect();
// Relaxed threshold for smoother scroll
if (rect.top < window.innerHeight * 0.2 || rect.bottom > window.innerHeight * 0.8) {
activeSpan.scrollIntoView({ behavior: 'smooth', block: 'center' });
}
@@ -801,7 +816,6 @@ function updateReaderButton(state) {
const playIcon = document.getElementById('reader-btn-play');
const pauseIcon = document.getElementById('reader-btn-pause');
// If loading, the spinner overrides icons
if (btn.classList.contains('loading')) return;
if (readerStarted) {

View File

@@ -99,17 +99,6 @@
.story-text-container p { margin-bottom: 1.2em; }
.story-text-container img { max-width: 100%; height: auto; border-radius: 8px; margin: 16px auto; display: block; }
.block-loading-spinner {
display: inline-flex; align-items: center; gap: 8px;
color: #6b7280; font-size: 0.9rem; font-family: "Inter", sans-serif;
padding: 8px 0;
}
.block-loading-spinner::before {
content: ''; width: 16px; height: 16px;
border: 2px solid #e2e8f0; border-top-color: #5753c9;
border-radius: 50%; animation: spin 0.8s linear infinite;
}
.word { transition: all 0.15s ease; border-radius: 3px; cursor: pointer; }
.word:hover { background-color: #f1f5f9; }
.current-sentence-bg {
@@ -121,7 +110,6 @@
.story-image-block { text-align: center; margin: 24px 0; }
.story-image-block img { max-width: 100%; height: auto; border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); }
/* Floating Player Button — Fixed RIGHT side */
#floating-player-btn {
position: fixed;
top: 5rem;
@@ -186,7 +174,6 @@
border-radius: 50%; animation: spin 0.8s linear infinite;
}
/* Block highlight on outline click */
.story-block.highlight-section,
.story-image-block.highlight-section {
animation: highlightPulse 2s ease-out;
@@ -265,19 +252,11 @@
<script>
/**
* Public Reader — Smart Preload Architecture (v3)
* Public Reader — Lazy Audio Loading
*
* Loading Strategy:
* 1. TEXT + TIMESTAMPS: loaded eagerly from /api/public/books/<id> in single batch.
* 2. AUDIO: base64 → Blob URL conversion is DEFERRED until needed.
* 3. Smart preload: when block N plays, preload blob URLs for N+1, N+2.
* At 70% mark of N, ensure N+1 is ready (safety net).
* 4. Memory cap: keep at most MAX_AUDIO_LOADED blob URLs alive;
* revoke distant past audio to free browser memory.
*
* Scroll Strategy:
* - Manual navigation (Start / outline click / word click): scroll to block.
* - Auto-advance (audio ended → next): NO block scroll — word highlighter carries user.
* Audio is fetched per-block from /api/public/books/<id>/audio/<block_id>
* when the user wants to play it. This avoids loading 15-20 MB
* of base64 audio up front (which gets truncated by reverse proxies).
*/
const pathParts = window.location.pathname.split('/');
@@ -297,7 +276,7 @@
let hasStarted = false;
let navObserver = null;
// Tunables (matches reader_templates/index.html)
// Tunables
const PRELOAD_AHEAD = 2;
const MID_PRELOAD_THRESHOLD = 0.7;
const MAX_AUDIO_LOADED = 5;
@@ -306,9 +285,6 @@
floatingPlayerBtn.addEventListener("click", handleFloatingBtnClick);
mainContainer.addEventListener("click", handleTextClick);
// ===================================================
// UI Helpers
// ===================================================
function showToast(msg) { toastText.textContent = msg; toastEl.classList.add("visible"); }
function hideToast() { toastEl.classList.remove("visible"); }
function setButtonLoading(b) { floatingPlayerBtn.classList.toggle("loading", b); }
@@ -329,10 +305,8 @@
}
}
// ===================================================
// INITIAL LOAD
// ===================================================
try {
// Step 1: Load metadata only (no audio_data)
const resp = await fetch(`/api/public/books/${BOOK_ID}`);
if (!resp.ok) throw new Error('Failed to load book');
const book = await resp.json();
@@ -341,7 +315,6 @@
document.getElementById("book-subtitle").textContent = book.author ? `by ${book.author}` : 'An interactive audiobook';
document.title = book.name + ' - Audiobook Reader';
// Build flat list of blocks across chapters
let globalIdx = 0;
for (const chapter of book.chapters) {
let firstInChapter = true;
@@ -369,7 +342,8 @@
continue;
}
if (!block.audio_data) continue;
// Skip blocks that have no audio at all
if (!block.has_audio) continue;
const blockId = `story-block-${globalIdx}`;
@@ -379,7 +353,6 @@
mainContainer.insertAdjacentHTML("beforeend", `
<div id="${blockId}" class="story-block mt-4" data-instance-index="${storyInstances.length}" data-chapter="${chapter.chapter_number}">
<article class="story-text-container"></article>
<audio class="audio-player" preload="none" style="display:none;"></audio>
</div>
`);
@@ -388,8 +361,9 @@
blockEl: document.getElementById(blockId),
block: block,
chapter: chapter,
dbBlockId: block.id, // Database block ID for lazy fetch
audio: null,
audioUrl: null, // blob URL ref for cleanup
audioUrl: null,
audioReady: false,
audioLoadingPromise: null,
midPreloadTriggered: false,
@@ -405,7 +379,7 @@
}
}
// Render text + sync for each instance (cheap, in-memory)
// Render text + sync for each instance
for (const inst of storyInstances) {
renderMarkdownInto(inst);
smartSync(inst);
@@ -420,9 +394,6 @@
mainContainer.innerHTML = `<p class="text-center text-danger">Error loading book: ${e.message}</p>`;
}
// ===================================================
// Outline / Navigation
// ===================================================
function addOutlineEntry(title, chapter, targetId) {
const li = document.createElement("li");
li.textContent = title;
@@ -465,9 +436,6 @@
document.querySelectorAll('.story-block, .story-image-block').forEach(b => navObserver.observe(b));
}
// ===================================================
// Render & Sync
// ===================================================
function renderMarkdownInto(inst) {
const container = inst.blockEl.querySelector(".story-text-container");
container.innerHTML = "";
@@ -531,23 +499,27 @@
}
// ===================================================
// AUDIO LAZY LOADING + MEMORY MANAGEMENT
// AUDIO LAZY LOADING (per-block fetch)
// ===================================================
async function fetchAudioBlob(inst) {
const resp = await fetch(`/api/public/books/${BOOK_ID}/audio/${inst.dbBlockId}`);
if (!resp.ok) throw new Error('Failed to fetch audio');
const data = await resp.json();
if (data.error || !data.audio_data) throw new Error(data.error || 'No audio data');
return data;
}
function ensureAudioLoaded(inst) {
if (inst.audioReady && inst.audio) return Promise.resolve(inst);
if (inst.audioLoadingPromise) return inst.audioLoadingPromise;
inst.audioLoadingPromise = new Promise((resolve, reject) => {
if (!inst.block.audio_data) {
inst.audioLoadingPromise = null;
return reject(new Error('No audio data'));
}
try {
const audio = inst.blockEl.querySelector('.audio-player');
const blob = base64ToBlob(inst.block.audio_data, `audio/${inst.block.audio_format || 'mp3'}`);
const url = URL.createObjectURL(blob);
inst.audioLoadingPromise = (async () => {
const audioData = await fetchAudioBlob(inst);
const blob = base64ToBlob(audioData.audio_data, `audio/${audioData.audio_format || 'mp3'}`);
const url = URL.createObjectURL(blob);
const audio = new Audio(url);
return new Promise((resolve, reject) => {
const onCanPlay = () => {
audio.removeEventListener('error', onError);
inst.audio = audio;
@@ -562,18 +534,16 @@
inst.audioLoadingPromise = null;
reject(new Error('Audio failed to load'));
};
audio.addEventListener('canplay', onCanPlay, { once: true });
audio.addEventListener('error', onError, { once: true });
audio.preload = 'auto';
audio.src = url;
audio.load();
} catch (err) {
inst.audioLoadingPromise = null;
reject(err);
}
});
})().catch(err => {
inst.audioLoadingPromise = null;
throw err;
});
return inst.audioLoadingPromise;
}
@@ -600,7 +570,6 @@
updateFloatingButton('paused');
}
});
// Mid-play safety net
audio.addEventListener('timeupdate', () => {
if (inst.midPreloadTriggered) return;
if (!audio.duration || isNaN(audio.duration)) return;
@@ -614,9 +583,6 @@
});
}
/**
* Preload N audio blocks ahead (fire-and-forget).
*/
function preloadAhead(fromIndex) {
for (let i = 1; i <= PRELOAD_AHEAD; i++) {
const idx = fromIndex + i;
@@ -625,11 +591,6 @@
}
}
/**
* Memory management: keep only sliding window of audio elements loaded.
* Window = [currentIndex - KEEP_BEHIND, currentIndex + PRELOAD_AHEAD]
* Bounded to MAX_AUDIO_LOADED total.
*/
function pruneLoadedAudio(currentIndex) {
const loaded = storyInstances.filter(i => i.audioReady && i.audio && i.audioUrl);
if (loaded.length <= MAX_AUDIO_LOADED) return;
@@ -659,23 +620,17 @@
function releaseAudio(inst) {
if (!inst.audio) return;
try {
inst.audio.pause();
inst.audio.removeAttribute('src');
inst.audio.load();
} catch (e) { /* ignore */ }
try { inst.audio.pause(); } catch (e) {}
if (inst.audioUrl) {
try { URL.revokeObjectURL(inst.audioUrl); } catch (e) {}
inst.audioUrl = null;
}
inst.audio = null;
inst.audioReady = false;
inst.audioLoadingPromise = null;
inst.midPreloadTriggered = false;
}
// ===================================================
// PLAYBACK
// ===================================================
async function playInstance(idx, ts = 0, opts = {}) {
if (idx < 0 || idx >= storyInstances.length) return;
const inst = storyInstances[idx];
@@ -707,7 +662,6 @@
await inst.audio.play();
updateFloatingButton('playing');
// Block-level scroll ONLY for manual navigation
if (!isAutoAdvance) {
const rect = inst.blockEl.getBoundingClientRect();
if (rect.top < 0 || rect.top > window.innerHeight * 0.6) {
@@ -767,9 +721,6 @@
playInstance(idx, inst.block.transcription[aiIdx].start);
}
// ===================================================
// Highlighting
// ===================================================
function startHighlightLoop(inst) {
cancelAnimationFrame(inst.animFrameId);
inst.animFrameId = requestAnimationFrame(() => highlightLoop(inst));
@@ -812,9 +763,6 @@
inst.lastSentenceSpans = [];
}
// ===================================================
// Utility
// ===================================================
function base64ToBlob(b64, mime) {
const bin = atob(b64);
const arr = new Uint8Array(bin.length);