diff --git a/routes/project_routes.py b/routes/project_routes.py index 01a5ee8..d32f4ab 100644 --- a/routes/project_routes.py +++ b/routes/project_routes.py @@ -3,7 +3,7 @@ import re import json import base64 -from flask import Blueprint, request, jsonify, Response, stream_with_context +from flask import Blueprint, request, jsonify from db import get_db, vacuum_db from auth import login_required @@ -15,12 +15,10 @@ project_bp = Blueprint('project', __name__) # Helpers # ============================================ -# C0/C1 control characters except \t \n \r — these corrupt JSON streams. _CONTROL_CHAR_RE = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]') def clean_str(s): - """Strip raw control characters from a string. Returns '' for None.""" if s is None: return '' if not isinstance(s, str): @@ -29,7 +27,6 @@ def clean_str(s): def clean_transcription(transcription): - """Sanitize 'word' fields inside a transcription list.""" if isinstance(transcription, list): for t in transcription: if isinstance(t, dict) and 'word' in t: @@ -118,11 +115,9 @@ def create_project(): @login_required def get_project(project_id): """ - Get a project with all its chapters and blocks. - - Streamed response: large projects (with many audio blocks) can produce - 10-50 MB of JSON. We stream it in chunks and sanitize every string field - to prevent control characters from breaking JSON parsing on the client. + Get project metadata WITHOUT audio_data. + Audio is loaded lazily via /api/projects//audio/. + This keeps the response small (<1 MB) and avoids proxy truncation issues. """ db = get_db() cursor = db.cursor() @@ -141,7 +136,10 @@ def get_project(project_id): chapters_data = [] for chapter in chapters: cursor.execute(''' - SELECT * FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order + SELECT id, block_order, block_type, content, tts_text, + audio_format, transcription, + (audio_data IS NOT NULL AND audio_data != '') as has_audio + FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order ''', (chapter['id'],)) blocks = cursor.fetchall() @@ -152,7 +150,6 @@ def get_project(project_id): ''', (block['id'],)) images = cursor.fetchall() - # Safely parse transcription (might be NULL, empty, or malformed) transcription = [] if block['transcription']: try: @@ -167,8 +164,9 @@ def get_project(project_id): 'block_type': clean_str(block['block_type']), 'content': clean_str(block['content']), 'tts_text': clean_str(block['tts_text']), - 'audio_data': clean_str(block['audio_data']), + 'audio_data': '', # Empty here; loaded lazily by frontend 'audio_format': clean_str(block['audio_format']) or 'mp3', + 'has_audio': bool(block['has_audio']), 'transcription': transcription, 'images': [{ 'id': img['id'], @@ -187,32 +185,43 @@ def get_project(project_id): 'blocks': blocks_data }) - response_data = { + return jsonify({ 'id': project['id'], 'name': clean_str(project['name']), 'created_at': clean_str(project['created_at']), 'updated_at': clean_str(project['updated_at']), 'chapters': chapters_data - } + }) + + +@project_bp.route('/api/projects//audio/', methods=['GET']) +@login_required +def get_block_audio(project_id, block_id): + """ + Return audio_data (base64) for a single block. + Used by the frontend to lazy-load audio after metadata is loaded. + """ + db = get_db() + cursor = db.cursor() - # Stream JSON in chunks. ensure_ascii=True forces all non-ASCII chars - # to be escaped (\uXXXX) — slightly larger payload but guarantees the - # stream is pure ASCII, so no proxy can mis-handle multi-byte chars - # at chunk boundaries. - def generate(): - json_str = json.dumps(response_data, ensure_ascii=True) - chunk_size = 64 * 1024 # 64 KB per chunk - for i in range(0, len(json_str), chunk_size): - yield json_str[i:i + chunk_size] + cursor.execute(''' + SELECT mb.audio_data, mb.audio_format + FROM markdown_blocks mb + JOIN chapters c ON mb.chapter_id = c.id + WHERE mb.id = ? AND c.project_id = ? + ''', (block_id, project_id)) + row = cursor.fetchone() - return Response( - stream_with_context(generate()), - mimetype='application/json; charset=utf-8', - headers={ - 'Cache-Control': 'no-cache', - 'X-Accel-Buffering': 'no' # Tell Nginx/Traefik: don't buffer this response - } - ) + if not row: + return jsonify({'error': 'Block not found'}), 404 + + if not row['audio_data']: + return jsonify({'audio_data': '', 'audio_format': row['audio_format'] or 'mp3'}) + + return jsonify({ + 'audio_data': clean_str(row['audio_data']), + 'audio_format': clean_str(row['audio_format']) or 'mp3' + }) @project_bp.route('/api/projects/', methods=['PUT']) @@ -281,13 +290,7 @@ def delete_project(project_id): @project_bp.route('/api/projects//save', methods=['POST']) @login_required def save_project_content(project_id): - """ - Save all chapters and blocks for a project. - - Every string field is sanitized before insertion so that invalid - control characters never enter the database. This protects future - reads from the JSON corruption bug we saw on /api/projects/ GET. - """ + """Save all chapters and blocks for a project.""" data = request.json chapters = data.get('chapters', []) @@ -328,7 +331,6 @@ def save_project_content(project_id): chapter_id = cursor.lastrowid for block in chapter.get('blocks', []): - # Clean transcription word fields before storing transcription = clean_transcription(block.get('transcription', [])) cursor.execute(''' @@ -376,7 +378,7 @@ def save_project_content(project_id): @project_bp.route('/api/projects//publish', methods=['POST']) @login_required def publish_project(project_id): - """Publish a project to make it visible on public homepage.""" + """Publish a project to public homepage.""" data = request.json or {} db = get_db() @@ -387,7 +389,6 @@ def publish_project(project_id): if not project: return jsonify({'error': 'Project not found'}), 404 - # Verify project has at least one chapter with audio cursor.execute(''' SELECT COUNT(*) as cnt FROM markdown_blocks mb JOIN chapters c ON mb.chapter_id = c.id @@ -422,7 +423,7 @@ def publish_project(project_id): @project_bp.route('/api/projects//unpublish', methods=['POST']) @login_required def unpublish_project(project_id): - """Unpublish a project (but keep author/description/category for easy republish).""" + """Unpublish a project.""" db = get_db() cursor = db.cursor() @@ -439,7 +440,7 @@ def unpublish_project(project_id): @project_bp.route('/api/projects//thumbnail', methods=['POST']) @login_required def upload_thumbnail(project_id): - """Upload a thumbnail image for the project.""" + """Upload a thumbnail image.""" if 'file' not in request.files: return jsonify({'error': 'No file provided'}), 400 @@ -488,76 +489,3 @@ def delete_thumbnail(project_id): cursor.execute('UPDATE projects SET thumbnail_data = NULL WHERE id = ?', (project_id,)) db.commit() return jsonify({'success': True}) - -# ============================================ -# DEBUG: Identify corrupt data -# ============================================ - -@project_bp.route('/api/projects//debug', methods=['GET']) -@login_required -def debug_project(project_id): - """ - Scan a project for control characters and report which fields are dirty. - Visit: /api/projects//debug after logging in. - """ - db = get_db() - cursor = db.cursor() - - cursor.execute('SELECT id, name FROM projects WHERE id = ?', (project_id,)) - project = cursor.fetchone() - if not project: - return jsonify({'error': 'Project not found'}), 404 - - def find_bad_chars(s): - """Return list of (position, char_code) for any control char found.""" - if not s or not isinstance(s, str): - return [] - bad = [] - for i, ch in enumerate(s): - code = ord(ch) - # Allow \t (9), \n (10), \r (13). Anything else <32 or 127 is bad. - if (code < 32 and code not in (9, 10, 13)) or code == 127: - bad.append({'pos': i, 'code': code, 'hex': f'0x{code:02x}'}) - if len(bad) >= 5: # cap at 5 per field - break - return bad - - report = { - 'project_id': project['id'], - 'project_name': project['name'], - 'issues': [] - } - - cursor.execute('SELECT * FROM chapters WHERE project_id = ? ORDER BY chapter_number', (project_id,)) - chapters = cursor.fetchall() - - for chapter in chapters: - ch_num = chapter['chapter_number'] - - for field in ('title', 'voice'): - bad = find_bad_chars(chapter[field]) - if bad: - report['issues'].append({ - 'where': f'chapter {ch_num} -> {field}', - 'bad_chars': bad, - 'sample': repr((chapter[field] or '')[:80]) - }) - - cursor.execute('SELECT * FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order', (chapter['id'],)) - blocks = cursor.fetchall() - - for block in blocks: - b_order = block['block_order'] - for field in ('block_type', 'content', 'tts_text', 'audio_data', 'audio_format', 'transcription'): - bad = find_bad_chars(block[field]) - if bad: - val = block[field] or '' - report['issues'].append({ - 'where': f'chapter {ch_num}, block {b_order} -> {field}', - 'field_length': len(val), - 'bad_chars': bad, - 'sample_around_first_bad': repr(val[max(0, bad[0]['pos']-20):bad[0]['pos']+20]) - }) - - report['total_issues'] = len(report['issues']) - return jsonify(report) diff --git a/static/js/app.js b/static/js/app.js index 8e31288..6cbce75 100644 --- a/static/js/app.js +++ b/static/js/app.js @@ -1,7 +1,6 @@ /** * Audiobook Maker Pro v4.2 - Main Application - * UPDATED: Publishing support, thumbnails, sections terminology - * FIXED: Edit/rename in new archive UI, republish populates existing data + * UPDATED: Lazy audio loading to avoid large response truncation */ // ============================================ @@ -20,7 +19,7 @@ let ttsEditModal = null; let publishModal = null; let publishingProjectId = null; let currentWorkflowStage = 'upload'; -let allArchiveProjects = []; // Cache for republish dialog +let allArchiveProjects = []; // ============================================ // Initialization @@ -398,7 +397,7 @@ function updateWorkflowProgress(stage) { } // ============================================ -// Helper: Switch to Editor Tab +// Helpers // ============================================ function switchToEditorTab() { @@ -409,10 +408,6 @@ function switchToEditorTab() { } } -// ============================================ -// Helper: Start from scratch -// ============================================ - function startFromScratch() { document.getElementById('uploadSection').style.display = 'none'; document.getElementById('editorSection').style.display = 'block'; @@ -432,10 +427,6 @@ function startFromScratch() { } } -// ============================================ -// Loading Overlay -// ============================================ - function showLoader(text = 'Processing...', subtext = 'Please wait') { const overlay = document.getElementById('loadingOverlay'); if(overlay) { @@ -591,7 +582,6 @@ async function openProjectArchive() { const response = await fetch('/api/projects'); const data = await response.json(); - // Cache for republish dialog allArchiveProjects = data.projects || []; const container = document.getElementById('projectList'); @@ -616,7 +606,6 @@ async function openProjectArchive() { : ''; const canPublish = project.audio_count > 0; - const safeNameAttr = escapeHtml(project.name).replace(/'/g, "'"); return `
@@ -694,7 +683,7 @@ async function openProjectArchive() { } // ============================================ -// Rename Functionality (FIXED for new UI) +// Rename // ============================================ function startEditProjectName(projectId) { @@ -755,18 +744,15 @@ async function saveProjectName(projectId) { return; } - // Update the text in the cached element const textEl = document.getElementById(`project-name-text-${projectId}`); if (textEl) textEl.textContent = newName; - // Update cached project list const cached = allArchiveProjects.find(p => p.id === projectId); if (cached) cached.name = newName; cancelEditProjectName(projectId); showNotification('Project renamed successfully', 'success'); - // Update header if currently loaded project was renamed if (currentProject.id === projectId) { currentProject.name = newName; const nameInput = document.getElementById('projectName'); @@ -820,13 +806,12 @@ async function uploadThumbnail(projectId, inputEl) { } // ============================================ -// Publishing Functions (FIXED to populate existing data) +// Publishing // ============================================ function openPublishDialog(projectId) { publishingProjectId = projectId; - // Find project in cache to populate existing data const project = allArchiveProjects.find(p => p.id === projectId); if (!publishModal) { @@ -871,7 +856,6 @@ function openPublishDialog(projectId) { publishModal = new bootstrap.Modal(document.getElementById('publishModal')); } - // Populate with existing data (FIX: was always empty before) document.getElementById('pub-name').value = project ? project.name : ''; document.getElementById('pub-author').value = project ? (project.author || '') : ''; document.getElementById('pub-description').value = project ? (project.description || '') : ''; @@ -926,14 +910,15 @@ async function unpublishProject(projectId) { } // ============================================ -// Project Load / Delete +// Project Load / Delete - LAZY AUDIO LOADING // ============================================ async function loadProject(projectId) { - showLoader('Loading project...'); + showLoader('Loading project...', 'Fetching metadata'); if(archiveModal) archiveModal.hide(); try { + // Step 1: Load lightweight metadata (no audio_data) const response = await fetch(`/api/projects/${projectId}`); const data = await response.json(); @@ -955,27 +940,93 @@ async function loadProject(projectId) { renderProjectInEditor(data); } - let hasAudio = false; + // Step 2: Find blocks that need audio fetched + const audioBlocks = []; for (const ch of data.chapters) { for (const bl of ch.blocks) { - if (bl.audio_data && bl.block_type !== 'image') { - hasAudio = true; - break; + if (bl.has_audio && bl.block_type !== 'image') { + audioBlocks.push(bl.id); } } - if (hasAudio) break; } - updateWorkflowProgress(hasAudio ? 'audio-ready' : 'edit'); + + updateWorkflowProgress(audioBlocks.length > 0 ? 'audio-ready' : 'edit'); hideLoader(); - showNotification('Project loaded successfully!', 'success'); + + if (audioBlocks.length === 0) { + showNotification('Project loaded successfully!', 'success'); + return; + } + + // Step 3: Lazy-load audio in background, parallel batches + showNotification(`Project loaded. Fetching ${audioBlocks.length} audio blocks in background...`, 'info'); + loadAudioBlocksInBackground(projectId, audioBlocks); } catch (error) { hideLoader(); + console.error('Load project error:', error); alert('Failed to load project: ' + error.message); } } + +async function loadAudioBlocksInBackground(projectId, blockIds) { + const BATCH_SIZE = 5; + let loaded = 0; + let failed = 0; + + async function fetchOne(blockId) { + try { + const resp = await fetch(`/api/projects/${projectId}/audio/${blockId}`); + const data = await resp.json(); + + if (data.error || !data.audio_data) { + failed++; + return; + } + + // Update editorBlocks state by db_id + const blockData = editorBlocks.find(b => b.db_id === blockId); + if (blockData) { + blockData.audio_data = data.audio_data; + blockData.audio_format = data.audio_format; + + // Update DOM indicator (green dot) + const blockEl = document.getElementById(blockData.id); + if (blockEl) { + const indicator = blockEl.querySelector('.audio-indicator'); + if (indicator) { + indicator.classList.remove('no-audio'); + indicator.classList.add('has-audio'); + indicator.title = 'Audio loaded'; + } + } + } + + loaded++; + } catch (e) { + failed++; + console.warn(`Failed to load audio for block ${blockId}:`, e); + } + } + + for (let i = 0; i < blockIds.length; i += BATCH_SIZE) { + const batch = blockIds.slice(i, i + BATCH_SIZE); + await Promise.all(batch.map(fetchOne)); + } + + const msg = failed > 0 + ? `Loaded ${loaded}/${blockIds.length} audio blocks (${failed} failed)` + : `All ${loaded} audio blocks loaded ✓`; + showNotification(msg, failed > 0 ? 'warning' : 'success'); + + if (typeof updatePanelUI === 'function') { + updatePanelUI(); + } +} + + async function deleteProject(projectId) { if (!confirm('Are you sure you want to delete this project? This action cannot be undone.')) return; diff --git a/static/js/markdown-editor.js b/static/js/markdown-editor.js index c50a26b..e7954cc 100644 --- a/static/js/markdown-editor.js +++ b/static/js/markdown-editor.js @@ -1,10 +1,6 @@ /** * Markdown Editor Module - * UPDATED: Data-driven section markers (stored in editorBlocks array) - * REMOVED: duplicate renderDocumentBlocks (now only in pdf-handler.js) - * FIXED: repairAllNewBlockLines no longer removes lines after section-dividers - * FIXED: removeSection is data-driven, no orphan dividers - * ADDED: addSectionAtLine function for Custom Section Marker button + * UPDATED: Lazy audio loading support — tracks db_id for each block */ // ============================================ @@ -15,7 +11,6 @@ let editorBlocks = []; let activeBlockId = null; let isToolbarClick = false; -// Panel state let panelState = { startingBlockId: null, blockCount: 10, @@ -31,7 +26,6 @@ function initMarkdownEditor() { const editor = document.getElementById('markdownEditor'); editor.addEventListener('click', function(e) { - // Pick mode: clicking a block sets it as starting block if (panelState.pickMode) { const blockEl = e.target.closest('.md-block'); if (blockEl && !blockEl.classList.contains('editing')) { @@ -66,7 +60,7 @@ function initMarkdownEditor() { }); restorePanelSettings(); - console.log('📝 Markdown editor initialized (data-driven sections)'); + console.log('📝 Markdown editor initialized (lazy audio loading)'); } // ============================================ @@ -81,7 +75,6 @@ function updatePanelUI() { const textBlocks = getTextBlocks(); const totalBlocks = textBlocks.length; - // Update total blocks stat with NULL checks const totalEl = document.getElementById('ampTotalBlocks'); if (totalEl) totalEl.textContent = totalBlocks; @@ -96,7 +89,6 @@ function updatePanelUI() { const remainEl = document.getElementById('ampRemainingBlocks'); if (remainEl) remainEl.textContent = (totalBlocks - genCount); - // Validate starting block if (!panelState.startingBlockId || !document.getElementById(panelState.startingBlockId)) { if (textBlocks.length > 0) { panelState.startingBlockId = textBlocks[0].id; @@ -302,7 +294,7 @@ function advanceStartingBlockAfterGeneration(generatedCount) { } // ============================================ -// DATA-DRIVEN Section Marker System +// Section Markers // ============================================ function makeSectionStart(blockId, title = null) { @@ -399,7 +391,7 @@ function renderDocumentOutline() { } // ============================================ -// Block Merge & Split Logic +// Block Merge & Split // ============================================ function mergeBlockUp(blockId) { @@ -575,6 +567,8 @@ function addBlock(type = 'paragraph', content = '', afterElement = null, images editorBlocks.push({ id: blockId, + db_id: null, // Database ID — set when loaded from server + has_audio: false, // Server-reported audio presence type: type, content: content, images: images, @@ -1112,6 +1106,8 @@ function renderProjectInEditor(projectData) { blockData.audio_format = block.audio_format; blockData.transcription = block.transcription; blockData.tts_text = block.tts_text; + blockData.db_id = block.id; // Track DB ID for lazy audio loading + blockData.has_audio = !!block.has_audio; // Server-reported audio presence } const blockEl = document.getElementById(blockId); @@ -1119,13 +1115,14 @@ function renderProjectInEditor(projectData) { blockEl.dataset.ttsText = block.tts_text; } - if (block.audio_data && block.block_type !== 'image') { + // Show audio indicator based on has_audio flag (audio will be lazy-loaded) + if (block.has_audio && block.block_type !== 'image') { if (blockEl) { const indicator = blockEl.querySelector('.audio-indicator'); if (indicator) { indicator.classList.remove('no-audio'); indicator.classList.add('has-audio'); - indicator.title = 'Audio generated'; + indicator.title = 'Audio loading...'; } } } @@ -1143,7 +1140,7 @@ function renderProjectInEditor(projectData) { let foundStart = false; for (const tb of textBlocks) { const data = editorBlocks.find(b => b.id === tb.id); - if (!data || !data.audio_data) { + if (!data || !data.has_audio) { panelState.startingBlockId = tb.id; foundStart = true; break; @@ -1153,12 +1150,11 @@ function renderProjectInEditor(projectData) { panelState.startingBlockId = textBlocks[textBlocks.length - 1].id; } - // যুক্ত করা হলো: প্রজেক্ট লোড হলেও প্যানেলের ব্লক কাউন্ট যেন মোট ব্লকের সমান থাকে if (textBlocks.length > 0) { - panelState.blockCount = textBlocks.length; + panelState.blockCount = textBlocks.length; } updatePanelUI(); renderDocumentOutline(); checkEmptyEditor(); -} \ No newline at end of file +}