# routes/project_routes.py - Project Management Routes (v4.2) import re import json import base64 from flask import Blueprint, request, jsonify, Response, stream_with_context from db import get_db, vacuum_db from auth import login_required project_bp = Blueprint('project', __name__) # ============================================ # Helpers # ============================================ # C0/C1 control characters except \t \n \r — these corrupt JSON streams. _CONTROL_CHAR_RE = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]') def clean_str(s): """Strip raw control characters from a string. Returns '' for None.""" if s is None: return '' if not isinstance(s, str): s = str(s) return _CONTROL_CHAR_RE.sub('', s) def clean_transcription(transcription): """Sanitize 'word' fields inside a transcription list.""" if isinstance(transcription, list): for t in transcription: if isinstance(t, dict) and 'word' in t: t['word'] = clean_str(t.get('word', '')) return transcription # ============================================ # Routes # ============================================ @project_bp.route('/api/projects', methods=['GET']) @login_required def list_projects(): """List all projects with publishing info.""" db = get_db() cursor = db.cursor() cursor.execute(''' SELECT p.id, p.name, p.created_at, p.updated_at, p.is_published, p.published_at, p.thumbnail_data, p.thumbnail_format, p.description, p.author, p.category, p.view_count, (SELECT COUNT(*) FROM chapters WHERE project_id = p.id) as chapter_count, (SELECT COUNT(*) FROM markdown_blocks mb JOIN chapters c ON mb.chapter_id = c.id WHERE c.project_id = p.id) as block_count, (SELECT COUNT(*) FROM markdown_blocks mb JOIN chapters c ON mb.chapter_id = c.id WHERE c.project_id = p.id AND mb.audio_data IS NOT NULL AND mb.audio_data != '') as audio_count FROM projects p ORDER BY p.updated_at DESC ''') projects = [] for row in cursor.fetchall(): projects.append({ 'id': row['id'], 'name': row['name'], 'created_at': row['created_at'], 'updated_at': row['updated_at'], 'chapter_count': row['chapter_count'], 'block_count': row['block_count'], 'audio_count': row['audio_count'], 'is_published': bool(row['is_published']), 'published_at': row['published_at'], 'thumbnail_data': row['thumbnail_data'], 'thumbnail_format': row['thumbnail_format'] or 'png', 'description': row['description'] or '', 'author': row['author'] or '', 'category': row['category'] or '', 'view_count': row['view_count'] or 0 }) return jsonify({'projects': projects}) @project_bp.route('/api/projects', methods=['POST']) @login_required def create_project(): """Create a new project.""" data = request.json name = data.get('name', '').strip() if not name: return jsonify({'error': 'Project name is required'}), 400 db = get_db() cursor = db.cursor() try: cursor.execute('INSERT INTO projects (name) VALUES (?)', (name,)) db.commit() return jsonify({ 'success': True, 'project_id': cursor.lastrowid, 'name': name }) except Exception as e: if 'UNIQUE constraint' in str(e): return jsonify({'error': 'Project with this name already exists'}), 400 return jsonify({'error': str(e)}), 500 @project_bp.route('/api/projects/', methods=['GET']) @login_required def get_project(project_id): """ Get a project with all its chapters and blocks. Streamed response: large projects (with many audio blocks) can produce 10-50 MB of JSON. We stream it in chunks and sanitize every string field to prevent control characters from breaking JSON parsing on the client. """ db = get_db() cursor = db.cursor() cursor.execute('SELECT * FROM projects WHERE id = ?', (project_id,)) project = cursor.fetchone() if not project: return jsonify({'error': 'Project not found'}), 404 cursor.execute(''' SELECT * FROM chapters WHERE project_id = ? ORDER BY chapter_number ''', (project_id,)) chapters = cursor.fetchall() chapters_data = [] for chapter in chapters: cursor.execute(''' SELECT * FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order ''', (chapter['id'],)) blocks = cursor.fetchall() blocks_data = [] for block in blocks: cursor.execute(''' SELECT * FROM block_images WHERE block_id = ? ORDER BY id ''', (block['id'],)) images = cursor.fetchall() # Safely parse transcription (might be NULL, empty, or malformed) transcription = [] if block['transcription']: try: transcription = json.loads(block['transcription']) transcription = clean_transcription(transcription) except (json.JSONDecodeError, TypeError): transcription = [] blocks_data.append({ 'id': block['id'], 'block_order': block['block_order'], 'block_type': clean_str(block['block_type']), 'content': clean_str(block['content']), 'tts_text': clean_str(block['tts_text']), 'audio_data': clean_str(block['audio_data']), 'audio_format': clean_str(block['audio_format']) or 'mp3', 'transcription': transcription, 'images': [{ 'id': img['id'], 'data': clean_str(img['image_data']), 'format': clean_str(img['image_format']) or 'png', 'alt_text': clean_str(img['alt_text']), 'position': clean_str(img['position']) or 'before' } for img in images] }) chapters_data.append({ 'id': chapter['id'], 'chapter_number': chapter['chapter_number'], 'title': clean_str(chapter['title']), 'voice': clean_str(chapter['voice']), 'blocks': blocks_data }) response_data = { 'id': project['id'], 'name': clean_str(project['name']), 'created_at': clean_str(project['created_at']), 'updated_at': clean_str(project['updated_at']), 'chapters': chapters_data } # Stream JSON in chunks. ensure_ascii=True forces all non-ASCII chars # to be escaped (\uXXXX) — slightly larger payload but guarantees the # stream is pure ASCII, so no proxy can mis-handle multi-byte chars # at chunk boundaries. def generate(): json_str = json.dumps(response_data, ensure_ascii=True) chunk_size = 64 * 1024 # 64 KB per chunk for i in range(0, len(json_str), chunk_size): yield json_str[i:i + chunk_size] return Response( stream_with_context(generate()), mimetype='application/json; charset=utf-8', headers={ 'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no' # Tell Nginx/Traefik: don't buffer this response } ) @project_bp.route('/api/projects/', methods=['PUT']) @login_required def update_project(project_id): """Update project name.""" data = request.json name = data.get('name', '').strip() if not name: return jsonify({'error': 'Project name is required'}), 400 db = get_db() cursor = db.cursor() try: cursor.execute(''' UPDATE projects SET name = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ? ''', (name, project_id)) db.commit() if cursor.rowcount == 0: return jsonify({'error': 'Project not found'}), 404 return jsonify({'success': True}) except Exception as e: if 'UNIQUE constraint' in str(e): return jsonify({'error': 'A project with this name already exists'}), 400 return jsonify({'error': str(e)}), 500 @project_bp.route('/api/projects/', methods=['DELETE']) @login_required def delete_project(project_id): """Delete a project and all its data.""" db = get_db() cursor = db.cursor() cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,)) if not cursor.fetchone(): return jsonify({'error': 'Project not found'}), 404 cursor.execute(''' DELETE FROM block_images WHERE block_id IN ( SELECT mb.id FROM markdown_blocks mb JOIN chapters c ON mb.chapter_id = c.id WHERE c.project_id = ? ) ''', (project_id,)) cursor.execute(''' DELETE FROM markdown_blocks WHERE chapter_id IN ( SELECT id FROM chapters WHERE project_id = ? ) ''', (project_id,)) cursor.execute('DELETE FROM chapters WHERE project_id = ?', (project_id,)) cursor.execute('DELETE FROM projects WHERE id = ?', (project_id,)) db.commit() vacuum_db() return jsonify({'success': True}) @project_bp.route('/api/projects//save', methods=['POST']) @login_required def save_project_content(project_id): """ Save all chapters and blocks for a project. Every string field is sanitized before insertion so that invalid control characters never enter the database. This protects future reads from the JSON corruption bug we saw on /api/projects/ GET. """ data = request.json chapters = data.get('chapters', []) db = get_db() cursor = db.cursor() cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,)) if not cursor.fetchone(): return jsonify({'error': 'Project not found'}), 404 cursor.execute(''' DELETE FROM block_images WHERE block_id IN ( SELECT mb.id FROM markdown_blocks mb JOIN chapters c ON mb.chapter_id = c.id WHERE c.project_id = ? ) ''', (project_id,)) cursor.execute(''' DELETE FROM markdown_blocks WHERE chapter_id IN ( SELECT id FROM chapters WHERE project_id = ? ) ''', (project_id,)) cursor.execute('DELETE FROM chapters WHERE project_id = ?', (project_id,)) for chapter in chapters: cursor.execute(''' INSERT INTO chapters (project_id, chapter_number, title, voice) VALUES (?, ?, ?, ?) ''', ( project_id, chapter['chapter_number'], clean_str(chapter.get('title', 'Section')), clean_str(chapter.get('voice', 'af_heart')) )) chapter_id = cursor.lastrowid for block in chapter.get('blocks', []): # Clean transcription word fields before storing transcription = clean_transcription(block.get('transcription', [])) cursor.execute(''' INSERT INTO markdown_blocks (chapter_id, block_order, block_type, content, tts_text, audio_data, audio_format, transcription) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ''', ( chapter_id, block['block_order'], clean_str(block.get('block_type', 'paragraph')), clean_str(block.get('content', '')), clean_str(block.get('tts_text', '')), clean_str(block.get('audio_data', '')), clean_str(block.get('audio_format', 'mp3')), json.dumps(transcription) )) block_id = cursor.lastrowid for img in block.get('images', []): cursor.execute(''' INSERT INTO block_images (block_id, image_data, image_format, alt_text, position) VALUES (?, ?, ?, ?, ?) ''', ( block_id, clean_str(img.get('data', '')), clean_str(img.get('format', 'png')), clean_str(img.get('alt_text', '')), clean_str(img.get('position', 'before')) )) cursor.execute(''' UPDATE projects SET updated_at = CURRENT_TIMESTAMP WHERE id = ? ''', (project_id,)) db.commit() return jsonify({'success': True, 'message': 'Project saved successfully'}) # ============================================ # v4.2: Publishing Endpoints # ============================================ @project_bp.route('/api/projects//publish', methods=['POST']) @login_required def publish_project(project_id): """Publish a project to make it visible on public homepage.""" data = request.json or {} db = get_db() cursor = db.cursor() cursor.execute('SELECT id, name FROM projects WHERE id = ?', (project_id,)) project = cursor.fetchone() if not project: return jsonify({'error': 'Project not found'}), 404 # Verify project has at least one chapter with audio cursor.execute(''' SELECT COUNT(*) as cnt FROM markdown_blocks mb JOIN chapters c ON mb.chapter_id = c.id WHERE c.project_id = ? AND mb.audio_data IS NOT NULL AND mb.audio_data != '' ''', (project_id,)) audio_count = cursor.fetchone()['cnt'] if audio_count == 0: return jsonify({'error': 'Cannot publish: no audio generated yet'}), 400 description = (data.get('description') or '').strip() author = (data.get('author') or '').strip() category = (data.get('category') or '').strip() cursor.execute(''' UPDATE projects SET is_published = 1, published_at = CURRENT_TIMESTAMP, description = ?, author = ?, category = ? WHERE id = ? ''', (description, author, category, project_id)) db.commit() return jsonify({ 'success': True, 'message': f'"{project["name"]}" published successfully!' }) @project_bp.route('/api/projects//unpublish', methods=['POST']) @login_required def unpublish_project(project_id): """Unpublish a project (but keep author/description/category for easy republish).""" db = get_db() cursor = db.cursor() cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,)) if not cursor.fetchone(): return jsonify({'error': 'Project not found'}), 404 cursor.execute('UPDATE projects SET is_published = 0 WHERE id = ?', (project_id,)) db.commit() return jsonify({'success': True, 'message': 'Project unpublished'}) @project_bp.route('/api/projects//thumbnail', methods=['POST']) @login_required def upload_thumbnail(project_id): """Upload a thumbnail image for the project.""" if 'file' not in request.files: return jsonify({'error': 'No file provided'}), 400 img_file = request.files['file'] if not img_file or not img_file.filename: return jsonify({'error': 'Invalid file'}), 400 filename = img_file.filename.lower() if not any(filename.endswith(ext) for ext in ('.png', '.jpg', '.jpeg', '.webp', '.gif')): return jsonify({'error': 'File must be an image (PNG/JPG/WEBP/GIF)'}), 400 img_bytes = img_file.read() if len(img_bytes) > 5 * 1024 * 1024: return jsonify({'error': 'Image too large (max 5MB)'}), 400 fmt = filename.rsplit('.', 1)[-1] if fmt == 'jpg': fmt = 'jpeg' b64 = base64.b64encode(img_bytes).decode('utf-8') db = get_db() cursor = db.cursor() cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,)) if not cursor.fetchone(): return jsonify({'error': 'Project not found'}), 404 cursor.execute(''' UPDATE projects SET thumbnail_data = ?, thumbnail_format = ? WHERE id = ? ''', (b64, fmt, project_id)) db.commit() return jsonify({ 'success': True, 'thumbnail_data': b64, 'thumbnail_format': fmt }) @project_bp.route('/api/projects//thumbnail', methods=['DELETE']) @login_required def delete_thumbnail(project_id): """Remove project thumbnail.""" db = get_db() cursor = db.cursor() cursor.execute('UPDATE projects SET thumbnail_data = NULL WHERE id = ?', (project_id,)) db.commit() return jsonify({'success': True}) # ============================================ # DEBUG: Identify corrupt data # ============================================ @project_bp.route('/api/projects//debug', methods=['GET']) @login_required def debug_project(project_id): """ Scan a project for control characters and report which fields are dirty. Visit: /api/projects//debug after logging in. """ db = get_db() cursor = db.cursor() cursor.execute('SELECT id, name FROM projects WHERE id = ?', (project_id,)) project = cursor.fetchone() if not project: return jsonify({'error': 'Project not found'}), 404 def find_bad_chars(s): """Return list of (position, char_code) for any control char found.""" if not s or not isinstance(s, str): return [] bad = [] for i, ch in enumerate(s): code = ord(ch) # Allow \t (9), \n (10), \r (13). Anything else <32 or 127 is bad. if (code < 32 and code not in (9, 10, 13)) or code == 127: bad.append({'pos': i, 'code': code, 'hex': f'0x{code:02x}'}) if len(bad) >= 5: # cap at 5 per field break return bad report = { 'project_id': project['id'], 'project_name': project['name'], 'issues': [] } cursor.execute('SELECT * FROM chapters WHERE project_id = ? ORDER BY chapter_number', (project_id,)) chapters = cursor.fetchall() for chapter in chapters: ch_num = chapter['chapter_number'] for field in ('title', 'voice'): bad = find_bad_chars(chapter[field]) if bad: report['issues'].append({ 'where': f'chapter {ch_num} -> {field}', 'bad_chars': bad, 'sample': repr((chapter[field] or '')[:80]) }) cursor.execute('SELECT * FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order', (chapter['id'],)) blocks = cursor.fetchall() for block in blocks: b_order = block['block_order'] for field in ('block_type', 'content', 'tts_text', 'audio_data', 'audio_format', 'transcription'): bad = find_bad_chars(block[field]) if bad: val = block[field] or '' report['issues'].append({ 'where': f'chapter {ch_num}, block {b_order} -> {field}', 'field_length': len(val), 'bad_chars': bad, 'sample_around_first_bad': repr(val[max(0, bad[0]['pos']-20):bad[0]['pos']+20]) }) report['total_issues'] = len(report['issues']) return jsonify(report)