564 lines
19 KiB
Python
564 lines
19 KiB
Python
# routes/project_routes.py - Project Management Routes (v4.2)
|
|
|
|
import re
|
|
import json
|
|
import base64
|
|
from flask import Blueprint, request, jsonify, Response, stream_with_context
|
|
|
|
from db import get_db, vacuum_db
|
|
from auth import login_required
|
|
|
|
project_bp = Blueprint('project', __name__)
|
|
|
|
|
|
# ============================================
|
|
# Helpers
|
|
# ============================================
|
|
|
|
# C0/C1 control characters except \t \n \r — these corrupt JSON streams.
|
|
_CONTROL_CHAR_RE = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]')
|
|
|
|
|
|
def clean_str(s):
|
|
"""Strip raw control characters from a string. Returns '' for None."""
|
|
if s is None:
|
|
return ''
|
|
if not isinstance(s, str):
|
|
s = str(s)
|
|
return _CONTROL_CHAR_RE.sub('', s)
|
|
|
|
|
|
def clean_transcription(transcription):
|
|
"""Sanitize 'word' fields inside a transcription list."""
|
|
if isinstance(transcription, list):
|
|
for t in transcription:
|
|
if isinstance(t, dict) and 'word' in t:
|
|
t['word'] = clean_str(t.get('word', ''))
|
|
return transcription
|
|
|
|
|
|
# ============================================
|
|
# Routes
|
|
# ============================================
|
|
|
|
@project_bp.route('/api/projects', methods=['GET'])
|
|
@login_required
|
|
def list_projects():
|
|
"""List all projects with publishing info."""
|
|
db = get_db()
|
|
cursor = db.cursor()
|
|
|
|
cursor.execute('''
|
|
SELECT p.id, p.name, p.created_at, p.updated_at,
|
|
p.is_published, p.published_at, p.thumbnail_data, p.thumbnail_format,
|
|
p.description, p.author, p.category, p.view_count,
|
|
(SELECT COUNT(*) FROM chapters WHERE project_id = p.id) as chapter_count,
|
|
(SELECT COUNT(*) FROM markdown_blocks mb
|
|
JOIN chapters c ON mb.chapter_id = c.id
|
|
WHERE c.project_id = p.id) as block_count,
|
|
(SELECT COUNT(*) FROM markdown_blocks mb
|
|
JOIN chapters c ON mb.chapter_id = c.id
|
|
WHERE c.project_id = p.id AND mb.audio_data IS NOT NULL AND mb.audio_data != '') as audio_count
|
|
FROM projects p
|
|
ORDER BY p.updated_at DESC
|
|
''')
|
|
|
|
projects = []
|
|
for row in cursor.fetchall():
|
|
projects.append({
|
|
'id': row['id'],
|
|
'name': row['name'],
|
|
'created_at': row['created_at'],
|
|
'updated_at': row['updated_at'],
|
|
'chapter_count': row['chapter_count'],
|
|
'block_count': row['block_count'],
|
|
'audio_count': row['audio_count'],
|
|
'is_published': bool(row['is_published']),
|
|
'published_at': row['published_at'],
|
|
'thumbnail_data': row['thumbnail_data'],
|
|
'thumbnail_format': row['thumbnail_format'] or 'png',
|
|
'description': row['description'] or '',
|
|
'author': row['author'] or '',
|
|
'category': row['category'] or '',
|
|
'view_count': row['view_count'] or 0
|
|
})
|
|
|
|
return jsonify({'projects': projects})
|
|
|
|
|
|
@project_bp.route('/api/projects', methods=['POST'])
|
|
@login_required
|
|
def create_project():
|
|
"""Create a new project."""
|
|
data = request.json
|
|
name = data.get('name', '').strip()
|
|
|
|
if not name:
|
|
return jsonify({'error': 'Project name is required'}), 400
|
|
|
|
db = get_db()
|
|
cursor = db.cursor()
|
|
|
|
try:
|
|
cursor.execute('INSERT INTO projects (name) VALUES (?)', (name,))
|
|
db.commit()
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'project_id': cursor.lastrowid,
|
|
'name': name
|
|
})
|
|
except Exception as e:
|
|
if 'UNIQUE constraint' in str(e):
|
|
return jsonify({'error': 'Project with this name already exists'}), 400
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
@project_bp.route('/api/projects/<int:project_id>', methods=['GET'])
|
|
@login_required
|
|
def get_project(project_id):
|
|
"""
|
|
Get a project with all its chapters and blocks.
|
|
|
|
Streamed response: large projects (with many audio blocks) can produce
|
|
10-50 MB of JSON. We stream it in chunks and sanitize every string field
|
|
to prevent control characters from breaking JSON parsing on the client.
|
|
"""
|
|
db = get_db()
|
|
cursor = db.cursor()
|
|
|
|
cursor.execute('SELECT * FROM projects WHERE id = ?', (project_id,))
|
|
project = cursor.fetchone()
|
|
|
|
if not project:
|
|
return jsonify({'error': 'Project not found'}), 404
|
|
|
|
cursor.execute('''
|
|
SELECT * FROM chapters WHERE project_id = ? ORDER BY chapter_number
|
|
''', (project_id,))
|
|
chapters = cursor.fetchall()
|
|
|
|
chapters_data = []
|
|
for chapter in chapters:
|
|
cursor.execute('''
|
|
SELECT * FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order
|
|
''', (chapter['id'],))
|
|
blocks = cursor.fetchall()
|
|
|
|
blocks_data = []
|
|
for block in blocks:
|
|
cursor.execute('''
|
|
SELECT * FROM block_images WHERE block_id = ? ORDER BY id
|
|
''', (block['id'],))
|
|
images = cursor.fetchall()
|
|
|
|
# Safely parse transcription (might be NULL, empty, or malformed)
|
|
transcription = []
|
|
if block['transcription']:
|
|
try:
|
|
transcription = json.loads(block['transcription'])
|
|
transcription = clean_transcription(transcription)
|
|
except (json.JSONDecodeError, TypeError):
|
|
transcription = []
|
|
|
|
blocks_data.append({
|
|
'id': block['id'],
|
|
'block_order': block['block_order'],
|
|
'block_type': clean_str(block['block_type']),
|
|
'content': clean_str(block['content']),
|
|
'tts_text': clean_str(block['tts_text']),
|
|
'audio_data': clean_str(block['audio_data']),
|
|
'audio_format': clean_str(block['audio_format']) or 'mp3',
|
|
'transcription': transcription,
|
|
'images': [{
|
|
'id': img['id'],
|
|
'data': clean_str(img['image_data']),
|
|
'format': clean_str(img['image_format']) or 'png',
|
|
'alt_text': clean_str(img['alt_text']),
|
|
'position': clean_str(img['position']) or 'before'
|
|
} for img in images]
|
|
})
|
|
|
|
chapters_data.append({
|
|
'id': chapter['id'],
|
|
'chapter_number': chapter['chapter_number'],
|
|
'title': clean_str(chapter['title']),
|
|
'voice': clean_str(chapter['voice']),
|
|
'blocks': blocks_data
|
|
})
|
|
|
|
response_data = {
|
|
'id': project['id'],
|
|
'name': clean_str(project['name']),
|
|
'created_at': clean_str(project['created_at']),
|
|
'updated_at': clean_str(project['updated_at']),
|
|
'chapters': chapters_data
|
|
}
|
|
|
|
# Stream JSON in chunks. ensure_ascii=True forces all non-ASCII chars
|
|
# to be escaped (\uXXXX) — slightly larger payload but guarantees the
|
|
# stream is pure ASCII, so no proxy can mis-handle multi-byte chars
|
|
# at chunk boundaries.
|
|
def generate():
|
|
json_str = json.dumps(response_data, ensure_ascii=True)
|
|
chunk_size = 64 * 1024 # 64 KB per chunk
|
|
for i in range(0, len(json_str), chunk_size):
|
|
yield json_str[i:i + chunk_size]
|
|
|
|
return Response(
|
|
stream_with_context(generate()),
|
|
mimetype='application/json; charset=utf-8',
|
|
headers={
|
|
'Cache-Control': 'no-cache',
|
|
'X-Accel-Buffering': 'no' # Tell Nginx/Traefik: don't buffer this response
|
|
}
|
|
)
|
|
|
|
|
|
@project_bp.route('/api/projects/<int:project_id>', methods=['PUT'])
|
|
@login_required
|
|
def update_project(project_id):
|
|
"""Update project name."""
|
|
data = request.json
|
|
name = data.get('name', '').strip()
|
|
|
|
if not name:
|
|
return jsonify({'error': 'Project name is required'}), 400
|
|
|
|
db = get_db()
|
|
cursor = db.cursor()
|
|
|
|
try:
|
|
cursor.execute('''
|
|
UPDATE projects SET name = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?
|
|
''', (name, project_id))
|
|
db.commit()
|
|
|
|
if cursor.rowcount == 0:
|
|
return jsonify({'error': 'Project not found'}), 404
|
|
|
|
return jsonify({'success': True})
|
|
except Exception as e:
|
|
if 'UNIQUE constraint' in str(e):
|
|
return jsonify({'error': 'A project with this name already exists'}), 400
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
@project_bp.route('/api/projects/<int:project_id>', methods=['DELETE'])
|
|
@login_required
|
|
def delete_project(project_id):
|
|
"""Delete a project and all its data."""
|
|
db = get_db()
|
|
cursor = db.cursor()
|
|
|
|
cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,))
|
|
if not cursor.fetchone():
|
|
return jsonify({'error': 'Project not found'}), 404
|
|
|
|
cursor.execute('''
|
|
DELETE FROM block_images WHERE block_id IN (
|
|
SELECT mb.id FROM markdown_blocks mb
|
|
JOIN chapters c ON mb.chapter_id = c.id
|
|
WHERE c.project_id = ?
|
|
)
|
|
''', (project_id,))
|
|
|
|
cursor.execute('''
|
|
DELETE FROM markdown_blocks WHERE chapter_id IN (
|
|
SELECT id FROM chapters WHERE project_id = ?
|
|
)
|
|
''', (project_id,))
|
|
|
|
cursor.execute('DELETE FROM chapters WHERE project_id = ?', (project_id,))
|
|
cursor.execute('DELETE FROM projects WHERE id = ?', (project_id,))
|
|
|
|
db.commit()
|
|
vacuum_db()
|
|
|
|
return jsonify({'success': True})
|
|
|
|
|
|
@project_bp.route('/api/projects/<int:project_id>/save', methods=['POST'])
|
|
@login_required
|
|
def save_project_content(project_id):
|
|
"""
|
|
Save all chapters and blocks for a project.
|
|
|
|
Every string field is sanitized before insertion so that invalid
|
|
control characters never enter the database. This protects future
|
|
reads from the JSON corruption bug we saw on /api/projects/<id> GET.
|
|
"""
|
|
data = request.json
|
|
chapters = data.get('chapters', [])
|
|
|
|
db = get_db()
|
|
cursor = db.cursor()
|
|
|
|
cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,))
|
|
if not cursor.fetchone():
|
|
return jsonify({'error': 'Project not found'}), 404
|
|
|
|
cursor.execute('''
|
|
DELETE FROM block_images WHERE block_id IN (
|
|
SELECT mb.id FROM markdown_blocks mb
|
|
JOIN chapters c ON mb.chapter_id = c.id
|
|
WHERE c.project_id = ?
|
|
)
|
|
''', (project_id,))
|
|
|
|
cursor.execute('''
|
|
DELETE FROM markdown_blocks WHERE chapter_id IN (
|
|
SELECT id FROM chapters WHERE project_id = ?
|
|
)
|
|
''', (project_id,))
|
|
|
|
cursor.execute('DELETE FROM chapters WHERE project_id = ?', (project_id,))
|
|
|
|
for chapter in chapters:
|
|
cursor.execute('''
|
|
INSERT INTO chapters (project_id, chapter_number, title, voice)
|
|
VALUES (?, ?, ?, ?)
|
|
''', (
|
|
project_id,
|
|
chapter['chapter_number'],
|
|
clean_str(chapter.get('title', 'Section')),
|
|
clean_str(chapter.get('voice', 'af_heart'))
|
|
))
|
|
|
|
chapter_id = cursor.lastrowid
|
|
|
|
for block in chapter.get('blocks', []):
|
|
# Clean transcription word fields before storing
|
|
transcription = clean_transcription(block.get('transcription', []))
|
|
|
|
cursor.execute('''
|
|
INSERT INTO markdown_blocks
|
|
(chapter_id, block_order, block_type, content, tts_text, audio_data, audio_format, transcription)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
''', (
|
|
chapter_id,
|
|
block['block_order'],
|
|
clean_str(block.get('block_type', 'paragraph')),
|
|
clean_str(block.get('content', '')),
|
|
clean_str(block.get('tts_text', '')),
|
|
clean_str(block.get('audio_data', '')),
|
|
clean_str(block.get('audio_format', 'mp3')),
|
|
json.dumps(transcription)
|
|
))
|
|
|
|
block_id = cursor.lastrowid
|
|
|
|
for img in block.get('images', []):
|
|
cursor.execute('''
|
|
INSERT INTO block_images (block_id, image_data, image_format, alt_text, position)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
''', (
|
|
block_id,
|
|
clean_str(img.get('data', '')),
|
|
clean_str(img.get('format', 'png')),
|
|
clean_str(img.get('alt_text', '')),
|
|
clean_str(img.get('position', 'before'))
|
|
))
|
|
|
|
cursor.execute('''
|
|
UPDATE projects SET updated_at = CURRENT_TIMESTAMP WHERE id = ?
|
|
''', (project_id,))
|
|
|
|
db.commit()
|
|
|
|
return jsonify({'success': True, 'message': 'Project saved successfully'})
|
|
|
|
|
|
# ============================================
|
|
# v4.2: Publishing Endpoints
|
|
# ============================================
|
|
|
|
@project_bp.route('/api/projects/<int:project_id>/publish', methods=['POST'])
|
|
@login_required
|
|
def publish_project(project_id):
|
|
"""Publish a project to make it visible on public homepage."""
|
|
data = request.json or {}
|
|
|
|
db = get_db()
|
|
cursor = db.cursor()
|
|
|
|
cursor.execute('SELECT id, name FROM projects WHERE id = ?', (project_id,))
|
|
project = cursor.fetchone()
|
|
if not project:
|
|
return jsonify({'error': 'Project not found'}), 404
|
|
|
|
# Verify project has at least one chapter with audio
|
|
cursor.execute('''
|
|
SELECT COUNT(*) as cnt FROM markdown_blocks mb
|
|
JOIN chapters c ON mb.chapter_id = c.id
|
|
WHERE c.project_id = ? AND mb.audio_data IS NOT NULL AND mb.audio_data != ''
|
|
''', (project_id,))
|
|
audio_count = cursor.fetchone()['cnt']
|
|
|
|
if audio_count == 0:
|
|
return jsonify({'error': 'Cannot publish: no audio generated yet'}), 400
|
|
|
|
description = (data.get('description') or '').strip()
|
|
author = (data.get('author') or '').strip()
|
|
category = (data.get('category') or '').strip()
|
|
|
|
cursor.execute('''
|
|
UPDATE projects
|
|
SET is_published = 1,
|
|
published_at = CURRENT_TIMESTAMP,
|
|
description = ?,
|
|
author = ?,
|
|
category = ?
|
|
WHERE id = ?
|
|
''', (description, author, category, project_id))
|
|
db.commit()
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'message': f'"{project["name"]}" published successfully!'
|
|
})
|
|
|
|
|
|
@project_bp.route('/api/projects/<int:project_id>/unpublish', methods=['POST'])
|
|
@login_required
|
|
def unpublish_project(project_id):
|
|
"""Unpublish a project (but keep author/description/category for easy republish)."""
|
|
db = get_db()
|
|
cursor = db.cursor()
|
|
|
|
cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,))
|
|
if not cursor.fetchone():
|
|
return jsonify({'error': 'Project not found'}), 404
|
|
|
|
cursor.execute('UPDATE projects SET is_published = 0 WHERE id = ?', (project_id,))
|
|
db.commit()
|
|
|
|
return jsonify({'success': True, 'message': 'Project unpublished'})
|
|
|
|
|
|
@project_bp.route('/api/projects/<int:project_id>/thumbnail', methods=['POST'])
|
|
@login_required
|
|
def upload_thumbnail(project_id):
|
|
"""Upload a thumbnail image for the project."""
|
|
if 'file' not in request.files:
|
|
return jsonify({'error': 'No file provided'}), 400
|
|
|
|
img_file = request.files['file']
|
|
if not img_file or not img_file.filename:
|
|
return jsonify({'error': 'Invalid file'}), 400
|
|
|
|
filename = img_file.filename.lower()
|
|
if not any(filename.endswith(ext) for ext in ('.png', '.jpg', '.jpeg', '.webp', '.gif')):
|
|
return jsonify({'error': 'File must be an image (PNG/JPG/WEBP/GIF)'}), 400
|
|
|
|
img_bytes = img_file.read()
|
|
if len(img_bytes) > 5 * 1024 * 1024:
|
|
return jsonify({'error': 'Image too large (max 5MB)'}), 400
|
|
|
|
fmt = filename.rsplit('.', 1)[-1]
|
|
if fmt == 'jpg':
|
|
fmt = 'jpeg'
|
|
|
|
b64 = base64.b64encode(img_bytes).decode('utf-8')
|
|
|
|
db = get_db()
|
|
cursor = db.cursor()
|
|
cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,))
|
|
if not cursor.fetchone():
|
|
return jsonify({'error': 'Project not found'}), 404
|
|
|
|
cursor.execute('''
|
|
UPDATE projects SET thumbnail_data = ?, thumbnail_format = ? WHERE id = ?
|
|
''', (b64, fmt, project_id))
|
|
db.commit()
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'thumbnail_data': b64,
|
|
'thumbnail_format': fmt
|
|
})
|
|
|
|
|
|
@project_bp.route('/api/projects/<int:project_id>/thumbnail', methods=['DELETE'])
|
|
@login_required
|
|
def delete_thumbnail(project_id):
|
|
"""Remove project thumbnail."""
|
|
db = get_db()
|
|
cursor = db.cursor()
|
|
cursor.execute('UPDATE projects SET thumbnail_data = NULL WHERE id = ?', (project_id,))
|
|
db.commit()
|
|
return jsonify({'success': True})
|
|
|
|
# ============================================
|
|
# DEBUG: Identify corrupt data
|
|
# ============================================
|
|
|
|
@project_bp.route('/api/projects/<int:project_id>/debug', methods=['GET'])
|
|
@login_required
|
|
def debug_project(project_id):
|
|
"""
|
|
Scan a project for control characters and report which fields are dirty.
|
|
Visit: /api/projects/<id>/debug after logging in.
|
|
"""
|
|
db = get_db()
|
|
cursor = db.cursor()
|
|
|
|
cursor.execute('SELECT id, name FROM projects WHERE id = ?', (project_id,))
|
|
project = cursor.fetchone()
|
|
if not project:
|
|
return jsonify({'error': 'Project not found'}), 404
|
|
|
|
def find_bad_chars(s):
|
|
"""Return list of (position, char_code) for any control char found."""
|
|
if not s or not isinstance(s, str):
|
|
return []
|
|
bad = []
|
|
for i, ch in enumerate(s):
|
|
code = ord(ch)
|
|
# Allow \t (9), \n (10), \r (13). Anything else <32 or 127 is bad.
|
|
if (code < 32 and code not in (9, 10, 13)) or code == 127:
|
|
bad.append({'pos': i, 'code': code, 'hex': f'0x{code:02x}'})
|
|
if len(bad) >= 5: # cap at 5 per field
|
|
break
|
|
return bad
|
|
|
|
report = {
|
|
'project_id': project['id'],
|
|
'project_name': project['name'],
|
|
'issues': []
|
|
}
|
|
|
|
cursor.execute('SELECT * FROM chapters WHERE project_id = ? ORDER BY chapter_number', (project_id,))
|
|
chapters = cursor.fetchall()
|
|
|
|
for chapter in chapters:
|
|
ch_num = chapter['chapter_number']
|
|
|
|
for field in ('title', 'voice'):
|
|
bad = find_bad_chars(chapter[field])
|
|
if bad:
|
|
report['issues'].append({
|
|
'where': f'chapter {ch_num} -> {field}',
|
|
'bad_chars': bad,
|
|
'sample': repr((chapter[field] or '')[:80])
|
|
})
|
|
|
|
cursor.execute('SELECT * FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order', (chapter['id'],))
|
|
blocks = cursor.fetchall()
|
|
|
|
for block in blocks:
|
|
b_order = block['block_order']
|
|
for field in ('block_type', 'content', 'tts_text', 'audio_data', 'audio_format', 'transcription'):
|
|
bad = find_bad_chars(block[field])
|
|
if bad:
|
|
val = block[field] or ''
|
|
report['issues'].append({
|
|
'where': f'chapter {ch_num}, block {b_order} -> {field}',
|
|
'field_length': len(val),
|
|
'bad_chars': bad,
|
|
'sample_around_first_bad': repr(val[max(0, bad[0]['pos']-20):bad[0]['pos']+20])
|
|
})
|
|
|
|
report['total_issues'] = len(report['issues'])
|
|
return jsonify(report)
|