diff --git a/routes/project_routes.py b/routes/project_routes.py index 824d840..01a5ee8 100644 --- a/routes/project_routes.py +++ b/routes/project_routes.py @@ -488,3 +488,76 @@ def delete_thumbnail(project_id): cursor.execute('UPDATE projects SET thumbnail_data = NULL WHERE id = ?', (project_id,)) db.commit() return jsonify({'success': True}) + +# ============================================ +# DEBUG: Identify corrupt data +# ============================================ + +@project_bp.route('/api/projects//debug', methods=['GET']) +@login_required +def debug_project(project_id): + """ + Scan a project for control characters and report which fields are dirty. + Visit: /api/projects//debug after logging in. + """ + db = get_db() + cursor = db.cursor() + + cursor.execute('SELECT id, name FROM projects WHERE id = ?', (project_id,)) + project = cursor.fetchone() + if not project: + return jsonify({'error': 'Project not found'}), 404 + + def find_bad_chars(s): + """Return list of (position, char_code) for any control char found.""" + if not s or not isinstance(s, str): + return [] + bad = [] + for i, ch in enumerate(s): + code = ord(ch) + # Allow \t (9), \n (10), \r (13). Anything else <32 or 127 is bad. + if (code < 32 and code not in (9, 10, 13)) or code == 127: + bad.append({'pos': i, 'code': code, 'hex': f'0x{code:02x}'}) + if len(bad) >= 5: # cap at 5 per field + break + return bad + + report = { + 'project_id': project['id'], + 'project_name': project['name'], + 'issues': [] + } + + cursor.execute('SELECT * FROM chapters WHERE project_id = ? ORDER BY chapter_number', (project_id,)) + chapters = cursor.fetchall() + + for chapter in chapters: + ch_num = chapter['chapter_number'] + + for field in ('title', 'voice'): + bad = find_bad_chars(chapter[field]) + if bad: + report['issues'].append({ + 'where': f'chapter {ch_num} -> {field}', + 'bad_chars': bad, + 'sample': repr((chapter[field] or '')[:80]) + }) + + cursor.execute('SELECT * FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order', (chapter['id'],)) + blocks = cursor.fetchall() + + for block in blocks: + b_order = block['block_order'] + for field in ('block_type', 'content', 'tts_text', 'audio_data', 'audio_format', 'transcription'): + bad = find_bad_chars(block[field]) + if bad: + val = block[field] or '' + report['issues'].append({ + 'where': f'chapter {ch_num}, block {b_order} -> {field}', + 'field_length': len(val), + 'bad_chars': bad, + 'sample_around_first_bad': repr(val[max(0, bad[0]['pos']-20):bad[0]['pos']+20]) + }) + + report['total_issues'] = len(report['issues']) + return jsonify(report)