Add debug endpoint to find control chars in project data
This commit is contained in:
@@ -488,3 +488,76 @@ def delete_thumbnail(project_id):
|
||||
cursor.execute('UPDATE projects SET thumbnail_data = NULL WHERE id = ?', (project_id,))
|
||||
db.commit()
|
||||
return jsonify({'success': True})
|
||||
|
||||
# ============================================
|
||||
# DEBUG: Identify corrupt data
|
||||
# ============================================
|
||||
|
||||
@project_bp.route('/api/projects/<int:project_id>/debug', methods=['GET'])
|
||||
@login_required
|
||||
def debug_project(project_id):
|
||||
"""
|
||||
Scan a project for control characters and report which fields are dirty.
|
||||
Visit: /api/projects/<id>/debug after logging in.
|
||||
"""
|
||||
db = get_db()
|
||||
cursor = db.cursor()
|
||||
|
||||
cursor.execute('SELECT id, name FROM projects WHERE id = ?', (project_id,))
|
||||
project = cursor.fetchone()
|
||||
if not project:
|
||||
return jsonify({'error': 'Project not found'}), 404
|
||||
|
||||
def find_bad_chars(s):
|
||||
"""Return list of (position, char_code) for any control char found."""
|
||||
if not s or not isinstance(s, str):
|
||||
return []
|
||||
bad = []
|
||||
for i, ch in enumerate(s):
|
||||
code = ord(ch)
|
||||
# Allow \t (9), \n (10), \r (13). Anything else <32 or 127 is bad.
|
||||
if (code < 32 and code not in (9, 10, 13)) or code == 127:
|
||||
bad.append({'pos': i, 'code': code, 'hex': f'0x{code:02x}'})
|
||||
if len(bad) >= 5: # cap at 5 per field
|
||||
break
|
||||
return bad
|
||||
|
||||
report = {
|
||||
'project_id': project['id'],
|
||||
'project_name': project['name'],
|
||||
'issues': []
|
||||
}
|
||||
|
||||
cursor.execute('SELECT * FROM chapters WHERE project_id = ? ORDER BY chapter_number', (project_id,))
|
||||
chapters = cursor.fetchall()
|
||||
|
||||
for chapter in chapters:
|
||||
ch_num = chapter['chapter_number']
|
||||
|
||||
for field in ('title', 'voice'):
|
||||
bad = find_bad_chars(chapter[field])
|
||||
if bad:
|
||||
report['issues'].append({
|
||||
'where': f'chapter {ch_num} -> {field}',
|
||||
'bad_chars': bad,
|
||||
'sample': repr((chapter[field] or '')[:80])
|
||||
})
|
||||
|
||||
cursor.execute('SELECT * FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order', (chapter['id'],))
|
||||
blocks = cursor.fetchall()
|
||||
|
||||
for block in blocks:
|
||||
b_order = block['block_order']
|
||||
for field in ('block_type', 'content', 'tts_text', 'audio_data', 'audio_format', 'transcription'):
|
||||
bad = find_bad_chars(block[field])
|
||||
if bad:
|
||||
val = block[field] or ''
|
||||
report['issues'].append({
|
||||
'where': f'chapter {ch_num}, block {b_order} -> {field}',
|
||||
'field_length': len(val),
|
||||
'bad_chars': bad,
|
||||
'sample_around_first_bad': repr(val[max(0, bad[0]['pos']-20):bad[0]['pos']+20])
|
||||
})
|
||||
|
||||
report['total_issues'] = len(report['issues'])
|
||||
return jsonify(report)
|
||||
|
||||
Reference in New Issue
Block a user