Fix: stream large project responses to avoid proxy truncation

This commit is contained in:
Ashim Kumar
2026-05-23 06:32:06 +06:00
parent 0617a374dd
commit d5cbadbdc4

View File

@@ -2,7 +2,7 @@
import json
import base64
from flask import Blueprint, request, jsonify
from flask import Blueprint, request, jsonify, Response, stream_with_context
from db import get_db, vacuum_db
from auth import login_required
@@ -86,7 +86,13 @@ def create_project():
@project_bp.route('/api/projects/<int:project_id>', methods=['GET'])
@login_required
def get_project(project_id):
"""Get a project with all its chapters and blocks."""
"""
Get a project with all its chapters and blocks.
Streamed response: large projects (with many audio blocks) can produce
10-50 MB of JSON. We stream it in chunks so that the reverse proxy
(Traefik in Coolify) doesn't buffer the entire payload and truncate it.
"""
db = get_db()
cursor = db.cursor()
@@ -115,6 +121,14 @@ def get_project(project_id):
''', (block['id'],))
images = cursor.fetchall()
# Safely parse transcription (might be NULL, empty, or malformed)
transcription = []
if block['transcription']:
try:
transcription = json.loads(block['transcription'])
except (json.JSONDecodeError, TypeError):
transcription = []
blocks_data.append({
'id': block['id'],
'block_order': block['block_order'],
@@ -123,7 +137,7 @@ def get_project(project_id):
'tts_text': block['tts_text'],
'audio_data': block['audio_data'],
'audio_format': block['audio_format'],
'transcription': json.loads(block['transcription']) if block['transcription'] else [],
'transcription': transcription,
'images': [{
'id': img['id'],
'data': img['image_data'],
@@ -141,13 +155,30 @@ def get_project(project_id):
'blocks': blocks_data
})
return jsonify({
response_data = {
'id': project['id'],
'name': project['name'],
'created_at': project['created_at'],
'updated_at': project['updated_at'],
'chapters': chapters_data
})
}
# Stream the JSON in chunks. ensure_ascii=False keeps Unicode (e.g. Bangla)
# compact and avoids the JSON ballooning to 2-3x its size.
def generate():
json_str = json.dumps(response_data, ensure_ascii=False)
chunk_size = 64 * 1024 # 64 KB per chunk
for i in range(0, len(json_str), chunk_size):
yield json_str[i:i + chunk_size]
return Response(
stream_with_context(generate()),
mimetype='application/json; charset=utf-8',
headers={
'Cache-Control': 'no-cache',
'X-Accel-Buffering': 'no' # Tell Nginx/Traefik: don't buffer this response
}
)
@project_bp.route('/api/projects/<int:project_id>', methods=['PUT'])