From d5cbadbdc4dcfcc8ebf0a95853cf27a1fea3fe54 Mon Sep 17 00:00:00 2001
From: Ashim Kumar <ak@Ashims-MacBook-Pro-2.local>
Date: Sat, 23 May 2026 06:32:06 +0600
Subject: [PATCH] Fix: stream large project responses to avoid proxy truncation

---
 routes/project_routes.py | 41 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/routes/project_routes.py b/routes/project_routes.py
index 8f0212b..7fc669d 100644
--- a/routes/project_routes.py
+++ b/routes/project_routes.py
@@ -2,7 +2,7 @@
 
 import json
 import base64
-from flask import Blueprint, request, jsonify
+from flask import Blueprint, request, jsonify, Response, stream_with_context
 
 from db import get_db, vacuum_db
 from auth import login_required
@@ -86,7 +86,13 @@ def create_project():
 @project_bp.route('/api/projects/<int:project_id>', methods=['GET'])
 @login_required
 def get_project(project_id):
-    """Get a project with all its chapters and blocks."""
+    """
+    Get a project with all its chapters and blocks.
+    
+    Streamed response: large projects (with many audio blocks) can produce
+    10-50 MB of JSON. We stream it in chunks so that the reverse proxy
+    (Traefik in Coolify) doesn't buffer the entire payload and truncate it.
+    """
     db = get_db()
     cursor = db.cursor()
     
@@ -115,6 +121,14 @@ def get_project(project_id):
             ''', (block['id'],))
             images = cursor.fetchall()
             
+            # Safely parse transcription (might be NULL, empty, or malformed)
+            transcription = []
+            if block['transcription']:
+                try:
+                    transcription = json.loads(block['transcription'])
+                except (json.JSONDecodeError, TypeError):
+                    transcription = []
+            
             blocks_data.append({
                 'id': block['id'],
                 'block_order': block['block_order'],
@@ -123,7 +137,7 @@ def get_project(project_id):
                 'tts_text': block['tts_text'],
                 'audio_data': block['audio_data'],
                 'audio_format': block['audio_format'],
-                'transcription': json.loads(block['transcription']) if block['transcription'] else [],
+                'transcription': transcription,
                 'images': [{
                     'id': img['id'],
                     'data': img['image_data'],
@@ -141,13 +155,30 @@ def get_project(project_id):
             'blocks': blocks_data
         })
     
-    return jsonify({
+    response_data = {
         'id': project['id'],
         'name': project['name'],
         'created_at': project['created_at'],
         'updated_at': project['updated_at'],
         'chapters': chapters_data
-    })
+    }
+    
+    # Stream the JSON in chunks. ensure_ascii=False keeps Unicode (e.g. Bangla)
+    # compact and avoids the JSON ballooning to 2-3x its size.
+    def generate():
+        json_str = json.dumps(response_data, ensure_ascii=False)
+        chunk_size = 64 * 1024  # 64 KB per chunk
+        for i in range(0, len(json_str), chunk_size):
+            yield json_str[i:i + chunk_size]
+    
+    return Response(
+        stream_with_context(generate()),
+        mimetype='application/json; charset=utf-8',
+        headers={
+            'Cache-Control': 'no-cache',
+            'X-Accel-Buffering': 'no'  # Tell Nginx/Traefik: don't buffer this response
+        }
+    )
 
 
 @project_bp.route('/api/projects/<int:project_id>', methods=['PUT'])