# routes/docx_routes.py - DOCX/DOC Upload and Processing Routes import json from flask import Blueprint, request, jsonify from db import get_db from docx_processor import process_docx_to_markdown from ai_processor import process_document_smartly from auth import login_required docx_bp = Blueprint('docx', __name__) @docx_bp.route('/api/upload-docx', methods=['POST']) @login_required def upload_docx(): """Upload and process a DOCX or DOC file.""" if 'file' not in request.files: return jsonify({'error': 'No file provided'}), 400 doc_file = request.files['file'] if not doc_file or not doc_file.filename: return jsonify({'error': 'Invalid file'}), 400 filename = doc_file.filename.lower() if not (filename.endswith('.docx') or filename.endswith('.doc')): return jsonify({'error': 'File must be a .docx or .doc file'}), 400 try: print(f"📄 Processing Word document: {doc_file.filename}") file_bytes = doc_file.read() print(f" 📏 File size: {len(file_bytes)} bytes") result = process_docx_to_markdown(file_bytes, doc_file.filename) # --- AI Powered Smart Reconstruction & Section Tagging --- smart_blocks = process_document_smartly(result.get('markdown_blocks', []), result.get('metadata', {})) block_count = len(smart_blocks) image_count = sum(1 for b in smart_blocks if b.get('type') == 'image') text_count = block_count - image_count print(f"✅ Word document processed & reconstructed: {block_count} blocks ({text_count} text, {image_count} images)") return jsonify({ 'success': True, 'filename': doc_file.filename, 'metadata': result.get('metadata', {}), 'blocks': smart_blocks }) except Exception as e: import traceback traceback.print_exc() return jsonify({'error': str(e)}), 500