# routes/docx_routes.py - DOCX/DOC Upload and Processing Routes import json from flask import Blueprint, request, jsonify from db import get_db from docx_processor import process_docx_to_markdown from auth import login_required docx_bp = Blueprint('docx', __name__) @docx_bp.route('/api/upload-docx', methods=['POST']) @login_required def upload_docx(): """Upload and process a DOCX or DOC file.""" if 'file' not in request.files: return jsonify({'error': 'No file provided'}), 400 doc_file = request.files['file'] if not doc_file or not doc_file.filename: return jsonify({'error': 'Invalid file'}), 400 filename = doc_file.filename.lower() if not (filename.endswith('.docx') or filename.endswith('.doc')): return jsonify({'error': 'File must be a .docx or .doc file'}), 400 try: print(f"📄 Processing Word document: {doc_file.filename}") file_bytes = doc_file.read() print(f" 📏 File size: {len(file_bytes)} bytes") result = process_docx_to_markdown(file_bytes, doc_file.filename) blocks = result.get('markdown_blocks', []) block_count = len(blocks) image_count = sum(1 for b in blocks if b.get('type') == 'image') text_count = block_count - image_count print(f"✅ Word document processed: {block_count} blocks ({text_count} text, {image_count} images)") for i, block in enumerate(blocks): if block.get('type') == 'image': data_len = len(block.get('data', '')) fmt = block.get('format', '?') print(f" 📷 Block {i}: image ({fmt}), data length: {data_len}") return jsonify({ 'success': True, 'filename': doc_file.filename, 'metadata': result.get('metadata', {}), 'blocks': blocks }) except Exception as e: import traceback traceback.print_exc() return jsonify({'error': str(e)}), 500