75 lines
2.9 KiB
Python
75 lines
2.9 KiB
Python
# routes/docx_routes.py - DOCX/DOC Upload and Processing Routes
|
|
|
|
import json
|
|
from flask import Blueprint, request, jsonify
|
|
|
|
from db import get_db
|
|
from docx_processor import process_docx_to_markdown
|
|
from ai_processor import process_document_smartly
|
|
from thumbnail_generator import generate_docx_thumbnail
|
|
from media_storage import save_pending_thumbnail
|
|
from auth import login_required
|
|
|
|
docx_bp = Blueprint('docx', __name__)
|
|
|
|
|
|
@docx_bp.route('/api/upload-docx', methods=['POST'])
|
|
@login_required
|
|
def upload_docx():
|
|
"""Upload and process a DOCX or DOC file."""
|
|
if 'file' not in request.files:
|
|
return jsonify({'error': 'No file provided'}), 400
|
|
|
|
doc_file = request.files['file']
|
|
|
|
if not doc_file or not doc_file.filename:
|
|
return jsonify({'error': 'Invalid file'}), 400
|
|
|
|
filename = doc_file.filename.lower()
|
|
if not (filename.endswith('.docx') or filename.endswith('.doc')):
|
|
return jsonify({'error': 'File must be a .docx or .doc file'}), 400
|
|
|
|
try:
|
|
print(f"📄 Processing Word document: {doc_file.filename}")
|
|
|
|
file_bytes = doc_file.read()
|
|
print(f" 📏 File size: {len(file_bytes)} bytes")
|
|
|
|
result = process_docx_to_markdown(file_bytes, doc_file.filename)
|
|
|
|
# --- AI Powered Smart Reconstruction & Section Tagging ---
|
|
smart_blocks = process_document_smartly(result.get('markdown_blocks', []), result.get('metadata', {}))
|
|
|
|
block_count = len(smart_blocks)
|
|
image_count = sum(1 for b in smart_blocks if b.get('type') == 'image')
|
|
text_count = block_count - image_count
|
|
|
|
print(f"✅ Word document processed & reconstructed: {block_count} blocks ({text_count} text, {image_count} images)")
|
|
|
|
# --- v4.4: DOCX থেকে অটো থাম্বনেইল (embedded thumbnail / প্রথম ইমেজ) ---
|
|
pending_thumbnail = None
|
|
pending_thumbnail_format = None
|
|
try:
|
|
thumb_bytes, thumb_fmt = generate_docx_thumbnail(file_bytes, smart_blocks)
|
|
if thumb_bytes:
|
|
token = save_pending_thumbnail(thumb_bytes, thumb_fmt)
|
|
if token:
|
|
pending_thumbnail = token
|
|
pending_thumbnail_format = thumb_fmt
|
|
print(f" 🖼️ Auto-thumbnail generated: {token} ({len(thumb_bytes)} bytes)")
|
|
except Exception as te:
|
|
print(f" ⚠️ Thumbnail step skipped: {te}")
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'filename': doc_file.filename,
|
|
'metadata': result.get('metadata', {}),
|
|
'blocks': smart_blocks,
|
|
'pending_thumbnail': pending_thumbnail,
|
|
'pending_thumbnail_format': pending_thumbnail_format
|
|
})
|
|
|
|
except Exception as e:
|
|
import traceback
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500 |