86 lines
3.0 KiB
Python
86 lines
3.0 KiB
Python
# routes/pdf_routes.py - PDF Upload and Processing Routes
|
|
|
|
import json
|
|
from flask import Blueprint, request, jsonify
|
|
|
|
from db import get_db
|
|
from pdf_processor import process_pdf_to_markdown
|
|
from ai_processor import process_document_smartly
|
|
from thumbnail_generator import generate_pdf_thumbnail
|
|
from media_storage import save_pending_thumbnail
|
|
from auth import login_required
|
|
|
|
pdf_bp = Blueprint('pdf', __name__)
|
|
|
|
|
|
@pdf_bp.route('/api/upload-pdf', methods=['POST'])
|
|
@login_required
|
|
def upload_pdf():
|
|
"""Upload and process a PDF file."""
|
|
if 'file' not in request.files:
|
|
return jsonify({'error': 'No file provided'}), 400
|
|
|
|
pdf_file = request.files['file']
|
|
|
|
if not pdf_file or not pdf_file.filename:
|
|
return jsonify({'error': 'Invalid file'}), 400
|
|
|
|
if not pdf_file.filename.lower().endswith('.pdf'):
|
|
return jsonify({'error': 'File must be a PDF'}), 400
|
|
|
|
try:
|
|
print(f"📄 Processing PDF: {pdf_file.filename}")
|
|
|
|
pdf_bytes = pdf_file.read()
|
|
result = process_pdf_to_markdown(pdf_bytes)
|
|
|
|
# --- AI Powered Smart Reconstruction & Section Tagging ---
|
|
smart_blocks = process_document_smartly(result['markdown_blocks'], result['metadata'])
|
|
|
|
# --- v4.4: প্রথম পেজ থেকে অটো থাম্বনেইল জেনারেট ---
|
|
pending_thumbnail = None
|
|
pending_thumbnail_format = None
|
|
try:
|
|
thumb_bytes, thumb_fmt = generate_pdf_thumbnail(pdf_bytes)
|
|
if thumb_bytes:
|
|
token = save_pending_thumbnail(thumb_bytes, thumb_fmt)
|
|
if token:
|
|
pending_thumbnail = token
|
|
pending_thumbnail_format = thumb_fmt
|
|
print(f" 🖼️ Auto-thumbnail generated: {token} ({len(thumb_bytes)} bytes)")
|
|
except Exception as te:
|
|
print(f" ⚠️ Thumbnail step skipped: {te}")
|
|
|
|
# Save PDF document record
|
|
db = get_db()
|
|
cursor = db.cursor()
|
|
|
|
cursor.execute('''
|
|
INSERT INTO pdf_documents (filename, page_count, metadata)
|
|
VALUES (?, ?, ?)
|
|
''', (
|
|
pdf_file.filename,
|
|
result["page_count"],
|
|
json.dumps(result["metadata"])
|
|
))
|
|
db.commit()
|
|
|
|
doc_id = cursor.lastrowid
|
|
|
|
print(f"✅ PDF processed & reconstructed: {result['page_count']} pages, {len(smart_blocks)} blocks")
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'document_id': doc_id,
|
|
'filename': pdf_file.filename,
|
|
'page_count': result['page_count'],
|
|
'metadata': result['metadata'],
|
|
'blocks': smart_blocks,
|
|
'pending_thumbnail': pending_thumbnail,
|
|
'pending_thumbnail_format': pending_thumbnail_format
|
|
})
|
|
|
|
except Exception as e:
|
|
import traceback
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500 |