Files
audiobook-maker-pro-v4.2/routes/pdf_routes.py

86 lines
3.0 KiB
Python

# routes/pdf_routes.py - PDF Upload and Processing Routes
import json
from flask import Blueprint, request, jsonify
from db import get_db
from pdf_processor import process_pdf_to_markdown
from ai_processor import process_document_smartly
from thumbnail_generator import generate_pdf_thumbnail
from media_storage import save_pending_thumbnail
from auth import login_required
pdf_bp = Blueprint('pdf', __name__)
@pdf_bp.route('/api/upload-pdf', methods=['POST'])
@login_required
def upload_pdf():
"""Upload and process a PDF file."""
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
pdf_file = request.files['file']
if not pdf_file or not pdf_file.filename:
return jsonify({'error': 'Invalid file'}), 400
if not pdf_file.filename.lower().endswith('.pdf'):
return jsonify({'error': 'File must be a PDF'}), 400
try:
print(f"📄 Processing PDF: {pdf_file.filename}")
pdf_bytes = pdf_file.read()
result = process_pdf_to_markdown(pdf_bytes)
# --- AI Powered Smart Reconstruction & Section Tagging ---
smart_blocks = process_document_smartly(result['markdown_blocks'], result['metadata'])
# --- v4.4: প্রথম পেজ থেকে অটো থাম্বনেইল জেনারেট ---
pending_thumbnail = None
pending_thumbnail_format = None
try:
thumb_bytes, thumb_fmt = generate_pdf_thumbnail(pdf_bytes)
if thumb_bytes:
token = save_pending_thumbnail(thumb_bytes, thumb_fmt)
if token:
pending_thumbnail = token
pending_thumbnail_format = thumb_fmt
print(f" 🖼️ Auto-thumbnail generated: {token} ({len(thumb_bytes)} bytes)")
except Exception as te:
print(f" ⚠️ Thumbnail step skipped: {te}")
# Save PDF document record
db = get_db()
cursor = db.cursor()
cursor.execute('''
INSERT INTO pdf_documents (filename, page_count, metadata)
VALUES (?, ?, ?)
''', (
pdf_file.filename,
result["page_count"],
json.dumps(result["metadata"])
))
db.commit()
doc_id = cursor.lastrowid
print(f"✅ PDF processed & reconstructed: {result['page_count']} pages, {len(smart_blocks)} blocks")
return jsonify({
'success': True,
'document_id': doc_id,
'filename': pdf_file.filename,
'page_count': result['page_count'],
'metadata': result['metadata'],
'blocks': smart_blocks,
'pending_thumbnail': pending_thumbnail,
'pending_thumbnail_format': pending_thumbnail_format
})
except Exception as e:
import traceback
traceback.print_exc()
return jsonify({'error': str(e)}), 500