65 lines
1.9 KiB
Python
65 lines
1.9 KiB
Python
# routes/pdf_routes.py - PDF Upload and Processing Routes
|
|
|
|
import json
|
|
from flask import Blueprint, request, jsonify
|
|
|
|
from db import get_db
|
|
from pdf_processor import process_pdf_to_markdown
|
|
from auth import login_required
|
|
|
|
pdf_bp = Blueprint('pdf', __name__)
|
|
|
|
|
|
@pdf_bp.route('/api/upload-pdf', methods=['POST'])
|
|
@login_required
|
|
def upload_pdf():
|
|
"""Upload and process a PDF file."""
|
|
if 'file' not in request.files:
|
|
return jsonify({'error': 'No file provided'}), 400
|
|
|
|
pdf_file = request.files['file']
|
|
|
|
if not pdf_file or not pdf_file.filename:
|
|
return jsonify({'error': 'Invalid file'}), 400
|
|
|
|
if not pdf_file.filename.lower().endswith('.pdf'):
|
|
return jsonify({'error': 'File must be a PDF'}), 400
|
|
|
|
try:
|
|
print(f"📄 Processing PDF: {pdf_file.filename}")
|
|
|
|
pdf_bytes = pdf_file.read()
|
|
result = process_pdf_to_markdown(pdf_bytes)
|
|
|
|
# Save PDF document record
|
|
db = get_db()
|
|
cursor = db.cursor()
|
|
|
|
cursor.execute('''
|
|
INSERT INTO pdf_documents (filename, page_count, metadata)
|
|
VALUES (?, ?, ?)
|
|
''', (
|
|
pdf_file.filename,
|
|
result["page_count"],
|
|
json.dumps(result["metadata"])
|
|
))
|
|
db.commit()
|
|
|
|
doc_id = cursor.lastrowid
|
|
|
|
print(f"✅ PDF processed: {result['page_count']} pages, {len(result['markdown_blocks'])} blocks")
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'document_id': doc_id,
|
|
'filename': pdf_file.filename,
|
|
'page_count': result['page_count'],
|
|
'metadata': result['metadata'],
|
|
'blocks': result['markdown_blocks']
|
|
})
|
|
|
|
except Exception as e:
|
|
import traceback
|
|
traceback.print_exc()
|
|
return jsonify({'error': str(e)}), 500
|