Audiobook Maker Pro v4.2 — production ready

This commit is contained in:
Ashim Kumar
2026-05-22 18:28:47 +06:00
commit 0617a374dd
41 changed files with 15262 additions and 0 deletions

35
.dockerignore Normal file
View File

@@ -0,0 +1,35 @@
__pycache__/
*.pyc
*.pyo
*.pyd
# Local virtualenv
.venv/
venv/
env/
# Local DB (dev DB যেন container-এ না যায়)
*.db
*.db-journal
audiobook_maker.db
# Git / IDE
.git/
.gitignore
.vscode/
.idea/
*.swp
# OS junk
.DS_Store
Thumbs.db
# Env files (Coolify UI থেকে set হবে)
.env
.env.local
# Beam app আলাদাভাবে deploy হবে
beam_app/
# Docs
*.md

15
.env.example Normal file
View File

@@ -0,0 +1,15 @@
# .env.example - এই ভ্যালুগুলো Coolify-র Environment Variables UI-তে দিতে হবে
# --- দরকারি ---
DATABASE=/opt/apps/Audiobook-Maker-Pro-v4.2/audiobook_maker.db
# Long random string বানান এই কমান্ড দিয়ে:
# python -c "import secrets; print(secrets.token_hex(32))"
SECRET_KEY=replace-with-a-long-random-string
# --- Beam Cloud (TTS backend) ---
BEAM_COMBINED_URL=https://your-beam-endpoint.beam.cloud
BEAM_API_TOKEN=your-beam-token
# --- ঐচ্ছিক ---
OPENAI_API_KEY=

26
.gitignore vendored Normal file
View File

@@ -0,0 +1,26 @@
# Python
__pycache__/
*.pyc
*.pyo
# Virtualenvs
.venv/
venv/
env/
# Local SQLite databases
*.db
*.db-journal
# Environment files
.env
.env.local
# IDE
.vscode/
.idea/
*.swp
# OS
.DS_Store
Thumbs.db

48
Dockerfile Normal file
View File

@@ -0,0 +1,48 @@
# Dockerfile - Audiobook Maker Pro v4.2 (CPU-only)
FROM python:3.11-slim
# Python যেন logs সাথে সাথে দেখায়
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
# System packages:
# ffmpeg + libsndfile1 → pydub-এর জন্য দরকার (audio convert)
# curl → healthcheck-এর জন্য
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
libsndfile1 \
curl \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Python প্যাকেজ আগে install করি (Docker cache ভালো কাজ করবে)
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# অ্যাপ্লিকেশন কোড copy করি
COPY . .
# ডাটাবেস ফোল্ডার তৈরি করি (Coolify এখানে volume mount করবে)
RUN mkdir -p /opt/apps/Audiobook-Maker-Pro-v4.2
# Gunicorn যেই port-এ চলবে
EXPOSE 5010
# Healthcheck — Coolify জানবে container ready কিনা
HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
CMD curl -fsS http://localhost:5010/login || exit 1
# Production server: gunicorn
# - 2 workers → 8 GB VPS-এর জন্য ঠিক আছে। বেশি ট্রাফিক হলে 3-4 করতে পারেন।
# - 120s timeout → Beam API call অনেক সময় ধীর হয়, তাই বেশি সময় দিচ্ছি।
CMD ["gunicorn", \
"--bind", "0.0.0.0:5010", \
"--workers", "2", \
"--timeout", "120", \
"--access-logfile", "-", \
"--error-logfile", "-", \
"app:app"]

321
ai_processor.py Normal file
View File

@@ -0,0 +1,321 @@
# ai_processor.py - অ্যাডভান্সড রুল-বেসড ইঞ্জিন (v2.1)
# আপনার দেয়া প্রোডাকশন-গ্রেড অ্যানালাইসিস এবং অপটিমাইজেশনের ওপর ভিত্তি করে আপডেট করা হয়েছে
# ১০০% এআই-মুক্ত, অত্যন্ত দ্রুত এবং নির্ভুল
import re
# =====================================================================
# হেল্পার ফাংশন
# =====================================================================
def _clean_markdown(text):
"""মার্কডাউন সিম্বলগুলো রিমুভ করে পরিষ্কার টেক্সট দেয়।"""
if not text:
return ""
text = re.sub(r'[*_]+', '', text)
text = re.sub(r'^#+\s*|^>\s*|^\-\s*', '', text, flags=re.MULTILINE)
return text.strip()
def _get_body_font_size(blocks):
"""
ডকুমেন্টের মূল বডি টেক্সটের ফন্ট সাইজ বের করে (Frequency/Word Count ভিত্তিক)।
মিডিয়ানের বদলে সবচেয়ে বেশি ব্যবহৃত ফন্ট সাইজটিকে বডি হিসেবে ধরে।
"""
size_char_counts = {}
for block in blocks:
if block.get('type') not in ['image', 'table'] and block.get('font_size'):
size = round(block['font_size'], 0)
word_count = len(_clean_markdown(block.get('content', '')).split())
# বড় ব্লকগুলোকে বেশি ওয়েট (Weight) দেওয়া হচ্ছে
size_char_counts[size] = size_char_counts.get(size, 0) + word_count
if not size_char_counts:
return 12.0
# যে ফন্ট সাইজে সবচেয়ে বেশি শব্দ আছে সেটিই বডি ফন্ট
return max(size_char_counts.keys(), key=lambda s: size_char_counts[s])
# =====================================================================
# স্টেপ ১: অ্যাডভান্সড মার্জিং (While Loop & Soft Signals)
# =====================================================================
def _should_merge(current, nxt):
"""দুটি ব্লক মার্জ করা উচিত কিনা তা যাচাই করে।"""
# রুল: হেডিং ব্লকগুলোকে নন-হেডিং ব্লকের সাথে জোড়া লাগতে দেওয়া হবে না
c_type = current.get('type', 'paragraph')
n_type = nxt.get('type', 'paragraph')
heading_types = {'heading1', 'heading2', 'heading3'}
if (c_type in heading_types and n_type not in heading_types) or \
(n_type in heading_types and c_type not in heading_types):
return False
if c_type in ['image', 'table'] or n_type in ['image', 'table']:
return False
c_text = current.get('content', '').strip()
n_text = nxt.get('content', '').strip()
c_clean = _clean_markdown(c_text)
n_clean = _clean_markdown(n_text)
if not c_clean or not n_clean:
return False
# রুল: স্ট্যান্ডঅ্যালোন হেডারগুলো প্রটেক্ট করা
standalone = ["INTRODUCTION", "FOREWORD", "PREFACE", "CONCLUSION",
"CONTENTS", "TABLE OF CONTENTS", "GLOSSARY", "APPENDIX"]
if c_clean.upper() in standalone or n_clean.upper() in standalone:
return False
# টাইপোগ্রাফি চেক
c_size = current.get('font_size', 12)
n_size = nxt.get('font_size', 12)
if abs(c_size - n_size) > 1.5:
return False
same_formatting = current.get('is_bold') == nxt.get('is_bold')
if not same_formatting:
return False
word_count_current = len(c_clean.split())
word_count_next = len(n_clean.split())
# রিলাক্সড টাইপ চেক: ব্লক ছোট হলে টাইপ মিসম্যাচ ইগনোর করা হবে
same_type = c_type == n_type
if not same_type:
if word_count_current < 10 and word_count_next < 10:
same_type = True
if not same_type:
return False
# সফট সিগন্যালস (Soft Signals)
ends_with_punct = bool(re.search(r'[.!?;:]\s*["\u0027\u2018\u2019\u201C\u201D]?$', c_clean))
starts_with_lower = n_clean[0].islower()
# "it", "is", "was", "are", "were" বাদ দেওয়া হয়েছে কারণ এগুলো বাক্যের শেষে থাকতে পারে
prep_regex = r'\b(the|a|an|of|in|to|for|and|or|but|with|from|by|at|on)\s*$'
# যেকোনো একটি স্ট্রং সিগন্যাল পেলেই মার্জ করবে
strong_merge = (
(not ends_with_punct and starts_with_lower) or
(c_clean.isupper() and n_clean.isupper() and len(c_clean) < 80 and len(n_clean) < 80) or
(c_clean and c_clean[-1] in ',;-') or
bool(re.search(prep_regex, c_clean, re.IGNORECASE))
)
return strong_merge
def _advanced_merge(blocks):
"""While লুপ ব্যবহার করে একাধিক (৩ বা ততোধিক) ভাঙা ফ্র্যাগমেন্ট জোড়া লাগায়।"""
merged_blocks = []
i = 0
while i < len(blocks):
current = dict(blocks[i])
# যতক্ষণ পর্যন্ত পরের ব্লকটি মার্জ করার যোগ্য, লুপ চলতে থাকবে
while i < len(blocks) - 1:
nxt = blocks[i + 1]
if _should_merge(current, nxt):
c_text = current.get('content', '').strip()
n_text = nxt.get('content', '').strip()
c_clean = _clean_markdown(c_text)
n_clean = _clean_markdown(n_text)
prefix = ""
if c_text.startswith('### '): prefix = "### "
elif c_text.startswith('## '): prefix = "## "
elif c_text.startswith('# '): prefix = "# "
current['content'] = f"{prefix}{c_clean} {n_clean}".strip()
print(f" 🔗 ফ্র্যাগমেন্ট মার্জ করা হয়েছে: \"{c_clean[-20:]} {n_clean[:20]}\"")
i += 1
else:
break
merged_blocks.append(current)
i += 1
return merged_blocks
# =====================================================================
# স্টেপ ২: ক্লাস্টার ভিত্তিক TOC ডিটেকশন (অপটিমাইজড)
# =====================================================================
def _detect_toc_region(blocks):
"""পরপর অনেকগুলো ছোট চ্যাপ্টার-লাইক এন্ট্রি দেখে TOC ক্লাস্টার বের করে। (While loop দিয়ে জাম্প করে)"""
toc_indices = set()
i = 0
while i < len(blocks) - 2:
streak = 0
temp_indices = []
for j in range(i, min(i + 30, len(blocks))):
clean = _clean_markdown(blocks[j].get('content', '')).strip()
word_count = len(clean.split())
if word_count > 20:
break # বড় প্যারাগ্রাফ পেলে ক্লাস্টার ভেঙে যাবে
is_chapter_like = bool(re.match(
r'^(chapter|part|section|appendix|introduction|conclusion|glossary|index|preface|foreword|contents)',
clean, re.IGNORECASE
))
is_numbered = bool(re.match(r'^\d+[\.\)]\s', clean))
if is_chapter_like or is_numbered or bool(re.search(r'(\.{3,}|…)\s*\d+$', clean)):
streak += 1
temp_indices.append(j)
elif word_count < 5:
temp_indices.append(j) # পেজ নাম্বার বা ছোট গ্যাপ হতে পারে, স্কিপ করে স্ট্রিক বজায় রাখবে
continue
else:
break
# ৩ বা তার বেশি এন্ট্রি পেলে সেটি একটি TOC রিজিয়ন
if streak >= 3:
toc_indices.update(temp_indices)
i = temp_indices[-1] + 1 # বারবার একই ইনডেক্স চেক না করে জাম্প করবে (লুপ এফিশিয়েন্সি)
else:
i += 1
return toc_indices
# =====================================================================
# স্টেপ ৩: সেকশন স্কোরিং এবং ফিল্টারিং
# =====================================================================
def _apply_section_scoring(blocks):
"""টেক্সটের ঘনত্ব, ফন্ট সাইজ এবং ক্লাস্টার ব্যবহার করে সেকশন চিহ্নিত করে।"""
body_size = _get_body_font_size(blocks)
section_counter = 1
toc_indices = _detect_toc_region(blocks)
for i, block in enumerate(blocks):
if block.get('type') in ['image', 'table']:
block['is_section_start'] = False
continue
if i == 0:
block['is_section_start'] = True
text = block.get('content', '').strip()
clean_text = _clean_markdown(text)
title = clean_text[:40].strip() + ("..." if len(clean_text) > 40 else "")
if not title:
title = "Section 1"
block['section_name'] = title
continue
text = block.get('content', '').strip()
clean_text = _clean_markdown(text)
word_count = len(clean_text.split())
if word_count == 0:
block['is_section_start'] = False
continue
# --- উন্নত পেজ নাম্বার ফিল্টারিং ---
# "316", "- 316 -", "Page 316" ফরম্যাটগুলো ধরবে
is_page_number = bool(re.match(r'^[-—–\s]*\d{1,4}[-—–\s]*$', clean_text.strip())) or \
bool(re.match(r'^page\s+\d{1,4}$', clean_text.strip(), re.IGNORECASE))
if is_page_number:
block['is_section_start'] = False
block['is_page_number'] = True
continue
# TOC এর ভেতরের এলিমেন্টগুলো সেকশন হবে না
if i in toc_indices:
block['is_section_start'] = False
continue
score = 0
# ফ্যাক্টর A: হেডিং টাইপ (PDF-এর heading3 কেও বুস্ট দেওয়া হলো)
if block.get('type') in ['heading1', 'heading2']: score += 5
elif block.get('type') == 'heading3': score += 3
# ফ্যাক্টর B: ফন্ট সাইজ (বডি টেক্সটের সাথে তুলনা)
f_size = block.get('font_size', body_size)
if f_size >= body_size + 4: score += 6
elif f_size >= body_size + 2: score += 3
# ফ্যাক্টর C: ফন্ট ওয়েট
if block.get('is_bold'): score += 3
# ফ্যাক্টর D: কেস (Case)
if clean_text.isupper() and 3 < len(clean_text) < 80: score += 3
# ফ্যাক্টর E: কি-ওয়ার্ডস
lower_text = clean_text.lower()
if re.match(r'^(chapter|part|section|appendix|introduction|preface|prologue|epilogue|foreword|glossary|index)', lower_text):
score += 5
# TABLE OF CONTENTS স্পেশাল রুল লজিক ফিক্স
if "table of contents" in lower_text or "contents" == lower_text:
if not toc_indices and i < 50:
# যদি কোনো TOC ক্লাস্টার না পাওয়া যায়, কিন্তু প্রথম দিকে থাকে
score += 5
elif len(toc_indices) > 0 and i <= min(toc_indices):
# যদি ক্লাস্টার থাকে এবং এটি তার আগে থাকে
score += 5
# --- পেনাল্টি (নেগেটিভ স্কোরিং - ফ্লেক্সিবল রুল) ---
if word_count > 20:
score -= 10
elif word_count > 12:
# ফন্ট বড় না হলে এবং বোল্ড না হলে তবেই পেনাল্টি
if not block.get('is_bold') and f_size <= body_size + 2:
score -= 5
if re.search(r'[.!?]\s*["\u0027\u2018\u2019\u201C\u201D]?$', clean_text):
score -= 3
# চূড়ান্ত সিদ্ধান্ত
if score >= 6:
block['is_section_start'] = True
title = clean_text[:60].strip()
if title.isupper() and len(title) > 10:
title = title.title()
block['section_name'] = title
section_counter += 1
print(f" 📌 সেকশন চিহ্নিত করা হয়েছে: [{score} pts] {title}")
else:
block['is_section_start'] = False
block['section_name'] = ""
# পেজ নাম্বার ব্লকগুলো মূল ডেটা থেকে পুরোপুরি বাদ দিয়ে দেওয়া হচ্ছে (TTS যেন না পড়ে)
filtered_blocks = [b for b in blocks if not b.get('is_page_number')]
return filtered_blocks
# =====================================================================
# মেইন এক্সপোর্ট ফাংশন
# =====================================================================
def process_document_smartly(blocks, metadata):
"""
মেইন এন্ট্রি পয়েন্ট। রুল-বেসড ইঞ্জিনের মাধ্যমে পুরো ডকুমেন্ট প্রসেস করা হয়।
"""
print("\n" + "=" * 60, flush=True)
print("🚀 অ্যাডভান্সড রুল-বেসড ইঞ্জিন (v2.1) শুরু হচ্ছে...", flush=True)
print(f"📄 মোট {len(blocks)} টি ব্লক বিশ্লেষণ করা হচ্ছে।", flush=True)
if not blocks:
return blocks
merged_blocks = _advanced_merge(blocks)
print(f"✂️ মার্জ করার পর মোট ব্লক সংখ্যা: {len(merged_blocks)}", flush=True)
final_blocks = _apply_section_scoring(merged_blocks)
section_count = sum(1 for b in final_blocks if b.get('is_section_start'))
print(f"📑 ডকুমেন্টে মোট {section_count} টি সেকশন পাওয়া গেছে।", flush=True)
print("=" * 60 + "\n", flush=True)
return final_blocks

38
app.py Normal file
View File

@@ -0,0 +1,38 @@
# app.py - Main Flask Application Entry Point
import os
from flask import Flask
from config import SECRET_KEY, STATIC_FOLDER, STATIC_URL_PATH, BEAM_COMBINED_URL, BEAM_API_TOKEN, DATABASE
from db import init_app as init_db
from routes import register_blueprints
def create_app():
app = Flask(__name__, static_folder=STATIC_FOLDER, static_url_path=STATIC_URL_PATH)
app.secret_key = SECRET_KEY
app.config['SESSION_COOKIE_HTTPONLY'] = True
app.config['SESSION_COOKIE_SAMESITE'] = 'Lax'
app.config['PERMANENT_SESSION_LIFETIME'] = 86400
init_db(app)
register_blueprints(app)
return app
app = create_app()
if __name__ == '__main__':
print("=" * 60)
print("🎧 Audiobook Maker Pro v4.2 Starting (dev mode)...")
print("=" * 60)
print(f"📍 Beam Combined URL: {BEAM_COMBINED_URL or '❌ NOT SET!'}")
print(f"📍 Beam Token: {'✅ Configured' if BEAM_API_TOKEN else '❌ NOT CONFIGURED!'}")
print(f"📍 Database: {DATABASE}")
print(f"📍 Default Admin: admin / admin123")
print("=" * 60)
app.run(debug=True, host='0.0.0.0', port=5010)

74
auth.py Normal file
View File

@@ -0,0 +1,74 @@
# auth.py - Authentication and User Management
import functools
from flask import session, redirect, url_for, request, jsonify
from db import get_db_connection
def init_users_table():
"""Create users table and default admin user."""
with get_db_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
username TEXT NOT NULL UNIQUE,
password TEXT NOT NULL,
role TEXT NOT NULL DEFAULT 'user',
is_active INTEGER NOT NULL DEFAULT 1,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_login TIMESTAMP
)
''')
# Create default admin if no users exist
cursor.execute('SELECT COUNT(*) as count FROM users')
if cursor.fetchone()['count'] == 0:
cursor.execute('''
INSERT INTO users (username, password, role, is_active)
VALUES (?, ?, ?, ?)
''', ('admin', 'admin123', 'admin', 1))
print("✅ Default admin user created (username: admin, password: admin123)")
conn.commit()
def login_required(f):
"""Decorator to require login for routes."""
@functools.wraps(f)
def decorated_function(*args, **kwargs):
if 'user_id' not in session:
# Check if it's an API request
if request.path.startswith('/api/'):
return jsonify({'error': 'Authentication required'}), 401
return redirect(url_for('auth.login_page'))
return f(*args, **kwargs)
return decorated_function
def admin_required(f):
"""Decorator to require admin role for routes."""
@functools.wraps(f)
def decorated_function(*args, **kwargs):
if 'user_id' not in session:
if request.path.startswith('/api/'):
return jsonify({'error': 'Authentication required'}), 401
return redirect(url_for('auth.login_page'))
if session.get('user_role') != 'admin':
if request.path.startswith('/api/'):
return jsonify({'error': 'Admin access required'}), 403
return redirect(url_for('main.index'))
return f(*args, **kwargs)
return decorated_function
def get_current_user():
"""Get current logged-in user info from session."""
if 'user_id' not in session:
return None
return {
'id': session.get('user_id'),
'username': session.get('username'),
'role': session.get('user_role')
}

27
beam_app/.beamignore Normal file
View File

@@ -0,0 +1,27 @@
# Generated by Beam SDK
.beamignore
.git
.idea
.python-version
.vscode
.venv
venv
__pycache__
.DS_Store
.config
drive/MyDrive
.coverage
.pytest_cache
.ipynb
.ruff_cache
.dockerignore
.ipynb_checkpoints
.env.local
.envrc
**/__pycache__/
**/.pytest_cache/
**/node_modules/
**/.venv/
*.pyc
.next/
.circleci

331
beam_app/app.py Normal file
View File

@@ -0,0 +1,331 @@
# beam_app/app.py - Combined TTS + Timestamp Endpoint (Fixed)
import os
import re
import base64
from io import BytesIO
from dataclasses import dataclass
from beam import endpoint, Image
# ====================================================
# Container Image
# ====================================================
gpu_image = (
Image(python_version="python3.11")
.add_python_packages([
"torch>=2.4.0",
"torchaudio>=2.4.0",
"transformers",
"scipy",
"soundfile>=0.12.0",
"kokoro>=0.1.0",
"huggingface_hub",
])
.add_commands([
"apt-get update && apt-get install -y libsndfile1 ffmpeg",
])
)
# ====================================================
# Voice IDs
# ====================================================
VOICE_IDS = [
"af_alloy", "af_aoede", "af_bella", "af_heart", "af_jessica",
"af_kore", "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky",
"am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam",
"am_michael", "am_onyx", "am_puck", "am_santa",
"bf_alice", "bf_emma", "bf_isabella", "bf_lily",
"bm_daniel", "bm_fable", "bm_george", "bm_lewis",
"ef_dora", "em_alex", "em_santa",
"ff_siwis",
"hf_alpha", "hf_beta", "hm_omega", "hm_psi",
"if_sara", "im_nicola",
"jf_alpha", "jf_gongitsune", "jf_nezumi", "jf_tebukuro", "jm_kumo",
"pf_dora", "pm_alex", "pm_santa",
"zf_xiaobei", "zf_xiaoni", "zf_xiaoxiao", "zf_xiaoyi",
"zm_yunjian", "zm_yunxi", "zm_yunxia", "zm_yunyang",
]
@dataclass
class TokenSpan:
start: int
end: int
token: int
# ====================================================
# on_start — Model Loading
# ====================================================
def load_all_models():
"""Container start হলে একবার চলবে।"""
import torch
import torchaudio
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch_device = torch.device(device)
print(f"{'='*50}")
print(f"🚀 CONTAINER STARTING — Device: {device}")
print(f"{'='*50}")
# --- 1. Kokoro TTS ---
print(f"📂 [1/2] Loading Kokoro TTS...")
try:
from kokoro import KPipeline
pipeline = KPipeline(lang_code='a', device=device)
print(f"✅ [1/2] Kokoro TTS loaded")
except Exception as e:
print(f"❌ [1/2] Kokoro TTS FAILED: {e}")
import traceback
traceback.print_exc()
# Return partial — TTS ছাড়া কাজ হবে না
raise e
# --- 2. wav2vec2 Aligner ---
print(f"📂 [2/2] Loading wav2vec2 Aligner...")
try:
bundle = torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H
aligner_model = bundle.get_model().to(torch_device)
labels = bundle.get_labels()
dictionary = {c: i for i, c in enumerate(labels)}
print(f"✅ [2/2] wav2vec2 Aligner loaded")
except Exception as e:
print(f"⚠️ [2/2] wav2vec2 FAILED: {e} — will skip alignment")
import traceback
traceback.print_exc()
bundle = None
aligner_model = None
labels = None
dictionary = None
print(f"{'='*50}")
print(f"🎉 CONTAINER READY")
print(f"{'='*50}")
return {
"pipeline": pipeline,
"device": device,
"torch_device": torch_device,
"aligner_model": aligner_model,
"bundle": bundle,
"labels": labels,
"dictionary": dictionary,
}
# ====================================================
# Combined Endpoint
# ====================================================
@endpoint(
name="tts-combined",
image=gpu_image,
on_start=load_all_models,
gpu="RTX4090",
cpu=2,
memory="16Gi",
keep_warm_seconds=180,
)
def generate_audio_with_timestamps(context, **inputs):
"""TTS + Forced Alignment একই GPU তে।"""
import torch
import torchaudio
import soundfile as sf
print(f"")
print(f"📥 REQUEST RECEIVED")
print(f" Keys: {list(inputs.keys())}")
print(f" text length: {len(inputs.get('text', ''))}")
# ---- Get models from on_start ----
ctx = context.on_start_value
if ctx is None:
print(f"❌ on_start_value is None!")
return {"error": "Models not loaded", "success": False}
pipeline = ctx["pipeline"]
aligner_model = ctx.get("aligner_model")
bundle = ctx.get("bundle")
dictionary = ctx.get("dictionary")
torch_device = ctx["torch_device"]
# ---- Parse inputs ----
text = inputs.get("text", "")
voice = inputs.get("voice", "af_heart")
speed = inputs.get("speed", 1.0)
skip_alignment = inputs.get("skip_alignment", False)
if not text or len(str(text).strip()) < 2:
return {"error": "Text is required (min 2 chars)", "success": False}
if voice not in VOICE_IDS:
return {"error": f"Invalid voice '{voice}'", "success": False}
try:
speed = max(0.5, min(2.0, float(speed)))
except (TypeError, ValueError):
speed = 1.0
# =================================================
# STEP 1: TTS Generation
# =================================================
try:
print(f"🔊 TTS: voice={voice}, speed={speed}, text={len(text)} chars")
print(f" Preview: {text[:80]}...")
generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
all_audio = []
for gs, ps, audio in generator:
all_audio.append(audio)
if not all_audio:
print(f"❌ No audio chunks generated")
return {"error": "No audio generated", "success": False}
full_audio = torch.cat(all_audio, dim=0)
sample_rate = 24000
# Encode to base64
audio_buffer = BytesIO()
sf.write(audio_buffer, full_audio.cpu().numpy(), sample_rate, format='WAV')
audio_buffer.seek(0)
audio_bytes = audio_buffer.read()
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
print(f"✅ TTS done: {len(audio_base64)} bytes base64, {len(audio_bytes)} bytes raw")
except Exception as e:
print(f"❌ TTS FAILED: {e}")
import traceback
traceback.print_exc()
return {"error": f"TTS failed: {str(e)}", "success": False}
# =================================================
# STEP 2: Forced Alignment
# =================================================
timestamps = []
if not skip_alignment and aligner_model is not None and bundle is not None and dictionary is not None:
try:
print(f"⏳ Aligning {len(text)} chars...")
# Audio tensor — GPU memory তে আছে, re-decode দরকার নেই
waveform = full_audio.unsqueeze(0).float().to(torch_device)
# Resample 24000 → 16000
if sample_rate != bundle.sample_rate:
resampler = torchaudio.transforms.Resample(
orig_freq=sample_rate, new_freq=bundle.sample_rate
).to(torch_device)
waveform = resampler(waveform)
# Text → tokens
original_words = text.split()
clean_words = []
valid_indices = []
for idx, word in enumerate(original_words):
clean = re.sub(r'[^a-zA-Z0-9]', '', word).upper()
if clean:
clean_words.append(clean)
valid_indices.append(idx)
transcript = " ".join(clean_words)
if transcript:
token_indices = []
for char in transcript:
if char == ' ':
token_indices.append(dictionary.get('|', 0))
else:
token_indices.append(dictionary.get(char, dictionary.get('|', 0)))
targets = torch.tensor(
token_indices, dtype=torch.int32, device=torch_device
).unsqueeze(0)
with torch.inference_mode():
emissions, _ = aligner_model(waveform)
emissions = torch.log_softmax(emissions, dim=-1)
input_lengths = torch.tensor([emissions.size(1)], device=torch_device)
target_lengths = torch.tensor([targets.size(1)], device=torch_device)
path, _ = torchaudio.functional.forced_align(
emissions, targets, input_lengths, target_lengths, blank=0
)
path = path[0].tolist()
# Parse segments
segments = []
if path:
current_label = path[0]
start_frame = 0
for t, label in enumerate(path):
if label != current_label:
if current_label != 0:
segments.append(
TokenSpan(start=start_frame, end=t, token=current_label)
)
current_label = label
start_frame = t
if current_label != 0:
segments.append(
TokenSpan(start=start_frame, end=len(path), token=current_label)
)
if path and len(path) > 0:
ratio = waveform.size(1) / len(path) / bundle.sample_rate
def get_sec(frame):
return round(frame * ratio, 2)
segment_idx = 0
for i, word_str in enumerate(transcript.split()):
word_len = len(word_str)
if segment_idx + word_len > len(segments):
break
t_start = segments[segment_idx].start
t_end = segments[segment_idx + word_len - 1].end
timestamps.append({
"word": original_words[valid_indices[i]],
"start": get_sec(t_start),
"end": get_sec(t_end),
})
segment_idx += word_len
if (segment_idx < len(segments)
and segments[segment_idx].token == dictionary.get('|', 0)):
segment_idx += 1
print(f"✅ Aligned {len(timestamps)} words")
except Exception as e:
print(f"⚠️ Alignment failed (audio still valid): {e}")
import traceback
traceback.print_exc()
timestamps = []
else:
if skip_alignment:
print(f"⏭️ Alignment skipped (skip_alignment=True)")
else:
print(f"⚠️ Alignment skipped (model not loaded)")
# =================================================
# Return Result
# =================================================
result = {
"success": True,
"audio_base64": audio_base64,
"audio_format": "wav",
"sample_rate": sample_rate,
"voice": voice,
"speed": speed,
"text_length": len(text),
"timestamps": timestamps,
"word_count": len(timestamps),
}
print(f"📤 RESPONSE: success=True, audio={len(audio_base64)} bytes, words={len(timestamps)}")
print(f"")
return result

View File

@@ -0,0 +1,7 @@
flask==3.1.2
torch==2.1.2
torchaudio==2.1.2
scipy==1.11.3
soundfile>=0.12.0
python-dotenv==1.0.0
kokoro>=0.1.0

59
config.py Normal file
View File

@@ -0,0 +1,59 @@
# config.py - Application Configuration
import os
import uuid
from dotenv import load_dotenv
load_dotenv()
# --- PATHS ---
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
STATIC_FOLDER = 'static'
STATIC_URL_PATH = '/static'
# --- DATABASE ---
# Production-এ env থেকে আসবে: /opt/apps/Audiobook-Maker-Pro-v4.2/audiobook_maker.db
# Dev-এ env না থাকলে app.py-র পাশে রাখবে
DATABASE = os.getenv('DATABASE', os.path.join(BASE_DIR, 'audiobook_maker.db'))
# --- FLASK SECRET KEY ---
SECRET_KEY = os.getenv('SECRET_KEY', 'audiobook-maker-pro-' + str(uuid.uuid4()))
# --- BEAM CLOUD API CONFIGURATION ---
BEAM_COMBINED_URL = os.getenv('BEAM_COMBINED_URL', '')
BEAM_API_TOKEN = os.getenv('BEAM_API_TOKEN', '')
# --- OPENAI API (optional) ---
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '')
# --- API HEADERS ---
def get_beam_headers():
return {'Authorization': f'Bearer {BEAM_API_TOKEN}'}
def get_beam_headers_json():
return {
'Authorization': f'Bearer {BEAM_API_TOKEN}',
'Content-Type': 'application/json'
}
# --- VOICE OPTIONS ---
VOICES = [
{'id': 'af_alloy', 'name': 'Alloy (US Female)'},
{'id': 'af_aoede', 'name': 'Aoede (US Female)'},
{'id': 'af_bella', 'name': 'Bella (US Female)'},
{'id': 'af_heart', 'name': 'Heart (US Female)'},
{'id': 'af_jessica', 'name': 'Jessica (US Female)'},
{'id': 'af_nicole', 'name': 'Nicole (US Female)'},
{'id': 'af_nova', 'name': 'Nova (US Female)'},
{'id': 'af_river', 'name': 'River (US Female)'},
{'id': 'af_sarah', 'name': 'Sarah (US Female)'},
{'id': 'af_sky', 'name': 'Sky (US Female)'},
{'id': 'am_adam', 'name': 'Adam (US Male)'},
{'id': 'am_echo', 'name': 'Echo (US Male)'},
{'id': 'am_eric', 'name': 'Eric (US Male)'},
{'id': 'am_michael', 'name': 'Michael (US Male)'},
{'id': 'bf_emma', 'name': 'Emma (UK Female)'},
{'id': 'bf_isabella', 'name': 'Isabella (UK Female)'},
{'id': 'bm_daniel', 'name': 'Daniel (UK Male)'},
{'id': 'bm_george', 'name': 'George (UK Male)'},
]

151
db.py Normal file
View File

@@ -0,0 +1,151 @@
# db.py - Database Configuration and Operations (v4.2)
import os
import sqlite3
from flask import g
from contextlib import contextmanager
from config import DATABASE
def get_db():
if 'db' not in g:
g.db = sqlite3.connect(DATABASE)
g.db.row_factory = sqlite3.Row
return g.db
def close_db(error=None):
db = g.pop('db', None)
if db is not None:
db.close()
@contextmanager
def get_db_connection():
conn = sqlite3.connect(DATABASE)
conn.row_factory = sqlite3.Row
try:
yield conn
finally:
conn.close()
def init_db():
"""Initialize database tables. Auto-creates parent directory."""
# ফোল্ডার না থাকলে নিজে থেকেই তৈরি করবে (Coolify volume mount-এর জন্য জরুরি)
db_dir = os.path.dirname(os.path.abspath(DATABASE))
if db_dir and not os.path.exists(db_dir):
os.makedirs(db_dir, exist_ok=True)
print(f"📂 Created data directory: {db_dir}")
with get_db_connection() as conn:
cursor = conn.cursor()
# Projects table
cursor.execute('''
CREATE TABLE IF NOT EXISTS projects (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# Chapters table
cursor.execute('''
CREATE TABLE IF NOT EXISTS chapters (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project_id INTEGER NOT NULL,
chapter_number INTEGER NOT NULL,
title TEXT DEFAULT 'Section',
voice TEXT DEFAULT 'af_heart',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
UNIQUE(project_id, chapter_number)
)
''')
try:
cursor.execute('ALTER TABLE chapters ADD COLUMN title TEXT DEFAULT "Section"')
except sqlite3.OperationalError:
pass
# Markdown blocks table
cursor.execute('''
CREATE TABLE IF NOT EXISTS markdown_blocks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
chapter_id INTEGER NOT NULL,
block_order INTEGER NOT NULL,
block_type TEXT NOT NULL DEFAULT 'paragraph',
content TEXT NOT NULL,
tts_text TEXT,
audio_data TEXT,
audio_format TEXT DEFAULT 'mp3',
transcription TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (chapter_id) REFERENCES chapters(id) ON DELETE CASCADE
)
''')
# Images table
cursor.execute('''
CREATE TABLE IF NOT EXISTS block_images (
id INTEGER PRIMARY KEY AUTOINCREMENT,
block_id INTEGER NOT NULL,
image_data TEXT NOT NULL,
image_format TEXT DEFAULT 'png',
alt_text TEXT,
position TEXT DEFAULT 'before',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (block_id) REFERENCES markdown_blocks(id) ON DELETE CASCADE
)
''')
# PDF Documents table
cursor.execute('''
CREATE TABLE IF NOT EXISTS pdf_documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
project_id INTEGER,
filename TEXT NOT NULL,
page_count INTEGER DEFAULT 0,
metadata TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE SET NULL
)
''')
# v4.2 publishing migrations
migrations = [
('projects', 'is_published', 'INTEGER DEFAULT 0'),
('projects', 'published_at', 'TIMESTAMP'),
('projects', 'thumbnail_data', 'TEXT'),
('projects', 'thumbnail_format', 'TEXT DEFAULT "png"'),
('projects', 'description', 'TEXT DEFAULT ""'),
('projects', 'author', 'TEXT DEFAULT ""'),
('projects', 'category', 'TEXT DEFAULT ""'),
('projects', 'view_count', 'INTEGER DEFAULT 0'),
]
for table, column, definition in migrations:
try:
cursor.execute(f'ALTER TABLE {table} ADD COLUMN {column} {definition}')
print(f" ✅ Added {table}.{column}")
except sqlite3.OperationalError:
pass
conn.commit()
print(f"✅ Database initialized at {DATABASE} (v4.2)")
def vacuum_db():
with get_db_connection() as conn:
conn.execute('VACUUM')
def init_app(app):
app.teardown_appcontext(close_db)
init_db()
from auth import init_users_table
init_users_table()

256
doc.py Normal file
View File

@@ -0,0 +1,256 @@
import os
from pathlib import Path
import mimetypes
import markdown
# INPUT: Set your Application folder path here
APPLICATION_FOLDER = f'../Audiobook Maker v4.2' # Replace with your actual folder path
# Add these new variables
EXCLUDE_FOLDERS = {
'node_modules',
# 'node_modules',
# 'venv',
# 'env',
# '__pycache__',
# 'dist',
# 'build',
# '.pytest_cache'
}
EXCLUDE_FILES = {
'doc.py'
# '.DS_Store',
# 'Thumbs.db',
# 'package-lock.json'
}
# File extensions to exclude
EXCLUDE_EXTENSIONS = {
# '.pyc',
# '.pyo',
# '.log',
# '.tmp'
}
def is_text_file(file_path):
"""Check if a file is likely a text/code file based on extension and mime type."""
# Common code file extensions
code_extensions = {
'.py', '.js', '.html', '.css', '.java', '.cpp', '.c', '.h', '.hpp',
'.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.ts', '.jsx',
'.tsx', '.vue', '.scss', '.sass', '.less', '.sql', '.json', '.xml',
'.yaml', '.yml', '.toml', '.ini', '.cfg', '.conf', '.sh', '.bat',
'.ps1', '.r', '.R', '.m', '.scala', '.clj', '.hs', '.elm', '.dart',
'.lua', '.pl', '.pm', '.tcl', '.awk', '.sed', '.dockerfile', '.md',
'.txt', '.log', '.gitignore', '.env', '.properties'
}
file_ext = Path(file_path).suffix.lower()
# Check by extension first
if file_ext in code_extensions:
return True
# Check by mime type for files without extension
if not file_ext:
mime_type, _ = mimetypes.guess_type(file_path)
if mime_type and mime_type.startswith('text/'):
return True
return False
def generate_tree_structure(root_path, prefix="", is_last=True, max_depth=None, current_depth=0):
"""Generate a tree-like directory structure."""
if max_depth is not None and current_depth > max_depth:
return ""
root = Path(root_path)
tree_str = ""
if current_depth == 0:
tree_str += f"{root.name}/\n"
try:
# Get all items and sort them (directories first, then files)
items = list(root.iterdir())
dirs = [item for item in items if item.is_dir() and not item.name.startswith('.') and item.name not in EXCLUDE_FOLDERS]
files = [item for item in items if item.is_file() and not item.name.startswith('.') and item.name not in EXCLUDE_FILES and item.suffix not in EXCLUDE_EXTENSIONS]
all_items = sorted(dirs) + sorted(files)
for i, item in enumerate(all_items):
is_last_item = i == len(all_items) - 1
if item.is_dir():
tree_str += f"{prefix}{'└── ' if is_last_item else '├── '}{item.name}/\n"
extension = " " if is_last_item else ""
tree_str += generate_tree_structure(
item,
prefix + extension,
is_last_item,
max_depth,
current_depth + 1
)
else:
tree_str += f"{prefix}{'└── ' if is_last_item else '├── '}{item.name}\n"
except PermissionError:
tree_str += f"{prefix}[Permission Denied]\n"
return tree_str
def generate_bash_command(root_folder):
"""Generate a bash command to recreate the directory and file structure."""
root_path = Path(root_folder)
dirs_to_create = []
files_to_create = []
for root, dirs, files in os.walk(root_folder, topdown=True):
# Skip hidden directories
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in EXCLUDE_FOLDERS]
for name in dirs:
dir_path = Path(root) / name
relative_dir = dir_path.relative_to(root_path)
dirs_to_create.append(f'"{relative_dir}"')
# Skip hidden files
files[:] = [f for f in files if not f.startswith('.') and f not in EXCLUDE_FILES and Path(f).suffix not in EXCLUDE_EXTENSIONS]
for name in files:
file_path = Path(root) / name
relative_file = file_path.relative_to(root_path)
files_to_create.append(f'"{relative_file}"')
command_parts = []
if dirs_to_create:
command_parts.append(f"mkdir -p {' '.join(dirs_to_create)}")
if files_to_create:
command_parts.append(f"touch {' '.join(files_to_create)}")
if not command_parts:
return "# No directories or files to create."
return " && ".join(command_parts)
def read_file_content(file_path):
"""Safely read file content with encoding detection."""
encodings = ['utf-8', 'utf-16', 'latin-1', 'cp1252']
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as file:
return file.read()
except (UnicodeDecodeError, UnicodeError):
continue
except Exception as e:
return f"Error reading file: {str(e)}"
return "Unable to decode file content"
def get_language_from_extension(file_path):
"""Get the appropriate language identifier for markdown code blocks."""
ext = Path(file_path).suffix.lower()
language_map = {
'.py': 'python', '.js': 'javascript', '.ts': 'typescript', '.jsx': 'jsx',
'.tsx': 'tsx', '.html': 'html', '.css': 'css', '.scss': 'scss',
'.sass': 'sass', '.java': 'java', '.cpp': 'cpp', '.c': 'c', '.h': 'c',
'.hpp': 'cpp', '.cs': 'csharp', '.php': 'php', '.rb': 'ruby', '.go': 'go',
'.rs': 'rust', '.swift': 'swift', '.kt': 'kotlin', '.sql': 'sql',
'.json': 'json', '.xml': 'xml', '.yaml': 'yaml', '.yml': 'yaml',
'.toml': 'toml', '.sh': 'bash', '.bat': 'batch', '.ps1': 'powershell',
'.dockerfile': 'dockerfile', '.md': 'markdown', '.r': 'r', '.R': 'r',
'.scala': 'scala', '.clj': 'clojure', '.hs': 'haskell', '.lua': 'lua',
'.pl': 'perl', '.tcl': 'tcl',
}
return language_map.get(ext, 'text')
def generate_documentation(root_folder):
"""Generate complete markdown and HTML documentation for the project."""
root_path = Path(root_folder)
if not root_path.exists() or not root_path.is_dir():
print(f"Error: The folder '{root_folder}' does not exist or is not a directory.")
return
# Start building markdown content
markdown_content = [f"# {root_path.name} - Project Documentation\n"]
# Add project structure
markdown_content.append("## 📂 Project Structure\n")
markdown_content.append("```")
markdown_content.append(generate_tree_structure(root_path))
markdown_content.append("```\n")
# # Add bash command to recreate structure
# markdown_content.append("## ⚙️ Bash Command to Recreate Structure\n")
# markdown_content.append("```bash")
# markdown_content.append(generate_bash_command(root_path))
# markdown_content.append("```\n")
# Add files content
markdown_content.append("## 📄 Files Content\n")
for root, dirs, files in os.walk(root_folder):
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in EXCLUDE_FOLDERS]
for file in sorted(files):
if file.startswith('.') or file in EXCLUDE_FILES or Path(file).suffix in EXCLUDE_EXTENSIONS : continue
file_path = Path(root) / file
if is_text_file(file_path):
relative_path = file_path.relative_to(root_path)
markdown_content.append(f"### 📜 `{relative_path}`\n")
content = read_file_content(file_path)
language = get_language_from_extension(file_path)
markdown_content.append(f"```{language}\n{content}\n```\n")
final_markdown = '\n'.join(markdown_content)
# Write to markdown file
output_md_file = f"../{root_path.name}_documentation.md"
try:
with open(output_md_file, 'w', encoding='utf-8') as f:
f.write(final_markdown)
print(f"✅ Markdown documentation generated: {Path(output_md_file).resolve()}")
except Exception as e:
print(f"❌ Error writing markdown file: {str(e)}")
# Generate and write HTML file
html_template = """
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>{title}</title>
<style>
body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif; line-height: 1.6; padding: 2em; max-width: 1024px; margin: 0 auto; color: #333; }}
h1, h2, h3 {{ border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; }}
code {{ font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; background-color: #f6f8fa; padding: 0.2em 0.4em; margin: 0; font-size: 85%; border-radius: 6px; }}
pre {{ background-color: #f6f8fa; padding: 16px; overflow: auto; border-radius: 6px; }}
pre code {{ padding: 0; margin: 0; font-size: 100%; background-color: transparent; border: none; }}
</style></head><body>{content}</body></html>
"""
html_content = markdown.markdown(final_markdown, extensions=['fenced_code', 'tables'])
final_html = html_template.format(title=f"{root_path.name} Documentation", content=html_content)
output_html_file = f"../{root_path.name}_documentation.html"
try:
with open(output_html_file, 'w', encoding='utf-8') as f:
f.write(final_html)
print(f"✅ HTML documentation generated: {Path(output_html_file).resolve()}")
except Exception as e:
print(f"❌ Error writing HTML file: {str(e)}")
# Main execution
if __name__ == "__main__":
if not APPLICATION_FOLDER or APPLICATION_FOLDER == "/path/to/your/Application":
print("⚠️ Please set the APPLICATION_FOLDER variable to your actual folder path.")
else:
print(f"🚀 Generating documentation for: {APPLICATION_FOLDER}")
generate_documentation(APPLICATION_FOLDER)

993
docx_processor.py Normal file
View File

@@ -0,0 +1,993 @@
# docx_processor.py - DOCX/DOC Processing and Content Extraction
# UPDATED: _make_block() now includes is_bold and font_size metadata for AI analysis
import io
import re
import base64
import email
import email.policy
import quopri
from html.parser import HTMLParser
from urllib.parse import unquote, urlparse
# ================================================================
# FORMAT DETECTION
# ================================================================
def detect_doc_format(file_bytes):
if not file_bytes or len(file_bytes) < 4:
return 'unknown'
if file_bytes[:4] == b'PK\x03\x04':
return 'docx'
if file_bytes[:8] == b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1':
return 'ole2'
if file_bytes[:4] == b'\xd0\xcf\x11\xe0':
return 'ole2'
header = file_bytes[:64]
for bom in [b'\xef\xbb\xbf', b'\xff\xfe', b'\xfe\xff']:
if header.startswith(bom):
header = header[len(bom):]
break
if header.lstrip().startswith(b'{\\rtf'):
return 'rtf'
header_str = header.decode('ascii', errors='ignore').strip()
if header_str.upper().startswith('MIME-VERSION'):
return 'mhtml'
sample_512 = file_bytes[:512].decode('ascii', errors='ignore').lower()
if 'mime-version' in sample_512 and 'content-type' in sample_512 and 'boundary' in sample_512:
return 'mhtml'
sample = file_bytes[:4096]
sample_str_lower = ''
for enc in ['utf-8', 'utf-16-le', 'utf-16-be', 'cp1252', 'latin-1']:
try:
sample_str_lower = sample.decode(enc, errors='ignore').lower().strip()
if sample_str_lower:
break
except Exception:
continue
if sample_str_lower:
html_markers = [
'<html', '<!doctype html', '<head', '<meta ',
'xmlns:w="urn:schemas-microsoft-com',
'xmlns:o="urn:schemas-microsoft-com',
'<o:documentproperties>', 'mso-',
]
for marker in html_markers:
if marker in sample_str_lower:
return 'html'
return 'unknown'
# ================================================================
# IMAGE NORMALIZATION HELPERS
# ================================================================
def _normalize_image_key(raw_location):
if not raw_location:
return ''
loc = raw_location.strip()
if loc.lower().startswith('cid:'):
loc = loc[4:]
for _ in range(3):
decoded = unquote(loc)
if decoded == loc:
break
loc = decoded
try:
parsed = urlparse(loc)
path = parsed.path if parsed.path else loc
except Exception:
path = loc
filename = path.replace('\\', '/').rsplit('/', 1)[-1].strip()
return filename.lower()
# ================================================================
# MHTML PROCESSOR
# ================================================================
class MHTMLProcessor:
def __init__(self, file_bytes):
self._file_bytes = file_bytes
self._embedded_images = {}
self._ordered_images = []
def process(self):
html_content = self._extract_html_from_mhtml()
if not html_content:
html_content = self._fallback_extract()
if not html_content:
return {
'metadata': {'title': '', 'author': '', 'subject': ''},
'blocks': [{'type': 'paragraph', 'content': '⚠️ Could not extract content from MHTML file.'}],
}
print(f" 📷 MHTML: {len(self._ordered_images)} image parts")
processor = HTMLDocProcessor(
html_content,
embedded_images=self._embedded_images,
ordered_images=self._ordered_images
)
result = processor.process()
img_blocks = sum(1 for b in result.get('blocks', []) if b.get('type') == 'image')
print(f"📄 MHTML processed: {len(result.get('blocks', []))} blocks ({img_blocks} images)")
return result
def _store_image(self, payload_bytes, content_type, content_location, content_id):
fmt = content_type.split('/')[-1].lower()
if fmt in ('x-wmf', 'x-emf', 'wmf', 'emf'):
return
if fmt == 'jpg':
fmt = 'jpeg'
b64_data = base64.b64encode(payload_bytes).decode('ascii')
self._ordered_images.append((b64_data, fmt, content_location or content_id or ''))
if content_location:
self._embedded_images[content_location] = (b64_data, fmt)
norm = _normalize_image_key(content_location)
if norm:
self._embedded_images[norm] = (b64_data, fmt)
if content_id:
self._embedded_images[f'cid:{content_id}'] = (b64_data, fmt)
self._embedded_images[content_id] = (b64_data, fmt)
def _extract_html_from_mhtml(self):
try:
msg = email.message_from_bytes(self._file_bytes, policy=email.policy.default)
html_body = None
if msg.is_multipart():
for part in msg.walk():
ct = part.get_content_type()
cl = part.get('Content-Location', '').strip()
cid = part.get('Content-ID', '').strip('<> ')
if ct == 'text/html':
payload = part.get_payload(decode=True)
if payload:
cs = part.get_content_charset() or 'utf-8'
try: html_body = payload.decode(cs, errors='ignore')
except: html_body = payload.decode('utf-8', errors='ignore')
elif ct and ct.startswith('image/'):
payload = part.get_payload(decode=True)
if payload and len(payload) > 100:
self._store_image(payload, ct, cl, cid)
else:
ct = msg.get_content_type()
if ct in ('text/html', 'multipart/related'):
payload = msg.get_payload(decode=True)
if payload:
cs = msg.get_content_charset() or 'utf-8'
try: html_body = payload.decode(cs, errors='ignore')
except: html_body = payload.decode('utf-8', errors='ignore')
return html_body
except Exception as e:
print(f" ⚠️ MIME parsing failed: {e}")
return None
def _fallback_extract(self):
try:
text = self._file_bytes.decode('ascii', errors='ignore')
bm = re.search(r'boundary="?([^\s";\r\n]+)"?', text, re.IGNORECASE)
if not bm: return None
boundary = bm.group(1)
parts = text.split(f'--{boundary}')
html_body = None
for part in parts:
he = part.find('\r\n\r\n')
if he == -1: he = part.find('\n\n')
if he == -1: continue
hs = part[:he]; body = part[he:].strip()
ctm = re.search(r'Content-Type:\s*([^\s;]+)', hs, re.IGNORECASE)
ct = ctm.group(1).lower() if ctm else ''
is_qp = bool(re.search(r'Content-Transfer-Encoding:\s*quoted-printable', hs, re.IGNORECASE))
is_b64 = bool(re.search(r'Content-Transfer-Encoding:\s*base64', hs, re.IGNORECASE))
clm = re.search(r'Content-Location:\s*(.+?)[\r\n]', hs, re.IGNORECASE)
cl = clm.group(1).strip() if clm else ''
cidm = re.search(r'Content-ID:\s*<?([^>\s\r\n]+)>?', hs, re.IGNORECASE)
cid = cidm.group(1).strip() if cidm else ''
if ct == 'text/html':
if is_qp: body = quopri.decodestring(body.encode('ascii', errors='ignore')).decode('utf-8', errors='ignore')
elif is_b64:
try: body = base64.b64decode(body).decode('utf-8', errors='ignore')
except: pass
if '<html' in body.lower() or '<body' in body.lower(): html_body = body
elif ct.startswith('image/') and is_b64 and body:
clean_b64 = re.sub(r'\s+', '', body)
try:
pb = base64.b64decode(clean_b64)
if len(pb) > 100: self._store_image(pb, ct, cl, cid)
except: pass
return html_body
except Exception as e:
print(f" ⚠️ Fallback MHTML failed: {e}")
return None
# ================================================================
# HTML COMMENT CLEANUP
# ================================================================
def _clean_html_comments(html_text):
html_text = re.sub(r'<!--\[if\s+!vml\]-->(.*?)<!--\[endif\]-->', r'\1', html_text, flags=re.DOTALL|re.IGNORECASE)
html_text = re.sub(r'<!--\[if\s+!mso\]-->(.*?)<!--\[endif\]-->', r'\1', html_text, flags=re.DOTALL|re.IGNORECASE)
html_text = re.sub(r'<!--\[if\s[^\]]*\]>.*?<!\[endif\]-->', '', html_text, flags=re.DOTALL|re.IGNORECASE)
html_text = re.sub(r'<!--.*?-->', '', html_text, flags=re.DOTALL)
return html_text
# ================================================================
# HTML DOC PROCESSOR
# ================================================================
class HTMLDocProcessor:
def __init__(self, file_bytes, embedded_images=None, ordered_images=None):
if isinstance(file_bytes, str):
self._html_text = file_bytes
self._file_bytes = file_bytes.encode('utf-8', errors='ignore')
else:
self._file_bytes = file_bytes
self._html_text = self._decode_html()
self._embedded_images = embedded_images or {}
self._ordered_images = ordered_images or []
self._used_image_indices = set()
def _decode_html(self):
if self._file_bytes[:3] == b'\xef\xbb\xbf': return self._file_bytes[3:].decode('utf-8', errors='ignore')
if self._file_bytes[:2] == b'\xff\xfe': return self._file_bytes[2:].decode('utf-16-le', errors='ignore')
if self._file_bytes[:2] == b'\xfe\xff': return self._file_bytes[2:].decode('utf-16-be', errors='ignore')
sample = self._file_bytes[:4096]
try:
st = sample.decode('ascii', errors='ignore')
cm = re.search(r'charset[="\s]+([a-zA-Z0-9\-]+)', st, re.IGNORECASE)
if cm:
try: return self._file_bytes.decode(cm.group(1).strip().strip('"\''), errors='ignore')
except: pass
except: pass
for enc in ['utf-8', 'cp1252', 'latin-1']:
try: return self._file_bytes.decode(enc, errors='ignore')
except: continue
return self._file_bytes.decode('latin-1', errors='replace')
def process(self):
metadata = {'title': '', 'author': '', 'subject': ''}
tm = re.search(r'<title[^>]*>(.*?)</title>', self._html_text, re.IGNORECASE|re.DOTALL)
if tm: metadata['title'] = self._strip_tags(tm.group(1)).strip()
blocks = self._extract_all_blocks()
blocks = [b for b in blocks if b.get('content', '').strip() or b.get('data')]
if not blocks: blocks = self._simple_extract()
img_count = sum(1 for b in blocks if b.get('type') == 'image')
print(f"📄 HTML-DOC processed: {len(blocks)} blocks ({img_count} images)")
return {'metadata': metadata, 'blocks': blocks}
def _strip_tags(self, html_str):
import html as hm
return hm.unescape(re.sub(r'<[^>]+>', '', html_str))
def _resolve_image_src(self, src):
import html as hm
if not src: return None, None
src = hm.unescape(src).strip()
if src.startswith('data:image'):
dm = re.match(r'data:image/([^;]+);base64,(.+)', src, re.DOTALL)
if dm: return dm.group(2).strip(), dm.group(1)
if src in self._embedded_images:
self._mark_used(self._embedded_images[src][0]); return self._embedded_images[src]
ns = _normalize_image_key(src)
if ns and ns in self._embedded_images:
self._mark_used(self._embedded_images[ns][0]); return self._embedded_images[ns]
if ns and '.' in ns:
nne = ns.rsplit('.', 1)[0]
if nne and nne in self._embedded_images:
self._mark_used(self._embedded_images[nne][0]); return self._embedded_images[nne]
if ns:
for loc, (data, fmt) in self._embedded_images.items():
ln = _normalize_image_key(loc)
if ln and ns and ln == ns: self._mark_used(data); return data, fmt
return self._get_next_unused()
def _mark_used(self, data_prefix):
p = data_prefix[:60]
for i, (b, f, l) in enumerate(self._ordered_images):
if i not in self._used_image_indices and b[:60] == p:
self._used_image_indices.add(i); return
def _get_next_unused(self):
for i, (b, f, l) in enumerate(self._ordered_images):
if i not in self._used_image_indices:
self._used_image_indices.add(i); return b, f
return None, None
def _extract_all_blocks(self):
import html as hm
blocks = []
cleaned = re.sub(r'<script[^>]*>.*?</script>', '', self._html_text, flags=re.DOTALL|re.IGNORECASE)
cleaned = re.sub(r'<style[^>]*>.*?</style>', '', cleaned, flags=re.DOTALL|re.IGNORECASE)
vml_srcs = []
for vm in re.finditer(r'<!--\[if\s[^\]]*vml[^\]]*\]>(.*?)<!\[endif\]-->', cleaned, re.DOTALL|re.IGNORECASE):
for im in re.finditer(r'<v:imagedata\b[^>]*?\bsrc\s*=\s*["\']([^"\']+)["\']', vm.group(1), re.IGNORECASE|re.DOTALL):
vml_srcs.append((hm.unescape(im.group(1)), vm.start()))
cleaned = _clean_html_comments(cleaned)
cleaned = re.sub(r'</?[ovw]:[^>]+>', '', cleaned, flags=re.IGNORECASE)
bm = re.search(r'<body[^>]*>(.*)</body>', cleaned, re.IGNORECASE|re.DOTALL)
if bm: cleaned = bm.group(1)
img_entries = []
for m in re.finditer(r'<img\b([^>]*?)/?\s*>', cleaned, re.IGNORECASE|re.DOTALL):
sm = re.search(r'\bsrc\s*=\s*["\']([^"\']+)["\']', m.group(1), re.IGNORECASE)
if not sm: sm = re.search(r'\bsrc\s*=\s*(\S+)', m.group(1), re.IGNORECASE)
if sm: img_entries.append((hm.unescape(sm.group(1)), m.start()))
if not img_entries and vml_srcs: img_entries = vml_srcs
self._used_image_indices = set()
for src, pos in img_entries:
d, f = self._resolve_image_src(src)
if d: blocks.append({'type':'image','content':f"![Image](embedded-image.{f})",'data':d,'format':f,'_pos':pos})
for m in re.finditer(r'<(h[1-6])\b[^>]*>(.*?)</\1\s*>', cleaned, re.IGNORECASE|re.DOTALL):
t = re.sub(r'\s+', ' ', self._strip_tags(m.group(2))).strip()
if t:
tag = m.group(1).lower()
p = {'h1':'# ','h2':'## '}.get(tag,'### ')
bt = {'h1':'heading1','h2':'heading2'}.get(tag,'heading3')
blocks.append({'type':bt,'content':f"{p}{t}",'_pos':m.start()})
for m in re.finditer(r'<table\b[^>]*>(.*?)</table\s*>', cleaned, re.IGNORECASE|re.DOTALL):
md = self._parse_table(m.group(1))
if md: blocks.append({'type':'table','content':md,'_pos':m.start()})
for m in re.finditer(r'<p\b([^>]*)>(.*?)</p\s*>', cleaned, re.IGNORECASE|re.DOTALL):
inner = m.group(2); attrs = m.group(1)
it = self._strip_tags(inner).strip()
hw = not it or all(c in ' \t\n\r\xa0' for c in it)
if hw: continue
t = re.sub(r'[ \t]+', ' ', re.sub(r'\n\s*\n', '\n', it)).strip()
if not t: continue
bt = 'paragraph'
cm = re.search(r'class\s*=\s*["\']?([^"\'>\s]+)', attrs, re.IGNORECASE)
cn = cm.group(1) if cm else ''
if 'MsoListParagraph' in cn:
t = re.sub(r'^[·•●○◦‣⁃]\s*', '', re.sub(r'^\d+[.)]\s*', '', t)); bt = 'list_item'
elif 'MsoTitle' in cn: bt = 'heading1'
elif 'MsoSubtitle' in cn: bt = 'heading2'
elif 'MsoQuote' in cn or 'MsoIntenseQuote' in cn: bt = 'quote'
pm = {'heading1':'# ','heading2':'## ','list_item':'- ','quote':'> '}
blocks.append({'type':bt,'content':f"{pm.get(bt,'')}{t}",'_pos':m.start()})
for m in re.finditer(r'<li\b[^>]*>(.*?)</li\s*>', cleaned, re.IGNORECASE|re.DOTALL):
t = re.sub(r'\s+', ' ', self._strip_tags(m.group(1))).strip()
if t: blocks.append({'type':'list_item','content':f"- {t}",'_pos':m.start()})
for m in re.finditer(r'<blockquote\b[^>]*>(.*?)</blockquote\s*>', cleaned, re.IGNORECASE|re.DOTALL):
t = re.sub(r'\s+', ' ', self._strip_tags(m.group(1))).strip()
if t: blocks.append({'type':'quote','content':f"> {t}",'_pos':m.start()})
for m in re.finditer(r'<div\b([^>]*)>(.*?)</div\s*>', cleaned, re.IGNORECASE|re.DOTALL):
if re.search(r'<(?:p|h[1-6]|table|div|ul|ol)\b', m.group(2), re.IGNORECASE): continue
t = re.sub(r'[ \t]+', ' ', self._strip_tags(m.group(2))).strip()
if t and len(t) > 1 and not all(c in ' \t\n\r\xa0' for c in t):
if not any(t in b.get('content','') for b in blocks):
blocks.append({'type':'paragraph','content':t,'_pos':m.start()})
blocks.sort(key=lambda b: b.get('_pos', 0))
seen = set(); deduped = []
for b in blocks:
b.pop('_pos', None)
if b.get('type') == 'image':
k = b.get('data','')[:60]
if k and k in seen: continue
if k: seen.add(k)
deduped.append(b)
else:
c = b.get('content','').strip()
if c and c not in seen: seen.add(c); deduped.append(b)
return deduped
def _parse_table(self, html):
rows = []
for rm in re.finditer(r'<tr\b[^>]*>(.*?)</tr\s*>', html, re.IGNORECASE|re.DOTALL):
cells = []
for cm in re.finditer(r'<t[dh]\b[^>]*>(.*?)</t[dh]\s*>', rm.group(1), re.IGNORECASE|re.DOTALL):
cells.append(re.sub(r'\s+', ' ', self._strip_tags(cm.group(1))).strip().replace('|','\\|'))
if cells: rows.append(cells)
if not rows: return ''
if all(len(r)==1 for r in rows) and len(rows)<=2: return ''
lines = []
for i, r in enumerate(rows):
lines.append('| '+' | '.join(r)+' |')
if i == 0: lines.append('| '+' | '.join(['---']*len(r))+' |')
return '\n'.join(lines)
def _simple_extract(self):
import html as hm
blocks = []; t = self._html_text
t = re.sub(r'<script[^>]*>.*?</script>', '', t, flags=re.DOTALL|re.IGNORECASE)
t = re.sub(r'<style[^>]*>.*?</style>', '', t, flags=re.DOTALL|re.IGNORECASE)
t = _clean_html_comments(t)
bm = re.search(r'<body[^>]*>(.*)</body>', t, re.IGNORECASE|re.DOTALL)
if bm: t = bm.group(1)
for tag, repl in [('br', '\n'), ('p', '\n\n'), ('div', '\n\n'), ('li', '\n'), ('tr', '\n'), ('table', '\n\n')]:
t = re.sub(rf'</?{tag}[^>]*>', repl, t, flags=re.IGNORECASE)
t = hm.unescape(re.sub(r'<[^>]+>', '', t))
for p in re.split(r'\n{2,}', t):
p = re.sub(r'[ \t]+', ' ', p).strip()
if p and len(p) > 1: blocks.append({'type':'paragraph','content':p})
return blocks
# ================================================================
# RTF DOC PROCESSOR
# ================================================================
class RTFDocProcessor:
def __init__(self, file_bytes): self._file_bytes = file_bytes
def process(self):
blocks = []; metadata = {'title':'','author':'','subject':''}
rtf = self._decode_rtf(); metadata.update(self._extract_meta(rtf))
pt = self._rtf_to_text(rtf)
if pt:
for p in re.split(r'\n{2,}', pt):
p = p.strip()
if not p: continue
if len(p) < 80 and p.isupper(): blocks.append({'type':'heading2','content':f"## {p}"})
else: blocks.append({'type':'paragraph','content':p})
print(f"📄 RTF-DOC processed: {len(blocks)} blocks")
return {'metadata': metadata, 'blocks': blocks}
def _decode_rtf(self):
d = self._file_bytes
for b in [b'\xef\xbb\xbf',b'\xff\xfe',b'\xfe\xff']:
if d.startswith(b): d = d[len(b):]; break
try: return d.decode('ascii', errors='ignore')
except: return d.decode('latin-1', errors='replace')
def _extract_meta(self, rtf):
m = {}
for f in ['title','author','subject']:
r = re.search(r'\\'+f+r'\s+([^}]+)', rtf)
if r: m[f] = r.group(1).strip()
return m
def _rtf_to_text(self, rtf):
try:
from striprtf.striprtf import rtf_to_text
return rtf_to_text(rtf, errors='ignore')
except ImportError: pass
except Exception: pass
t = rtf
for g in ['fonttbl','colortbl','stylesheet','info','header','footer']:
t = re.sub(r'\{\\'+re.escape(g)+r'[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', '', t, flags=re.DOTALL)
t = re.sub(r'\\par\b\s*','\n',t); t = re.sub(r'\\pard\b\s*','',t)
t = re.sub(r'\\line\b\s*','\n',t); t = re.sub(r'\\tab\b\s*','\t',t)
def hr(m):
try: return bytes([int(m.group(1),16)]).decode('cp1252',errors='ignore')
except: return ''
t = re.sub(r"\\\'([0-9a-fA-F]{2})", hr, t)
def ur(m):
try:
c = int(m.group(1))
if c < 0: c += 65536
return chr(c)
except: return ''
t = re.sub(r'\\u(-?\d+)\??', ur, t)
t = re.sub(r'\\[a-zA-Z]+\d*\s?','',t); t = re.sub(r'[{}]','',t)
return re.sub(r'\n{3,}','\n\n',re.sub(r' +',' ',t)).strip()
# ================================================================
# DOCX PROCESSOR (using python-docx)
# ================================================================
DOCX_NSMAP = {
'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
}
W_NS = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
R_NS = '{http://schemas.openxmlformats.org/officeDocument/2006/relationships}'
class DOCXProcessor:
"""Process DOCX files with typography metadata for AI analysis."""
HEADING_PATTERNS = {
'Title':'title','Subtitle':'subtitle',
'Heading 1':'heading1','Heading 2':'heading2',
'Heading 3':'heading3','Heading 4':'heading3',
'Heading 5':'heading3','Heading 6':'heading3',
'Heading 7':'heading3','Heading 8':'heading3','Heading 9':'heading3',
}
QUOTE_STYLES = {'Quote','Intense Quote','Block Text'}
LIST_BULLET_STYLES = {'List Bullet','List Bullet 2','List Bullet 3'}
LIST_NUMBER_STYLES = {'List Number','List Number 2','List Number 3','List Continue'}
# Approximate font sizes for heading types (for AI metadata)
HEADING_FONT_SIZES = {
'title': 28.0, 'subtitle': 22.0,
'heading1': 24.0, 'heading2': 20.0, 'heading3': 16.0,
'paragraph': 12.0, 'list_item': 12.0, 'numbered_list': 12.0,
'quote': 12.0,
}
def __init__(self, docx_bytes):
import docx as docx_module
self.doc = docx_module.Document(io.BytesIO(docx_bytes))
self._image_cache = {}
self._extract_all_images()
def _extract_all_images(self):
try:
for rel_id, rel in self.doc.part.rels.items():
if "image" in rel.reltype:
try:
ip = rel.target_part
ib = ip.blob; ct = ip.content_type or ''
fmt = 'png'
if 'jpeg' in ct or 'jpg' in ct: fmt = 'jpeg'
elif 'gif' in ct: fmt = 'gif'
elif 'bmp' in ct: fmt = 'bmp'
elif 'tiff' in ct: fmt = 'tiff'
elif 'webp' in ct: fmt = 'webp'
else:
pn = str(ip.partname) if hasattr(ip,'partname') else ''
if '.jpg' in pn or '.jpeg' in pn: fmt = 'jpeg'
elif '.gif' in pn: fmt = 'gif'
self._image_cache[rel_id] = (base64.b64encode(ib).decode('utf-8'), fmt)
except Exception as e: print(f" ⚠️ Image {rel_id}: {e}")
except Exception as e: print(f" ⚠️ Rels error: {e}")
def _get_paragraph_images(self, paragraph):
images = []
try:
for drawing in paragraph._element.findall('.//w:drawing', DOCX_NSMAP):
for blip in drawing.findall('.//a:blip', DOCX_NSMAP):
eid = blip.get(f'{R_NS}embed')
if eid and eid in self._image_cache:
d, f = self._image_cache[eid]
images.append({'data': d, 'format': f})
except Exception as e: print(f" ⚠️ Para images: {e}")
return images
def _get_paragraph_segments(self, paragraph):
segments = [[]]
for child in paragraph._element:
tag = child.tag
if tag == f'{W_NS}r':
self._process_run_element(child, segments)
elif tag == f'{W_NS}hyperlink':
for run_elem in child.findall(f'{W_NS}r'):
self._process_run_element(run_elem, segments)
elif tag == f'{W_NS}smartTag':
for run_elem in child.findall(f'{W_NS}r'):
self._process_run_element(run_elem, segments)
elif tag == f'{W_NS}sdt':
for run_elem in child.iter(f'{W_NS}r'):
self._process_run_element(run_elem, segments)
return segments
def _process_run_element(self, run_elem, segments):
for elem in run_elem:
if elem.tag == f'{W_NS}br':
segments.append([])
elif elem.tag == f'{W_NS}t':
if elem.text:
is_bold, is_italic = self._get_run_formatting(run_elem)
segments[-1].append((elem.text, is_bold, is_italic))
def _get_run_formatting(self, run_elem):
is_bold = False
is_italic = False
rpr = run_elem.find(f'{W_NS}rPr')
if rpr is not None:
b = rpr.find(f'{W_NS}b')
if b is not None:
v = b.get(f'{W_NS}val')
is_bold = v is None or v not in ('0', 'false')
i = rpr.find(f'{W_NS}i')
if i is not None:
v = i.get(f'{W_NS}val')
is_italic = v is None or v not in ('0', 'false')
return is_bold, is_italic
def _segments_to_text(self, segment):
parts = []
for text, is_bold, is_italic in segment:
if is_bold and is_italic: parts.append(f"***{text}***")
elif is_bold: parts.append(f"**{text}**")
elif is_italic: parts.append(f"*{text}*")
else: parts.append(text)
return ''.join(parts)
def _segment_plain_text(self, segment):
return ''.join(text for text, _, _ in segment)
def _get_full_paragraph_plain_text(self, paragraph):
texts = []
for t_elem in paragraph._element.iter(f'{W_NS}t'):
if t_elem.text:
texts.append(t_elem.text)
return ''.join(texts).strip()
def _is_paragraph_bold(self, paragraph):
"""Check if the majority of text in a paragraph is bold."""
bold_chars = 0
total_chars = 0
for run in paragraph.runs:
text = run.text or ''
char_count = len(text.strip())
if char_count == 0:
continue
total_chars += char_count
if run.bold:
bold_chars += char_count
if total_chars == 0:
return False
return bold_chars / total_chars > 0.5
def _get_list_type_from_numpr(self, paragraph):
"""Deep inspection of Word XML to determine list type."""
try:
pPr = paragraph._element.pPr
if pPr is None: return None
numPr = pPr.numPr
if numPr is None: return None
numId_elem = numPr.numId
if numId_elem is None: return None
numId = numId_elem.val
ilvl_val = 0
ilvl_elem = numPr.ilvl
if ilvl_elem is not None:
ilvl_val = ilvl_elem.val
num_part = self.doc.part.numbering_part
if not num_part: return 'list_item'
num = num_part.element.num_having_numId(numId)
if num is None: return 'list_item'
abstractNumId = num.abstractNumId.val
abstractNum = num_part.element.abstractNum_having_abstractNumId(abstractNumId)
if abstractNum is None: return 'list_item'
for lvl in abstractNum.findall('.//w:lvl', DOCX_NSMAP):
if int(lvl.get(f'{W_NS}ilvl', 0)) == ilvl_val:
numFmt = lvl.find('.//w:numFmt', DOCX_NSMAP)
if numFmt is not None:
val = numFmt.get(f'{W_NS}val')
if val != 'bullet':
return 'numbered_list'
else:
return 'list_item'
except Exception:
pass
return None
def _classify_paragraph(self, paragraph):
xml_list_type = self._get_list_type_from_numpr(paragraph)
if xml_list_type:
return xml_list_type
sn = paragraph.style.name if paragraph.style else ''
for p, bt in self.HEADING_PATTERNS.items():
if sn == p or sn.startswith(p): return bt
if sn in self.QUOTE_STYLES: return 'quote'
if sn in self.LIST_BULLET_STYLES: return 'list_item'
if sn in self.LIST_NUMBER_STYLES: return 'numbered_list'
if sn == 'List Paragraph': return 'list_item'
if 'toc' in sn.lower(): return 'list_item'
return 'paragraph'
def _table_to_markdown(self, table):
rd = []
for r in table.rows:
rd.append([c.text.replace('|','\\|').replace('\n',' ').strip() for c in r.cells])
if not rd: return ""
lines = []
for i, r in enumerate(rd):
lines.append('| '+' | '.join(r)+' |')
if i == 0: lines.append('| '+' | '.join(['---']*len(r))+' |')
return '\n'.join(lines)
def _make_block(self, block_type, text, is_bold=False):
"""Create a block dict with typography metadata for AI analysis."""
tm = {
'title':('heading1','# '),'subtitle':('heading2','## '),
'heading1':('heading1','# '),'heading2':('heading2','## '),
'heading3':('heading3','### '),'quote':('quote','> '),
'list_item':('list_item','- '),'numbered_list':('numbered_list','1. '),
}
if block_type in tm:
bt, pf = tm[block_type]
block = {'type': bt, 'content': f"{pf}{text}"}
else:
bt = 'paragraph'
block = {'type': bt, 'content': text}
# Add typography metadata for AI
block['is_bold'] = is_bold or block_type in ('title', 'subtitle', 'heading1', 'heading2', 'heading3')
block['font_size'] = self.HEADING_FONT_SIZES.get(block_type, 12.0)
return block
def _process_element(self, element, blocks):
from docx.table import Table as DocxTable
from docx.text.paragraph import Paragraph as DocxParagraph
if isinstance(element, DocxParagraph):
plain_text = self._get_full_paragraph_plain_text(element)
if not plain_text:
for img in self._get_paragraph_images(element):
blocks.append({
'type': 'image',
'content': f"![Document Image](embedded-image.{img['format']})",
'data': img['data'], 'format': img['format'],
})
return
for img in self._get_paragraph_images(element):
blocks.append({
'type': 'image',
'content': f"![Document Image](embedded-image.{img['format']})",
'data': img['data'], 'format': img['format'],
})
block_type = self._classify_paragraph(element)
is_bold = self._is_paragraph_bold(element)
segments = self._get_paragraph_segments(element)
non_empty_segments = [s for s in segments if self._segment_plain_text(s).strip()]
if len(non_empty_segments) <= 1:
text = self._segments_to_text(non_empty_segments[0]) if non_empty_segments else ''
if text.strip():
blocks.append(self._make_block(block_type, text, is_bold))
else:
for idx, seg in enumerate(non_empty_segments):
seg_text = self._segments_to_text(seg)
seg_plain = self._segment_plain_text(seg).strip()
if not seg_plain:
continue
if idx == 0:
blocks.append(self._make_block(block_type, seg_text, is_bold))
else:
seg_is_bold = all(b for _, b, _ in seg if _)
is_short = len(seg_plain) < 100
if seg_is_bold and is_short and not seg_plain.endswith(('.', ':', ',')):
blocks.append(self._make_block('heading3', seg_text, True))
else:
blocks.append(self._make_block('paragraph', seg_text, seg_is_bold))
elif isinstance(element, DocxTable):
md = self._table_to_markdown(element)
if md.strip():
blocks.append({'type': 'table', 'content': md})
def process(self):
blocks = []; metadata = {'title':'','author':'','subject':''}
try:
cp = self.doc.core_properties
metadata['title'] = cp.title or ''
metadata['author'] = cp.author or ''
metadata['subject'] = cp.subject or ''
except: pass
try:
for element in self.doc.iter_inner_content():
self._process_element(element, blocks)
except AttributeError:
print(" ⚠️ iter_inner_content() not available, using fallback")
for p in self.doc.paragraphs: self._process_element(p, blocks)
for t in self.doc.tables: self._process_element(t, blocks)
img_count = sum(1 for b in blocks if b.get('type') == 'image')
print(f"📄 DOCX processed: {len(blocks)} blocks ({img_count} images)")
return {'metadata': metadata, 'blocks': blocks}
# ================================================================
# OLE2 DOC PROCESSOR
# ================================================================
class DOCProcessor:
def __init__(self, doc_bytes): self._doc_bytes = doc_bytes
def process(self):
blocks = []; metadata = {'title':'','author':'','subject':''}; imgs = []
try:
import olefile
ole = olefile.OleFileIO(io.BytesIO(self._doc_bytes))
try:
m = ole.get_metadata()
for f in ['title','author','subject']:
v = getattr(m,f,None)
if v: metadata[f] = v.decode('utf-8',errors='ignore') if isinstance(v,bytes) else str(v)
except: pass
imgs = self._extract_ole_images(ole)
if ole.exists('WordDocument'):
t = self._extract_text(ole)
if t:
for p in re.split(r'\r\n|\r|\n', t):
p = p.strip()
if p: blocks.append({'type':'paragraph','content':p})
ole.close()
except ImportError:
blocks = self._basic_extract(); imgs = self._scan_images(self._doc_bytes)
except Exception as e:
print(f" ⚠️ OLE failed: {e}")
blocks = self._basic_extract(); imgs = self._scan_images(self._doc_bytes)
if not blocks: blocks = self._basic_extract()
if imgs and blocks:
iv = max(1, len(blocks)//(len(imgs)+1)); r = []; ii = 0
for i, b in enumerate(blocks):
if ii < len(imgs) and i > 0 and i % iv == 0: r.append(imgs[ii]); ii += 1
r.append(b)
while ii < len(imgs): r.append(imgs[ii]); ii += 1
blocks = r
elif imgs: blocks = imgs + blocks
print(f"📄 DOC (OLE2): {len(blocks)} blocks ({len(imgs)} images)")
return {'metadata': metadata, 'blocks': blocks}
def _extract_ole_images(self, ole):
imgs = []
try:
for sp in ole.listdir():
try:
d = ole.openstream(sp).read()
if len(d) < 100: continue
if d[:3] == b'\xff\xd8\xff':
imgs.append({'type':'image','content':'![Image](embedded-image.jpeg)','data':base64.b64encode(d).decode(),'format':'jpeg'}); continue
if d[:8] == b'\x89PNG\r\n\x1a\n':
imgs.append({'type':'image','content':'![Image](embedded-image.png)','data':base64.b64encode(d).decode(),'format':'png'}); continue
if len(d) > 2048: imgs.extend(self._scan_images(d))
except: continue
except: pass
seen = set(); return [i for i in imgs if (k:=i.get('data','')[:80]) and k not in seen and not seen.add(k)]
def _scan_images(self, data):
imgs = []; pos = 0
while pos < len(data)-3:
i = data.find(b'\xff\xd8\xff', pos)
if i == -1: break
e = data.find(b'\xff\xd9', i+3)
if e == -1: break
e += 2
if 2048 < e-i < 50*1024*1024:
imgs.append({'type':'image','content':'![Image](embedded-image.jpeg)','data':base64.b64encode(data[i:e]).decode(),'format':'jpeg'})
pos = e
pos = 0
while pos < len(data)-8:
i = data.find(b'\x89PNG\r\n\x1a\n', pos)
if i == -1: break
e = data.find(b'IEND\xaeB`\x82', i+8)
if e == -1: break
e += 8
if 1024 < e-i < 50*1024*1024:
imgs.append({'type':'image','content':'![Image](embedded-image.png)','data':base64.b64encode(data[i:e]).decode(),'format':'png'})
pos = e
return imgs
def _extract_text(self, ole):
t = ''
try:
if ole.exists('WordDocument'):
s = ole.openstream('WordDocument').read()
d = s.decode('utf-16-le',errors='ignore')
c = ''.join(ch for ch in d if ch in '\r\n\t' or ch.isprintable())
if len(c) > 20: return c.strip()
except: pass
for sp in ole.listdir():
try:
d = ole.openstream(sp).read().decode('utf-16-le',errors='ignore')
c = ''.join(ch for ch in d if ch.isprintable() or ch in '\r\n\t')
if len(c) > len(t): t = c
except: pass
return t
def _basic_extract(self):
blocks = []
try:
d = self._doc_bytes.decode('utf-16-le',errors='ignore')
c = ''.join(ch for ch in d if ch.isprintable() or ch in '\r\n\t')
for p in c.split('\r'):
p = p.strip()
if len(p) > 3: blocks.append({'type':'paragraph','content':p})
except: pass
return blocks
# ================================================================
# LIST GROUPING POST-PROCESSOR
# ================================================================
def _group_lists(blocks):
"""Groups consecutive list items into a single markdown block."""
final_blocks = []
list_buffer = []
list_type = None
def flush():
if not list_buffer: return
lines = []
for i, text in enumerate(list_buffer):
clean_text = re.sub(r'^\s*[-*+]\s+', '', text)
clean_text = re.sub(r'^\s*\d+\.\s+', '', clean_text)
if list_type == 'numbered_list':
lines.append(f"{i+1}. {clean_text}")
else:
lines.append(f"- {clean_text}")
final_blocks.append({
'type': 'paragraph',
'content': '\n'.join(lines)
})
list_buffer.clear()
for b in blocks:
if b.get('type') in ['image', 'table']:
flush()
list_type = None
final_blocks.append(b)
continue
content = b.get('content', '')
bt = b.get('type', '')
is_bullet = bt == 'list_item' or content.startswith('- ') or content.startswith('* ')
is_number = bt == 'numbered_list' or re.match(r'^\s*\d+\.\s+', content)
if bt.startswith('heading'):
is_bullet = False
is_number = False
if is_bullet or is_number:
current_type = 'numbered_list' if is_number else 'bullet_list'
if list_type and list_type != current_type:
flush()
list_type = current_type
list_buffer.append(content)
else:
flush()
list_type = None
final_blocks.append(b)
flush()
return final_blocks
# ================================================================
# MAIN ENTRY POINT
# ================================================================
def process_docx_to_markdown(file_bytes, filename=''):
fmt = detect_doc_format(file_bytes)
print(f" 🔍 File: {filename} | Format: {fmt} | Size: {len(file_bytes)}")
pmap = {'docx':DOCXProcessor,'mhtml':MHTMLProcessor,'html':HTMLDocProcessor,'rtf':RTFDocProcessor,'ole2':DOCProcessor}
result = None
if fmt in pmap:
try:
r = pmap[fmt](file_bytes).process()
if r.get('blocks'):
result = r
except Exception as e:
print(f" ⚠️ {fmt} failed: {e}")
if not result:
for fn, PC in [('DOCX',DOCXProcessor),('MHTML',MHTMLProcessor),('HTML',HTMLDocProcessor),('RTF',RTFDocProcessor),('OLE2',DOCProcessor)]:
try:
r = PC(file_bytes).process()
if r.get('blocks'):
result = r
print(f" ✅ Parsed as {fn}")
break
except: continue
if result and result.get('blocks'):
blocks = _group_lists(result['blocks'])
ic = sum(1 for b in blocks if b.get('type')=='image')
print(f"{len(blocks)} blocks ({ic} images) after grouping")
return {'metadata': result['metadata'], 'markdown_blocks': blocks}
return {'metadata':{'title':'','author':'','subject':''},'markdown_blocks':[{'type':'paragraph','content':'⚠️ Could not extract content. Try saving as .docx.'}]}

643
pdf_processor.py Normal file
View File

@@ -0,0 +1,643 @@
# pdf_processor.py - PDF Processing and Content Extraction
# UPDATED: Blocks now include font_size and is_bold metadata for AI analysis
import base64
import re
import fitz # PyMuPDF
# ================================================================
# LIST GROUPING HELPER
# ================================================================
def _group_lists(blocks):
"""Groups consecutive list items into a cohesive Markdown list block."""
final_blocks = []
list_buffer = []
list_type = None
def flush():
if not list_buffer: return
lines = []
for i, text in enumerate(list_buffer):
clean_text = re.sub(r'^\s*[-*+]\s+', '', text)
clean_text = re.sub(r'^\s*\d+\.\s+', '', clean_text)
if list_type == 'numbered_list':
lines.append(f"{i+1}. {clean_text}")
else:
lines.append(f"- {clean_text}")
final_blocks.append({
'type': 'paragraph',
'content': '\n'.join(lines)
})
list_buffer.clear()
for b in blocks:
if b.get('type') in ['image', 'table']:
flush()
list_type = None
final_blocks.append(b)
continue
content = b.get('content', '')
bt = b.get('type', '')
is_bullet = bt == 'list_item' or content.startswith('- ') or content.startswith('* ')
is_number = bt == 'numbered_list' or re.match(r'^\s*\d+\.\s+', content)
if bt.startswith('heading'):
is_bullet = False
is_number = False
if is_bullet or is_number:
current_type = 'numbered_list' if is_number else 'bullet_list'
if list_type and list_type != current_type:
flush()
list_type = current_type
list_buffer.append(content)
else:
flush()
list_type = None
final_blocks.append(b)
flush()
return final_blocks
class PDFProcessor:
"""Process PDF files and extract structured content."""
TITLE_SIZE_THRESHOLD = 24
SUBTITLE_SIZE_THRESHOLD = 18
HEADING_SIZE_THRESHOLD = 14
TITLE_RATIO = 1.8
SUBTITLE_RATIO = 1.4
HEADING_RATIO = 1.2
LIST_PATTERNS = [
r'^\s*[\u2022\u2023\u25E6\u2043\u2219•●○◦‣·∙]\s*',
r'^\s*[-–—]\s+',
r'^\s*\d+[.)]\s+',
r'^\s*[a-zA-Z][.)]\s+',
r'^\s*[ivxIVX]+[.)]\s+',
]
BULLET_CHARS = set('•●○◦‣⁃·∙\u2022\u2023\u25E6\u2043\u2219-–—')
INLINE_BULLET_SPLIT = re.compile(
r'\s*[\u2022\u2023\u25E6\u2043\u2219•●○◦‣·∙]\s+'
)
QUOTE_PATTERNS = [
r'^[\"\'\u201C\u201D\u2018\u2019].+[\"\'\u201C\u201D\u2018\u2019]$',
]
TOC_LEADER_PATTERN = re.compile(r'[.…·]{3,}\s*\.?\s*\d+\s*$')
def __init__(self, pdf_bytes):
self.doc = fitz.open(stream=pdf_bytes, filetype="pdf")
self.elements = []
self.font_sizes = []
self.median_size = 12
self.body_size = 12
def close(self):
if self.doc:
self.doc.close()
def _analyze_font_distribution(self):
font_size_counts = {}
for page in self.doc:
blocks = page.get_text("dict", flags=11)["blocks"]
for block in blocks:
if block.get("type") == 0:
for line in block.get("lines", []):
for span in line.get("spans", []):
size = round(span.get("size", 12), 1)
text = span.get("text", "").strip()
if text:
self.font_sizes.append(size)
font_size_counts[size] = font_size_counts.get(size, 0) + len(text)
if self.font_sizes:
self.font_sizes.sort()
n = len(self.font_sizes)
self.median_size = self.font_sizes[n // 2]
if font_size_counts:
self.body_size = max(font_size_counts.keys(), key=lambda x: font_size_counts[x])
else:
self.body_size = self.median_size
def _is_likely_heading(self, text, font_size, flags):
text_stripped = text.strip()
if not text_stripped:
return False, None
is_bold = bool(flags & 2 ** 4)
is_all_caps = text_stripped.isupper() and len(text_stripped) > 3
size_ratio = font_size / self.body_size if self.body_size > 0 else 1
if size_ratio >= self.TITLE_RATIO or font_size >= self.TITLE_SIZE_THRESHOLD:
if len(text_stripped) < 200:
return True, "title"
if size_ratio >= self.SUBTITLE_RATIO or font_size >= self.SUBTITLE_SIZE_THRESHOLD:
if len(text_stripped) < 150:
return True, "subtitle"
if size_ratio >= self.HEADING_RATIO and is_bold:
if len(text_stripped) < 100:
return True, "heading"
if is_all_caps and is_bold and len(text_stripped) < 80:
return True, "heading"
if is_bold and len(text_stripped) < 60:
return True, "heading"
return False, None
def _classify_element(self, text, font_size, flags, is_italic=False, bbox=None):
text_stripped = text.strip()
if not text_stripped:
return None
is_bold = bool(flags & 2 ** 4)
is_heading, heading_type = self._is_likely_heading(text_stripped, font_size, flags)
if is_heading:
return heading_type
for pattern in self.LIST_PATTERNS:
if re.match(pattern, text_stripped):
if re.match(r'^\s*\d+[.)]\s+', text_stripped) or \
re.match(r'^\s*[a-zA-Z][.)]\s+', text_stripped) or \
re.match(r'^\s*[ivxIVX]+[.)]\s+', text_stripped):
return "numbered_list"
return "list_item"
if is_italic and len(text_stripped) > 50:
return "quote"
for pattern in self.QUOTE_PATTERNS:
if re.match(pattern, text_stripped):
return "quote"
return "paragraph"
def _extract_images(self, page, page_num):
images = []
image_list = page.get_images(full=True)
for img_index, img in enumerate(image_list):
try:
xref = img[0]
base_image = self.doc.extract_image(xref)
if base_image:
image_bytes = base_image["image"]
image_ext = base_image["ext"]
img_rects = page.get_image_rects(img)
bbox = None
if img_rects:
rect = img_rects[0]
bbox = [rect.x0, rect.y0, rect.x1, rect.y1]
images.append({
"type": "image",
"data": base64.b64encode(image_bytes).decode('utf-8'),
"format": image_ext,
"bbox": bbox,
"width": base_image.get("width", 0),
"height": base_image.get("height", 0),
})
except Exception: pass
return images
def _extract_tables(self, page, page_num):
tables = []
try:
table_finder = page.find_tables()
for table_index, table in enumerate(table_finder):
try:
table_data = table.extract()
bbox = list(table.bbox)
markdown_table = self._table_to_markdown(table_data)
tables.append({
"type": "table",
"data": table_data,
"markdown": markdown_table,
"bbox": bbox,
})
except Exception: pass
except Exception: pass
return tables
def _table_to_markdown(self, table_data):
if not table_data: return ""
lines = []
for row_idx, row in enumerate(table_data):
cells = [str(cell).replace('|', '\\|').replace('\n', ' ') if cell else '' for cell in row]
lines.append('| ' + ' | '.join(cells) + ' |')
if row_idx == 0:
lines.append('| ' + ' | '.join(['---'] * len(cells)) + ' |')
return '\n'.join(lines)
def _get_reading_order(self, elements, page_width):
if not elements: return elements
mid_x = page_width / 2
left_col, right_col, full_width = [], [], []
for elem in elements:
bbox = elem.get("bbox")
if not bbox:
full_width.append(elem)
continue
x0, y0, x1, y1 = bbox
width = x1 - x0
if width > page_width * 0.6:
full_width.append(elem)
elif x1 < mid_x:
left_col.append(elem)
elif x0 > mid_x:
right_col.append(elem)
else:
full_width.append(elem)
sort_by_y = lambda e: (e.get("bbox") or [0, 0, 0, 0])[1]
left_col.sort(key=sort_by_y)
right_col.sort(key=sort_by_y)
full_width.sort(key=sort_by_y)
all_elements = [(e, "full") for e in full_width]
all_elements += [(e, "left") for e in left_col]
all_elements += [(e, "right") for e in right_col]
all_elements.sort(key=lambda x: (x[0].get("bbox") or [0, 0, 0, 0])[1])
result = [e[0] for e in all_elements]
for idx, elem in enumerate(result):
elem["reading_order"] = idx
return result
def _bboxes_overlap(self, bbox1, bbox2, threshold=0.5):
if not bbox1 or not bbox2: return False
x1_min, y1_min, x1_max, y1_max = bbox1
x2_min, y2_min, x2_max, y2_max = bbox2
x_overlap = max(0, min(x1_max, x2_max) - max(x1_min, x2_min))
y_overlap = max(0, min(y1_max, y2_max) - max(y1_min, y2_min))
intersection = x_overlap * y_overlap
area1 = (x1_max - x1_min) * (y1_max - y1_min)
if area1 == 0: return False
return intersection / area1 > threshold
def _extract_line_info(self, line):
text = ""
total_chars = 0
weighted_size = 0.0
combined_flags = 0
for span in line.get("spans", []):
span_text = span.get("text", "")
span_size = span.get("size", 12)
span_flags = span.get("flags", 0)
if span_text.strip():
char_count = len(span_text)
text += span_text
weighted_size = ((weighted_size * total_chars + span_size * char_count) /
(total_chars + char_count)) if (total_chars + char_count) > 0 else span_size
total_chars += char_count
combined_flags |= span_flags
stripped = text.strip()
return {
"text": text,
"stripped": stripped,
"bbox": list(line.get("bbox", [0, 0, 0, 0])),
"font_size": round(weighted_size, 1),
"flags": combined_flags,
"is_bold": bool(combined_flags & (2 ** 4)),
"is_italic": bool(combined_flags & (2 ** 1)),
"char_count": total_chars,
"is_bullet": len(stripped) <= 2 and bool(stripped) and all(c in self.BULLET_CHARS for c in stripped),
"is_single_line_entry": False,
}
def _is_single_line_entry(self, info, page_width):
text = info["stripped"]
if not text: return False
if self.TOC_LEADER_PATTERN.search(text): return True
if re.search(r'\d+\s*$', text) and '' in text: return True
return False
def _should_break_between(self, prev_info, curr_info, median_gap, avg_line_height, page_width):
if prev_info["is_bullet"]: return False
prev_bbox = prev_info["bbox"]
curr_bbox = curr_info["bbox"]
gap = curr_bbox[1] - prev_bbox[3]
size_diff = abs(curr_info["font_size"] - prev_info["font_size"])
if size_diff > 1.5: return True
if prev_info["is_bold"] != curr_info["is_bold"]: return True
if median_gap > 0:
gap_ratio = gap / median_gap if median_gap > 0 else 1
if gap_ratio >= 2.0: return True
if gap_ratio >= 1.5:
if prev_info["stripped"] and prev_info["stripped"][-1] in '.!?:"\u201D\u2019':
return True
if gap > avg_line_height * 1.0: return True
x_diff = abs(curr_bbox[0] - prev_bbox[0])
if x_diff > 25: return True
if prev_info.get("is_single_line_entry"): return True
if prev_info["is_bold"] and curr_info["is_bold"]:
prev_line_width = prev_bbox[2] - prev_bbox[0]
if page_width > 0 and prev_line_width < page_width * 0.75:
return True
return False
def _merge_bullet_lines(self, line_infos):
if not line_infos: return line_infos
merged = []
i = 0
while i < len(line_infos):
info = line_infos[i]
if info["is_bullet"] and i + 1 < len(line_infos):
next_info = line_infos[i + 1]
bullet_char = info["stripped"]
merged.append({
"text": bullet_char + " " + next_info["text"],
"stripped": bullet_char + " " + next_info["stripped"],
"bbox": [
min(info["bbox"][0], next_info["bbox"][0]),
min(info["bbox"][1], next_info["bbox"][1]),
max(info["bbox"][2], next_info["bbox"][2]),
max(info["bbox"][3], next_info["bbox"][3]),
],
"font_size": next_info["font_size"],
"flags": next_info["flags"],
"is_bold": next_info["is_bold"],
"is_italic": next_info["is_italic"],
"char_count": info["char_count"] + next_info["char_count"],
"is_bullet": False,
"is_single_line_entry": False,
})
i += 2
else:
merged.append(info)
i += 1
return merged
def _split_block_into_paragraphs(self, block, page_width):
lines = block.get("lines", [])
if not lines: return []
line_infos = []
for line in lines:
info = self._extract_line_info(line)
if info["stripped"]: line_infos.append(info)
if not line_infos: return []
line_infos = self._merge_bullet_lines(line_infos)
for info in line_infos:
info["is_single_line_entry"] = self._is_single_line_entry(info, page_width)
if len(line_infos) == 1: return [line_infos]
gaps = []
line_heights = []
for i in range(len(line_infos)):
h = line_infos[i]["bbox"][3] - line_infos[i]["bbox"][1]
line_heights.append(h)
if i > 0:
gap = line_infos[i]["bbox"][1] - line_infos[i - 1]["bbox"][3]
gaps.append(gap)
avg_line_height = sum(line_heights) / len(line_heights) if line_heights else 12
median_gap = sorted(gaps)[len(gaps) // 2] if gaps else avg_line_height * 0.3
paragraphs = []
current_group = [line_infos[0]]
for i in range(1, len(line_infos)):
if self._should_break_between(line_infos[i - 1], line_infos[i], median_gap, avg_line_height, page_width):
paragraphs.append(current_group)
current_group = [line_infos[i]]
else:
current_group.append(line_infos[i])
if current_group: paragraphs.append(current_group)
return paragraphs
def _group_to_element(self, line_group):
text = " ".join(info["stripped"] for info in line_group if info["stripped"])
if not text.strip(): return None
total_chars = sum(info["char_count"] for info in line_group)
font_size = sum(info["font_size"] * info["char_count"] for info in line_group) / total_chars if total_chars > 0 else self.body_size
flags = 0
for info in line_group: flags |= info["flags"]
x0 = min(info["bbox"][0] for info in line_group)
y0 = min(info["bbox"][1] for info in line_group)
x1 = max(info["bbox"][2] for info in line_group)
y1 = max(info["bbox"][3] for info in line_group)
elem_type = self._classify_element(text, font_size, flags, bool(flags & (2 ** 1)), [x0, y0, x1, y1])
if elem_type:
return {
"type": elem_type,
"text": text.strip(),
"bbox": [x0, y0, x1, y1],
"font_size": round(font_size, 1),
"flags": flags,
}
return None
def _should_merge_elements(self, prev_elem, curr_elem):
if prev_elem["type"] != "paragraph" or curr_elem["type"] != "paragraph": return False
if abs(prev_elem["font_size"] - curr_elem["font_size"]) > 1.5: return False
prev_bold = bool(prev_elem.get("flags", 0) & (2 ** 4))
curr_bold = bool(curr_elem.get("flags", 0) & (2 ** 4))
if prev_bold != curr_bold: return False
prev_text = prev_elem["text"].strip()
curr_text = curr_elem["text"].strip()
if not prev_text or not curr_text: return False
if self.TOC_LEADER_PATTERN.search(prev_text): return False
last_char = prev_text[-1]
if last_char in '.!?':
if curr_text and curr_text[0].islower(): return True
return False
if last_char in '"\u201D\u2019':
if len(prev_text) >= 2 and prev_text[-2] in '.!?':
if curr_text and curr_text[0].islower(): return True
return False
return True
def _merge_continuation_paragraphs(self, elements):
if len(elements) <= 1: return elements
merged = [elements[0]]
for i in range(1, len(elements)):
prev = merged[-1]
curr = elements[i]
if self._should_merge_elements(prev, curr):
prev_bbox = prev["bbox"]
curr_bbox = curr["bbox"]
merged[-1] = {
"type": "paragraph",
"text": prev["text"].rstrip() + " " + curr["text"].lstrip(),
"bbox": [
min(prev_bbox[0], curr_bbox[0]),
min(prev_bbox[1], curr_bbox[1]),
max(prev_bbox[2], curr_bbox[2]),
max(prev_bbox[3], curr_bbox[3]),
],
"font_size": prev["font_size"],
"flags": prev.get("flags", 0),
}
else:
merged.append(curr)
return merged
def _split_combined_list_items(self, elements):
result = []
for elem in elements:
if elem["type"] != "list_item":
result.append(elem)
continue
text = elem["text"].strip()
cleaned = text
for pattern in self.LIST_PATTERNS:
cleaned = re.sub(pattern, '', cleaned, count=1).strip()
parts = self.INLINE_BULLET_SPLIT.split(cleaned)
parts = [p.strip() for p in parts if p.strip()]
if len(parts) <= 1:
result.append(elem)
else:
bbox = elem["bbox"]
item_height = (bbox[3] - bbox[1]) / len(parts) if len(parts) > 0 else 0
for idx, part in enumerate(parts):
result.append({
"type": "list_item",
"text": part.strip(),
"bbox": [bbox[0], bbox[1] + idx * item_height, bbox[2], bbox[1] + (idx + 1) * item_height],
"font_size": elem["font_size"],
"flags": elem.get("flags", 0),
})
return result
def process(self):
self._analyze_font_distribution()
all_pages = []
for page_num, page in enumerate(self.doc):
page_elements = []
page_rect = page.rect
dict_blocks = page.get_text("dict", flags=11)["blocks"]
tables = self._extract_tables(page, page_num)
table_bboxes = [t["bbox"] for t in tables if t.get("bbox")]
images = self._extract_images(page, page_num)
for block in dict_blocks:
if block.get("type") != 0: continue
block_bbox = block.get("bbox", [0, 0, 0, 0])
skip = False
for t_bbox in table_bboxes:
if self._bboxes_overlap(block_bbox, t_bbox): skip = True; break
if skip: continue
for group in self._split_block_into_paragraphs(block, page_rect.width):
element = self._group_to_element(group)
if element: page_elements.append(element)
page_elements = [e for e in page_elements if e["text"].strip()]
page_elements = self._merge_continuation_paragraphs(page_elements)
page_elements = self._split_combined_list_items(page_elements)
page_elements.extend(tables)
page_elements.extend(images)
page_elements = self._get_reading_order(page_elements, page_rect.width)
all_pages.append({
"page_number": page_num,
"width": page_rect.width,
"height": page_rect.height,
"elements": page_elements
})
return {
"page_count": len(self.doc),
"metadata": {
"title": self.doc.metadata.get("title", ""),
"author": self.doc.metadata.get("author", ""),
"subject": self.doc.metadata.get("subject", ""),
},
"pages": all_pages
}
def to_markdown(self, processed_data):
"""Convert processed data to markdown blocks WITH typography metadata."""
blocks = []
for page in processed_data.get("pages", []):
for elem in page.get("elements", []):
elem_type = elem.get("type")
# Base block data
block = None
if elem_type == "title":
block = {"type": "heading1", "content": f"# {elem.get('text', '')}"}
elif elem_type == "subtitle":
block = {"type": "heading2", "content": f"## {elem.get('text', '')}"}
elif elem_type == "heading":
block = {"type": "heading3", "content": f"### {elem.get('text', '')}"}
elif elem_type == "paragraph":
block = {"type": "paragraph", "content": elem.get('text', '')}
elif elem_type == "list_item":
text = elem.get('text', '')
for pattern in self.LIST_PATTERNS: text = re.sub(pattern, '', text, count=1)
block = {"type": "list_item", "content": f"- {text.strip()}"}
elif elem_type == "numbered_list":
text = elem.get('text', '')
for pattern in self.LIST_PATTERNS: text = re.sub(pattern, '', text, count=1)
block = {"type": "numbered_list", "content": f"1. {text.strip()}"}
elif elem_type == "quote":
block = {"type": "quote", "content": f"> {elem.get('text', '')}"}
elif elem_type == "table":
block = {"type": "table", "content": elem.get('markdown', '')}
elif elem_type == "image":
if elem.get("data"):
block = {
"type": "image",
"content": f"![PDF Image](embedded-image.{elem.get('format', 'png')})",
"data": elem.get("data"), "format": elem.get("format", "png")
}
if block:
# ADD typography metadata for AI analysis
if elem.get("font_size"):
block["font_size"] = elem["font_size"]
if elem.get("flags") is not None:
block["is_bold"] = bool(elem["flags"] & (2 ** 4))
blocks.append(block)
# Apply the list grouping logic
return _group_lists(blocks)
def process_pdf_to_markdown(pdf_bytes):
processor = PDFProcessor(pdf_bytes)
try:
processed_data = processor.process()
markdown_blocks = processor.to_markdown(processed_data)
return {
"page_count": processed_data["page_count"],
"metadata": processed_data["metadata"],
"markdown_blocks": markdown_blocks
}
finally:
processor.close()

751
reader_templates/Reader.html Executable file
View File

@@ -0,0 +1,751 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Interactive Audiobook Reader</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet" />
<link rel="preconnect" href="https://fonts.googleapis.com" />
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Lora:ital,wght@0,400..700;1,400..700&family=Poppins:wght@500;700&display=swap" rel="stylesheet" />
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<script>
const INJECTED_MANIFEST = /*{{MANIFEST_DATA}}*/ null;
</script>
<style>
@keyframes fadeIn { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } }
html { scroll-behavior: smooth; }
body {
background-image: linear-gradient(to top, #f3e7e9 0%, #e3eeff 99%, #e3eeff 100%);
color: #1f2937; font-family: "Lora", serif;
}
.story-title { font-family: "Poppins", sans-serif; font-weight: 700; font-size: 2.5rem; color: #111827; }
.story-subtitle { font-family: "Poppins", sans-serif; color: #4b5563; font-weight: 500; font-size: 1.1rem; }
.main-content-card {
background-color: rgba(255,255,255,0.9); backdrop-filter: blur(12px);
border-radius: 1rem; padding: 3rem 4rem; box-shadow: 0 10px 35px rgba(0,0,0,0.08);
border: 1px solid rgba(255,255,255,0.2); max-width: 1400px; margin: 0 auto;
animation: fadeIn 0.5s ease-in-out;
}
#load-folder-btn {
background-image: linear-gradient(45deg, #3d4e81 0%, #5753c9 50%, #6e78da 100%);
background-size: 200% auto; border: none; transition: all 0.4s ease-in-out !important;
box-shadow: 0 4px 15px rgba(0,0,0,0.2);
}
#load-folder-btn:hover { background-position: right center; transform: translateY(-3px); box-shadow: 0 8px 20px rgba(0,0,0,0.25); }
/* Layout with Sidebar Outline */
.reader-layout {
display: flex;
gap: 32px;
align-items: flex-start;
text-align: left;
}
.document-outline {
width: 280px;
flex-shrink: 0;
position: sticky;
top: 40px;
background: white;
border-radius: 12px;
padding: 20px;
box-shadow: 0 4px 6px -1px rgba(0,0,0,0.05), 0 2px 4px -1px rgba(0,0,0,0.03);
border: 1px solid #e2e8f0;
max-height: calc(100vh - 80px);
overflow-y: auto;
opacity: 0;
transform: translateX(-20px);
transition: opacity 0.4s ease-out, transform 0.4s ease-out;
}
.document-outline.visible {
opacity: 1;
transform: translateX(0);
}
.outline-title {
font-family: "Poppins", sans-serif;
font-weight: 700;
font-size: 0.95rem;
color: #64748b;
text-transform: uppercase;
margin-bottom: 16px;
border-bottom: 1px solid #e2e8f0;
padding-bottom: 12px;
display: flex;
align-items: center;
gap: 8px;
letter-spacing: 0.5px;
}
.outline-list {
list-style: none;
padding: 0;
margin: 0;
}
.outline-list li {
padding: 10px 14px;
font-size: 0.9rem;
color: #334155;
cursor: pointer;
border-radius: 8px;
transition: all 0.2s;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
font-family: "Inter", sans-serif;
margin-bottom: 4px;
}
.outline-list li:hover {
background: #f8fafc;
color: #4f46e5;
}
.outline-list li.active {
background: #e0e7ff;
color: #3730a3;
font-weight: 600;
}
.reader-content-area {
flex: 1;
min-width: 0;
}
.story-text-container { font-size: 36px; line-height: 2.1; color: #1f2937; cursor: pointer; }
.story-text-container h1, .story-text-container h2, .story-text-container h3 {
font-family: "Poppins", sans-serif; color: #111827; line-height: 1.8; margin-top: 1.5em; margin-bottom: 0.8em;
}
.story-text-container h1 { font-size: 2.2em; }
.story-text-container h2 { font-size: 1.8em; }
.story-text-container h3 { font-size: 1.5em; }
.story-text-container p { margin-bottom: 1.2em; }
.story-text-container img { max-width: 100%; height: auto; border-radius: 8px; margin: 16px auto; display: block; }
.word { transition: all 0.15s ease; border-radius: 3px; }
.word:hover { background-color: #f1f5f9; }
.current-sentence-bg {
-webkit-box-decoration-break: clone; box-decoration-break: clone;
background-color: #e0e7ff; padding: 0.1em 0.25em; margin: 0 -0.2em; border-radius: 8px;
}
.current-word { color: #3d4e81; text-decoration: underline; text-decoration-thickness: 3px; text-underline-offset: 3px; font-weight: 700; }
/* Image blocks in reader */
.story-image-block { text-align: center; margin: 24px 0; }
.story-image-block img { max-width: 100%; height: auto; border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); }
/* Floating Button - TOP RIGHT adjacent to content */
#floating-player-btn {
position: fixed; top: 2rem; height: 60px; min-width: 60px; padding: 0 24px;
border-radius: 30px; background-image: linear-gradient(45deg, #3d4e81 0%, #5753c9 50%, #6e78da 100%);
background-size: 200% auto; border: none; color: white; box-shadow: 0 8px 25px rgba(0,0,0,0.3);
display: none; align-items: center; justify-content: center; cursor: pointer; z-index: 1050;
transition: transform 0.2s ease-out, opacity 0.3s ease-out, width 0.3s ease, padding 0.3s ease, border-radius 0.3s ease;
opacity: 0; transform: scale(0.8);
}
#floating-player-btn.visible { display: flex; opacity: 1; transform: scale(1); }
#floating-player-btn:hover { transform: scale(1.05); }
#floating-player-btn:active { transform: scale(0.95); }
#floating-player-btn svg { width: 28px; height: 28px; }
#fp-start-text { font-weight: 600; margin-right: 10px; font-family: "Poppins", sans-serif; font-size: 1.1rem; }
#floating-player-btn.active-mode { width: 60px; padding: 0; border-radius: 50%; }
@media (max-width: 992px) {
.reader-layout { flex-direction: column; }
.document-outline { width: 100%; position: relative; top: 0; max-height: 300px; margin-bottom: 24px; }
}
@media (max-width: 768px) {
.main-content-card { padding: 1.5rem; }
.story-title { font-size: 2rem; }
.story-text-container { font-size: 24px; line-height: 1.9; }
#floating-player-btn { top: 1rem; right: 1rem !important; height: 50px; min-width: 50px; }
#floating-player-btn.active-mode { width: 50px; }
}
.highlight-section {
animation: highlightPulse 2s ease-out;
}
@keyframes highlightPulse {
0% { background-color: rgba(79, 70, 229, 0.15); border-left: 4px solid #4f46e5; border-radius: 8px; }
100% { background-color: transparent; border-left: 4px solid transparent; border-radius: 8px; }
}
</style>
</head>
<body>
<button id="floating-player-btn">
<span id="fp-start-text">Start</span>
<svg id="fp-pause-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" style="display:none;"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>
<svg id="fp-play-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" style="display:none;"><path d="M8 5v14l11-7z"/></svg>
</button>
<div class="container-fluid my-5 px-md-5">
<main id="main-content" class="main-content-card">
<header class="text-center mb-5" id="main-header">
<h1 class="story-title">Interactive Reader</h1>
<p class="story-subtitle">Select your assets folder to begin.</p>
</header>
<div id="resume-alert" class="alert alert-info d-flex justify-content-between align-items-center" style="display:none;">
<span>Welcome back! Resume from where you left off?</span>
<button id="resume-btn" class="btn btn-primary btn-sm">Resume Playback</button>
</div>
<div class="reader-layout">
<aside class="document-outline" id="story-nav" style="display: none;">
<h6 class="outline-title">
<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" fill="currentColor" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M4.5 11.5A.5.5 0 0 1 5 11h10a.5.5 0 0 1 0 1H5a.5.5 0 0 1-.5-.5zm-2-4A.5.5 0 0 1 3 7h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5zm-2-4A.5.5 0 0 1 1 3h10a.5.5 0 0 1 0 1H1a.5.5 0 0 1-.5-.5z"/>
</svg>
Table of Sections
</h6>
<ul class="outline-list" id="story-nav-list"></ul>
</aside>
<div class="reader-content-area">
<div class="text-center" id="loader-section">
<p>Please select the folder containing your files (e.g., the 'book' folder).</p>
<input type="file" id="folder-input" webkitdirectory directory multiple style="display:none;" />
<button id="load-folder-btn" class="btn btn-dark btn-lg">Select a Folder</button>
<div id="info-alert" class="alert mt-4" style="display:none;"></div>
</div>
<div id="stories-main-container" class="d-none"></div>
</div>
</div>
</main>
</div>
<script>
document.addEventListener("DOMContentLoaded", () => {
const mainContainer = document.getElementById("stories-main-container");
const mainContentCard = document.getElementById("main-content");
const folderInput = document.getElementById("folder-input");
const loadFolderBtn = document.getElementById("load-folder-btn");
const floatingPlayerBtn = document.getElementById("floating-player-btn");
const fpStartText = document.getElementById("fp-start-text");
const fpPauseIcon = document.getElementById("fp-pause-icon");
const fpPlayIcon = document.getElementById("fp-play-icon");
const storyNav = document.getElementById("story-nav");
const storyNavList = document.getElementById("story-nav-list");
const resumeAlert = document.getElementById("resume-alert");
const resumeBtn = document.getElementById("resume-btn");
let storyInstances = [];
let currentlyPlayingInstance = null;
let currentlyPlayingIndex = -1;
let hasStarted = false;
let navObserver;
let currentBookId = null;
const PROGRESS_KEY = "interactiveReaderProgress";
// Apply metadata if dynamically injected
if (typeof INJECTED_MANIFEST !== 'undefined' && INJECTED_MANIFEST) {
document.getElementById("main-header").querySelector(".story-title").textContent = INJECTED_MANIFEST.title || "Interactive Reader";
}
loadFolderBtn.addEventListener("click", () => folderInput.click());
folderInput.addEventListener("change", handleFolderSelection);
mainContainer.addEventListener("click", handleTextClick);
floatingPlayerBtn.addEventListener("click", handleFloatingBtnClick);
window.addEventListener("beforeunload", saveCurrentProgress);
window.addEventListener("resize", positionUI);
window.addEventListener("scroll", positionUI);
function positionUI() {
const rect = mainContentCard.getBoundingClientRect();
const btnRight = window.innerWidth - rect.right - 8;
floatingPlayerBtn.style.right = Math.max(btnRight, 8) + "px";
floatingPlayerBtn.style.left = "auto";
}
function saveCurrentProgress() {
if (!currentlyPlayingInstance || !currentBookId) return;
const idx = storyInstances.indexOf(currentlyPlayingInstance);
saveProgress(currentBookId, idx, currentlyPlayingInstance.getAudioElement().currentTime);
}
function saveProgress(bookId, idx, timestamp) {
localStorage.setItem(PROGRESS_KEY, JSON.stringify({ bookId, instanceIndex: idx, timestamp, lastUpdate: Date.now() }));
}
function loadProgress(bookId) {
const saved = localStorage.getItem(PROGRESS_KEY);
if (!saved) return;
const p = JSON.parse(saved);
if (p.bookId !== bookId) return;
const target = storyInstances[p.instanceIndex];
if (!target) return;
resumeAlert.style.display = "flex";
resumeBtn.onclick = () => {
resumeAlert.style.display = "none";
hasStarted = true;
currentlyPlayingInstance = target;
currentlyPlayingIndex = p.instanceIndex;
target.playAt(p.timestamp);
updateFloatingButton("playing");
};
}
function updateFloatingButton(state) {
if (hasStarted) {
fpStartText.style.display = "none";
floatingPlayerBtn.classList.add("active-mode");
}
if (state === "playing") { fpPauseIcon.style.display = "block"; fpPlayIcon.style.display = "none"; }
else { fpPauseIcon.style.display = "none"; fpPlayIcon.style.display = "block"; }
}
function handleFloatingBtnClick() {
if (!hasStarted) {
hasStarted = true;
if (storyInstances.length > 0) {
currentlyPlayingInstance = storyInstances[0];
currentlyPlayingIndex = 0;
currentlyPlayingInstance.playAt(0);
updateFloatingButton("playing");
}
return;
}
if (currentlyPlayingInstance) {
const audio = currentlyPlayingInstance.getAudioElement();
if (audio.paused) {
currentlyPlayingInstance.play();
updateFloatingButton("playing");
} else {
currentlyPlayingInstance.pause();
updateFloatingButton("paused");
}
} else {
if (storyInstances.length > 0) {
currentlyPlayingInstance = storyInstances[0];
currentlyPlayingIndex = 0;
currentlyPlayingInstance.playAt(0);
updateFloatingButton("playing");
}
}
}
function playNextInstance() {
const next = currentlyPlayingIndex + 1;
if (next < storyInstances.length) {
if (currentlyPlayingInstance) currentlyPlayingInstance.stopAndReset();
currentlyPlayingInstance = storyInstances[next];
currentlyPlayingIndex = next;
currentlyPlayingInstance.playAt(0);
} else {
updateFloatingButton("paused");
currentlyPlayingInstance = null;
currentlyPlayingIndex = -1;
}
}
function handleTextClick(event) {
const wordSpan = event.target.closest(".word");
if (!wordSpan) return;
const storyBlock = event.target.closest(".story-block");
if (!storyBlock) return;
const idx = parseInt(storyBlock.dataset.instanceIndex, 10);
const target = storyInstances[idx];
if (!target) return;
const timestamp = target.getTimeForSpan(wordSpan);
if (timestamp !== null) {
hasStarted = true;
if (currentlyPlayingInstance && currentlyPlayingInstance !== target) {
currentlyPlayingInstance.stopAndReset();
}
currentlyPlayingInstance = target;
currentlyPlayingIndex = idx;
currentlyPlayingInstance.playAt(timestamp);
updateFloatingButton("playing");
}
}
async function handleFolderSelection(event) {
const files = event.target.files;
if (files.length === 0) return;
let loadedManifest = typeof INJECTED_MANIFEST !== 'undefined' && INJECTED_MANIFEST ? INJECTED_MANIFEST : null;
let manifestFile = null;
const allItemsMap = new Map();
for (const file of files) {
const name = file.name;
if (name.toLowerCase() === 'manifest.json') {
manifestFile = file;
continue;
}
const ext = name.split(".").pop().toLowerCase();
const prefixMatch = name.match(/^([\d]+\.[\d]+)_/);
if (!prefixMatch) continue;
const sortKey = prefixMatch[1];
if (!allItemsMap.has(sortKey)) {
allItemsMap.set(sortKey, { audioFile: null, textFile: null, jsonFile: null, imageFiles: [] });
}
const entry = allItemsMap.get(sortKey);
if (["wav", "mp3"].includes(ext)) entry.audioFile = file;
else if (ext === "txt") entry.textFile = file;
else if (ext === "json") entry.jsonFile = file;
else if (["jpg", "jpeg", "png", "gif", "webp"].includes(ext)) entry.imageFiles.push(file);
}
if (manifestFile && !loadedManifest) {
try {
const manifestText = await new Promise((resolve, reject) => {
const r = new FileReader();
r.onload = () => resolve(r.result);
r.onerror = reject;
r.readAsText(manifestFile);
});
loadedManifest = JSON.parse(manifestText);
document.getElementById("main-header").querySelector(".story-title").textContent = loadedManifest.title || "Interactive Reader";
} catch(e) {
console.warn("Could not parse local manifest.json", e);
}
}
const chapterTitles = {};
if (loadedManifest && loadedManifest.assets) {
loadedManifest.assets.forEach(asset => {
if (asset.sortKey) {
const ch = asset.sortKey.split('.')[0];
if (!chapterTitles[ch] && asset.sectionName) {
chapterTitles[ch] = asset.sectionName;
}
}
});
}
const sortedEntries = Array.from(allItemsMap.entries())
.sort(([a], [b]) => {
const [aMaj, aMin] = a.split(".").map(Number);
const [bMaj, bMin] = b.split(".").map(Number);
return aMaj !== bMaj ? aMaj - bMaj : aMin - bMin;
});
const orderedItems = [];
for (const [sortKey, entry] of sortedEntries) {
if (entry.audioFile && entry.textFile && entry.jsonFile) {
orderedItems.push({
type: "audio",
sortKey,
audioFile: entry.audioFile,
textFile: entry.textFile,
jsonFile: entry.jsonFile,
imageFiles: entry.imageFiles
});
} else if (entry.imageFiles.length > 0) {
orderedItems.push({
type: "image",
sortKey,
imageFiles: entry.imageFiles
});
}
}
const audioItems = orderedItems.filter(i => i.type === "audio");
if (audioItems.length === 0) {
document.getElementById("info-alert").textContent = "No valid story parts found. Ensure files have matching .txt, .json, and audio files.";
document.getElementById("info-alert").style.display = "block";
return;
}
currentBookId = audioItems.map(i => i.sortKey).join("|");
document.getElementById("loader-section").style.display = "none";
document.getElementById("main-header").querySelector(".story-subtitle").textContent = "An interactive reading experience.";
mainContainer.classList.remove("d-none");
mainContainer.innerHTML = "";
storyNavList.innerHTML = "";
resumeAlert.style.display = "none";
let audioInstanceIndex = 0;
let lastChapter = null;
orderedItems.forEach((item, globalIdx) => {
const chapter = item.sortKey.split(".")[0];
if (item.type === "audio") {
const currentBlockId = `story-block-${audioInstanceIndex}`;
if (chapter !== lastChapter) {
const title = chapterTitles[chapter] || `Section ${chapter}`;
const li = document.createElement("li");
li.textContent = title;
li.title = title;
li.dataset.chapter = chapter;
li.onclick = () => {
const target = document.getElementById(currentBlockId);
if (target) {
const offset = target.getBoundingClientRect().top + window.pageYOffset - 120;
window.scrollTo({top: offset, behavior: 'smooth'});
target.classList.add('highlight-section');
setTimeout(() => target.classList.remove('highlight-section'), 2000);
}
};
storyNavList.appendChild(li);
lastChapter = chapter;
}
const html = `<div id="${currentBlockId}" class="story-block mt-4" data-instance-index="${audioInstanceIndex}" data-sort-key="${item.sortKey}">
<div class="image-container text-center mb-4"></div>
<div class="loading-indicator text-center p-5"><div class="spinner-border"></div></div>
<audio class="audio-player" style="display:none;"></audio>
<article class="story-text-container" style="display:none;"></article>
</div>`;
mainContainer.insertAdjacentHTML("beforeend", html);
audioInstanceIndex++;
} else if (item.type === "image") {
const currentImageBlockId = `story-image-block-${item.sortKey.replace('.', '-')}`;
if (chapter !== lastChapter) {
const title = chapterTitles[chapter] || `Section ${chapter}`;
const li = document.createElement("li");
li.textContent = title;
li.title = title;
li.dataset.chapter = chapter;
li.onclick = () => {
const target = document.getElementById(currentImageBlockId);
if (target) {
const offset = target.getBoundingClientRect().top + window.pageYOffset - 120;
window.scrollTo({top: offset, behavior: 'smooth'});
target.classList.add('highlight-section');
setTimeout(() => target.classList.remove('highlight-section'), 2000);
}
};
storyNavList.appendChild(li);
lastChapter = chapter;
}
const imgDiv = document.createElement("div");
imgDiv.id = currentImageBlockId;
imgDiv.className = "story-image-block mt-4";
imgDiv.dataset.sortKey = item.sortKey;
for (const imgFile of item.imageFiles) {
const img = document.createElement("img");
img.src = URL.createObjectURL(imgFile);
img.className = "img-fluid rounded shadow-sm";
img.style.maxHeight = "70vh";
imgDiv.appendChild(img);
}
mainContainer.appendChild(imgDiv);
}
});
storyNav.style.display = 'block';
storyNav.classList.add("visible");
floatingPlayerBtn.classList.add("visible");
storyInstances = [];
let instIdx = 0;
for (const item of orderedItems) {
if (item.type !== "audio") continue;
const block = document.getElementById(`story-block-${instIdx}`);
if (item.imageFiles && item.imageFiles.length > 0) {
const imgContainer = block.querySelector(".image-container");
for (const imgFile of item.imageFiles) {
const img = document.createElement("img");
img.src = URL.createObjectURL(imgFile);
img.className = "img-fluid rounded shadow-sm";
img.style.maxHeight = "60vh";
imgContainer.appendChild(img);
}
}
storyInstances.push(createStoryPlayer(block, item, instIdx));
instIdx++;
}
Promise.all(storyInstances.map(i => i.isReady())).then(() => {
loadProgress(currentBookId);
positionUI();
});
setupNavObserver();
setTimeout(positionUI, 100);
}
function setupNavObserver() {
if (navObserver) navObserver.disconnect();
navObserver = new IntersectionObserver((entries) => {
entries.forEach(entry => {
if (entry.isIntersecting) {
const sk = entry.target.dataset.sortKey;
if (sk) {
const ch = sk.split(".")[0];
storyNavList.querySelectorAll("li").forEach(l => l.classList.remove("active"));
const al = storyNavList.querySelector(`li[data-chapter='${ch}']`);
if (al) al.classList.add("active");
}
}
});
}, { threshold: 0.4 });
document.querySelectorAll(".story-block, .story-image-block").forEach(b => navObserver.observe(b));
}
function createStoryPlayer(storyBlock, item, instanceIndex) {
const audioPlayer = storyBlock.querySelector(".audio-player");
const storyContainer = storyBlock.querySelector(".story-text-container");
let wordTimestamps = [];
let sentenceData = [];
let allWordSpans = [];
let wordMap = [];
let animationFrameId = null;
let lastHighlightedWordSpan = null;
let lastHighlightedSentenceSpans = [];
const readyPromise = new Promise(async (resolve) => {
const readFile = (f) => new Promise((res, rej) => {
const r = new FileReader();
r.onload = () => res(r.result);
r.onerror = rej;
r.readAsText(f);
});
const [text, json] = await Promise.all([readFile(item.textFile), readFile(item.jsonFile)]);
wordTimestamps = JSON.parse(json);
audioPlayer.src = URL.createObjectURL(item.audioFile);
renderMarkdown(text);
smartSync();
storyBlock.querySelector(".loading-indicator").style.display = "none";
storyContainer.style.display = "block";
audioPlayer.addEventListener("play", () => { startLoop(); updateFloatingButton("playing"); });
audioPlayer.addEventListener("pause", () => { stopLoop(); updateFloatingButton("paused"); saveCurrentProgress(); });
audioPlayer.addEventListener("ended", () => { stopLoop(); clearAllHighlights(); playNextInstance(); });
resolve();
});
function renderMarkdown(text) {
storyContainer.innerHTML = "";
allWordSpans = [];
const div = document.createElement("div");
div.innerHTML = marked.parse(text, { breaks: true, gfm: true });
function processNode(node) {
if (node.nodeType === Node.TEXT_NODE) {
const words = node.textContent.split(/(\s+)/);
const fragment = document.createDocumentFragment();
words.forEach(part => {
if (part.trim().length > 0) {
const span = document.createElement("span");
span.className = "word";
span.textContent = part;
allWordSpans.push(span);
fragment.appendChild(span);
} else {
fragment.appendChild(document.createTextNode(part));
}
});
node.parentNode.replaceChild(fragment, node);
} else if (node.nodeType === Node.ELEMENT_NODE) {
Array.from(node.childNodes).forEach(processNode);
}
}
processNode(div);
while (div.firstChild) storyContainer.appendChild(div.firstChild);
}
function smartSync() {
wordMap = new Array(allWordSpans.length).fill(undefined);
let aiIdx = 0;
allWordSpans.forEach((span, i) => {
const textWord = span.textContent.toLowerCase().replace(/[^\w]/g, "");
for (let off = 0; off < 5; off++) {
if (aiIdx + off >= wordTimestamps.length) break;
const aiWord = wordTimestamps[aiIdx + off].word.toLowerCase().replace(/[^\w]/g, "");
if (textWord === aiWord) { wordMap[i] = aiIdx + off; aiIdx += off + 1; return; }
}
});
sentenceData = [];
let buffer = [], startIdx = 0;
allWordSpans.forEach((span, i) => {
buffer.push(span);
if (/[.!?]["'\u201D\u2019]?$/.test(span.textContent.trim())) {
let startT = 0, endT = 0;
for (let k = startIdx; k <= i; k++) if (wordMap[k] !== undefined) { startT = wordTimestamps[wordMap[k]].start; break; }
for (let k = i; k >= startIdx; k--) if (wordMap[k] !== undefined) { endT = wordTimestamps[wordMap[k]].end; break; }
if (endT > startT) sentenceData.push({ spans: [...buffer], startTime: startT, endTime: endT });
buffer = []; startIdx = i + 1;
}
});
if (buffer.length > 0) {
let startT = 0, endT = 0;
for (let k = startIdx; k < allWordSpans.length; k++) if (wordMap[k] !== undefined) { startT = wordTimestamps[wordMap[k]].start; break; }
for (let k = allWordSpans.length - 1; k >= startIdx; k--) if (wordMap[k] !== undefined) { endT = wordTimestamps[wordMap[k]].end; break; }
if (endT > startT) sentenceData.push({ spans: [...buffer], startTime: startT, endTime: endT });
}
}
function highlightLoop() {
if (audioPlayer.paused) return;
const t = audioPlayer.currentTime;
const aiIdx = wordTimestamps.findIndex(w => t >= w.start && t < w.end);
if (aiIdx !== -1) {
const tIdx = wordMap.findIndex(i => i === aiIdx);
if (tIdx !== -1) {
const sp = allWordSpans[tIdx];
if (sp !== lastHighlightedWordSpan) {
if (lastHighlightedWordSpan) lastHighlightedWordSpan.classList.remove("current-word");
sp.classList.add("current-word");
const rect = sp.getBoundingClientRect();
if (rect.top < window.innerHeight * 0.3 || rect.bottom > window.innerHeight * 0.7) sp.scrollIntoView({ behavior: "smooth", block: "center" });
lastHighlightedWordSpan = sp;
}
}
}
const sent = sentenceData.find(s => t >= s.startTime && t <= s.endTime);
if (sent && sent.spans !== lastHighlightedSentenceSpans) {
lastHighlightedSentenceSpans.forEach(s => s.classList.remove("current-sentence-bg"));
sent.spans.forEach(s => s.classList.add("current-sentence-bg"));
lastHighlightedSentenceSpans = sent.spans;
}
animationFrameId = requestAnimationFrame(highlightLoop);
}
function clearAllHighlights() {
if (lastHighlightedWordSpan) lastHighlightedWordSpan.classList.remove("current-word");
lastHighlightedSentenceSpans.forEach(s => s.classList.remove("current-sentence-bg"));
lastHighlightedWordSpan = null; lastHighlightedSentenceSpans = [];
}
function startLoop() { cancelAnimationFrame(animationFrameId); animationFrameId = requestAnimationFrame(highlightLoop); }
function stopLoop() { cancelAnimationFrame(animationFrameId); }
return {
play: () => audioPlayer.play(),
pause: () => audioPlayer.pause(),
playAt: (time) => { audioPlayer.currentTime = time; audioPlayer.play(); },
stopAndReset: () => { audioPlayer.pause(); audioPlayer.currentTime = 0; clearAllHighlights(); },
getAudioElement: () => audioPlayer,
getTimeForSpan: (span) => {
const idx = allWordSpans.indexOf(span);
const aiIdx = wordMap[idx];
return aiIdx !== undefined ? wordTimestamps[aiIdx].start : null;
},
isReady: () => readyPromise
};
}
});
</script>
</body>
</html>

855
reader_templates/index.html Executable file
View File

@@ -0,0 +1,855 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Interactive Audiobook Reader</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet" />
<link rel="preconnect" href="https://fonts.googleapis.com" />
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Lora:ital,wght@0,400..700;1,400..700&family=Poppins:wght@500;700&display=swap" rel="stylesheet" />
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<script>
const INJECTED_MANIFEST = /*{{MANIFEST_DATA}}*/ null;
</script>
<style>
@keyframes fadeIn { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } }
@keyframes spin { to { transform: rotate(360deg); } }
@keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.5; } }
html { scroll-behavior: smooth; }
body {
background-image: linear-gradient(to top, #f3e7e9 0%, #e3eeff 99%, #e3eeff 100%);
color: #1f2937; font-family: "Lora", serif;
}
.story-title { font-family: "Poppins", sans-serif; font-weight: 700; font-size: 2.5rem; color: #111827; }
.story-subtitle { font-family: "Poppins", sans-serif; color: #4b5563; font-weight: 500; font-size: 1.1rem; }
.main-content-card {
background-color: rgba(255,255,255,0.9); backdrop-filter: blur(12px);
border-radius: 1rem; padding: 3rem 4rem; box-shadow: 0 10px 35px rgba(0,0,0,0.08);
border: 1px solid rgba(255,255,255,0.2); max-width: 1400px; margin: 0 auto;
animation: fadeIn 0.5s ease-in-out;
}
.reader-layout { display: flex; gap: 32px; align-items: flex-start; text-align: left; }
.document-outline {
width: 280px; flex-shrink: 0; position: sticky; top: 40px;
background: white; border-radius: 12px; padding: 20px;
box-shadow: 0 4px 6px -1px rgba(0,0,0,0.05);
border: 1px solid #e2e8f0; max-height: calc(100vh - 80px); overflow-y: auto;
opacity: 0; transform: translateX(-20px);
transition: opacity 0.4s ease-out, transform 0.4s ease-out;
}
.document-outline.visible { opacity: 1; transform: translateX(0); }
.outline-title {
font-family: "Poppins", sans-serif; font-weight: 700; font-size: 0.95rem;
color: #64748b; text-transform: uppercase; margin-bottom: 16px;
border-bottom: 1px solid #e2e8f0; padding-bottom: 12px;
display: flex; align-items: center; gap: 8px; letter-spacing: 0.5px;
}
.outline-list { list-style: none; padding: 0; margin: 0; }
.outline-list li {
padding: 10px 14px; font-size: 0.9rem; color: #334155; cursor: pointer;
border-radius: 8px; transition: all 0.2s; white-space: nowrap;
overflow: hidden; text-overflow: ellipsis; font-family: "Inter", sans-serif;
margin-bottom: 4px;
}
.outline-list li:hover { background: #f8fafc; color: #4f46e5; }
.outline-list li.active { background: #e0e7ff; color: #3730a3; font-weight: 600; }
.reader-content-area { flex: 1; min-width: 0; }
.story-text-container { font-size: 36px; line-height: 2.1; color: #1f2937; cursor: pointer; }
.story-text-container h1, .story-text-container h2, .story-text-container h3 {
font-family: "Poppins", sans-serif; color: #111827; line-height: 1.8;
margin-top: 1.5em; margin-bottom: 0.8em;
}
.story-text-container h1 { font-size: 2.2em; }
.story-text-container h2 { font-size: 1.8em; }
.story-text-container h3 { font-size: 1.5em; }
.story-text-container p { margin-bottom: 1.2em; }
.story-text-container img { max-width: 100%; height: auto; border-radius: 8px; margin: 16px auto; display: block; }
.word { transition: all 0.15s ease; border-radius: 3px; }
.word:hover { background-color: #f1f5f9; }
.current-sentence-bg {
-webkit-box-decoration-break: clone; box-decoration-break: clone;
background-color: #e0e7ff; padding: 0.1em 0.25em; margin: 0 -0.2em; border-radius: 8px;
}
.current-word { color: #3d4e81; text-decoration: underline; text-decoration-thickness: 3px; text-underline-offset: 3px; font-weight: 700; }
.story-image-block { text-align: center; margin: 24px 0; }
.story-image-block img { max-width: 100%; height: auto; border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); }
.block-loading-spinner {
display: inline-flex; align-items: center; gap: 8px;
color: #6b7280; font-size: 0.9rem; font-family: "Inter", sans-serif;
padding: 8px 0;
}
.block-loading-spinner::before {
content: ''; width: 16px; height: 16px;
border: 2px solid #e2e8f0; border-top-color: #5753c9;
border-radius: 50%; animation: spin 0.8s linear infinite;
}
#floating-player-btn {
position: fixed; top: 2rem; height: 60px; min-width: 60px; padding: 0 24px;
border-radius: 30px; background-image: linear-gradient(45deg, #3d4e81 0%, #5753c9 50%, #6e78da 100%);
background-size: 200% auto; border: none; color: white; box-shadow: 0 8px 25px rgba(0,0,0,0.3);
display: none; align-items: center; justify-content: center; cursor: pointer; z-index: 1050;
transition: transform 0.2s, opacity 0.3s, width 0.3s, padding 0.3s, border-radius 0.3s;
opacity: 0; transform: scale(0.8);
}
#floating-player-btn.visible { display: flex; opacity: 1; transform: scale(1); }
#floating-player-btn:hover { transform: scale(1.05); }
#floating-player-btn:active { transform: scale(0.95); }
#floating-player-btn svg { width: 28px; height: 28px; }
#fp-start-text { font-weight: 600; margin-right: 10px; font-family: "Poppins", sans-serif; font-size: 1.1rem; }
#floating-player-btn.active-mode { width: 60px; padding: 0; border-radius: 50%; }
#floating-player-btn.loading { background: linear-gradient(45deg, #6b7280, #9ca3af); cursor: wait; }
#fp-loading-spinner {
width: 24px; height: 24px;
border: 3px solid rgba(255,255,255,0.3); border-top-color: white;
border-radius: 50%; animation: spin 0.8s linear infinite;
display: none;
}
#floating-player-btn.loading #fp-loading-spinner { display: block; }
#floating-player-btn.loading #fp-play-icon,
#floating-player-btn.loading #fp-pause-icon,
#floating-player-btn.loading #fp-start-text { display: none !important; }
.reader-toast {
position: fixed; bottom: 2rem; left: 50%; transform: translateX(-50%);
background: rgba(30, 41, 59, 0.95); color: white;
padding: 12px 24px; border-radius: 24px;
font-family: "Inter", sans-serif; font-size: 0.9rem;
box-shadow: 0 8px 24px rgba(0,0,0,0.2);
z-index: 1100; opacity: 0; transition: opacity 0.3s;
pointer-events: none; display: flex; align-items: center; gap: 8px;
}
.reader-toast.visible { opacity: 1; }
.reader-toast .spinner {
width: 14px; height: 14px;
border: 2px solid rgba(255,255,255,0.3); border-top-color: white;
border-radius: 50%; animation: spin 0.8s linear infinite;
}
@media (max-width: 992px) {
.reader-layout { flex-direction: column; }
.document-outline { width: 100%; position: relative; top: 0; max-height: 300px; margin-bottom: 24px; }
}
@media (max-width: 768px) {
.main-content-card { padding: 1.5rem; }
.story-text-container { font-size: 24px; line-height: 1.9; }
#floating-player-btn { top: 1rem; right: 1rem !important; height: 50px; min-width: 50px; }
#floating-player-btn.active-mode { width: 50px; }
}
.highlight-section { animation: highlightPulse 2s ease-out; }
@keyframes highlightPulse {
0% { background-color: rgba(79, 70, 229, 0.15); border-left: 4px solid #4f46e5; border-radius: 8px; }
100% { background-color: transparent; border-left: 4px solid transparent; border-radius: 8px; }
}
</style>
</head>
<body>
<button id="floating-player-btn">
<span id="fp-start-text">Start</span>
<svg id="fp-pause-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" style="display:none;"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>
<svg id="fp-play-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" style="display:none;"><path d="M8 5v14l11-7z"/></svg>
<div id="fp-loading-spinner"></div>
</button>
<div id="reader-toast" class="reader-toast">
<div class="spinner"></div>
<span id="reader-toast-text">Loading…</span>
</div>
<div class="container-fluid my-5 px-md-5">
<main id="main-content" class="main-content-card">
<header class="text-center mb-5" id="main-header">
<h1 class="story-title">Interactive Reader</h1>
<p class="story-subtitle">Loading book...</p>
</header>
<div id="resume-alert" class="alert alert-info d-flex justify-content-between align-items-center" style="display:none;">
<span>Welcome back! Resume from where you left off?</span>
<button id="resume-btn" class="btn btn-primary btn-sm">Resume Playback</button>
</div>
<div class="reader-layout">
<aside class="document-outline" id="story-nav" style="display: none;">
<h6 class="outline-title">
<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" fill="currentColor" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M4.5 11.5A.5.5 0 0 1 5 11h10a.5.5 0 0 1 0 1H5a.5.5 0 0 1-.5-.5zm-2-4A.5.5 0 0 1 3 7h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5zm-2-4A.5.5 0 0 1 1 3h10a.5.5 0 0 1 0 1H1a.5.5 0 0 1-.5-.5z"/>
</svg>
Table of Sections
</h6>
<ul class="outline-list" id="story-nav-list"></ul>
</aside>
<div class="reader-content-area">
<div id="stories-main-container"></div>
</div>
</div>
</main>
</div>
<script>
/**
* Interactive Reader — Smart Preload Architecture (v3)
*
* Loading Strategy:
* 1. TEXT + JSON: eager parallel batch (small files, fast).
* 2. AUDIO: preload="none" by default → no auto-download of 200+ MP3s.
* 3. Smart preload: when block N starts playing, preload N+1, N+2.
* At 70% through N's audio, ensure N+1 is ready (safety net).
* 4. Memory management: keep at most MAX_AUDIO_LOADED audio sources in
* memory. When user moves forward, release distant past audio.
*
* Scroll Strategy:
* - Manual navigation (Start / outline / word click): scroll block to top.
* - Auto-advance (audio ended → next block): NO block scroll, let the
* word highlighter smoothly carry the user. Prevents jarring jumps.
*/
document.addEventListener("DOMContentLoaded", async () => {
const mainContainer = document.getElementById("stories-main-container");
const mainContentCard = document.getElementById("main-content");
const floatingPlayerBtn = document.getElementById("floating-player-btn");
const fpStartText = document.getElementById("fp-start-text");
const fpPauseIcon = document.getElementById("fp-pause-icon");
const fpPlayIcon = document.getElementById("fp-play-icon");
const storyNav = document.getElementById("story-nav");
const storyNavList = document.getElementById("story-nav-list");
const resumeAlert = document.getElementById("resume-alert");
const resumeBtn = document.getElementById("resume-btn");
const toastEl = document.getElementById("reader-toast");
const toastText = document.getElementById("reader-toast-text");
// --- State ---
let storyInstances = [];
let currentlyPlayingInstance = null;
let currentlyPlayingIndex = -1;
let hasStarted = false;
let navObserver = null;
let currentBookId = null;
// --- Tunables ---
const PROGRESS_KEY = "interactiveReaderProgress";
const TEXT_BATCH_SIZE = 8; // parallel text/json fetches
const PRELOAD_AHEAD = 2; // preload N blocks ahead of current
const MID_PRELOAD_THRESHOLD = 0.7; // at 70% of current audio, ensure next is ready
const MAX_AUDIO_LOADED = 5; // keep at most this many audio sources in memory
const KEEP_BEHIND = 1; // keep this many past blocks loaded (for back-seek)
// --- Event bindings ---
floatingPlayerBtn.addEventListener("click", handleFloatingBtnClick);
mainContainer.addEventListener("click", handleTextClick);
window.addEventListener("beforeunload", saveCurrentProgress);
window.addEventListener("resize", positionUI);
window.addEventListener("scroll", positionUI);
// ===================================================
// UI helpers
// ===================================================
function positionUI() {
const rect = mainContentCard.getBoundingClientRect();
const btnRight = window.innerWidth - rect.right - 8;
floatingPlayerBtn.style.right = Math.max(btnRight, 8) + "px";
floatingPlayerBtn.style.left = "auto";
}
function showToast(msg) { toastText.textContent = msg; toastEl.classList.add("visible"); }
function hideToast() { toastEl.classList.remove("visible"); }
function setButtonLoading(b) { floatingPlayerBtn.classList.toggle("loading", b); }
function updateFloatingButton(state) {
if (hasStarted) { fpStartText.style.display = "none"; floatingPlayerBtn.classList.add("active-mode"); }
if (state === "playing") { fpPauseIcon.style.display = "block"; fpPlayIcon.style.display = "none"; }
else { fpPauseIcon.style.display = "none"; fpPlayIcon.style.display = "block"; }
}
// ===================================================
// Progress persistence
// ===================================================
function saveCurrentProgress() {
if (!currentlyPlayingInstance || !currentBookId || !currentlyPlayingInstance.audio) return;
try {
localStorage.setItem(PROGRESS_KEY, JSON.stringify({
bookId: currentBookId,
instanceIndex: currentlyPlayingIndex,
timestamp: currentlyPlayingInstance.audio.currentTime,
lastUpdate: Date.now()
}));
} catch (e) { /* quota errors ignored */ }
}
function loadProgress(bookId) {
const saved = localStorage.getItem(PROGRESS_KEY);
if (!saved) return;
let p; try { p = JSON.parse(saved); } catch (e) { return; }
if (p.bookId !== bookId) return;
if (p.instanceIndex < 0 || p.instanceIndex >= storyInstances.length) return;
resumeAlert.style.display = "flex";
resumeBtn.onclick = () => {
resumeAlert.style.display = "none";
hasStarted = true;
playInstanceAt(p.instanceIndex, p.timestamp);
};
}
// ===================================================
// INITIAL RENDER
// ===================================================
try {
let manifest = (typeof INJECTED_MANIFEST !== 'undefined' && INJECTED_MANIFEST) ? INJECTED_MANIFEST : null;
if (!manifest) {
const resp = await fetch("manifest.json");
manifest = await resp.json();
}
document.getElementById("main-header").querySelector(".story-title").textContent = manifest.title || "Interactive Reader";
document.getElementById("main-header").querySelector(".story-subtitle").textContent = "An interactive reading experience.";
const audioAssets = (manifest.assets || []).filter(a => a.textFile && a.audioFile && a.jsonFile);
if (audioAssets.length === 0) {
mainContainer.innerHTML = '<p class="text-center text-muted">No playable content found.</p>';
return;
}
const chapterTitles = {};
if (manifest.assets) {
manifest.assets.forEach(asset => {
if (asset.sortKey) {
const ch = asset.sortKey.split('.')[0];
if (!chapterTitles[ch] && asset.sectionName) chapterTitles[ch] = asset.sectionName;
}
});
}
const allSortKeys = new Set();
const audioPrefixSet = new Set();
for (const asset of audioAssets) {
const m = asset.prefix.match(/^([\d]+\.[\d]+)_/);
if (m) { allSortKeys.add(m[1]); audioPrefixSet.add(m[1]); }
}
const imageMap = new Map();
if (Array.isArray(manifest.images)) {
manifest.images.forEach(img => {
allSortKeys.add(img.sortKey);
if (!imageMap.has(img.sortKey)) imageMap.set(img.sortKey, []);
imageMap.get(img.sortKey).push(img.file);
});
}
const sortedKeys = Array.from(allSortKeys).sort((a, b) => {
const [aMa, aMi] = a.split(".").map(Number);
const [bMa, bMi] = b.split(".").map(Number);
return aMa !== bMa ? aMa - bMa : aMi - bMi;
});
currentBookId = audioAssets.map(a => a.prefix).join("|");
let audioInstanceIndex = 0;
let lastChapter = null;
for (const sortKey of sortedKeys) {
const chapter = sortKey.split(".")[0];
const isAudio = audioPrefixSet.has(sortKey);
const hasImages = imageMap.has(sortKey);
if (isAudio) {
const asset = audioAssets.find(a => a.prefix.startsWith(sortKey + "_"));
if (!asset) continue;
const blockId = `story-block-${audioInstanceIndex}`;
if (chapter !== lastChapter) {
addOutlineEntry(chapterTitles[chapter] || `Section ${chapter}`, chapter, blockId);
lastChapter = chapter;
}
mainContainer.insertAdjacentHTML("beforeend", `
<div id="${blockId}" class="story-block mt-4"
data-instance-index="${audioInstanceIndex}"
data-sort-key="${sortKey}">
<div class="image-container text-center mb-4"></div>
<article class="story-text-container">
<div class="block-loading-spinner">Loading content…</div>
</article>
<audio class="audio-player" preload="none" style="display:none;"></audio>
</div>
`);
if (hasImages) {
const imgContainer = document.getElementById(blockId).querySelector(".image-container");
for (const imgUrl of imageMap.get(sortKey)) {
const img = document.createElement("img");
img.src = imgUrl; img.loading = "lazy";
img.className = "img-fluid rounded shadow-sm";
img.style.maxHeight = "60vh";
imgContainer.appendChild(img);
}
}
storyInstances.push({
index: audioInstanceIndex,
asset: asset,
blockEl: null,
audio: null,
wordTimestamps: [],
wordMap: [],
sentenceData: [],
allWordSpans: [],
animationFrameId: null,
lastHighlightedWordSpan: null,
lastHighlightedSentenceSpans: [],
textReady: false,
audioReady: false,
audioLoadingPromise: null,
midPreloadTriggered: false // prevents repeated trigger of mid-play preload
});
audioInstanceIndex++;
} else if (hasImages) {
const imgBlockId = `story-image-block-${sortKey.replace('.', '-')}`;
if (chapter !== lastChapter) {
addOutlineEntry(chapterTitles[chapter] || `Section ${chapter}`, chapter, imgBlockId);
lastChapter = chapter;
}
const imgDiv = document.createElement("div");
imgDiv.id = imgBlockId;
imgDiv.className = "story-image-block mt-4";
imgDiv.dataset.sortKey = sortKey;
for (const imgUrl of imageMap.get(sortKey)) {
const img = document.createElement("img");
img.src = imgUrl; img.loading = "lazy";
img.className = "img-fluid rounded shadow-sm";
img.style.maxHeight = "70vh";
imgDiv.appendChild(img);
}
mainContainer.appendChild(imgDiv);
}
}
storyInstances.forEach(inst => { inst.blockEl = document.getElementById(`story-block-${inst.index}`); });
storyNav.style.display = 'block';
storyNav.classList.add("visible");
floatingPlayerBtn.classList.add("visible");
setupNavObserver();
setTimeout(positionUI, 100);
loadAllTextContent().then(() => loadProgress(currentBookId));
} catch (e) {
console.error(e);
mainContainer.innerHTML = `<p class="text-center text-danger">Error loading: ${e.message}</p>`;
}
async function loadAllTextContent() {
for (let i = 0; i < storyInstances.length; i += TEXT_BATCH_SIZE) {
const batch = storyInstances.slice(i, i + TEXT_BATCH_SIZE);
await Promise.all(batch.map(loadTextForInstance));
}
}
async function loadTextForInstance(inst) {
if (inst.textReady) return;
try {
const [textResp, jsonResp] = await Promise.all([
fetch(inst.asset.textFile),
fetch(inst.asset.jsonFile)
]);
if (!textResp.ok || !jsonResp.ok) throw new Error('fetch failed');
const [text, timestamps] = await Promise.all([textResp.text(), jsonResp.json()]);
inst.wordTimestamps = timestamps;
renderMarkdownInto(inst, text);
smartSync(inst);
inst.textReady = true;
} catch (err) {
console.warn(`Text load failed for block ${inst.index}:`, err);
const container = inst.blockEl.querySelector(".story-text-container");
if (container) container.innerHTML = '<p class="text-muted small">Content failed to load.</p>';
}
}
// ===================================================
// AUDIO LAZY LOADING + MEMORY MANAGEMENT
// ===================================================
function ensureAudioLoaded(inst) {
if (inst.audioReady && inst.audio && inst.audio.src) return Promise.resolve(inst);
if (inst.audioLoadingPromise) return inst.audioLoadingPromise;
inst.audioLoadingPromise = new Promise((resolve, reject) => {
const audio = inst.blockEl.querySelector(".audio-player");
if (!audio) return reject(new Error('No audio element'));
const onCanPlay = () => {
audio.removeEventListener("error", onError);
inst.audio = audio;
inst.audioReady = true;
wireAudioEvents(inst);
resolve(inst);
};
const onError = () => {
audio.removeEventListener("canplay", onCanPlay);
inst.audioLoadingPromise = null;
reject(new Error('Audio failed to load'));
};
audio.addEventListener("canplay", onCanPlay, { once: true });
audio.addEventListener("error", onError, { once: true });
audio.preload = "auto";
audio.src = inst.asset.audioFile;
audio.load();
});
return inst.audioLoadingPromise;
}
function wireAudioEvents(inst) {
const audio = inst.audio;
audio.addEventListener("play", () => {
startHighlightLoop(inst);
updateFloatingButton("playing");
});
audio.addEventListener("pause", () => {
stopHighlightLoop(inst);
updateFloatingButton("paused");
saveCurrentProgress();
});
audio.addEventListener("ended", () => {
stopHighlightLoop(inst);
clearHighlights(inst);
playNextInstance();
});
// Safety-net mid-play preload: when current passes 70%, ensure next is ready.
audio.addEventListener("timeupdate", () => {
if (inst.midPreloadTriggered) return;
if (!audio.duration || isNaN(audio.duration)) return;
if ((audio.currentTime / audio.duration) >= MID_PRELOAD_THRESHOLD) {
inst.midPreloadTriggered = true;
const nextIdx = inst.index + 1;
if (nextIdx < storyInstances.length) {
ensureAudioLoaded(storyInstances[nextIdx]).catch(() => {});
}
}
});
}
/**
* Preload the next N audio blocks (fire-and-forget).
*/
function preloadAhead(fromIndex) {
for (let i = 1; i <= PRELOAD_AHEAD; i++) {
const idx = fromIndex + i;
if (idx >= storyInstances.length) break;
ensureAudioLoaded(storyInstances[idx]).catch(() => {});
}
}
/**
* Memory management: keep only a sliding window of audio elements loaded.
* Releases audio resources for blocks too far from the current one.
*
* Window = [currentIndex - KEEP_BEHIND, currentIndex + PRELOAD_AHEAD]
* Bounded to MAX_AUDIO_LOADED total.
*/
function pruneLoadedAudio(currentIndex) {
// Find all currently-loaded instances
const loaded = storyInstances.filter(i => i.audioReady && i.audio && i.audio.src);
if (loaded.length <= MAX_AUDIO_LOADED) return;
const keepLow = currentIndex - KEEP_BEHIND;
const keepHigh = currentIndex + PRELOAD_AHEAD;
// Sort candidates for eviction: outside window first, then by distance from current
const candidates = loaded
.filter(inst => inst !== currentlyPlayingInstance)
.map(inst => ({
inst,
inWindow: inst.index >= keepLow && inst.index <= keepHigh,
distance: Math.abs(inst.index - currentIndex)
}))
.sort((a, b) => {
// Out-of-window first
if (a.inWindow !== b.inWindow) return a.inWindow ? 1 : -1;
// Then by farthest distance
return b.distance - a.distance;
});
let toEvict = loaded.length - MAX_AUDIO_LOADED;
for (const c of candidates) {
if (toEvict <= 0) break;
releaseAudio(c.inst);
toEvict--;
}
}
function releaseAudio(inst) {
if (!inst.audio) return;
try {
inst.audio.pause();
inst.audio.removeAttribute('src');
inst.audio.load(); // reset to empty state, free decoder
} catch (e) { /* ignore */ }
inst.audioReady = false;
inst.audioLoadingPromise = null;
inst.midPreloadTriggered = false;
// NOTE: we intentionally keep inst.audio reference; events are still bound,
// but a fresh ensureAudioLoaded() call will re-set the src and work.
// To be fully clean, we'd null it out — but element is needed for re-load.
}
// ===================================================
// PLAYBACK
// ===================================================
async function playInstanceAt(index, timestamp = 0, opts = {}) {
if (index < 0 || index >= storyInstances.length) return;
const inst = storyInstances[index];
const isAutoAdvance = opts.autoAdvance === true;
if (currentlyPlayingInstance && currentlyPlayingInstance !== inst) {
stopAndResetInstance(currentlyPlayingInstance);
}
currentlyPlayingInstance = inst;
currentlyPlayingIndex = index;
hasStarted = true;
inst.midPreloadTriggered = false; // reset for this play session
// For auto-advance, audio is usually preloaded → no spinner flicker.
// Only show spinner if audio truly isn't ready yet.
const needsLoad = !inst.audioReady;
if (needsLoad) {
setButtonLoading(true);
showToast("Loading audio…");
}
try {
if (!inst.textReady) await loadTextForInstance(inst);
await ensureAudioLoaded(inst);
if (needsLoad) {
hideToast();
setButtonLoading(false);
}
inst.audio.currentTime = timestamp;
await inst.audio.play();
updateFloatingButton("playing");
// Block-level scroll ONLY for manual navigation, not auto-advance.
if (!isAutoAdvance) {
const rect = inst.blockEl.getBoundingClientRect();
if (rect.top < 0 || rect.top > window.innerHeight * 0.6) {
inst.blockEl.scrollIntoView({ behavior: 'smooth', block: 'start' });
}
}
preloadAhead(index);
pruneLoadedAudio(index);
} catch (err) {
console.error(err);
setButtonLoading(false);
showToast("Failed to load audio. Tap again to retry.");
setTimeout(hideToast, 3000);
updateFloatingButton("paused");
}
}
function playNextInstance() {
const next = currentlyPlayingIndex + 1;
if (next < storyInstances.length) {
playInstanceAt(next, 0, { autoAdvance: true });
} else {
updateFloatingButton("paused");
currentlyPlayingInstance = null;
currentlyPlayingIndex = -1;
}
}
function stopAndResetInstance(inst) {
if (inst.audio) { inst.audio.pause(); inst.audio.currentTime = 0; }
stopHighlightLoop(inst);
clearHighlights(inst);
}
function handleFloatingBtnClick() {
if (!hasStarted) { playInstanceAt(0, 0); return; }
if (currentlyPlayingInstance && currentlyPlayingInstance.audio) {
const audio = currentlyPlayingInstance.audio;
if (audio.paused) { audio.play().catch(console.error); updateFloatingButton("playing"); }
else { audio.pause(); updateFloatingButton("paused"); }
} else {
playInstanceAt(0, 0);
}
}
async function handleTextClick(event) {
const wordSpan = event.target.closest(".word");
if (!wordSpan) return;
const storyBlock = event.target.closest(".story-block");
if (!storyBlock) return;
const idx = parseInt(storyBlock.dataset.instanceIndex, 10);
if (isNaN(idx)) return;
const inst = storyInstances[idx];
if (!inst.textReady) {
showToast("Loading…");
await loadTextForInstance(inst);
hideToast();
}
const timestamp = getTimeForSpan(inst, wordSpan);
if (timestamp === null) return;
playInstanceAt(idx, timestamp);
}
// ===================================================
// Outline / Nav
// ===================================================
function addOutlineEntry(title, chapter, targetBlockId) {
const li = document.createElement("li");
li.textContent = title; li.title = title;
li.dataset.chapter = chapter;
li.onclick = () => {
const target = document.getElementById(targetBlockId);
if (target) {
const offset = target.getBoundingClientRect().top + window.pageYOffset - 120;
window.scrollTo({ top: offset, behavior: 'smooth' });
target.classList.add('highlight-section');
setTimeout(() => target.classList.remove('highlight-section'), 2000);
}
};
storyNavList.appendChild(li);
}
function setupNavObserver() {
if (navObserver) navObserver.disconnect();
navObserver = new IntersectionObserver((entries) => {
entries.forEach(entry => {
if (entry.isIntersecting) {
const sk = entry.target.dataset.sortKey;
if (sk) {
const ch = sk.split(".")[0];
storyNavList.querySelectorAll("li").forEach(l => l.classList.remove("active"));
const al = storyNavList.querySelector(`li[data-chapter='${ch}']`);
if (al) al.classList.add("active");
}
}
});
}, { threshold: 0.4 });
document.querySelectorAll(".story-block, .story-image-block").forEach(b => navObserver.observe(b));
}
// ===================================================
// Render & Sync
// ===================================================
function renderMarkdownInto(inst, text) {
const container = inst.blockEl.querySelector(".story-text-container");
container.innerHTML = "";
inst.allWordSpans = [];
const div = document.createElement("div");
div.innerHTML = marked.parse(text, { breaks: true, gfm: true });
(function processNode(node) {
if (node.nodeType === Node.TEXT_NODE) {
const words = node.textContent.split(/(\s+)/);
const fragment = document.createDocumentFragment();
words.forEach(part => {
if (part.trim().length > 0) {
const span = document.createElement("span");
span.className = "word";
span.textContent = part;
inst.allWordSpans.push(span);
fragment.appendChild(span);
} else fragment.appendChild(document.createTextNode(part));
});
node.parentNode.replaceChild(fragment, node);
} else if (node.nodeType === Node.ELEMENT_NODE) {
Array.from(node.childNodes).forEach(processNode);
}
})(div);
while (div.firstChild) container.appendChild(div.firstChild);
}
function smartSync(inst) {
inst.wordMap = new Array(inst.allWordSpans.length).fill(undefined);
let ai = 0;
inst.allWordSpans.forEach((span, i) => {
const tw = span.textContent.toLowerCase().replace(/[^\w]/g, "");
for (let o = 0; o < 5; o++) {
if (ai + o >= inst.wordTimestamps.length) break;
const aw = inst.wordTimestamps[ai + o].word.toLowerCase().replace(/[^\w]/g, "");
if (tw === aw) { inst.wordMap[i] = ai + o; ai += o + 1; return; }
}
});
inst.sentenceData = [];
let buf = [], si = 0;
inst.allWordSpans.forEach((s, i) => {
buf.push(s);
if (/[.!?]["'\u201D\u2019]?$/.test(s.textContent.trim())) {
let sT = 0, eT = 0;
for (let k = si; k <= i; k++) if (inst.wordMap[k] !== undefined) { sT = inst.wordTimestamps[inst.wordMap[k]].start; break; }
for (let k = i; k >= si; k--) if (inst.wordMap[k] !== undefined) { eT = inst.wordTimestamps[inst.wordMap[k]].end; break; }
if (eT > sT) inst.sentenceData.push({ spans: [...buf], startTime: sT, endTime: eT });
buf = []; si = i + 1;
}
});
if (buf.length > 0) {
let sT = 0, eT = 0;
for (let k = si; k < inst.allWordSpans.length; k++) if (inst.wordMap[k] !== undefined) { sT = inst.wordTimestamps[inst.wordMap[k]].start; break; }
for (let k = inst.allWordSpans.length - 1; k >= si; k--) if (inst.wordMap[k] !== undefined) { eT = inst.wordTimestamps[inst.wordMap[k]].end; break; }
if (eT > sT) inst.sentenceData.push({ spans: [...buf], startTime: sT, endTime: eT });
}
}
function getTimeForSpan(inst, wordSpan) {
const idx = inst.allWordSpans.indexOf(wordSpan);
if (idx < 0) return null;
const ai = inst.wordMap[idx];
return ai !== undefined ? inst.wordTimestamps[ai].start : null;
}
function startHighlightLoop(inst) {
cancelAnimationFrame(inst.animationFrameId);
inst.animationFrameId = requestAnimationFrame(() => hlLoop(inst));
}
function stopHighlightLoop(inst) { cancelAnimationFrame(inst.animationFrameId); }
function hlLoop(inst) {
if (!inst.audio || inst.audio.paused) return;
const t = inst.audio.currentTime;
const ai = inst.wordTimestamps.findIndex(w => t >= w.start && t < w.end);
if (ai !== -1) {
const ti = inst.wordMap.findIndex(i => i === ai);
if (ti !== -1) {
const sp = inst.allWordSpans[ti];
if (sp !== inst.lastHighlightedWordSpan) {
if (inst.lastHighlightedWordSpan) inst.lastHighlightedWordSpan.classList.remove("current-word");
sp.classList.add("current-word");
const r = sp.getBoundingClientRect();
// Slightly relaxed threshold (20% / 80%) for smoother scroll
if (r.top < window.innerHeight * 0.2 || r.bottom > window.innerHeight * 0.8) {
sp.scrollIntoView({ behavior: "smooth", block: "center" });
}
inst.lastHighlightedWordSpan = sp;
}
}
}
const sent = inst.sentenceData.find(s => t >= s.startTime && t <= s.endTime);
if (sent && sent.spans !== inst.lastHighlightedSentenceSpans) {
inst.lastHighlightedSentenceSpans.forEach(s => s.classList.remove("current-sentence-bg"));
sent.spans.forEach(s => s.classList.add("current-sentence-bg"));
inst.lastHighlightedSentenceSpans = sent.spans;
}
inst.animationFrameId = requestAnimationFrame(() => hlLoop(inst));
}
function clearHighlights(inst) {
if (inst.lastHighlightedWordSpan) inst.lastHighlightedWordSpan.classList.remove("current-word");
inst.lastHighlightedSentenceSpans.forEach(s => s.classList.remove("current-sentence-bg"));
inst.lastHighlightedWordSpan = null;
inst.lastHighlightedSentenceSpans = [];
}
});
</script>
</body>
</html>

20
requirements.txt Normal file
View File

@@ -0,0 +1,20 @@
# requirements.txt - Audiobook Maker Pro v4.2 (CPU-only, production)
# --- Web framework ---
Flask==3.1.2
gunicorn==23.0.0
# --- Environment / config ---
python-dotenv==1.0.1
# --- HTTP client (Beam API calls) ---
requests==2.32.3
# --- Audio processing ---
pydub==0.25.1
# --- Document processing ---
PyMuPDF==1.24.10
python-docx==1.1.2
olefile==0.47
striprtf==0.0.29

27
routes/__init__.py Normal file
View File

@@ -0,0 +1,27 @@
# routes/__init__.py - Blueprint Registration (v4.2)
from flask import Flask
def register_blueprints(app: Flask):
from routes.auth_routes import auth_bp
from routes.admin_routes import admin_bp
from routes.main_routes import main_bp
from routes.pdf_routes import pdf_bp
from routes.docx_routes import docx_bp
from routes.project_routes import project_bp
from routes.generation_routes import generation_bp
from routes.export_routes import export_bp
from routes.public_routes import public_bp # NEW
app.register_blueprint(auth_bp)
app.register_blueprint(admin_bp)
app.register_blueprint(main_bp)
app.register_blueprint(pdf_bp)
app.register_blueprint(docx_bp)
app.register_blueprint(project_bp)
app.register_blueprint(generation_bp)
app.register_blueprint(export_bp)
app.register_blueprint(public_bp) # NEW
print("✅ All blueprints registered (v4.2)")

175
routes/admin_routes.py Normal file
View File

@@ -0,0 +1,175 @@
# routes/admin_routes.py - Admin Dashboard Routes
from flask import Blueprint, request, jsonify, session, send_from_directory
from db import get_db
from auth import admin_required
admin_bp = Blueprint('admin', __name__)
@admin_bp.route('/admin')
@admin_required
def admin_page():
"""Serve admin dashboard page."""
return send_from_directory('templates', 'admin.html')
@admin_bp.route('/api/admin/users', methods=['GET'])
@admin_required
def list_users():
"""List all users."""
db = get_db()
cursor = db.cursor()
cursor.execute('''
SELECT id, username, role, is_active, created_at, last_login
FROM users ORDER BY created_at DESC
''')
users = []
for row in cursor.fetchall():
users.append({
'id': row['id'],
'username': row['username'],
'role': row['role'],
'is_active': bool(row['is_active']),
'created_at': row['created_at'],
'last_login': row['last_login']
})
return jsonify({'users': users})
@admin_bp.route('/api/admin/users', methods=['POST'])
@admin_required
def create_user():
"""Create a new user."""
data = request.json
username = data.get('username', '').strip()
password = data.get('password', '')
role = data.get('role', 'user')
if not username or not password:
return jsonify({'error': 'Username and password are required'}), 400
if len(username) < 3:
return jsonify({'error': 'Username must be at least 3 characters'}), 400
if len(password) < 4:
return jsonify({'error': 'Password must be at least 4 characters'}), 400
if role not in ('user', 'admin'):
return jsonify({'error': 'Role must be "user" or "admin"'}), 400
db = get_db()
cursor = db.cursor()
try:
cursor.execute('''
INSERT INTO users (username, password, role, is_active)
VALUES (?, ?, ?, 1)
''', (username, password, role))
db.commit()
print(f"✅ New user created: {username} (role: {role})")
return jsonify({
'success': True,
'user_id': cursor.lastrowid,
'message': f'User "{username}" created successfully'
})
except Exception as e:
if 'UNIQUE constraint' in str(e):
return jsonify({'error': f'Username "{username}" already exists'}), 400
return jsonify({'error': str(e)}), 500
@admin_bp.route('/api/admin/users/<int:user_id>', methods=['PUT'])
@admin_required
def update_user(user_id):
"""Update a user."""
data = request.json
db = get_db()
cursor = db.cursor()
cursor.execute('SELECT id, username FROM users WHERE id = ?', (user_id,))
user = cursor.fetchone()
if not user:
return jsonify({'error': 'User not found'}), 404
# Build update query dynamically
updates = []
params = []
if 'username' in data:
username = data['username'].strip()
if len(username) < 3:
return jsonify({'error': 'Username must be at least 3 characters'}), 400
updates.append('username = ?')
params.append(username)
if 'password' in data and data['password']:
password = data['password']
if len(password) < 4:
return jsonify({'error': 'Password must be at least 4 characters'}), 400
updates.append('password = ?')
params.append(password)
if 'role' in data:
role = data['role']
if role not in ('user', 'admin'):
return jsonify({'error': 'Role must be "user" or "admin"'}), 400
# Prevent demoting self
if user_id == session.get('user_id') and role != 'admin':
return jsonify({'error': 'Cannot change your own role'}), 400
updates.append('role = ?')
params.append(role)
if 'is_active' in data:
# Prevent deactivating self
if user_id == session.get('user_id') and not data['is_active']:
return jsonify({'error': 'Cannot deactivate your own account'}), 400
updates.append('is_active = ?')
params.append(1 if data['is_active'] else 0)
if not updates:
return jsonify({'error': 'No fields to update'}), 400
params.append(user_id)
try:
cursor.execute(f"UPDATE users SET {', '.join(updates)} WHERE id = ?", params)
db.commit()
return jsonify({'success': True, 'message': 'User updated successfully'})
except Exception as e:
if 'UNIQUE constraint' in str(e):
return jsonify({'error': 'Username already exists'}), 400
return jsonify({'error': str(e)}), 500
@admin_bp.route('/api/admin/users/<int:user_id>', methods=['DELETE'])
@admin_required
def delete_user(user_id):
"""Delete a user."""
# Prevent deleting self
if user_id == session.get('user_id'):
return jsonify({'error': 'Cannot delete your own account'}), 400
db = get_db()
cursor = db.cursor()
cursor.execute('SELECT id, username FROM users WHERE id = ?', (user_id,))
user = cursor.fetchone()
if not user:
return jsonify({'error': 'User not found'}), 404
cursor.execute('DELETE FROM users WHERE id = ?', (user_id,))
db.commit()
print(f"🗑️ User deleted: {user['username']}")
return jsonify({'success': True, 'message': f'User "{user["username"]}" deleted'})

113
routes/auth_routes.py Normal file
View File

@@ -0,0 +1,113 @@
# routes/auth_routes.py - Authentication Routes
from flask import Blueprint, request, jsonify, session, redirect, url_for, send_from_directory
from db import get_db
from auth import login_required, admin_required, get_current_user
auth_bp = Blueprint('auth', __name__)
@auth_bp.route('/login')
def login_page():
"""Serve login page."""
if 'user_id' in session:
return redirect(url_for('main.index'))
return send_from_directory('templates', 'login.html')
@auth_bp.route('/api/auth/login', methods=['POST'])
def login():
"""Handle user login."""
data = request.json
username = data.get('username', '').strip()
password = data.get('password', '')
if not username or not password:
return jsonify({'error': 'Username and password are required'}), 400
db = get_db()
cursor = db.cursor()
cursor.execute('''
SELECT id, username, password, role, is_active
FROM users WHERE username = ?
''', (username,))
user = cursor.fetchone()
if not user:
return jsonify({'error': 'Invalid username or password'}), 401
if not user['is_active']:
return jsonify({'error': 'Account is disabled. Contact your administrator.'}), 403
if user['password'] != password:
return jsonify({'error': 'Invalid username or password'}), 401
# Set session
session['user_id'] = user['id']
session['username'] = user['username']
session['user_role'] = user['role']
# Update last login
cursor.execute('''
UPDATE users SET last_login = CURRENT_TIMESTAMP WHERE id = ?
''', (user['id'],))
db.commit()
print(f"✅ User logged in: {username} (role: {user['role']})")
return jsonify({
'success': True,
'user': {
'id': user['id'],
'username': user['username'],
'role': user['role']
}
})
@auth_bp.route('/api/auth/logout', methods=['POST'])
def logout():
"""Handle user logout."""
username = session.get('username', 'Unknown')
session.clear()
print(f"👋 User logged out: {username}")
return jsonify({'success': True})
@auth_bp.route('/api/auth/me', methods=['GET'])
@login_required
def get_me():
"""Get current user info."""
user = get_current_user()
return jsonify({'user': user})
@auth_bp.route('/api/auth/change-password', methods=['POST'])
@login_required
def change_password():
"""Change current user's password."""
data = request.json
current_password = data.get('current_password', '')
new_password = data.get('new_password', '')
if not current_password or not new_password:
return jsonify({'error': 'Current password and new password are required'}), 400
if len(new_password) < 4:
return jsonify({'error': 'New password must be at least 4 characters'}), 400
db = get_db()
cursor = db.cursor()
cursor.execute('SELECT password FROM users WHERE id = ?', (session['user_id'],))
user = cursor.fetchone()
if not user or user['password'] != current_password:
return jsonify({'error': 'Current password is incorrect'}), 401
cursor.execute('UPDATE users SET password = ? WHERE id = ?', (new_password, session['user_id']))
db.commit()
return jsonify({'success': True, 'message': 'Password changed successfully'})

57
routes/docx_routes.py Normal file
View File

@@ -0,0 +1,57 @@
# routes/docx_routes.py - DOCX/DOC Upload and Processing Routes
import json
from flask import Blueprint, request, jsonify
from db import get_db
from docx_processor import process_docx_to_markdown
from ai_processor import process_document_smartly
from auth import login_required
docx_bp = Blueprint('docx', __name__)
@docx_bp.route('/api/upload-docx', methods=['POST'])
@login_required
def upload_docx():
"""Upload and process a DOCX or DOC file."""
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
doc_file = request.files['file']
if not doc_file or not doc_file.filename:
return jsonify({'error': 'Invalid file'}), 400
filename = doc_file.filename.lower()
if not (filename.endswith('.docx') or filename.endswith('.doc')):
return jsonify({'error': 'File must be a .docx or .doc file'}), 400
try:
print(f"📄 Processing Word document: {doc_file.filename}")
file_bytes = doc_file.read()
print(f" 📏 File size: {len(file_bytes)} bytes")
result = process_docx_to_markdown(file_bytes, doc_file.filename)
# --- AI Powered Smart Reconstruction & Section Tagging ---
smart_blocks = process_document_smartly(result.get('markdown_blocks', []), result.get('metadata', {}))
block_count = len(smart_blocks)
image_count = sum(1 for b in smart_blocks if b.get('type') == 'image')
text_count = block_count - image_count
print(f"✅ Word document processed & reconstructed: {block_count} blocks ({text_count} text, {image_count} images)")
return jsonify({
'success': True,
'filename': doc_file.filename,
'metadata': result.get('metadata', {}),
'blocks': smart_blocks
})
except Exception as e:
import traceback
traceback.print_exc()
return jsonify({'error': str(e)}), 500

185
routes/export_routes.py Normal file
View File

@@ -0,0 +1,185 @@
# routes/export_routes.py - Export Routes
import io
import os
import json
import base64
import zipfile
import re
from flask import Blueprint, request, jsonify, send_file
from db import get_db
from utils import sanitize_filename, strip_markdown
from auth import login_required
export_bp = Blueprint('export', __name__)
@export_bp.route('/api/export/<int:project_id>', methods=['GET'])
@login_required
def export_project(project_id):
"""Export project as ZIP file. Only includes chapters with audio."""
db = get_db()
cursor = db.cursor()
cursor.execute('SELECT * FROM projects WHERE id = ?', (project_id,))
project = cursor.fetchone()
if not project:
return jsonify({'error': 'Project not found'}), 404
project_name = sanitize_filename(project['name'])
cursor.execute('''
SELECT * FROM chapters WHERE project_id = ? ORDER BY chapter_number
''', (project_id,))
chapters = cursor.fetchall()
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf:
manifest = {
'title': project['name'],
'assets': [],
'images': []
}
for chapter in chapters:
section_id = chapter['chapter_number']
section_title = chapter['title']
cursor.execute('''
SELECT * FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order
''', (chapter['id'],))
blocks = cursor.fetchall()
chapter_has_audio = False
for block in blocks:
is_image_block = (
(block['content'] and block['content'].strip().startswith('![')) or
block['block_type'] == 'image'
)
if not is_image_block and block['audio_data']:
chapter_has_audio = True
break
if not chapter_has_audio:
continue
for block in blocks:
block_order = block['block_order']
prefix = f"{section_id}.{block_order}"
content = block['content']
is_image_block = (
(content and content.strip().startswith('![')) or
block['block_type'] == 'image'
)
cursor.execute('''
SELECT * FROM block_images WHERE block_id = ? ORDER BY id
''', (block['id'],))
images = cursor.fetchall()
image_idx = 0
for img in images:
if img['position'] == 'before':
image_filename = f"book/{prefix}_img{image_idx}.{img['image_format']}"
image_bytes = base64.b64decode(img['image_data'])
zf.writestr(image_filename, image_bytes)
manifest['images'].append({
'sortKey': prefix,
'file': image_filename
})
image_idx += 1
if is_image_block:
for img in images:
if img['position'] == 'after':
next_prefix = f"{section_id}.{block_order + 1}"
image_filename = f"book/{next_prefix}_img{image_idx}.{img['image_format']}"
image_bytes = base64.b64decode(img['image_data'])
zf.writestr(image_filename, image_bytes)
manifest['images'].append({
'sortKey': next_prefix,
'file': image_filename
})
image_idx += 1
continue
plain_text = strip_markdown(content)
if not plain_text.strip():
continue
if not block['audio_data']:
continue
text_filename = f"book/{prefix}_{project_name}.txt"
zf.writestr(text_filename, plain_text)
asset_entry = {
'prefix': f"{prefix}_",
'sortKey': prefix,
'sectionName': section_title,
'textFile': text_filename,
'audioFile': None,
'jsonFile': None
}
audio_filename = f"book/{prefix}_{project_name}.{block['audio_format'] or 'mp3'}"
audio_bytes = base64.b64decode(block['audio_data'])
zf.writestr(audio_filename, audio_bytes)
asset_entry['audioFile'] = audio_filename
if block['transcription']:
json_filename = f"book/{prefix}_{project_name}.json"
zf.writestr(json_filename, block['transcription'])
asset_entry['jsonFile'] = json_filename
manifest['assets'].append(asset_entry)
for img in images:
if img['position'] == 'after':
next_prefix = f"{section_id}.{block_order + 1}"
image_filename = f"book/{next_prefix}_img{image_idx}.{img['image_format']}"
image_bytes = base64.b64decode(img['image_data'])
zf.writestr(image_filename, image_bytes)
manifest['images'].append({
'sortKey': next_prefix,
'file': image_filename
})
image_idx += 1
# Write manifest.json to zip root
manifest_json_str = json.dumps(manifest, indent=2)
zf.writestr('manifest.json', manifest_json_str)
# --- DYNAMIC INJECTION FOR Reader.html & index.html ---
reader_templates_dir = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'reader_templates'
)
index_path = os.path.join(reader_templates_dir, 'index.html')
if os.path.exists(index_path):
with open(index_path, 'r', encoding='utf-8') as f:
html_content = f.read()
# Inject manifest into index.html safely
html_content = html_content.replace('/*{{MANIFEST_DATA}}*/ null', manifest_json_str)
zf.writestr('index.html', html_content)
reader_path = os.path.join(reader_templates_dir, 'Reader.html')
if os.path.exists(reader_path):
with open(reader_path, 'r', encoding='utf-8') as f:
html_content = f.read()
# Inject manifest into Reader.html safely
html_content = html_content.replace('/*{{MANIFEST_DATA}}*/ null', manifest_json_str)
zf.writestr('Reader.html', html_content)
zip_buffer.seek(0)
return send_file(
zip_buffer,
mimetype='application/zip',
as_attachment=True,
download_name=f"{project_name}.zip"
)

475
routes/generation_routes.py Normal file
View File

@@ -0,0 +1,475 @@
# routes/generation_routes.py - Combined Endpoint with Correct Task Polling
import json
import time
import base64
import requests
from flask import Blueprint, request, jsonify
from db import get_db
from config import BEAM_COMBINED_URL, BEAM_API_TOKEN, get_beam_headers_json
from utils import convert_to_mp3, strip_markdown
from auth import login_required
generation_bp = Blueprint('generation', __name__)
# ============================================
# Beam Task Polling Config
# ============================================
BEAM_TASK_API = "https://api.beam.cloud/v2/task/{task_id}/"
POLL_INTERVAL = 3
POLL_MAX_WAIT = 300
def get_beam_auth_headers():
"""Beam API headers — Bearer AND Basic উভয়ই try করবে।"""
return {
'Authorization': f'Bearer {BEAM_API_TOKEN}',
'Content-Type': 'application/json',
}
def poll_beam_task(task_id):
"""Beam task poll করে result আনে।"""
print(f"⏳ Polling task: {task_id}")
task_url = BEAM_TASK_API.format(task_id=task_id)
print(f" URL: {task_url}")
start_time = time.time()
# প্রথম কয়েকটা attempt এ 404 আসতে পারে — task register হতে delay
initial_delay = True
while True:
elapsed = time.time() - start_time
if elapsed > POLL_MAX_WAIT:
print(f"❌ Polling timeout after {POLL_MAX_WAIT}s")
return None, f'Task timed out after {int(POLL_MAX_WAIT)} seconds'
# প্রথম ২ সেকেন্ড wait করি task register হতে
if initial_delay and elapsed < 2:
time.sleep(2)
initial_delay = False
continue
try:
# ★ Bearer token দিয়ে try
resp = requests.get(task_url, headers=get_beam_auth_headers(), timeout=30)
# Bearer fail হলে Basic try করি
if resp.status_code in (401, 403):
print(f" Bearer auth failed, trying Basic...")
basic_headers = {
'Authorization': f'Basic {BEAM_API_TOKEN}',
'Content-Type': 'application/json',
}
resp = requests.get(task_url, headers=basic_headers, timeout=30)
print(f" [{int(elapsed)}s] HTTP {resp.status_code} | Body: {len(resp.text)} chars")
if resp.status_code == 404:
# Task এখনও register হয়নি — wait
if elapsed < 30:
print(f" Task not found yet, waiting...")
time.sleep(POLL_INTERVAL)
continue
else:
# ৩০ সেকেন্ড পরেও 404 — সমস্যা
print(f"❌ Task not found after {int(elapsed)}s")
# ★ Debug: response body দেখি
print(f" 404 body: {resp.text[:300]}")
# ★ Alternative: Beam API base URL ভিন্ন হতে পারে
# কিছু Beam setup এ URL format ভিন্ন
alt_urls = [
f"https://api.beam.cloud/v2/task/{task_id}/status/",
f"https://api.beam.cloud/v2/task/{task_id}",
f"https://api.beam.cloud/v1/task/{task_id}/",
]
for alt_url in alt_urls:
try:
alt_resp = requests.get(alt_url, headers=get_beam_auth_headers(), timeout=10)
print(f" Alt URL {alt_url}: HTTP {alt_resp.status_code}")
if alt_resp.status_code == 200:
print(f" ✅ Found working URL!")
resp = alt_resp
break
except Exception:
pass
if resp.status_code == 404:
return None, f'Task {task_id} not found on Beam API after {int(elapsed)}s'
if resp.status_code == 200 and resp.text:
try:
data = resp.json()
except Exception as e:
print(f" JSON parse error: {e}")
print(f" Body: {resp.text[:300]}")
time.sleep(POLL_INTERVAL)
continue
status = data.get('status', '').upper()
print(f" [{int(elapsed)}s] Task status: {status}")
if status in ('COMPLETE', 'COMPLETED', 'SUCCESS'):
print(f"✅ Task complete!")
# ★ Result বের করা — Beam বিভিন্ন জায়গায় result রাখে
# 1. 'output' key
# 2. 'result' key
# 3. 'outputs' list (file-based)
# 4. response body তেই (endpoint mode)
actual_result = None
# Check 'output' (endpoint mode — function return value)
if data.get('output') and isinstance(data['output'], dict):
actual_result = data['output']
print(f" Result found in 'output' key")
# Check 'result'
elif data.get('result') and isinstance(data['result'], dict):
actual_result = data['result']
print(f" Result found in 'result' key")
# Check if top-level has audio_base64 (unlikely but possible)
elif data.get('audio_base64'):
actual_result = data
print(f" Result found in top-level data")
if actual_result and actual_result.get('audio_base64'):
return actual_result, None
elif actual_result and actual_result.get('success'):
return actual_result, None
# ★ Outputs (file-based) — need to download
outputs = data.get('outputs', [])
if outputs:
print(f" Task has {len(outputs)} output files")
# For our use case, result should be in 'output' not files
# But log it for debug
for out in outputs:
print(f" Output: {out.get('name', '?')}{out.get('url', '?')}")
# No usable result found
print(f" ⚠️ Task complete but no audio in response")
print(f" Response keys: {list(data.keys())}")
print(f" Full response (first 500): {json.dumps(data, default=str)[:500]}")
return None, 'Task completed but no audio data in result. Check Beam logs.'
elif status in ('FAILED', 'ERROR'):
error_msg = data.get('error', 'Task failed')
print(f"❌ Task failed: {error_msg}")
return None, f'Task failed: {error_msg}'
elif status in ('CANCELLED', 'CANCELED'):
return None, 'Task was cancelled'
elif status in ('TIMEOUT', 'EXPIRED'):
return None, f'Task {status.lower()} on Beam. Container may not have started in time.'
elif status in ('PENDING', 'RUNNING', 'RETRY'):
pass # Keep polling
else:
print(f" Unknown status: {status}")
elif resp.status_code != 404:
print(f" Unexpected HTTP {resp.status_code}: {resp.text[:200]}")
except requests.exceptions.RequestException as e:
print(f" Poll error: {e}")
time.sleep(POLL_INTERVAL)
# ============================================
# Beam Call + Smart Response Handler
# ============================================
def call_beam_and_get_result(text, voice='af_heart', speed=1.0):
"""Beam combined endpoint call + async polling।"""
if not BEAM_COMBINED_URL:
return None, 'BEAM_COMBINED_URL is not configured in .env'
print(f"📞 Calling: {BEAM_COMBINED_URL}")
print(f" text={len(text)} chars, voice={voice}")
response = requests.post(
BEAM_COMBINED_URL,
headers=get_beam_headers_json(),
json={
'text': text,
'voice': voice,
'speed': speed,
'skip_alignment': False,
},
timeout=300
)
print(f"📡 Status: {response.status_code}")
print(f"📡 Content-Length: {response.headers.get('Content-Length', 'N/A')}")
task_id = response.headers.get('X-Task-Id', '')
# ========================================
# CASE 1: Task ID + empty/no body → Async → Poll
# ========================================
if task_id and (not response.text or not response.text.strip() or response.headers.get('Content-Length') == '0'):
print(f"📋 Async mode — Task ID: {task_id}")
return poll_beam_task(task_id)
# ========================================
# CASE 2: Task ID + body
# ========================================
if task_id and response.text and response.text.strip():
print(f"📋 Task ID: {task_id} + body ({len(response.text)} chars)")
try:
result = response.json()
if result.get('success') and result.get('audio_base64'):
print(f"✅ Direct sync result")
return _extract(result), None
# Body isn't the final result — poll
return poll_beam_task(task_id)
except Exception:
return poll_beam_task(task_id)
# ========================================
# CASE 3: No task_id + empty body → Error
# ========================================
if not response.text or not response.text.strip():
return None, 'Empty response from Beam with no task ID'
# ========================================
# CASE 4: Synchronous response
# ========================================
if response.status_code != 200:
try:
err = response.json().get('error', response.text[:200])
except Exception:
err = response.text[:200]
return None, f'Beam Error ({response.status_code}): {err}'
try:
result = response.json()
except Exception as e:
return None, f'Invalid JSON: {response.text[:100]}'
if not result.get('success'):
return None, result.get('error', 'Unknown error')
return _extract(result), None
def _extract(result):
return {
'audio_base64': result.get('audio_base64', ''),
'audio_format': result.get('audio_format', 'wav'),
'sample_rate': result.get('sample_rate', 24000),
'transcription': result.get('timestamps', []),
}
# ============================================
# API Route: Single Block
# ============================================
@generation_bp.route('/api/generate', methods=['POST'])
@login_required
def generate_audio():
data = request.json
text = data.get('text', '')
voice = data.get('voice', 'af_heart')
block_id = data.get('block_id')
if not text:
return jsonify({'error': 'No text provided'}), 400
stripped = text.strip()
if stripped.startswith('![') and '](' in stripped and stripped.endswith(')'):
return jsonify({'error': 'Cannot generate audio for image content'}), 400
clean_text = strip_markdown(text)
if not clean_text.strip():
return jsonify({'error': 'No speakable text content'}), 400
try:
print(f"")
print(f"{'='*60}")
print(f"🔊 GENERATE REQUEST")
print(f" Voice: {voice}, Text: {len(clean_text)} chars")
print(f" Preview: {clean_text[:100]}...")
print(f"{'='*60}")
result, error = call_beam_and_get_result(clean_text, voice)
if error:
print(f"❌ Failed: {error}")
return jsonify({'error': error}), 500
audio_base64 = result.get('audio_base64', '')
source_format = result.get('audio_format', 'wav')
transcription = result.get('transcription', [])
if not audio_base64:
return jsonify({'error': 'No audio data received'}), 500
if source_format != 'mp3':
audio_base64 = convert_to_mp3(audio_base64, source_format)
if block_id:
db = get_db()
cursor = db.cursor()
cursor.execute('''
UPDATE markdown_blocks
SET audio_data = ?, audio_format = 'mp3', transcription = ?
WHERE id = ?
''', (audio_base64, json.dumps(transcription), block_id))
db.commit()
print(f"✅ DONE: audio={len(audio_base64)} bytes, words={len(transcription)}")
print(f"{'='*60}")
return jsonify({
'success': True,
'audio_data': audio_base64,
'audio_format': 'mp3',
'transcription': transcription
})
except requests.exceptions.ConnectionError as e:
print(f"❌ CONNECTION: {e}")
return jsonify({'error': 'Cannot connect to Beam Cloud.'}), 500
except requests.exceptions.Timeout:
return jsonify({'error': 'Request timed out. Try again in 1-2 minutes.'}), 500
except requests.exceptions.RequestException as e:
return jsonify({'error': f'API error: {str(e)}'}), 500
except Exception as e:
import traceback
traceback.print_exc()
return jsonify({'error': str(e)}), 500
# ============================================
# API Route: Chapter
# ============================================
@generation_bp.route('/api/generate-chapter', methods=['POST'])
@login_required
def generate_chapter_audio():
data = request.json
chapter_id = data.get('chapter_id')
voice = data.get('voice', 'af_heart')
if not chapter_id:
return jsonify({'error': 'Chapter ID required'}), 400
db = get_db()
cursor = db.cursor()
cursor.execute('''
SELECT id, content, tts_text, block_type FROM markdown_blocks
WHERE chapter_id = ? ORDER BY block_order
''', (chapter_id,))
blocks = cursor.fetchall()
if not blocks:
return jsonify({'error': 'No blocks found'}), 404
results = []
success_count = 0
error_count = 0
total = len(blocks)
print(f"\n{'='*60}")
print(f"📖 CHAPTER: {total} blocks, voice={voice}")
print(f"{'='*60}")
for idx, block in enumerate(blocks):
block_id = block['id']
block_type = block['block_type'] if 'block_type' in block.keys() else 'paragraph'
content = block['content'] or ''
text = block['tts_text'] if block['tts_text'] else content
if block_type == 'image':
results.append({'block_id': block_id, 'success': True, 'skipped': True})
continue
stripped = text.strip()
if stripped.startswith('![') and '](' in stripped and stripped.endswith(')'):
results.append({'block_id': block_id, 'success': True, 'skipped': True})
continue
clean_text = strip_markdown(text)
if not clean_text.strip():
results.append({'block_id': block_id, 'success': True, 'skipped': True})
continue
print(f"\n📖 Block {idx+1}/{total}: {len(clean_text)} chars")
try:
result, error = call_beam_and_get_result(clean_text, voice)
if error:
print(f"❌ Block {block_id}: {error}")
results.append({'block_id': block_id, 'success': False, 'error': error})
error_count += 1
continue
audio_base64 = result.get('audio_base64', '')
source_format = result.get('audio_format', 'wav')
transcription = result.get('transcription', [])
if not audio_base64:
results.append({'block_id': block_id, 'success': False, 'error': 'No audio'})
error_count += 1
continue
if source_format != 'mp3':
audio_base64 = convert_to_mp3(audio_base64, source_format)
cursor.execute('''
UPDATE markdown_blocks
SET audio_data = ?, audio_format = 'mp3', transcription = ?
WHERE id = ?
''', (audio_base64, json.dumps(transcription), block_id))
results.append({
'block_id': block_id,
'success': True,
'audio_data': audio_base64,
'transcription': transcription
})
success_count += 1
print(f"✅ Block {idx+1} done")
except Exception as e:
print(f"❌ Block {block_id}: {e}")
results.append({'block_id': block_id, 'success': False, 'error': str(e)})
error_count += 1
db.commit()
skipped = total - success_count - error_count
print(f"\n📖 COMPLETE: {success_count} ok, {error_count} fail, {skipped} skip")
print(f"{'='*60}\n")
return jsonify({
'success': True,
'results': results,
'summary': {
'total': total,
'generated': success_count,
'failed': error_count,
'skipped': skipped
}
})

61
routes/main_routes.py Normal file
View File

@@ -0,0 +1,61 @@
# routes/main_routes.py - Main Application Routes
import os
from flask import Blueprint, jsonify, send_from_directory, session
from config import DATABASE, VOICES
from auth import login_required, get_current_user
main_bp = Blueprint('main', __name__)
@main_bp.route('/')
@login_required
def index():
"""Serve main application page."""
return send_from_directory('templates', 'index.html')
@main_bp.route('/static/<path:filename>')
def serve_static(filename):
"""Serve static files."""
return send_from_directory('static', filename)
@main_bp.route('/api/voices', methods=['GET'])
@login_required
def get_voices():
"""Get available TTS voices."""
return jsonify({'voices': VOICES})
@main_bp.route('/api/stats', methods=['GET'])
@login_required
def get_stats():
"""Get database statistics."""
from db import get_db
db = get_db()
cursor = db.cursor()
cursor.execute('SELECT COUNT(*) as count FROM projects')
project_count = cursor.fetchone()['count']
cursor.execute('SELECT COUNT(*) as count FROM chapters')
chapter_count = cursor.fetchone()['count']
cursor.execute('SELECT COUNT(*) as count FROM markdown_blocks')
block_count = cursor.fetchone()['count']
cursor.execute('SELECT COUNT(*) as count FROM pdf_documents')
pdf_count = cursor.fetchone()['count']
db_size = os.path.getsize(DATABASE) if os.path.exists(DATABASE) else 0
return jsonify({
'projects': project_count,
'chapters': chapter_count,
'blocks': block_count,
'pdf_documents': pdf_count,
'database_size_mb': round(db_size / (1024 * 1024), 2)
})

68
routes/pdf_routes.py Normal file
View File

@@ -0,0 +1,68 @@
# routes/pdf_routes.py - PDF Upload and Processing Routes
import json
from flask import Blueprint, request, jsonify
from db import get_db
from pdf_processor import process_pdf_to_markdown
from ai_processor import process_document_smartly
from auth import login_required
pdf_bp = Blueprint('pdf', __name__)
@pdf_bp.route('/api/upload-pdf', methods=['POST'])
@login_required
def upload_pdf():
"""Upload and process a PDF file."""
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
pdf_file = request.files['file']
if not pdf_file or not pdf_file.filename:
return jsonify({'error': 'Invalid file'}), 400
if not pdf_file.filename.lower().endswith('.pdf'):
return jsonify({'error': 'File must be a PDF'}), 400
try:
print(f"📄 Processing PDF: {pdf_file.filename}")
pdf_bytes = pdf_file.read()
result = process_pdf_to_markdown(pdf_bytes)
# --- AI Powered Smart Reconstruction & Section Tagging ---
smart_blocks = process_document_smartly(result['markdown_blocks'], result['metadata'])
# Save PDF document record
db = get_db()
cursor = db.cursor()
cursor.execute('''
INSERT INTO pdf_documents (filename, page_count, metadata)
VALUES (?, ?, ?)
''', (
pdf_file.filename,
result["page_count"],
json.dumps(result["metadata"])
))
db.commit()
doc_id = cursor.lastrowid
print(f"✅ PDF processed & reconstructed: {result['page_count']} pages, {len(smart_blocks)} blocks")
return jsonify({
'success': True,
'document_id': doc_id,
'filename': pdf_file.filename,
'page_count': result['page_count'],
'metadata': result['metadata'],
'blocks': smart_blocks
})
except Exception as e:
import traceback
traceback.print_exc()
return jsonify({'error': str(e)}), 500

417
routes/project_routes.py Normal file
View File

@@ -0,0 +1,417 @@
# routes/project_routes.py - Project Management Routes (v4.2)
import json
import base64
from flask import Blueprint, request, jsonify
from db import get_db, vacuum_db
from auth import login_required
project_bp = Blueprint('project', __name__)
@project_bp.route('/api/projects', methods=['GET'])
@login_required
def list_projects():
"""List all projects with publishing info."""
db = get_db()
cursor = db.cursor()
cursor.execute('''
SELECT p.id, p.name, p.created_at, p.updated_at,
p.is_published, p.published_at, p.thumbnail_data, p.thumbnail_format,
p.description, p.author, p.category, p.view_count,
(SELECT COUNT(*) FROM chapters WHERE project_id = p.id) as chapter_count,
(SELECT COUNT(*) FROM markdown_blocks mb
JOIN chapters c ON mb.chapter_id = c.id
WHERE c.project_id = p.id) as block_count,
(SELECT COUNT(*) FROM markdown_blocks mb
JOIN chapters c ON mb.chapter_id = c.id
WHERE c.project_id = p.id AND mb.audio_data IS NOT NULL AND mb.audio_data != '') as audio_count
FROM projects p
ORDER BY p.updated_at DESC
''')
projects = []
for row in cursor.fetchall():
projects.append({
'id': row['id'],
'name': row['name'],
'created_at': row['created_at'],
'updated_at': row['updated_at'],
'chapter_count': row['chapter_count'],
'block_count': row['block_count'],
'audio_count': row['audio_count'],
'is_published': bool(row['is_published']),
'published_at': row['published_at'],
'thumbnail_data': row['thumbnail_data'],
'thumbnail_format': row['thumbnail_format'] or 'png',
'description': row['description'] or '',
'author': row['author'] or '',
'category': row['category'] or '',
'view_count': row['view_count'] or 0
})
return jsonify({'projects': projects})
@project_bp.route('/api/projects', methods=['POST'])
@login_required
def create_project():
"""Create a new project."""
data = request.json
name = data.get('name', '').strip()
if not name:
return jsonify({'error': 'Project name is required'}), 400
db = get_db()
cursor = db.cursor()
try:
cursor.execute('INSERT INTO projects (name) VALUES (?)', (name,))
db.commit()
return jsonify({
'success': True,
'project_id': cursor.lastrowid,
'name': name
})
except Exception as e:
if 'UNIQUE constraint' in str(e):
return jsonify({'error': 'Project with this name already exists'}), 400
return jsonify({'error': str(e)}), 500
@project_bp.route('/api/projects/<int:project_id>', methods=['GET'])
@login_required
def get_project(project_id):
"""Get a project with all its chapters and blocks."""
db = get_db()
cursor = db.cursor()
cursor.execute('SELECT * FROM projects WHERE id = ?', (project_id,))
project = cursor.fetchone()
if not project:
return jsonify({'error': 'Project not found'}), 404
cursor.execute('''
SELECT * FROM chapters WHERE project_id = ? ORDER BY chapter_number
''', (project_id,))
chapters = cursor.fetchall()
chapters_data = []
for chapter in chapters:
cursor.execute('''
SELECT * FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order
''', (chapter['id'],))
blocks = cursor.fetchall()
blocks_data = []
for block in blocks:
cursor.execute('''
SELECT * FROM block_images WHERE block_id = ? ORDER BY id
''', (block['id'],))
images = cursor.fetchall()
blocks_data.append({
'id': block['id'],
'block_order': block['block_order'],
'block_type': block['block_type'],
'content': block['content'],
'tts_text': block['tts_text'],
'audio_data': block['audio_data'],
'audio_format': block['audio_format'],
'transcription': json.loads(block['transcription']) if block['transcription'] else [],
'images': [{
'id': img['id'],
'data': img['image_data'],
'format': img['image_format'],
'alt_text': img['alt_text'],
'position': img['position']
} for img in images]
})
chapters_data.append({
'id': chapter['id'],
'chapter_number': chapter['chapter_number'],
'title': chapter['title'],
'voice': chapter['voice'],
'blocks': blocks_data
})
return jsonify({
'id': project['id'],
'name': project['name'],
'created_at': project['created_at'],
'updated_at': project['updated_at'],
'chapters': chapters_data
})
@project_bp.route('/api/projects/<int:project_id>', methods=['PUT'])
@login_required
def update_project(project_id):
"""Update project name."""
data = request.json
name = data.get('name', '').strip()
if not name:
return jsonify({'error': 'Project name is required'}), 400
db = get_db()
cursor = db.cursor()
try:
cursor.execute('''
UPDATE projects SET name = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?
''', (name, project_id))
db.commit()
if cursor.rowcount == 0:
return jsonify({'error': 'Project not found'}), 404
return jsonify({'success': True})
except Exception as e:
if 'UNIQUE constraint' in str(e):
return jsonify({'error': 'A project with this name already exists'}), 400
return jsonify({'error': str(e)}), 500
@project_bp.route('/api/projects/<int:project_id>', methods=['DELETE'])
@login_required
def delete_project(project_id):
"""Delete a project and all its data."""
db = get_db()
cursor = db.cursor()
cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,))
if not cursor.fetchone():
return jsonify({'error': 'Project not found'}), 404
cursor.execute('''
DELETE FROM block_images WHERE block_id IN (
SELECT mb.id FROM markdown_blocks mb
JOIN chapters c ON mb.chapter_id = c.id
WHERE c.project_id = ?
)
''', (project_id,))
cursor.execute('''
DELETE FROM markdown_blocks WHERE chapter_id IN (
SELECT id FROM chapters WHERE project_id = ?
)
''', (project_id,))
cursor.execute('DELETE FROM chapters WHERE project_id = ?', (project_id,))
cursor.execute('DELETE FROM projects WHERE id = ?', (project_id,))
db.commit()
vacuum_db()
return jsonify({'success': True})
@project_bp.route('/api/projects/<int:project_id>/save', methods=['POST'])
@login_required
def save_project_content(project_id):
"""Save all chapters and blocks for a project."""
data = request.json
chapters = data.get('chapters', [])
db = get_db()
cursor = db.cursor()
cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,))
if not cursor.fetchone():
return jsonify({'error': 'Project not found'}), 404
cursor.execute('''
DELETE FROM block_images WHERE block_id IN (
SELECT mb.id FROM markdown_blocks mb
JOIN chapters c ON mb.chapter_id = c.id
WHERE c.project_id = ?
)
''', (project_id,))
cursor.execute('''
DELETE FROM markdown_blocks WHERE chapter_id IN (
SELECT id FROM chapters WHERE project_id = ?
)
''', (project_id,))
cursor.execute('DELETE FROM chapters WHERE project_id = ?', (project_id,))
for chapter in chapters:
cursor.execute('''
INSERT INTO chapters (project_id, chapter_number, title, voice)
VALUES (?, ?, ?, ?)
''', (
project_id,
chapter['chapter_number'],
chapter.get('title', 'Section'),
chapter.get('voice', 'af_heart')
))
chapter_id = cursor.lastrowid
for block in chapter.get('blocks', []):
cursor.execute('''
INSERT INTO markdown_blocks
(chapter_id, block_order, block_type, content, tts_text, audio_data, audio_format, transcription)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', (
chapter_id,
block['block_order'],
block.get('block_type', 'paragraph'),
block['content'],
block.get('tts_text'),
block.get('audio_data'),
block.get('audio_format', 'mp3'),
json.dumps(block.get('transcription', []))
))
block_id = cursor.lastrowid
for img in block.get('images', []):
cursor.execute('''
INSERT INTO block_images (block_id, image_data, image_format, alt_text, position)
VALUES (?, ?, ?, ?, ?)
''', (
block_id,
img['data'],
img.get('format', 'png'),
img.get('alt_text', ''),
img.get('position', 'before')
))
cursor.execute('''
UPDATE projects SET updated_at = CURRENT_TIMESTAMP WHERE id = ?
''', (project_id,))
db.commit()
return jsonify({'success': True, 'message': 'Project saved successfully'})
# ============================================
# v4.2: Publishing Endpoints
# ============================================
@project_bp.route('/api/projects/<int:project_id>/publish', methods=['POST'])
@login_required
def publish_project(project_id):
"""Publish a project to make it visible on public homepage."""
data = request.json or {}
db = get_db()
cursor = db.cursor()
cursor.execute('SELECT id, name FROM projects WHERE id = ?', (project_id,))
project = cursor.fetchone()
if not project:
return jsonify({'error': 'Project not found'}), 404
# Verify project has at least one chapter with audio
cursor.execute('''
SELECT COUNT(*) as cnt FROM markdown_blocks mb
JOIN chapters c ON mb.chapter_id = c.id
WHERE c.project_id = ? AND mb.audio_data IS NOT NULL AND mb.audio_data != ''
''', (project_id,))
audio_count = cursor.fetchone()['cnt']
if audio_count == 0:
return jsonify({'error': 'Cannot publish: no audio generated yet'}), 400
description = (data.get('description') or '').strip()
author = (data.get('author') or '').strip()
category = (data.get('category') or '').strip()
cursor.execute('''
UPDATE projects
SET is_published = 1,
published_at = CURRENT_TIMESTAMP,
description = ?,
author = ?,
category = ?
WHERE id = ?
''', (description, author, category, project_id))
db.commit()
return jsonify({
'success': True,
'message': f'"{project["name"]}" published successfully!'
})
@project_bp.route('/api/projects/<int:project_id>/unpublish', methods=['POST'])
@login_required
def unpublish_project(project_id):
"""Unpublish a project (but keep author/description/category for easy republish)."""
db = get_db()
cursor = db.cursor()
cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,))
if not cursor.fetchone():
return jsonify({'error': 'Project not found'}), 404
# Only flip is_published flag — keep author/description/category for republish
cursor.execute('UPDATE projects SET is_published = 0 WHERE id = ?', (project_id,))
db.commit()
return jsonify({'success': True, 'message': 'Project unpublished'})
@project_bp.route('/api/projects/<int:project_id>/thumbnail', methods=['POST'])
@login_required
def upload_thumbnail(project_id):
"""Upload a thumbnail image for the project."""
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
img_file = request.files['file']
if not img_file or not img_file.filename:
return jsonify({'error': 'Invalid file'}), 400
filename = img_file.filename.lower()
if not any(filename.endswith(ext) for ext in ('.png', '.jpg', '.jpeg', '.webp', '.gif')):
return jsonify({'error': 'File must be an image (PNG/JPG/WEBP/GIF)'}), 400
img_bytes = img_file.read()
if len(img_bytes) > 5 * 1024 * 1024:
return jsonify({'error': 'Image too large (max 5MB)'}), 400
fmt = filename.rsplit('.', 1)[-1]
if fmt == 'jpg':
fmt = 'jpeg'
b64 = base64.b64encode(img_bytes).decode('utf-8')
db = get_db()
cursor = db.cursor()
cursor.execute('SELECT id FROM projects WHERE id = ?', (project_id,))
if not cursor.fetchone():
return jsonify({'error': 'Project not found'}), 404
cursor.execute('''
UPDATE projects SET thumbnail_data = ?, thumbnail_format = ? WHERE id = ?
''', (b64, fmt, project_id))
db.commit()
return jsonify({
'success': True,
'thumbnail_data': b64,
'thumbnail_format': fmt
})
@project_bp.route('/api/projects/<int:project_id>/thumbnail', methods=['DELETE'])
@login_required
def delete_thumbnail(project_id):
"""Remove project thumbnail."""
db = get_db()
cursor = db.cursor()
cursor.execute('UPDATE projects SET thumbnail_data = NULL WHERE id = ?', (project_id,))
db.commit()
return jsonify({'success': True})

133
routes/public_routes.py Normal file
View File

@@ -0,0 +1,133 @@
# routes/public_routes.py - Public (No Auth) Routes for Published Audiobooks
import json
from flask import Blueprint, jsonify, send_from_directory, abort
from db import get_db
public_bp = Blueprint('public', __name__)
@public_bp.route('/home')
def public_home():
"""Public homepage - Bookcase view of published audiobooks."""
return send_from_directory('templates', 'public_home.html')
@public_bp.route('/read/<int:project_id>')
def public_reader(project_id):
"""Public reader page for a published audiobook."""
db = get_db()
cursor = db.cursor()
cursor.execute('SELECT id, is_published FROM projects WHERE id = ?', (project_id,))
project = cursor.fetchone()
if not project or not project['is_published']:
abort(404)
# Increment view count
cursor.execute('UPDATE projects SET view_count = view_count + 1 WHERE id = ?', (project_id,))
db.commit()
return send_from_directory('templates', 'public_reader.html')
@public_bp.route('/api/public/books', methods=['GET'])
def list_published_books():
"""List all published audiobooks (no auth required)."""
db = get_db()
cursor = db.cursor()
cursor.execute('''
SELECT p.id, p.name, p.description, p.author, p.category,
p.thumbnail_data, p.thumbnail_format, p.published_at,
p.view_count, p.created_at,
(SELECT COUNT(*) FROM chapters WHERE project_id = p.id) as chapter_count
FROM projects p
WHERE p.is_published = 1
ORDER BY p.published_at DESC
''')
books = []
for row in cursor.fetchall():
books.append({
'id': row['id'],
'name': row['name'],
'description': row['description'] or '',
'author': row['author'] or '',
'category': row['category'] or '',
'thumbnail_data': row['thumbnail_data'],
'thumbnail_format': row['thumbnail_format'] or 'png',
'published_at': row['published_at'],
'view_count': row['view_count'] or 0,
'chapter_count': row['chapter_count']
})
return jsonify({'books': books})
@public_bp.route('/api/public/books/<int:project_id>', methods=['GET'])
def get_published_book(project_id):
"""Get full published book content for the reader."""
db = get_db()
cursor = db.cursor()
cursor.execute('''
SELECT * FROM projects WHERE id = ? AND is_published = 1
''', (project_id,))
project = cursor.fetchone()
if not project:
return jsonify({'error': 'Book not found or not published'}), 404
cursor.execute('''
SELECT * FROM chapters WHERE project_id = ? ORDER BY chapter_number
''', (project_id,))
chapters = cursor.fetchall()
chapters_data = []
for chapter in chapters:
cursor.execute('''
SELECT * FROM markdown_blocks WHERE chapter_id = ? ORDER BY block_order
''', (chapter['id'],))
blocks = cursor.fetchall()
blocks_data = []
for block in blocks:
cursor.execute('''
SELECT * FROM block_images WHERE block_id = ? ORDER BY id
''', (block['id'],))
images = cursor.fetchall()
blocks_data.append({
'id': block['id'],
'block_order': block['block_order'],
'block_type': block['block_type'],
'content': block['content'],
'audio_data': block['audio_data'],
'audio_format': block['audio_format'],
'transcription': json.loads(block['transcription']) if block['transcription'] else [],
'images': [{
'data': img['image_data'],
'format': img['image_format'],
'alt_text': img['alt_text'],
'position': img['position']
} for img in images]
})
chapters_data.append({
'id': chapter['id'],
'chapter_number': chapter['chapter_number'],
'title': chapter['title'],
'blocks': blocks_data
})
return jsonify({
'id': project['id'],
'name': project['name'],
'description': project['description'] or '',
'author': project['author'] or '',
'thumbnail_data': project['thumbnail_data'],
'thumbnail_format': project['thumbnail_format'] or 'png',
'chapters': chapters_data
})

View File

@@ -0,0 +1,827 @@
/* ============================================
Markdown Editor Styles
UPDATED: Added Audiobook Maker Panel (fixed)
UPDATED: Added starting-block highlight
UPDATED: Added Sidebar Outline & Section Dividers
============================================= */
/* ============================================
Audiobook Maker Panel (Fixed at top of editor)
============================================= */
.audiobook-maker-panel {
position: sticky;
top: 0;
z-index: 200;
background: linear-gradient(135deg, #1e1b4b 0%, #312e81 50%, #3730a3 100%);
border-radius: var(--border-radius);
padding: 16px 24px;
margin-bottom: 20px;
display: flex;
align-items: center;
justify-content: space-between;
flex-wrap: wrap;
gap: 12px;
box-shadow: 0 4px 20px rgba(30, 27, 75, 0.35);
color: white;
}
.amp-left {
display: flex;
align-items: center;
gap: 16px;
flex-wrap: wrap;
}
.amp-right {
display: flex;
align-items: center;
gap: 10px;
}
.amp-label {
font-weight: 700;
font-size: 0.82rem;
text-transform: uppercase;
letter-spacing: 0.8px;
color: rgba(255,255,255,0.7);
white-space: nowrap;
}
.amp-voice-select {
min-width: 200px;
padding: 8px 12px;
border: 2px solid rgba(255,255,255,0.25);
border-radius: 8px;
background: rgba(255,255,255,0.1);
color: white;
font-size: 0.88rem;
font-weight: 500;
cursor: pointer;
transition: border-color 0.2s;
}
.amp-voice-select:focus {
outline: none;
border-color: rgba(255,255,255,0.6);
background: rgba(255,255,255,0.15);
}
.amp-voice-select option {
background: #1e1b4b;
color: white;
}
/* Block count input group */
.amp-block-count-group {
display: flex;
align-items: center;
gap: 8px;
}
.amp-block-count-label {
font-size: 0.78rem;
font-weight: 600;
color: rgba(255,255,255,0.7);
white-space: nowrap;
}
.amp-block-count-input {
width: 72px;
padding: 8px 10px;
border: 2px solid rgba(255,255,255,0.25);
border-radius: 8px;
background: rgba(255,255,255,0.1);
color: white;
font-size: 0.95rem;
font-weight: 700;
text-align: center;
transition: border-color 0.2s;
-moz-appearance: textfield;
}
.amp-block-count-input::-webkit-outer-spin-button,
.amp-block-count-input::-webkit-inner-spin-button {
-webkit-appearance: none;
margin: 0;
}
.amp-block-count-input:focus {
outline: none;
border-color: rgba(255,255,255,0.6);
background: rgba(255,255,255,0.15);
}
.amp-count-arrows {
display: flex;
flex-direction: column;
gap: 2px;
}
.amp-count-arrow {
width: 24px;
height: 18px;
border: none;
background: rgba(255,255,255,0.12);
color: white;
border-radius: 4px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
font-size: 0.6rem;
transition: background 0.2s;
padding: 0;
}
.amp-count-arrow:hover {
background: rgba(255,255,255,0.25);
}
/* Starting block indicator */
.amp-start-indicator {
display: flex;
align-items: center;
gap: 6px;
padding: 6px 14px;
background: rgba(255,255,255,0.1);
border: 1.5px solid rgba(255,255,255,0.2);
border-radius: 20px;
font-size: 0.78rem;
font-weight: 600;
color: rgba(255,255,255,0.85);
white-space: nowrap;
cursor: pointer;
transition: all 0.2s;
}
.amp-start-indicator:hover {
background: rgba(255,255,255,0.18);
border-color: rgba(255,255,255,0.4);
}
.amp-start-indicator .start-block-num {
background: #fbbf24;
color: #1e1b4b;
font-weight: 800;
font-size: 0.72rem;
padding: 2px 8px;
border-radius: 10px;
min-width: 24px;
text-align: center;
}
.amp-start-indicator.pick-mode {
background: rgba(251, 191, 36, 0.2);
border-color: #fbbf24;
color: #fbbf24;
animation: pickPulse 1.5s ease-in-out infinite;
}
@keyframes pickPulse {
0%, 100% { box-shadow: 0 0 0 0 rgba(251, 191, 36, 0.3); }
50% { box-shadow: 0 0 0 6px rgba(251, 191, 36, 0); }
}
/* Generate button on panel */
.amp-generate-btn {
padding: 10px 24px;
border: none;
border-radius: 10px;
background: linear-gradient(135deg, #10b981 0%, #059669 100%);
color: white;
font-weight: 700;
font-size: 0.88rem;
cursor: pointer;
transition: all 0.2s;
display: flex;
align-items: center;
gap: 8px;
white-space: nowrap;
box-shadow: 0 2px 10px rgba(16, 185, 129, 0.3);
}
.amp-generate-btn:hover {
transform: translateY(-1px);
box-shadow: 0 4px 16px rgba(16, 185, 129, 0.4);
}
.amp-generate-btn:active {
transform: translateY(0);
}
.amp-generate-btn:disabled {
opacity: 0.5;
cursor: not-allowed;
transform: none;
box-shadow: none;
}
/* Stats/info line */
.amp-info {
width: 100%;
display: flex;
align-items: center;
gap: 16px;
padding-top: 8px;
border-top: 1px solid rgba(255,255,255,0.1);
margin-top: 4px;
}
.amp-stat {
font-size: 0.72rem;
color: rgba(255,255,255,0.5);
display: flex;
align-items: center;
gap: 4px;
}
.amp-stat strong {
color: rgba(255,255,255,0.8);
}
/* ============================================
Layout for Sidebar and Editor
============================================= */
.editor-layout {
display: flex;
gap: 24px;
align-items: flex-start;
}
.document-outline {
width: 250px;
flex-shrink: 0;
position: sticky;
top: 120px;
background: white;
border-radius: var(--border-radius);
padding: 16px;
box-shadow: var(--shadow-sm);
border: 1px solid var(--border-color);
max-height: calc(100vh - 140px);
overflow-y: auto;
}
.outline-title {
font-weight: 700;
font-size: 0.9rem;
color: var(--text-secondary);
text-transform: uppercase;
margin-bottom: 12px;
border-bottom: 1px solid var(--border-color);
padding-bottom: 8px;
}
.outline-list {
list-style: none;
padding: 0;
margin: 0;
}
.outline-list li {
padding: 8px 12px;
font-size: 0.85rem;
color: var(--text-primary);
cursor: pointer;
border-radius: 6px;
transition: background 0.2s;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.outline-list li:hover {
background: var(--bg-tertiary);
color: var(--primary-color);
}
.editor-container {
flex: 1;
background: var(--bg-secondary);
border: 1px solid var(--border-color);
border-radius: var(--border-radius);
min-height: 500px;
padding: 24px 48px;
position: relative;
}
/* ============================================
Section Dividers (Automated Chapter Markers)
============================================= */
.section-divider {
display: flex;
align-items: center;
margin: 32px 0;
position: relative;
}
.divider-line {
flex: 1;
height: 1px;
background: linear-gradient(90deg, transparent, #cbd5e1, transparent);
}
.divider-content {
display: flex;
align-items: center;
gap: 12px;
padding: 0 16px;
position: relative;
}
.section-title {
font-size: 0.9rem;
font-weight: 700;
color: #475569;
text-transform: uppercase;
letter-spacing: 1px;
padding: 4px 12px;
background: #f8fafc;
border-radius: 20px;
border: 1px solid #e2e8f0;
outline: none;
transition: all 0.2s;
}
.section-title:focus {
border-color: var(--primary-color);
background: white;
box-shadow: 0 0 0 2px rgba(79,70,229,0.1);
}
.btn-merge-section {
opacity: 0;
background: #fee2e2;
color: #dc2626;
border: none;
border-radius: 6px;
font-size: 0.7rem;
padding: 4px 8px;
cursor: pointer;
transition: opacity 0.2s;
position: absolute;
left: 100%;
white-space: nowrap;
}
.section-divider:hover .btn-merge-section {
opacity: 1;
}
/* ============================================
Starting Block Highlight
============================================= */
.md-block.starting-block {
border-left: 4px solid #fbbf24 !important;
background: rgba(251, 191, 36, 0.06);
}
.md-block.starting-block::before {
content: 'START';
position: absolute;
top: -1px;
left: -4px;
background: #fbbf24;
color: #1e1b4b;
font-size: 0.58rem;
font-weight: 800;
letter-spacing: 0.5px;
padding: 1px 8px;
border-radius: 0 0 6px 0;
z-index: 5;
}
/* Blocks in generation range */
.md-block.in-gen-range {
border-left: 4px solid #a78bfa !important;
background: rgba(167, 139, 250, 0.04);
}
/* Pick mode: blocks glow on hover */
.editor-pick-mode .md-block:not(.editing):hover {
border-color: #fbbf24 !important;
background: rgba(251, 191, 36, 0.08);
cursor: crosshair;
}
/* ============================================
Markdown Block
============================================= */
.md-block {
position: relative;
margin: 8px 0;
padding: 12px 16px;
border-radius: var(--border-radius-sm);
border: 2px solid transparent;
transition: all 0.2s;
min-height: 48px;
}
.md-block:hover {
background: var(--bg-tertiary);
border-color: var(--border-color);
}
.md-block.editing {
background: #eff6ff;
border-color: var(--primary-color);
box-shadow: 0 0 0 3px rgba(79, 70, 229, 0.1);
}
/* Block Content */
.md-block-content {
font-family: var(--font-serif);
font-size: 1.125rem;
line-height: 1.8;
color: var(--text-primary);
}
.md-block-content h1 {
font-size: 2rem;
font-weight: 700;
margin: 0;
padding-bottom: 8px;
}
.md-block-content h2 {
font-size: 1.5rem;
font-weight: 700;
margin: 0;
}
.md-block-content h3 {
font-size: 1.25rem;
font-weight: 600;
margin: 0;
}
.md-block-content p {
margin: 0;
}
.md-block-content blockquote {
border-left: 4px solid var(--primary-color);
padding-left: 16px;
margin: 0;
font-style: italic;
color: var(--text-secondary);
}
.md-block-content ul,
.md-block-content ol {
margin: 0;
padding-left: 24px;
}
.md-block-content img {
max-width: 100%;
height: auto;
border-radius: var(--border-radius-sm);
margin: 8px auto;
display: block;
}
.md-block-content table {
width: 100%;
border-collapse: collapse;
margin: 8px 0;
}
.md-block-content th,
.md-block-content td {
border: 1px solid var(--border-color);
padding: 8px 12px;
text-align: left;
}
.md-block-content th {
background: var(--bg-tertiary);
font-weight: 600;
}
/* Block Edit Mode */
.md-block-edit {
display: none;
}
.md-block.editing .md-block-content {
display: none;
}
.md-block.editing .md-block-edit {
display: block;
}
.md-block-textarea {
width: 100%;
min-height: 100px;
padding: 12px;
border: none;
border-radius: var(--border-radius-sm);
font-family: var(--font-mono);
font-size: 0.9375rem;
line-height: 1.6;
resize: vertical;
background: white;
}
.md-block-textarea:focus {
outline: none;
}
/* Block Toolbar */
.md-block-toolbar {
display: none;
position: absolute;
top: -45px; /* Adjust top for new buttons */
left: 0;
background: white;
border: 1px solid var(--border-color);
border-radius: var(--border-radius-sm);
padding: 4px;
box-shadow: var(--shadow-md);
z-index: 100;
gap: 4px;
}
.md-block.editing .md-block-toolbar {
display: flex;
}
.toolbar-btn {
width: 32px;
height: 32px;
border: none;
background: transparent;
border-radius: 4px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
color: var(--text-secondary);
transition: all 0.2s;
}
.toolbar-btn:hover {
background: var(--bg-tertiary);
color: var(--primary-color);
}
.toolbar-btn.active {
background: var(--primary-color);
color: white;
}
.toolbar-divider {
width: 1px;
background: var(--border-color);
margin: 4px;
}
.action-btn-text {
font-size: 0.75rem;
font-weight: 600;
padding: 0 6px;
width: auto;
}
/* Empty Block Placeholder */
.md-block-placeholder {
color: var(--text-muted);
font-style: italic;
}
/* ============================================
New Block Line
============================================= */
.new-block-line {
height: 24px;
position: relative;
cursor: text;
}
.new-block-line:hover::before {
content: '';
position: absolute;
left: 0;
right: 0;
top: 50%;
height: 2px;
background: var(--primary-color);
opacity: 0.3;
}
.new-block-line:hover .add-line-buttons {
display: flex;
}
.add-line-buttons {
display: none;
position: absolute;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
gap: 8px;
z-index: 50;
}
.add-line-btn {
width: 28px;
height: 28px;
border: none;
border-radius: 50%;
background: var(--primary-color);
color: white;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
font-size: 0.75rem;
transition: all 0.2s;
}
.add-line-btn:hover {
transform: scale(1.1);
}
/* Image add button - teal */
.add-line-btn.image-btn {
background: #06b6d4;
}
.add-line-btn.image-btn:hover {
background: #0891b2;
}
.add-line-btn.image-btn:hover {
background: #0891b2;
}
/* Section add button - purple */
.add-line-btn.section-btn {
background: #8b5cf6;
}
.add-line-btn.section-btn:hover {
background: #7c3aed;
}
/* ============================================
Image Block - Centered
============================================= */
.image-block {
text-align: center;
padding: 24px;
border: 2px dashed var(--border-color);
border-radius: var(--border-radius);
background: var(--bg-tertiary);
cursor: pointer;
transition: all 0.2s;
}
.image-block:hover {
border-color: var(--primary-color);
background: rgba(79, 70, 229, 0.05);
}
.image-block img {
max-width: 100%;
height: auto;
border-radius: var(--border-radius-sm);
margin: 0 auto;
display: block;
}
.image-upload-placeholder {
color: var(--text-muted);
}
.image-upload-placeholder i {
font-size: 2rem;
margin-bottom: 8px;
display: block;
}
/* ============================================
Audio Indicator
============================================= */
.audio-indicator {
position: absolute;
top: 8px;
right: 8px;
width: 24px;
height: 24px;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
font-size: 0.75rem;
}
.audio-indicator.has-audio {
background: var(--success-color);
color: white;
}
.audio-indicator.no-audio {
background: var(--bg-tertiary);
color: var(--text-muted);
}
/* ============================================
Block Actions Indicator (Edit + Delete)
============================================= */
.block-actions-indicator {
position: absolute;
top: 8px;
right: 40px;
display: flex;
gap: 4px;
opacity: 0;
transition: opacity 0.2s;
z-index: 10;
}
.md-block:hover .block-actions-indicator {
opacity: 1;
}
.action-indicator-btn {
width: 28px;
height: 28px;
border: none;
background: var(--bg-tertiary);
border-radius: 6px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
color: var(--text-secondary);
transition: all 0.2s;
padding: 0;
}
.action-indicator-btn.edit-block-btn:hover {
background: var(--primary-color);
color: white;
}
.action-indicator-btn.delete-block-btn:hover {
background: var(--danger-color);
color: white;
}
/* Hide the old block-edit-indicator since we replaced it */
.block-edit-indicator {
display: none !important;
}
/* ============================================
Responsive
============================================= */
@media (max-width: 768px) {
.audiobook-maker-panel {
flex-direction: column;
align-items: stretch;
padding: 14px 16px;
}
.amp-left {
flex-direction: column;
gap: 10px;
}
.amp-right {
justify-content: center;
}
.amp-voice-select {
min-width: 100%;
}
.amp-info {
flex-wrap: wrap;
gap: 8px;
}
.editor-layout {
flex-direction: column;
}
.document-outline {
width: 100%;
position: relative;
top: 0;
max-height: 200px;
}
}

1398
static/css/style.css Normal file

File diff suppressed because it is too large Load Diff

1174
static/js/app.js Normal file

File diff suppressed because it is too large Load Diff

223
static/js/generation.js Normal file
View File

@@ -0,0 +1,223 @@
/**
* Audio Generation Module
* UPDATED: Panel-based generation (no chapter markers)
* Generates audio for N blocks starting from the selected starting block
*/
// ============================================
// Panel-Based Generation
// ============================================
async function generateFromPanel() {
const textBlocks = getTextBlocks();
if (textBlocks.length === 0) {
alert('No text blocks found to generate audio for.');
return;
}
if (!panelState.startingBlockId) {
alert('No starting block selected.');
return;
}
const startIdx = getTextBlockIndex(panelState.startingBlockId);
if (startIdx < 0) {
alert('Starting block not found. Please select a valid block.');
return;
}
const count = panelState.blockCount || 10;
const voice = panelState.voice || 'af_heart';
const endIdx = Math.min(startIdx + count, textBlocks.length);
const blocksToGenerate = [];
for (let i = startIdx; i < endIdx; i++) {
const blockEl = textBlocks[i];
const textarea = blockEl.querySelector('.md-block-textarea');
const content = textarea ? textarea.value : '';
if (!content.trim()) continue;
// Skip image content
if (content.trim().startsWith('![') && content.trim().indexOf('](') !== -1) continue;
const ttsText = (blockEl.dataset.ttsText && blockEl.dataset.ttsText.trim())
? blockEl.dataset.ttsText
: content;
blocksToGenerate.push({
id: blockEl.id,
text: ttsText,
element: blockEl
});
}
if (blocksToGenerate.length === 0) {
alert('No speakable text blocks found in the selected range.');
return;
}
// Disable generate button
const genBtn = document.getElementById('ampGenerateBtn');
if (genBtn) genBtn.disabled = true;
showLoader(`Generating Audio...`, `Processing ${blocksToGenerate.length} blocks`);
let successCount = 0;
let errorCount = 0;
for (let i = 0; i < blocksToGenerate.length; i++) {
const blockInfo = blocksToGenerate[i];
document.getElementById('loadingSubtext').textContent =
`Block ${i + 1} of ${blocksToGenerate.length}`;
try {
const response = await fetch('/api/generate', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
text: blockInfo.text,
voice: voice,
block_id: null
})
});
const data = await response.json();
if (data.error) {
console.error(`Block ${blockInfo.id} error:`, data.error);
errorCount++;
continue;
}
// Store audio data in editorBlocks
const blockData = editorBlocks.find(b => b.id === blockInfo.id);
if (blockData) {
blockData.audio_data = data.audio_data;
blockData.audio_format = data.audio_format;
blockData.transcription = data.transcription;
}
// Update visual indicator
const indicator = blockInfo.element.querySelector('.audio-indicator');
if (indicator) {
indicator.classList.remove('no-audio');
indicator.classList.add('has-audio');
indicator.title = 'Audio generated';
}
successCount++;
} catch (error) {
console.error(`Block ${blockInfo.id} error:`, error);
errorCount++;
}
}
hideLoader();
// Re-enable generate button
if (genBtn) genBtn.disabled = false;
if (errorCount > 0) {
showNotification(`Generated ${successCount} blocks, ${errorCount} failed`, 'warning');
} else {
showNotification(`Generated audio for ${successCount} blocks!`, 'success');
}
// Update workflow to show audio is ready
if (successCount > 0) {
updateWorkflowProgress('audio-ready');
// Advance starting block
advanceStartingBlockAfterGeneration(endIdx - startIdx);
}
}
// ============================================
// Single Block Generation (from toolbar button)
// ============================================
async function generateSingleBlockAudio(blockId) {
const block = document.getElementById(blockId);
if (!block) {
console.error('Block not found:', blockId);
return;
}
const blockType = block.dataset.blockType || 'paragraph';
if (blockType === 'image') {
alert('Cannot generate audio for image blocks.');
return;
}
const textarea = block.querySelector('.md-block-textarea');
const content = textarea ? textarea.value : '';
if (!content.trim()) {
alert('No text content to generate audio for.');
return;
}
if (content.trim().startsWith('![') && content.trim().indexOf('](') !== -1) {
alert('Cannot generate audio for image blocks.');
return;
}
const ttsText = (block.dataset.ttsText && block.dataset.ttsText.trim()) ? block.dataset.ttsText : content;
const voice = panelState.voice || 'af_heart';
showLoader('Generating Audio...', 'Creating speech and timestamps');
try {
const response = await fetch('/api/generate', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
text: ttsText,
voice: voice,
block_id: null
})
});
const data = await response.json();
if (data.error) {
throw new Error(data.error);
}
const blockData = editorBlocks.find(b => b.id === blockId);
if (blockData) {
blockData.audio_data = data.audio_data;
blockData.audio_format = data.audio_format;
blockData.transcription = data.transcription;
}
const indicator = block.querySelector('.audio-indicator');
if (indicator) {
indicator.classList.remove('no-audio');
indicator.classList.add('has-audio');
indicator.title = 'Audio generated';
}
hideLoader();
showNotification('Audio generated successfully!', 'success');
updateWorkflowProgress('audio-ready');
updatePanelUI();
} catch (error) {
hideLoader();
console.error('Generation error:', error);
alert('Failed to generate audio: ' + error.message);
}
}
// Keep old function name for backward compatibility
function generateBlockAudio(blockId) {
generateSingleBlockAudio(blockId);
}

View File

@@ -0,0 +1,917 @@
/**
* Interactive Reader Module — Smart Preload Architecture (v3)
*
* Loading Strategy:
* - Text and timestamps come from in-memory `editorBlocks` (already loaded).
* - Audio base64 → Blob URL conversion is DEFERRED until needed.
* - When block N plays, preload blob URLs for N+1, N+2 (background).
* - At 70% mark of N's audio, ensure N+1 is ready (safety net).
* - Memory cap: keep at most MAX_AUDIO_LOADED blob URLs alive;
* revoke distant past audio to free browser memory.
*
* Scroll Strategy:
* - Manual navigation (button / outline / word click): scroll block to top.
* - Auto-advance (audio ended → next block): NO block scroll — let the
* word highlighter smoothly carry the user. Prevents jarring jumps.
*/
// ============================================
// Reader State
// ============================================
let readerInstances = [];
let currentReaderInstance = null;
let currentReaderIndex = -1;
let readerStarted = false;
let readerUICreated = false;
// Tunables
const READER_PRELOAD_AHEAD = 2;
const READER_MID_PRELOAD_THRESHOLD = 0.7;
const READER_MAX_AUDIO_LOADED = 5;
const READER_KEEP_BEHIND = 1;
// ============================================
// Render Reader
// ============================================
function renderInteractiveReader() {
const container = document.getElementById('readerContainer');
if (container.style.maxWidth) container.style.maxWidth = '';
const chapters = collectEditorContent();
let hasAudio = false;
const allBlocks = [];
let outlineHtml = '';
let currentIndex = 0;
for (const chapter of chapters) {
if (chapter.blocks.length === 0) continue;
outlineHtml += `
<li onclick="scrollToReaderBlock(${currentIndex})" title="${escapeHtml(chapter.title)}">
${escapeHtml(chapter.title)}
</li>
`;
let isFirstBlockOfChapter = true;
for (const block of chapter.blocks) {
const blockData = findEditorBlockForContent(block);
const isImageBlock = block.block_type === 'image' ||
(block.content && block.content.trim().startsWith('![') && block.content.trim().includes(']('));
allBlocks.push({
...block,
_editorData: blockData || null,
_isImage: isImageBlock,
_chapterTitle: isFirstBlockOfChapter ? chapter.title : null
});
isFirstBlockOfChapter = false;
if (!isImageBlock && blockData && blockData.audio_data) {
hasAudio = true;
}
currentIndex++;
}
}
const readerOutlineSidebar = document.getElementById('readerOutlineSidebar');
const readerOutlineList = document.getElementById('readerOutlineList');
if (!hasAudio) {
container.innerHTML = `
<div class="reader-empty-state">
<i class="bi bi-book"></i>
<p>Generate audio to view the interactive reader</p>
<p class="text-muted">Go to the Editor tab and click "Generate" on the panel</p>
</div>
`;
removeReaderUI();
if (readerOutlineSidebar) readerOutlineSidebar.style.display = 'none';
return;
}
if (readerOutlineSidebar && readerOutlineList) {
readerOutlineSidebar.style.display = 'block';
readerOutlineList.innerHTML = outlineHtml || '<li class="text-muted small">No sections found.</li>';
}
let html = '<div class="reader-flow">';
// Cleanup any previous instances (revoke blob URLs)
cleanupAllReaderInstances();
readerInstances = [];
let globalBlockIndex = 0;
for (const block of allBlocks) {
const blockData = block._editorData;
const isImageBlock = block._isImage;
const hasBlockAudio = !isImageBlock && blockData && blockData.audio_data;
const blockId = blockData ? blockData.id : `reader_${Date.now()}_${Math.random().toString(36).substr(2, 5)}`;
html += `<div class="reader-block" data-block-id="${blockId}" data-reader-index="${globalBlockIndex}" data-has-audio="${!!hasBlockAudio}">`;
if (isImageBlock) {
const imageHtml = buildImageHtml(block, blockData);
html += `<div class="reader-content reader-image-block">${imageHtml}</div>`;
} else {
const blockImages = getBlockImages(block, blockData);
for (const img of blockImages) {
if (img.position === 'before' && img.data) {
html += `<div class="reader-image-block"><img src="data:image/${img.format || 'png'};base64,${img.data}" alt="${img.alt_text || 'Image'}"></div>`;
}
}
html += `<div class="reader-content" id="reader-content-${globalBlockIndex}"></div>`;
for (const img of blockImages) {
if (img.position === 'after' && img.data) {
html += `<div class="reader-image-block"><img src="data:image/${img.format || 'png'};base64,${img.data}" alt="${img.alt_text || 'Image'}"></div>`;
}
}
}
html += `</div>`;
readerInstances.push({
index: globalBlockIndex,
blockId: blockId,
blockData: blockData,
content: block.content,
hasAudio: !!hasBlockAudio,
isImage: isImageBlock,
wordSpans: [],
wordMap: [],
sentenceData: [],
audio: null,
audioUrl: null, // blob URL ref for cleanup
audioReady: false,
audioLoadingPromise: null,
midPreloadTriggered: false,
transcription: (!isImageBlock && blockData) ? (blockData.transcription || []) : [],
animFrameId: null,
lastWordSpan: null,
lastSentenceSpans: []
});
globalBlockIndex++;
}
html += '</div>';
container.innerHTML = html;
// Render words and run sync for every instance (text is cheap and already in memory)
for (const inst of readerInstances) {
if (inst.isImage || !inst.content) continue;
const contentEl = document.getElementById(`reader-content-${inst.index}`);
if (!contentEl) continue;
renderWordsIntoContainer(contentEl, inst);
if (inst.hasAudio && inst.transcription.length > 0) {
runReaderSmartSync(inst);
}
}
addReaderStyles();
setupReaderUI();
}
// ============================================
// Image Resolution Helpers
// ============================================
function findEditorBlockForContent(block) {
for (const eb of editorBlocks) {
const el = document.getElementById(eb.id);
if (el) {
const textarea = el.querySelector('.md-block-textarea');
if (textarea && textarea.value === block.content) {
return eb;
}
}
}
for (const eb of editorBlocks) {
if (eb.content === block.content) return eb;
}
return null;
}
function getBlockImages(block, blockData) {
if (block.images && block.images.length > 0) {
const valid = block.images.filter(img => img.data && img.data.length > 0);
if (valid.length > 0) return valid;
}
if (blockData && blockData.images && blockData.images.length > 0) {
const valid = blockData.images.filter(img => img.data && img.data.length > 0);
if (valid.length > 0) return valid;
}
return [];
}
function buildImageHtml(block, blockData) {
if (block.images && block.images.length > 0) {
let html = '';
for (const img of block.images) {
if (img.data && img.data.length > 0) {
html += `<img src="data:image/${img.format || 'png'};base64,${img.data}" alt="${img.alt_text || 'Image'}">`;
}
}
if (html) return html;
}
if (blockData && blockData.images && blockData.images.length > 0) {
let html = '';
for (const img of blockData.images) {
if (img.data && img.data.length > 0) {
html += `<img src="data:image/${img.format || 'png'};base64,${img.data}" alt="${img.alt_text || 'Image'}">`;
}
}
if (html) return html;
}
if (block.content) {
const dataUriMatch = block.content.match(/!\[([^\]]*)\]\((data:image\/[^)]+)\)/);
if (dataUriMatch) {
return `<img src="${dataUriMatch[2]}" alt="${dataUriMatch[1] || 'Image'}">`;
}
}
if (blockData && blockData.id) {
const editorBlock = document.getElementById(blockData.id);
if (editorBlock) {
const editorImg = editorBlock.querySelector('.image-block img, .md-block-content img');
if (editorImg && editorImg.src && editorImg.src.startsWith('data:image')) {
return `<img src="${editorImg.src}" alt="Image">`;
}
}
}
if (block.content) {
for (const eb of editorBlocks) {
if (eb.content === block.content && eb.images && eb.images.length > 0) {
let html = '';
for (const img of eb.images) {
if (img.data && img.data.length > 0) {
html += `<img src="data:image/${img.format || 'png'};base64,${img.data}" alt="${img.alt_text || 'Image'}">`;
}
}
if (html) return html;
}
}
}
return `<div class="reader-image-placeholder">
<i class="bi bi-image" style="font-size:2rem;color:#94a3b8;"></i>
<p style="color:#94a3b8;margin-top:8px;">Image not available</p>
</div>`;
}
// ============================================
// Word Rendering & Sync
// ============================================
function renderWordsIntoContainer(container, inst) {
const div = document.createElement('div');
div.innerHTML = marked.parse(inst.content, { breaks: true, gfm: true });
inst.wordSpans = [];
function processNode(node) {
if (node.nodeType === Node.TEXT_NODE) {
const words = node.textContent.split(/(\s+)/);
const fragment = document.createDocumentFragment();
words.forEach(part => {
if (part.trim().length > 0) {
const span = document.createElement('span');
span.className = 'reader-word';
span.textContent = part;
span.dataset.readerIndex = inst.index;
span.dataset.wordIdx = inst.wordSpans.length;
inst.wordSpans.push(span);
fragment.appendChild(span);
} else {
fragment.appendChild(document.createTextNode(part));
}
});
node.parentNode.replaceChild(fragment, node);
} else if (node.nodeType === Node.ELEMENT_NODE) {
Array.from(node.childNodes).forEach(processNode);
}
}
processNode(div);
while (div.firstChild) container.appendChild(div.firstChild);
}
function runReaderSmartSync(inst) {
const { wordSpans, transcription } = inst;
inst.wordMap = new Array(wordSpans.length).fill(undefined);
let aiIdx = 0;
wordSpans.forEach((span, i) => {
const textWord = span.textContent.toLowerCase().replace(/[^\w]/g, '');
for (let off = 0; off < 5; off++) {
if (aiIdx + off >= transcription.length) break;
const aiWord = transcription[aiIdx + off].word.toLowerCase().replace(/[^\w]/g, '');
if (textWord === aiWord) {
inst.wordMap[i] = aiIdx + off;
aiIdx += off + 1;
return;
}
}
});
inst.sentenceData = [];
let buffer = [];
let startIdx = 0;
wordSpans.forEach((span, i) => {
buffer.push(span);
if (/[.!?]["'\u201D\u2019]?$/.test(span.textContent.trim())) {
let startT = 0, endT = 0;
for (let k = startIdx; k <= i; k++) {
if (inst.wordMap[k] !== undefined) { startT = transcription[inst.wordMap[k]].start; break; }
}
for (let k = i; k >= startIdx; k--) {
if (inst.wordMap[k] !== undefined) { endT = transcription[inst.wordMap[k]].end; break; }
}
if (endT > startT) inst.sentenceData.push({ spans: [...buffer], startTime: startT, endTime: endT });
buffer = [];
startIdx = i + 1;
}
});
if (buffer.length > 0) {
let startT = 0, endT = 0;
for (let k = startIdx; k < wordSpans.length; k++) {
if (inst.wordMap[k] !== undefined) { startT = transcription[inst.wordMap[k]].start; break; }
}
for (let k = wordSpans.length - 1; k >= startIdx; k--) {
if (inst.wordMap[k] !== undefined) { endT = transcription[inst.wordMap[k]].end; break; }
}
if (endT > startT) inst.sentenceData.push({ spans: [...buffer], startTime: startT, endTime: endT });
}
}
// ============================================
// Reader UI
// ============================================
function setupReaderUI() {
removeReaderUI();
const btn = document.createElement('button');
btn.id = 'reader-floating-btn';
btn.innerHTML = `
<span id="reader-btn-text">Start</span>
<svg id="reader-btn-play" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" style="display:none;width:24px;height:24px;"><path d="M8 5v14l11-7z"/></svg>
<svg id="reader-btn-pause" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" style="display:none;width:24px;height:24px;"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>
<div id="reader-btn-spinner" class="reader-btn-spinner" style="display:none;"></div>
`;
document.body.appendChild(btn);
btn.addEventListener('click', handleReaderFloatingClick);
const container = document.getElementById('readerContainer');
container.addEventListener('click', handleReaderWordClick);
readerStarted = false;
currentReaderInstance = null;
currentReaderIndex = -1;
readerUICreated = true;
positionReaderUI();
window.addEventListener('resize', positionReaderUI);
window.addEventListener('scroll', positionReaderUI);
}
function positionReaderUI() {
const readerContainer = document.getElementById('readerContainer');
const btn = document.getElementById('reader-floating-btn');
if (!readerContainer || !btn) return;
const containerRect = readerContainer.getBoundingClientRect();
btn.style.position = 'fixed';
btn.style.top = '80px';
const rightPos = window.innerWidth - (containerRect.right + 8);
btn.style.right = Math.max(rightPos, 8) + 'px';
btn.style.left = 'auto';
}
function removeReaderUI() {
const oldBtn = document.getElementById('reader-floating-btn');
if (oldBtn) oldBtn.remove();
readerStarted = false;
currentReaderInstance = null;
currentReaderIndex = -1;
readerUICreated = false;
window.removeEventListener('resize', positionReaderUI);
window.removeEventListener('scroll', positionReaderUI);
cleanupAllReaderInstances();
}
function cleanupAllReaderInstances() {
for (const inst of readerInstances) {
if (inst.audio) {
try { inst.audio.pause(); } catch (e) {}
inst.audio = null;
}
if (inst.audioUrl) {
try { URL.revokeObjectURL(inst.audioUrl); } catch (e) {}
inst.audioUrl = null;
}
inst.audioReady = false;
inst.audioLoadingPromise = null;
if (inst.animFrameId) cancelAnimationFrame(inst.animFrameId);
}
}
function showReaderUI() {
const btn = document.getElementById('reader-floating-btn');
if (btn) btn.style.display = 'flex';
positionReaderUI();
}
function hideReaderUI() {
const btn = document.getElementById('reader-floating-btn');
if (btn) btn.style.display = 'none';
}
function setReaderButtonLoading(isLoading) {
const btn = document.getElementById('reader-floating-btn');
if (!btn) return;
btn.classList.toggle('loading', isLoading);
const spinner = document.getElementById('reader-btn-spinner');
if (spinner) spinner.style.display = isLoading ? 'block' : 'none';
}
// ============================================
// Audio Lazy Loading + Memory Management
// ============================================
function ensureReaderAudioLoaded(inst) {
if (inst.audioReady && inst.audio) return Promise.resolve(inst);
if (inst.audioLoadingPromise) return inst.audioLoadingPromise;
inst.audioLoadingPromise = new Promise((resolve, reject) => {
const blockData = inst.blockData;
if (!blockData || !blockData.audio_data) {
inst.audioLoadingPromise = null;
return reject(new Error('No audio data'));
}
try {
const audioBlob = base64ToBlob(blockData.audio_data, `audio/${blockData.audio_format || 'mp3'}`);
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
const onCanPlay = () => {
audio.removeEventListener('error', onError);
inst.audio = audio;
inst.audioUrl = audioUrl;
inst.audioReady = true;
wireReaderAudioEvents(inst);
resolve(inst);
};
const onError = () => {
audio.removeEventListener('canplay', onCanPlay);
try { URL.revokeObjectURL(audioUrl); } catch (e) {}
inst.audioLoadingPromise = null;
reject(new Error('Audio failed to load'));
};
audio.addEventListener('canplay', onCanPlay, { once: true });
audio.addEventListener('error', onError, { once: true });
// Audio.load is implicit; setting src starts loading metadata
audio.preload = 'auto';
audio.load();
} catch (err) {
inst.audioLoadingPromise = null;
reject(err);
}
});
return inst.audioLoadingPromise;
}
function wireReaderAudioEvents(inst) {
const audio = inst.audio;
audio.addEventListener('play', () => {
startReaderHighlightLoop(inst);
updateReaderButton('playing');
});
audio.addEventListener('pause', () => {
stopReaderHighlightLoop(inst);
updateReaderButton('paused');
});
audio.addEventListener('ended', () => {
stopReaderHighlightLoop(inst);
clearReaderHighlights(inst);
const nextIdx = findNextAudioIndex(inst.index);
if (nextIdx >= 0) {
playReaderInstanceByIndex(nextIdx, { autoAdvance: true });
} else {
updateReaderButton('paused');
currentReaderInstance = null;
currentReaderIndex = -1;
}
});
// Mid-play safety net: ensure next is ready by 70% of current
audio.addEventListener('timeupdate', () => {
if (inst.midPreloadTriggered) return;
if (!audio.duration || isNaN(audio.duration)) return;
if ((audio.currentTime / audio.duration) >= READER_MID_PRELOAD_THRESHOLD) {
inst.midPreloadTriggered = true;
const nextIdx = findNextAudioIndex(inst.index);
if (nextIdx >= 0) {
ensureReaderAudioLoaded(readerInstances[nextIdx]).catch(() => {});
}
}
});
}
function preloadReaderAhead(fromIndex) {
let preloadedCount = 0;
let idx = fromIndex + 1;
while (idx < readerInstances.length && preloadedCount < READER_PRELOAD_AHEAD) {
const inst = readerInstances[idx];
if (inst.hasAudio) {
ensureReaderAudioLoaded(inst).catch(() => {});
preloadedCount++;
}
idx++;
}
}
function pruneReaderLoadedAudio(currentIndex) {
const loaded = readerInstances.filter(i => i.audioReady && i.audio);
if (loaded.length <= READER_MAX_AUDIO_LOADED) return;
const keepLow = currentIndex - READER_KEEP_BEHIND;
const keepHigh = currentIndex + READER_PRELOAD_AHEAD;
const candidates = loaded
.filter(inst => inst !== currentReaderInstance)
.map(inst => ({
inst,
inWindow: inst.index >= keepLow && inst.index <= keepHigh,
distance: Math.abs(inst.index - currentIndex)
}))
.sort((a, b) => {
if (a.inWindow !== b.inWindow) return a.inWindow ? 1 : -1;
return b.distance - a.distance;
});
let toEvict = loaded.length - READER_MAX_AUDIO_LOADED;
for (const c of candidates) {
if (toEvict <= 0) break;
releaseReaderAudio(c.inst);
toEvict--;
}
}
function releaseReaderAudio(inst) {
if (!inst.audio) return;
try { inst.audio.pause(); } catch (e) {}
if (inst.audioUrl) {
try { URL.revokeObjectURL(inst.audioUrl); } catch (e) {}
inst.audioUrl = null;
}
inst.audio = null;
inst.audioReady = false;
inst.audioLoadingPromise = null;
inst.midPreloadTriggered = false;
}
// ============================================
// Playback & Navigation
// ============================================
function handleReaderFloatingClick() {
if (!readerStarted) {
readerStarted = true;
const firstIdx = findNextAudioIndex(-1);
if (firstIdx >= 0) playReaderInstanceByIndex(firstIdx);
return;
}
if (currentReaderInstance && currentReaderInstance.audio) {
if (currentReaderInstance.audio.paused) {
currentReaderInstance.audio.play().catch(console.error);
updateReaderButton('playing');
} else {
currentReaderInstance.audio.pause();
updateReaderButton('paused');
}
} else {
const firstIdx = findNextAudioIndex(-1);
if (firstIdx >= 0) playReaderInstanceByIndex(firstIdx);
}
}
async function handleReaderWordClick(event) {
const wordSpan = event.target.closest('.reader-word');
if (!wordSpan) return;
const readerIdx = parseInt(wordSpan.dataset.readerIndex, 10);
const wordIdx = parseInt(wordSpan.dataset.wordIdx, 10);
const inst = readerInstances[readerIdx];
if (!inst || !inst.hasAudio) return;
const aiIdx = inst.wordMap[wordIdx];
if (aiIdx === undefined) return;
const timestamp = inst.transcription[aiIdx].start;
playReaderInstanceByIndex(readerIdx, { timestamp });
}
function findNextAudioIndex(afterIndex) {
for (let i = afterIndex + 1; i < readerInstances.length; i++) {
if (readerInstances[i].hasAudio) return i;
}
return -1;
}
async function playReaderInstanceByIndex(index, opts = {}) {
if (index < 0 || index >= readerInstances.length) {
updateReaderButton('paused');
currentReaderInstance = null;
currentReaderIndex = -1;
return;
}
const inst = readerInstances[index];
if (!inst.hasAudio) {
// Skip non-audio blocks
playReaderInstanceByIndex(findNextAudioIndex(index), opts);
return;
}
const isAutoAdvance = opts.autoAdvance === true;
const timestamp = opts.timestamp != null ? opts.timestamp : 0;
if (currentReaderInstance && currentReaderInstance !== inst) {
stopReaderInstance(currentReaderInstance);
}
readerStarted = true;
currentReaderIndex = index;
currentReaderInstance = inst;
inst.midPreloadTriggered = false;
const needsLoad = !inst.audioReady;
if (needsLoad) setReaderButtonLoading(true);
try {
await ensureReaderAudioLoaded(inst);
if (needsLoad) setReaderButtonLoading(false);
inst.audio.currentTime = timestamp;
await inst.audio.play();
updateReaderButton('playing');
// Block-level scroll ONLY for manual navigation
if (!isAutoAdvance) {
const blockEl = document.querySelector(`.reader-block[data-reader-index="${index}"]`);
if (blockEl) {
const rect = blockEl.getBoundingClientRect();
if (rect.top < 0 || rect.top > window.innerHeight * 0.6) {
blockEl.scrollIntoView({ behavior: 'smooth', block: 'center' });
}
}
}
preloadReaderAhead(index);
pruneReaderLoadedAudio(index);
} catch (err) {
console.error('Reader playback failed:', err);
setReaderButtonLoading(false);
updateReaderButton('paused');
if (typeof showNotification === 'function') {
showNotification('Failed to load audio. Tap again to retry.', 'warning');
}
}
}
function stopReaderInstance(inst) {
if (inst.audio) {
try {
inst.audio.pause();
inst.audio.currentTime = 0;
} catch (e) {}
}
stopReaderHighlightLoop(inst);
clearReaderHighlights(inst);
}
function scrollToReaderBlock(index) {
const blockEl = document.querySelector(`.reader-block[data-reader-index="${index}"]`);
if (blockEl) {
const headerOffset = 100;
const elementPosition = blockEl.getBoundingClientRect().top;
const offsetPosition = elementPosition + window.pageYOffset - headerOffset;
window.scrollTo({ top: offsetPosition, behavior: 'smooth' });
blockEl.classList.add('highlight-section');
setTimeout(() => blockEl.classList.remove('highlight-section'), 2000);
}
}
// ============================================
// Highlighting
// ============================================
function startReaderHighlightLoop(inst) {
cancelAnimationFrame(inst.animFrameId);
function loop() {
if (!inst.audio || inst.audio.paused) return;
const currentTime = inst.audio.currentTime;
const activeAiIndex = inst.transcription.findIndex(w => currentTime >= w.start && currentTime < w.end);
if (activeAiIndex !== -1) {
const activeTextIndex = inst.wordMap.findIndex(i => i === activeAiIndex);
if (activeTextIndex !== -1) {
const activeSpan = inst.wordSpans[activeTextIndex];
if (activeSpan !== inst.lastWordSpan) {
if (inst.lastWordSpan) inst.lastWordSpan.classList.remove('current-word');
activeSpan.classList.add('current-word');
const rect = activeSpan.getBoundingClientRect();
// Relaxed threshold for smoother scroll
if (rect.top < window.innerHeight * 0.2 || rect.bottom > window.innerHeight * 0.8) {
activeSpan.scrollIntoView({ behavior: 'smooth', block: 'center' });
}
inst.lastWordSpan = activeSpan;
}
}
}
const activeSentence = inst.sentenceData.find(s => currentTime >= s.startTime && currentTime <= s.endTime);
if (activeSentence && activeSentence.spans !== inst.lastSentenceSpans) {
if (inst.lastSentenceSpans && inst.lastSentenceSpans.length) {
inst.lastSentenceSpans.forEach(s => s.classList.remove('current-sentence-bg'));
}
activeSentence.spans.forEach(s => s.classList.add('current-sentence-bg'));
inst.lastSentenceSpans = activeSentence.spans;
}
inst.animFrameId = requestAnimationFrame(loop);
}
inst.animFrameId = requestAnimationFrame(loop);
}
function stopReaderHighlightLoop(inst) {
cancelAnimationFrame(inst.animFrameId);
}
function clearReaderHighlights(inst) {
if (inst.lastWordSpan) {
inst.lastWordSpan.classList.remove('current-word');
inst.lastWordSpan = null;
}
if (inst.lastSentenceSpans && inst.lastSentenceSpans.length) {
inst.lastSentenceSpans.forEach(s => s.classList.remove('current-sentence-bg'));
inst.lastSentenceSpans = [];
}
}
// ============================================
// Button State
// ============================================
function updateReaderButton(state) {
const btn = document.getElementById('reader-floating-btn');
if (!btn) return;
const textEl = document.getElementById('reader-btn-text');
const playIcon = document.getElementById('reader-btn-play');
const pauseIcon = document.getElementById('reader-btn-pause');
// If loading, the spinner overrides icons
if (btn.classList.contains('loading')) return;
if (readerStarted) {
if (textEl) textEl.style.display = 'none';
btn.classList.add('active-mode');
if (state === 'playing') {
playIcon.style.display = 'none';
pauseIcon.style.display = 'block';
} else {
playIcon.style.display = 'block';
pauseIcon.style.display = 'none';
}
}
}
// ============================================
// Utility
// ============================================
function base64ToBlob(base64, mimeType) {
const byteCharacters = atob(base64);
const byteNumbers = new Array(byteCharacters.length);
for (let i = 0; i < byteCharacters.length; i++) {
byteNumbers[i] = byteCharacters.charCodeAt(i);
}
const byteArray = new Uint8Array(byteNumbers);
return new Blob([byteArray], { type: mimeType });
}
function addReaderStyles() {
if (document.getElementById('readerStyles')) return;
const style = document.createElement('style');
style.id = 'readerStyles';
style.textContent = `
@keyframes readerSpin { to { transform: rotate(360deg); } }
#readerContainer {
flex: 1;
background: var(--bg-secondary);
border: 1px solid var(--border-color);
border-radius: var(--border-radius);
min-height: 500px;
padding: 24px 48px !important;
position: relative;
max-width: 900px !important;
margin: 0 auto;
}
@media (max-width: 768px) {
#readerContainer { padding: 16px 24px !important; }
}
.reader-flow { margin-bottom: 48px; }
.reader-block { position: relative; margin-bottom: 16px; padding: 8px 16px; border-radius: var(--border-radius-sm); transition: background 0.2s; }
.reader-content { font-family: var(--font-serif); font-size: 1.125rem; line-height: 1.8; }
.reader-content p { margin-bottom: 1em; }
.reader-content h1, .reader-content h2, .reader-content h3 { font-family: var(--font-serif); margin-top: 1.5em; margin-bottom: 0.5em; }
.reader-image-block { text-align: center; margin: 24px 0; }
.reader-image-block img {
max-width: 100%; height: auto; border-radius: 12px;
box-shadow: 0 4px 12px rgba(0,0,0,0.1); display: block; margin: 0 auto;
}
.reader-image-placeholder { text-align: center; padding: 40px; background: #f8fafc; border: 2px dashed #e2e8f0; border-radius: 12px; }
.reader-word { cursor: pointer; padding: 1px 0; border-radius: 3px; transition: background 0.15s, color 0.15s; }
.reader-word:hover { background: #e3f2fd; }
.reader-word.current-word { color: #3d4e81; text-decoration: underline; text-decoration-thickness: 3px; text-underline-offset: 3px; font-weight: 700; }
.current-sentence-bg { -webkit-box-decoration-break: clone; box-decoration-break: clone; background-color: #e0e7ff; padding: 0.1em 0.2em; margin: 0 -0.15em; border-radius: 6px; }
#reader-floating-btn {
position: fixed; top: 80px; right: 24px; height: 56px; min-width: 56px; padding: 0 20px;
border-radius: 28px; background: linear-gradient(135deg, var(--primary-color) 0%, #7c3aed 100%);
border: none; color: white; box-shadow: 0 4px 15px rgba(0,0,0,0.25);
display: flex; align-items: center; justify-content: center; gap: 8px; cursor: pointer;
z-index: 1050; transition: transform 0.2s, box-shadow 0.2s, background 0.2s;
font-family: var(--font-sans); font-weight: 600; font-size: 1rem;
}
#reader-floating-btn:hover { transform: scale(1.05); box-shadow: 0 6px 20px rgba(0,0,0,0.3); }
#reader-floating-btn:active { transform: scale(0.95); }
#reader-floating-btn.active-mode { width: 56px; padding: 0; border-radius: 50%; }
#reader-floating-btn.active-mode #reader-btn-text { display: none; }
#reader-floating-btn.loading {
background: linear-gradient(135deg, #6b7280, #9ca3af);
cursor: wait;
}
#reader-floating-btn.loading #reader-btn-text,
#reader-floating-btn.loading #reader-btn-play,
#reader-floating-btn.loading #reader-btn-pause {
display: none !important;
}
.reader-btn-spinner {
width: 24px; height: 24px;
border: 3px solid rgba(255,255,255,0.3);
border-top-color: white;
border-radius: 50%;
animation: readerSpin 0.8s linear infinite;
}
@media (max-width: 768px) {
#reader-floating-btn { top: auto; bottom: 20px; right: 20px !important; left: auto !important; }
}
.highlight-section {
animation: highlightPulse 2s ease-out;
}
@keyframes highlightPulse {
0% { background-color: rgba(79, 70, 229, 0.15); border-left: 4px solid #4f46e5; border-radius: var(--border-radius-sm); }
100% { background-color: transparent; border-left: 4px solid transparent; border-radius: var(--border-radius-sm); }
}
`;
document.head.appendChild(style);
}

1164
static/js/markdown-editor.js Normal file

File diff suppressed because it is too large Load Diff

270
static/js/pdf-handler.js Normal file
View File

@@ -0,0 +1,270 @@
/**
* Document Handler Module (PDF, DOCX, DOC)
* AUTHORITATIVE renderDocumentBlocks() — single source of truth
* Section markers are data-driven via editorBlocks[].sectionStart
*/
function initPdfHandler() {
const uploadZone = document.getElementById('uploadZone');
const docInput = document.getElementById('docInput');
if (!uploadZone || !docInput) return;
uploadZone.addEventListener('click', (e) => {
if (e.target.closest('button')) return;
docInput.click();
});
docInput.addEventListener('change', (e) => {
const file = e.target.files[0];
if (file) {
handleDocumentFile(file);
}
});
uploadZone.addEventListener('dragover', (e) => {
e.preventDefault();
uploadZone.classList.add('drag-over');
});
uploadZone.addEventListener('dragleave', () => {
uploadZone.classList.remove('drag-over');
});
uploadZone.addEventListener('drop', (e) => {
e.preventDefault();
uploadZone.classList.remove('drag-over');
const files = e.dataTransfer.files;
if (files.length > 0) {
const file = files[0];
const name = file.name.toLowerCase();
if (name.endsWith('.pdf') || name.endsWith('.docx') || name.endsWith('.doc')) {
handleDocumentFile(file);
} else {
alert('Please drop a valid PDF, DOCX, or DOC file.');
}
}
});
console.log('📄 Document handler initialized (PDF, DOCX, DOC)');
}
function handleDocumentFile(file) {
const name = file.name.toLowerCase();
if (name.endsWith('.pdf')) {
handlePdfFile(file);
} else if (name.endsWith('.docx') || name.endsWith('.doc')) {
handleWordFile(file);
} else {
alert('Unsupported file type. Please upload a PDF, DOCX, or DOC file.');
}
}
async function handlePdfFile(file) {
if (!file.name.toLowerCase().endsWith('.pdf')) {
alert('Please select a valid PDF file.');
return;
}
showLoader('Processing PDF...', `Extracting content from ${file.name}`);
const formData = new FormData();
formData.append('file', file);
try {
const response = await fetch('/api/upload-pdf', {
method: 'POST',
body: formData
});
const data = await response.json();
if (data.error) {
throw new Error(data.error);
}
console.log(`✅ PDF processed: ${data.page_count} pages, ${data.blocks.length} blocks`);
const projectName = file.name.replace('.pdf', '');
document.getElementById('projectName').value = projectName;
currentProject.name = projectName;
renderDocumentBlocks(data.blocks);
document.getElementById('uploadSection').style.display = 'none';
document.getElementById('editorSection').style.display = 'block';
const panel = document.getElementById('audiobookMakerPanel');
if (panel) panel.style.display = 'flex';
const sidebar = document.getElementById('documentOutlineSidebar');
if (sidebar) sidebar.style.display = 'block';
hideLoader();
showNotification(`PDF processed: ${data.blocks.length} blocks extracted`, 'success');
updateWorkflowProgress('edit');
} catch (error) {
hideLoader();
console.error('PDF processing error:', error);
alert('Failed to process PDF: ' + error.message);
}
}
async function handleWordFile(file) {
const name = file.name.toLowerCase();
if (!name.endsWith('.docx') && !name.endsWith('.doc')) {
alert('Please select a valid DOCX or DOC file.');
return;
}
const fileType = name.endsWith('.docx') ? 'DOCX' : 'DOC';
showLoader(`Processing ${fileType}...`, `Extracting content from ${file.name}`);
const formData = new FormData();
formData.append('file', file);
try {
const response = await fetch('/api/upload-docx', {
method: 'POST',
body: formData
});
const data = await response.json();
if (data.error) {
throw new Error(data.error);
}
console.log(`${fileType} processed: ${data.blocks.length} blocks`);
const projectName = file.name.replace(/\.(docx|doc)$/i, '');
document.getElementById('projectName').value = projectName;
currentProject.name = projectName;
renderDocumentBlocks(data.blocks);
document.getElementById('uploadSection').style.display = 'none';
document.getElementById('editorSection').style.display = 'block';
const panel = document.getElementById('audiobookMakerPanel');
if (panel) panel.style.display = 'flex';
const sidebar = document.getElementById('documentOutlineSidebar');
if (sidebar) sidebar.style.display = 'block';
hideLoader();
showNotification(`${fileType} processed: ${data.blocks.length} blocks extracted`, 'success');
updateWorkflowProgress('edit');
} catch (error) {
hideLoader();
console.error(`${fileType} processing error:`, error);
alert(`Failed to process ${fileType}: ` + error.message);
}
}
/**
* AUTHORITATIVE renderDocumentBlocks()
* This is the ONLY version of this function in the entire app.
* Section dividers are rendered from editorBlocks[].sectionStart data.
*/
function renderDocumentBlocks(blocks) {
const editor = document.getElementById('markdownEditor');
if (!editor) return;
editor.innerHTML = '';
editorBlocks = [];
const emptyMessage = document.getElementById('emptyEditorMessage');
if (emptyMessage) {
emptyMessage.style.display = 'none';
}
for (const block of blocks) {
let type = 'paragraph';
let content = block.content || '';
if (block.type === 'image') {
type = 'image';
} else if (block.type === 'heading1' || content.startsWith('# ')) {
type = 'heading1';
} else if (block.type === 'heading2' || content.startsWith('## ')) {
type = 'heading2';
} else if (block.type === 'heading3' || content.startsWith('### ')) {
type = 'heading3';
} else if (block.type === 'list_item' || content.startsWith('- ')) {
type = 'bulletList';
} else if (block.type === 'quote' || content.startsWith('> ')) {
type = 'quote';
} else if (block.type === 'table') {
type = 'table';
}
let images = [];
if (block.type === 'image' && block.data) {
images = [{
data: block.data,
format: block.format || 'png',
alt_text: 'Document Image',
position: 'before'
}];
content = `![Document Image](embedded-image.${block.format || 'png'})`;
}
const lastChild = editor.lastElementChild;
const blockId = addBlock(type, content, lastChild, images);
// Store section info in editorBlocks data (data-driven approach)
if (block.is_section_start) {
const blockData = editorBlocks.find(b => b.id === blockId);
if (blockData) {
blockData.sectionStart = true;
blockData.sectionName = block.section_name || 'Section';
}
}
if (block.type === 'image' && block.data) {
const blockEl = document.getElementById(blockId);
if (blockEl) {
const contentDiv = blockEl.querySelector('.md-block-content');
if (contentDiv) {
contentDiv.innerHTML = `
<div class="image-block">
<img src="data:image/${block.format || 'png'};base64,${block.data}" alt="Document Image">
</div>
`;
}
}
}
const blockEl = document.getElementById(blockId);
if (blockEl) {
ensureNewBlockLineAfter(blockEl);
}
}
// Now render all section dividers from data
renderAllSectionDividers();
repairAllNewBlockLines();
// Initialize panel state for new document
const textBlocks = getTextBlocks();
if (textBlocks.length > 0) {
panelState.startingBlockId = textBlocks[0].id;
panelState.blockCount = textBlocks.length; // Modified: set to total blocks
}
updatePanelUI();
renderDocumentOutline();
checkEmptyEditor();
}
// Keep backward compatibility alias
function renderPdfBlocks(blocks) {
renderDocumentBlocks(blocks);
}

679
templates/admin.html Normal file
View File

@@ -0,0 +1,679 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Admin Dashboard - Audiobook Maker Pro</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.1/font/bootstrap-icons.css">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
<style>
:root {
--primary-color: #4f46e5;
--primary-hover: #4338ca;
--success-color: #10b981;
--danger-color: #ef4444;
--warning-color: #f59e0b;
--bg-primary: #f8fafc;
--text-primary: #1e293b;
--text-secondary: #64748b;
--text-muted: #94a3b8;
--border-color: #e2e8f0;
}
* { box-sizing: border-box; }
body {
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
background-color: var(--bg-primary);
color: var(--text-primary);
margin: 0;
}
.admin-header {
background: linear-gradient(135deg, var(--primary-color) 0%, #7c3aed 100%);
color: white;
padding: 16px 24px;
display: flex;
justify-content: space-between;
align-items: center;
}
.admin-header h1 {
font-size: 1.25rem;
font-weight: 700;
margin: 0;
display: flex;
align-items: center;
gap: 10px;
}
.admin-header-actions {
display: flex;
gap: 10px;
align-items: center;
}
.admin-header-actions .btn {
border-radius: 8px;
font-weight: 500;
font-size: 0.85rem;
}
.btn-header-light {
background: rgba(255,255,255,0.15);
border: 1.5px solid rgba(255,255,255,0.4);
color: white;
}
.btn-header-light:hover {
background: rgba(255,255,255,0.25);
color: white;
}
.admin-container {
max-width: 900px;
margin: 32px auto;
padding: 0 24px;
}
.admin-card {
background: white;
border-radius: 16px;
box-shadow: 0 4px 12px rgba(0,0,0,0.06);
border: 1px solid var(--border-color);
overflow: hidden;
}
.admin-card-header {
padding: 20px 24px;
border-bottom: 1px solid var(--border-color);
display: flex;
justify-content: space-between;
align-items: center;
}
.admin-card-header h2 {
font-size: 1.1rem;
font-weight: 700;
margin: 0;
display: flex;
align-items: center;
gap: 8px;
}
.user-count-badge {
background: var(--bg-primary);
color: var(--text-secondary);
font-size: 0.75rem;
font-weight: 600;
padding: 2px 10px;
border-radius: 12px;
border: 1px solid var(--border-color);
}
.user-table {
width: 100%;
border-collapse: collapse;
}
.user-table th {
background: var(--bg-primary);
padding: 12px 20px;
font-size: 0.75rem;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.5px;
color: var(--text-secondary);
text-align: left;
border-bottom: 1px solid var(--border-color);
}
.user-table td {
padding: 14px 20px;
border-bottom: 1px solid #f1f5f9;
font-size: 0.88rem;
vertical-align: middle;
}
.user-table tr:last-child td {
border-bottom: none;
}
.user-table tr:hover td {
background: #fafbfd;
}
.role-badge {
display: inline-flex;
align-items: center;
gap: 4px;
padding: 3px 10px;
border-radius: 12px;
font-size: 0.72rem;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.3px;
}
.role-badge.admin {
background: #ede9fe;
color: #5b21b6;
}
.role-badge.user {
background: #e0f2fe;
color: #0369a1;
}
.status-badge {
display: inline-flex;
align-items: center;
gap: 4px;
font-size: 0.8rem;
font-weight: 500;
}
.status-badge .status-dot {
width: 8px;
height: 8px;
border-radius: 50%;
}
.status-badge.active .status-dot { background: var(--success-color); }
.status-badge.active { color: #065f46; }
.status-badge.disabled .status-dot { background: var(--danger-color); }
.status-badge.disabled { color: #991b1b; }
.user-actions .btn {
padding: 4px 10px;
font-size: 0.78rem;
border-radius: 6px;
}
.date-text {
font-size: 0.8rem;
color: var(--text-muted);
}
.empty-state {
text-align: center;
padding: 60px 20px;
color: var(--text-muted);
}
.empty-state i {
font-size: 3rem;
margin-bottom: 12px;
display: block;
}
/* Modal tweaks */
.modal-content {
border: none;
border-radius: 16px;
box-shadow: 0 20px 50px rgba(0,0,0,0.2);
}
.modal-header {
border-bottom: 1px solid var(--border-color);
padding: 20px 24px;
}
.modal-body { padding: 24px; }
.modal-footer { border-top: 1px solid var(--border-color); padding: 16px 24px; }
.form-label {
font-weight: 600;
font-size: 0.85rem;
color: #374151;
}
.form-control, .form-select {
padding: 10px 14px;
border: 2px solid var(--border-color);
border-radius: 8px;
font-size: 0.9rem;
}
.form-control:focus, .form-select:focus {
border-color: var(--primary-color);
box-shadow: 0 0 0 3px rgba(79,70,229,0.1);
}
.toast-container {
position: fixed;
top: 20px;
right: 20px;
z-index: 9999;
}
@media (max-width: 768px) {
.admin-header {
flex-direction: column;
gap: 12px;
text-align: center;
}
.admin-card-header {
flex-direction: column;
gap: 12px;
}
.user-table {
font-size: 0.82rem;
}
.user-table th, .user-table td {
padding: 10px 12px;
}
}
</style>
</head>
<body>
<!-- Header -->
<header class="admin-header">
<h1>
<i class="bi bi-shield-lock"></i>
Admin Dashboard
</h1>
<div class="admin-header-actions">
<span class="text-white-50" id="currentUserLabel"></span>
<a href="/" class="btn btn-header-light btn-sm">
<i class="bi bi-arrow-left me-1"></i> Back to App
</a>
<button class="btn btn-header-light btn-sm" onclick="handleLogout()">
<i class="bi bi-box-arrow-right me-1"></i> Logout
</button>
</div>
</header>
<!-- Main Content -->
<div class="admin-container">
<div class="admin-card">
<div class="admin-card-header">
<h2>
<i class="bi bi-people"></i>
User Management
<span class="user-count-badge" id="userCountBadge">0 users</span>
</h2>
<button class="btn btn-primary btn-sm" onclick="openCreateModal()">
<i class="bi bi-person-plus me-1"></i> Create User
</button>
</div>
<div id="userTableContainer">
<div class="empty-state">
<div class="spinner-border text-primary" role="status"></div>
<p class="mt-2">Loading users...</p>
</div>
</div>
</div>
</div>
<!-- Toast Container -->
<div class="toast-container" id="toastContainer"></div>
<!-- Create/Edit User Modal -->
<div class="modal fade" id="userModal" tabindex="-1">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title" id="userModalTitle">
<i class="bi bi-person-plus me-2"></i>Create New User
</h5>
<button type="button" class="btn-close" data-bs-dismiss="modal"></button>
</div>
<div class="modal-body">
<input type="hidden" id="editUserId">
<div class="mb-3">
<label for="modalUsername" class="form-label">Username</label>
<input type="text" class="form-control" id="modalUsername"
placeholder="Enter username" minlength="3" required>
</div>
<div class="mb-3">
<label for="modalPassword" class="form-label">
Password
<span class="text-muted fw-normal" id="passwordHint" style="display:none;">(leave blank to keep current)</span>
</label>
<input type="text" class="form-control" id="modalPassword"
placeholder="Enter password" minlength="4">
</div>
<div class="mb-3">
<label for="modalRole" class="form-label">Role</label>
<select class="form-select" id="modalRole">
<option value="user">User</option>
<option value="admin">Admin</option>
</select>
</div>
<div class="mb-3" id="activeField" style="display:none;">
<label class="form-label">Status</label>
<div class="form-check form-switch">
<input class="form-check-input" type="checkbox" id="modalActive" checked>
<label class="form-check-label" for="modalActive">Account Active</label>
</div>
</div>
<div class="alert alert-danger" id="modalError" style="display:none;"></div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Cancel</button>
<button type="button" class="btn btn-primary" id="modalSaveBtn" onclick="saveUser()">
<i class="bi bi-check-lg me-1"></i> Create User
</button>
</div>
</div>
</div>
</div>
<!-- Delete Confirmation Modal -->
<div class="modal fade" id="deleteModal" tabindex="-1">
<div class="modal-dialog modal-sm">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title"><i class="bi bi-exclamation-triangle text-danger me-2"></i>Delete User</h5>
<button type="button" class="btn-close" data-bs-dismiss="modal"></button>
</div>
<div class="modal-body">
<p>Are you sure you want to delete <strong id="deleteUserName"></strong>?</p>
<p class="text-muted small mb-0">This action cannot be undone.</p>
<input type="hidden" id="deleteUserId">
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary btn-sm" data-bs-dismiss="modal">Cancel</button>
<button type="button" class="btn btn-danger btn-sm" onclick="confirmDelete()">
<i class="bi bi-trash me-1"></i> Delete
</button>
</div>
</div>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
<script>
let userModal, deleteModal;
let currentUsers = [];
document.addEventListener('DOMContentLoaded', function() {
userModal = new bootstrap.Modal(document.getElementById('userModal'));
deleteModal = new bootstrap.Modal(document.getElementById('deleteModal'));
loadCurrentUser();
loadUsers();
});
async function loadCurrentUser() {
try {
const resp = await fetch('/api/auth/me');
const data = await resp.json();
if (data.user) {
document.getElementById('currentUserLabel').textContent =
`Logged in as ${data.user.username}`;
}
} catch(e) { /* ignore */ }
}
async function loadUsers() {
try {
const resp = await fetch('/api/admin/users');
const data = await resp.json();
currentUsers = data.users || [];
renderUserTable(currentUsers);
} catch(e) {
document.getElementById('userTableContainer').innerHTML =
'<div class="empty-state"><i class="bi bi-exclamation-circle"></i><p>Failed to load users</p></div>';
}
}
function renderUserTable(users) {
const container = document.getElementById('userTableContainer');
const badge = document.getElementById('userCountBadge');
badge.textContent = `${users.length} user${users.length !== 1 ? 's' : ''}`;
if (users.length === 0) {
container.innerHTML = '<div class="empty-state"><i class="bi bi-people"></i><p>No users found</p></div>';
return;
}
let html = `<table class="user-table">
<thead>
<tr>
<th>User</th>
<th>Role</th>
<th>Status</th>
<th>Last Login</th>
<th>Created</th>
<th style="text-align:right;">Actions</th>
</tr>
</thead>
<tbody>`;
for (const user of users) {
const statusClass = user.is_active ? 'active' : 'disabled';
const statusText = user.is_active ? 'Active' : 'Disabled';
const lastLogin = user.last_login ? formatDate(user.last_login) : '<span class="text-muted">Never</span>';
html += `<tr>
<td>
<strong>${escapeHtml(user.username)}</strong>
</td>
<td>
<span class="role-badge ${user.role}">
<i class="bi bi-${user.role === 'admin' ? 'shield-check' : 'person'}"></i>
${user.role}
</span>
</td>
<td>
<span class="status-badge ${statusClass}">
<span class="status-dot"></span>
${statusText}
</span>
</td>
<td class="date-text">${lastLogin}</td>
<td class="date-text">${formatDate(user.created_at)}</td>
<td style="text-align:right;">
<div class="user-actions d-flex gap-1 justify-content-end">
<button class="btn btn-outline-primary btn-sm" onclick="openEditModal(${user.id})" title="Edit user">
<i class="bi bi-pencil"></i>
</button>
<button class="btn btn-outline-danger btn-sm" onclick="openDeleteModal(${user.id}, '${escapeHtml(user.username)}')" title="Delete user">
<i class="bi bi-trash"></i>
</button>
</div>
</td>
</tr>`;
}
html += '</tbody></table>';
container.innerHTML = html;
}
// --- Create / Edit ---
function openCreateModal() {
document.getElementById('userModalTitle').innerHTML = '<i class="bi bi-person-plus me-2"></i>Create New User';
document.getElementById('editUserId').value = '';
document.getElementById('modalUsername').value = '';
document.getElementById('modalPassword').value = '';
document.getElementById('modalPassword').required = true;
document.getElementById('modalPassword').placeholder = 'Enter password';
document.getElementById('passwordHint').style.display = 'none';
document.getElementById('modalRole').value = 'user';
document.getElementById('modalActive').checked = true;
document.getElementById('activeField').style.display = 'none';
document.getElementById('modalSaveBtn').innerHTML = '<i class="bi bi-check-lg me-1"></i> Create User';
document.getElementById('modalError').style.display = 'none';
userModal.show();
}
function openEditModal(userId) {
const user = currentUsers.find(u => u.id === userId);
if (!user) return;
document.getElementById('userModalTitle').innerHTML = '<i class="bi bi-pencil me-2"></i>Edit User';
document.getElementById('editUserId').value = userId;
document.getElementById('modalUsername').value = user.username;
document.getElementById('modalPassword').value = '';
document.getElementById('modalPassword').required = false;
document.getElementById('modalPassword').placeholder = 'Leave blank to keep current';
document.getElementById('passwordHint').style.display = 'inline';
document.getElementById('modalRole').value = user.role;
document.getElementById('modalActive').checked = user.is_active;
document.getElementById('activeField').style.display = 'block';
document.getElementById('modalSaveBtn').innerHTML = '<i class="bi bi-check-lg me-1"></i> Save Changes';
document.getElementById('modalError').style.display = 'none';
userModal.show();
}
async function saveUser() {
const userId = document.getElementById('editUserId').value;
const username = document.getElementById('modalUsername').value.trim();
const password = document.getElementById('modalPassword').value;
const role = document.getElementById('modalRole').value;
const isActive = document.getElementById('modalActive').checked;
const errorDiv = document.getElementById('modalError');
errorDiv.style.display = 'none';
if (!username || username.length < 3) {
errorDiv.textContent = 'Username must be at least 3 characters';
errorDiv.style.display = 'block';
return;
}
if (!userId && (!password || password.length < 4)) {
errorDiv.textContent = 'Password must be at least 4 characters';
errorDiv.style.display = 'block';
return;
}
if (userId && password && password.length < 4) {
errorDiv.textContent = 'Password must be at least 4 characters';
errorDiv.style.display = 'block';
return;
}
try {
let url, method, body;
if (userId) {
url = `/api/admin/users/${userId}`;
method = 'PUT';
body = { username, role, is_active: isActive };
if (password) body.password = password;
} else {
url = '/api/admin/users';
method = 'POST';
body = { username, password, role };
}
const resp = await fetch(url, {
method: method,
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body)
});
const data = await resp.json();
if (data.error) {
errorDiv.textContent = data.error;
errorDiv.style.display = 'block';
return;
}
userModal.hide();
showToast(data.message || 'User saved successfully', 'success');
loadUsers();
} catch(e) {
errorDiv.textContent = 'Network error. Please try again.';
errorDiv.style.display = 'block';
}
}
// --- Delete ---
function openDeleteModal(userId, username) {
document.getElementById('deleteUserId').value = userId;
document.getElementById('deleteUserName').textContent = username;
deleteModal.show();
}
async function confirmDelete() {
const userId = document.getElementById('deleteUserId').value;
try {
const resp = await fetch(`/api/admin/users/${userId}`, { method: 'DELETE' });
const data = await resp.json();
deleteModal.hide();
if (data.error) {
showToast(data.error, 'error');
return;
}
showToast(data.message || 'User deleted', 'success');
loadUsers();
} catch(e) {
deleteModal.hide();
showToast('Failed to delete user', 'error');
}
}
// --- Logout ---
async function handleLogout() {
try {
await fetch('/api/auth/logout', { method: 'POST' });
} catch(e) { /* ignore */ }
window.location.href = '/login';
}
// --- Utilities ---
function escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
function formatDate(dateStr) {
if (!dateStr) return '';
const d = new Date(dateStr);
return d.toLocaleDateString() + ' ' + d.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
}
function showToast(message, type = 'info') {
const container = document.getElementById('toastContainer');
const colors = { success: 'success', error: 'danger', info: 'primary' };
const icons = { success: 'check-circle', error: 'exclamation-circle', info: 'info-circle' };
const toast = document.createElement('div');
toast.className = `alert alert-${colors[type] || 'info'} d-flex align-items-center gap-2`;
toast.style.cssText = 'min-width:280px; animation: slideIn 0.3s ease; margin-bottom: 8px;';
toast.innerHTML = `<i class="bi bi-${icons[type] || 'info-circle'}"></i> ${escapeHtml(message)}`;
container.appendChild(toast);
setTimeout(() => {
toast.style.animation = 'slideOut 0.3s ease';
setTimeout(() => toast.remove(), 300);
}, 3000);
}
// Inject animation styles
const s = document.createElement('style');
s.textContent = `
@keyframes slideIn { from { transform: translateX(100%); opacity: 0; } to { transform: translateX(0); opacity: 1; } }
@keyframes slideOut { from { transform: translateX(0); opacity: 1; } to { transform: translateX(100%); opacity: 0; } }
`;
document.head.appendChild(s);
</script>
</body>
</html>

478
templates/index.html Normal file
View File

@@ -0,0 +1,478 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Audiobook Maker Pro v4.1</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.1/font/bootstrap-icons.css">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Merriweather:ital,wght@0,400;0,700;1,400&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
<link rel="stylesheet" href="/static/css/style.css">
<link rel="stylesheet" href="/static/css/markdown-editor.css">
</head>
<body>
<div class="loading-overlay" id="loadingOverlay">
<div class="loading-content">
<div class="spinner-border text-primary" role="status"></div>
<h5 id="loadingText">Processing...</h5>
<p id="loadingSubtext" class="text-muted">Please wait</p>
</div>
</div>
<div class="welcome-overlay" id="welcomeOverlay">
<div class="welcome-card">
<div class="welcome-header">
<i class="bi bi-soundwave"></i>
<h2>Welcome to Audiobook Maker Pro</h2>
<p>Turn any document into an interactive audiobook in 3 simple steps</p>
</div>
<div class="welcome-steps">
<div class="welcome-step">
<div class="welcome-step-number">Step-1</div>
<div class="welcome-step-icon"><i class="bi bi-upload"></i></div>
<h4>Upload Document</h4>
<div class="welcome-step-details">
<p>Drag & drop a <strong>PDF</strong>, <strong>DOCX</strong>, or <strong>DOC</strong> file into the upload area</p>
</div>
</div>
<div class="welcome-step-arrow"><i class="bi bi-arrow-right"></i></div>
<div class="welcome-step">
<div class="welcome-step-number">Step-2</div>
<div class="welcome-step-icon"><i class="bi bi-soundwave"></i></div>
<h4>Edit & Generate Audio</h4>
<div class="welcome-step-details">
<p>Use the <strong>Audiobook Maker Panel</strong> at the top to select a voice and set how many blocks to generate</p>
<p>Click <span class="welcome-ui-btn success-btn"><i class="bi bi-play-fill"></i> Generate</span> to create audio with word-level timestamps</p>
</div>
</div>
<div class="welcome-step-arrow"><i class="bi bi-arrow-right"></i></div>
<div class="welcome-step">
<div class="welcome-step-number">Step-3</div>
<div class="welcome-step-icon"><i class="bi bi-book"></i></div>
<h4>Read & Export</h4>
<div class="welcome-step-details">
<p>Switch to the <span class="welcome-ui-btn"><i class="bi bi-book"></i> Interactive Reader</span> tab to listen with word-by-word highlighting</p>
<p>Click <span class="welcome-ui-btn primary-btn"><i class="bi bi-download"></i> Export</span> in the header to download your audiobook as a ZIP</p>
</div>
</div>
</div>
<div class="welcome-actions">
<button class="btn btn-primary btn-lg" onclick="dismissWelcome()">
<i class="bi bi-rocket-takeoff me-2"></i>Get Started
</button>
<label class="welcome-dismiss-label">
<input type="checkbox" id="welcomeDontShow"> Don't show this again
</label>
</div>
</div>
</div>
<div class="floating-guide-panel" id="floatingGuidePanel">
<div class="guide-panel-header" id="guidePanelHeader">
<div class="guide-panel-title">
<i class="bi bi-lightbulb"></i>
<span>Quick Guide</span>
</div>
<div class="guide-panel-controls">
<button class="guide-panel-btn" id="guidePanelCollapse" onclick="toggleGuideCollapse()" title="Collapse / Expand">
<i class="bi bi-chevron-up" id="guideCollapseIcon"></i>
</button>
<button class="guide-panel-btn" onclick="hideGuidePanel()" title="Hide guide panel">
<i class="bi bi-x-lg"></i>
</button>
</div>
</div>
<div class="guide-panel-body" id="guidePanelBody">
<div class="guide-section">
<div class="guide-section-badge">Step 2 — Edit & Generate</div>
<div class="guide-instruction">
<span class="guide-step-num">1</span>
<div class="guide-instruction-text">
Use the <strong>panel at the top</strong> to select a <strong>voice</strong> and set the number of blocks to generate
</div>
</div>
<div class="guide-instruction">
<span class="guide-step-num">2</span>
<div class="guide-instruction-text">
Click the <strong>Starting from</strong> indicator to pick which block to start from, then click
<span class="guide-ui generate-btn-ui"><i class="bi bi-play-fill"></i> Generate</span>
to create audio
</div>
</div>
<div class="guide-instruction">
<span class="guide-step-num">3</span>
<div class="guide-instruction-text">
Click
<span class="guide-ui image-btn-ui"><i class="bi bi-image"></i></span>
between blocks to <strong>add an image</strong>, or
<span class="guide-ui add-block-btn-ui"><i class="bi bi-plus"></i></span>
to <strong>add a text block</strong>
</div>
</div>
<div class="guide-instruction">
<span class="guide-step-num">4</span>
<div class="guide-instruction-text">
After generating, the starting block <strong>auto-advances</strong> so you can keep clicking Generate for the next batch
</div>
</div>
</div>
<div class="guide-section">
<div class="guide-section-badge step3-badge">Step 3 — Read & Export</div>
<div class="guide-instruction">
<span class="guide-step-num">5</span>
<div class="guide-instruction-text">
Switch to the
<span class="guide-ui reader-tab-ui"><i class="bi bi-book"></i> Interactive Reader</span>
tab to <strong>listen</strong> with word-by-word highlighting
</div>
</div>
<div class="guide-instruction">
<span class="guide-step-num">6</span>
<div class="guide-instruction-text">
Click
<span class="guide-ui export-btn-ui"><i class="bi bi-download"></i> Export</span>
in the header to <strong>download</strong> your audiobook as a ZIP
</div>
</div>
</div>
<div class="guide-panel-footer">
<label class="guide-dont-show">
<input type="checkbox" id="guidePanelDontShow" onchange="handleGuideDontShow()">
Don't show this again
</label>
</div>
</div>
</div>
<button class="floating-guide-toggle" id="floatingGuideToggle" onclick="showGuidePanel()" title="Show Quick Guide">
<i class="bi bi-lightbulb"></i>
</button>
<div class="app-container">
<header class="app-header">
<div class="header-left">
<h1 class="app-title">
<i class="bi bi-soundwave"></i>
Audiobook Maker Pro
<span class="version-badge">v4.2</span>
</h1>
</div>
<div class="header-right">
<div class="project-controls">
<input type="text" id="projectName" class="form-control project-name-input"
placeholder="Project Name" value="My Audiobook">
<button class="btn btn-success" id="saveProjectBtn" onclick="saveProject()" title="Save project (Ctrl+S)">
<i class="bi bi-save me-1"></i> Save
</button>
<button class="btn btn-primary" onclick="exportProject()" title="Export as ZIP with audio + reader">
<i class="bi bi-download me-1"></i> Export
</button>
<button class="btn btn-header-archive" onclick="openProjectArchive()" title="Load or manage saved projects">
<i class="bi bi-archive me-1"></i> Archive
</button>
<button class="btn btn-header-help" id="headerHelpBtn" onclick="handleHeaderHelp()" title="Show quick start guide">
<i class="bi bi-question-circle me-1"></i>
<span id="headerHelpLabel">Quick Start</span>
</button>
<div class="dropdown">
<button class="btn btn-header-user dropdown-toggle" type="button"
data-bs-toggle="dropdown" aria-expanded="false" id="userMenuBtn">
<i class="bi bi-person-circle me-1"></i>
<span id="headerUsername">User</span>
</button>
<ul class="dropdown-menu dropdown-menu-end">
<li id="adminMenuItem" style="display:none;">
<a class="dropdown-item" href="/admin">
<i class="bi bi-shield-lock me-2"></i>Admin Dashboard
</a>
</li>
<li id="adminDivider" style="display:none;"><hr class="dropdown-divider"></li>
<li>
<a class="dropdown-item" href="#" onclick="openChangePassword()">
<i class="bi bi-key me-2"></i>Change Password
</a>
</li>
<li><hr class="dropdown-divider"></li>
<li>
<a class="dropdown-item text-danger" href="#" onclick="handleLogout()">
<i class="bi bi-box-arrow-right me-2"></i>Logout
</a>
</li>
</ul>
</div>
</div>
</div>
</header>
<div class="workflow-progress" id="workflowProgress">
<div class="workflow-step active" id="wpStep1">
<div class="workflow-step-dot">1</div>
<span>Upload</span>
</div>
<div class="workflow-connector" id="wpConn1"></div>
<div class="workflow-step" id="wpStep2">
<div class="workflow-step-dot">2</div>
<span>Edit & Generate</span>
</div>
<div class="workflow-connector" id="wpConn2"></div>
<div class="workflow-step" id="wpStep3">
<div class="workflow-step-dot">3</div>
<span>Read & Export</span>
</div>
</div>
<main class="main-content">
<ul class="nav nav-tabs main-tabs" id="mainTabs" role="tablist">
<li class="nav-item">
<button class="nav-link active" id="editor-tab" data-bs-toggle="tab"
data-bs-target="#editorPanel" type="button">
<i class="bi bi-pencil-square me-1"></i> Audiobook Maker Editor
</button>
</li>
<li class="nav-item">
<button class="nav-link" id="reader-tab" data-bs-toggle="tab"
data-bs-target="#readerPanel" type="button">
<i class="bi bi-book me-1"></i> Interactive Reader
<span class="reader-tab-badge" id="readerTabBadge" style="display:none;">
Ready
</span>
</button>
</li>
</ul>
<div class="tab-content" id="mainTabContent">
<div class="tab-pane fade show active" id="editorPanel" role="tabpanel">
<div class="upload-section" id="uploadSection">
<div class="upload-zone-wrapper" style="min-height: 380px; border: 2px dashed var(--border-color); border-radius: var(--border-radius-lg); overflow: hidden; background: var(--bg-tertiary); transition: border-color 0.3s;">
<div class="upload-zone" id="uploadZone" style="width: 100%; display: flex; align-items: center; justify-content: center; padding: 48px 32px; cursor: pointer; transition: all 0.3s;">
<input type="file" id="docInput" accept=".pdf,.doc,.docx" hidden>
<div class="upload-content" style="text-align: center; max-width: 500px;">
<i class="bi bi-file-earmark-arrow-up upload-icon"></i>
<h4>Upload Your Document</h4>
<p class="text-muted mb-3">
Drag & drop a file here, or click the button below to browse
</p>
<div class="d-flex gap-2 justify-content-center flex-wrap mb-3">
<span class="badge bg-danger bg-opacity-10 text-danger px-3 py-2">
<i class="bi bi-file-earmark-pdf me-1"></i>PDF
</span>
<span class="badge bg-primary bg-opacity-10 text-primary px-3 py-2">
<i class="bi bi-file-earmark-word me-1"></i>DOC
</span>
</div>
<button class="btn btn-primary btn-lg" onclick="event.stopPropagation(); document.getElementById('docInput').click()">
<i class="bi bi-upload me-2"></i>Choose File
</button>
<p class="upload-hint-text">
<i class="bi bi-magic me-1"></i>
AI will auto-detect chapters, fix broken paragraphs, and structure your book
</p>
</div>
</div>
</div>
</div>
<div class="editor-section" id="editorSection" style="display:none">
<div class="audiobook-maker-panel" id="audiobookMakerPanel" style="display:none;">
<div class="amp-left">
<div>
<div class="amp-label">Voice</div>
<select class="amp-voice-select" id="ampVoiceSelect"
onchange="handleVoiceChange(this.value)">
<option value="af_heart">Heart (US Female)</option>
</select>
</div>
<div>
<div class="amp-label">Blocks to generate</div>
<div class="amp-block-count-group">
<input type="number" class="amp-block-count-input" id="ampBlockCount"
value="10" min="1" max="999"
onchange="handleBlockCountChange(this.value)"
onkeyup="handleBlockCountChange(this.value)">
<div class="amp-count-arrows">
<button class="amp-count-arrow" onclick="adjustBlockCount(1)" title="Increase">
<i class="bi bi-chevron-up"></i>
</button>
<button class="amp-count-arrow" onclick="adjustBlockCount(-1)" title="Decrease">
<i class="bi bi-chevron-down"></i>
</button>
</div>
</div>
</div>
<div>
<div class="amp-label">Starting from</div>
<div class="amp-start-indicator" id="ampStartIndicator"
onclick="togglePickMode()" title="Click to pick a different starting block">
<i class="bi bi-cursor-fill"></i>
Block <span class="start-block-num" id="ampStartBlockNum">1</span>
</div>
</div>
</div>
<div class="amp-right">
<button class="amp-generate-btn" id="ampGenerateBtn" onclick="generateFromPanel()">
<i class="bi bi-play-fill"></i>
Generate
</button>
</div>
<div class="amp-info">
<div class="amp-stat">
<i class="bi bi-layers"></i>
Total Blocks: <strong id="ampTotalBlocks">0</strong>
</div>
<div class="amp-stat">
<i class="bi bi-check-circle"></i>
Generated: <strong id="ampGeneratedBlocks">0</strong>
</div>
<div class="amp-stat">
<i class="bi bi-hourglass-split"></i>
Remaining: <strong id="ampRemainingBlocks">0</strong>
</div>
</div>
</div>
<div class="editor-layout">
<aside class="document-outline" id="documentOutlineSidebar" style="display:none;">
<h6 class="outline-title"><i class="bi bi-list-nested"></i> Document Outline</h6>
<ul class="outline-list" id="documentOutlineList">
<li class="text-muted small">No sections found.</li>
</ul>
</aside>
<div class="editor-container" id="markdownEditor">
<div class="empty-editor-message" id="emptyEditorMessage">
<i class="bi bi-file-text"></i>
<p>Upload a document to create content</p>
<p class="text-muted small">Click anywhere to add a new block</p>
</div>
</div>
</div>
</div>
</div>
<div class="tab-pane fade" id="readerPanel" role="tabpanel">
<div class="editor-layout">
<aside class="document-outline" id="readerOutlineSidebar" style="display:none;">
<h6 class="outline-title"><i class="bi bi-list-nested"></i> Outline</h6>
<ul class="outline-list" id="readerOutlineList">
<li class="text-muted small">No sections found.</li>
</ul>
</aside>
<div class="reader-container" id="readerContainer" style="flex: 1; max-width: 100%;">
<div class="reader-empty-state">
<i class="bi bi-book"></i>
<p>Generate audio to view the interactive reader</p>
<p class="text-muted">Go to the Editor tab, then click <strong>"Generate"</strong> on the panel</p>
<button class="btn btn-primary mt-3" onclick="switchToEditorTab()">
<i class="bi bi-arrow-left me-1"></i> Go to Editor
</button>
</div>
</div>
</div>
</div>
</div>
</main>
</div>
<div class="modal fade" id="archiveModal" tabindex="-1">
<div class="modal-dialog modal-lg">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">
<i class="bi bi-archive me-2"></i>Project Archive
</h5>
<button type="button" class="btn-close" data-bs-dismiss="modal"></button>
</div>
<div class="modal-body">
<div id="projectList"></div>
</div>
</div>
</div>
</div>
<div class="modal fade" id="ttsEditModal" tabindex="-1">
<div class="modal-dialog modal-lg">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">
<i class="bi bi-mic me-2"></i>Edit Text for TTS Generation
</h5>
<button type="button" class="btn-close" data-bs-dismiss="modal"></button>
</div>
<div class="modal-body">
<p class="text-muted small mb-2">
<i class="bi bi-info-circle me-1"></i>
This text will be used for audio generation. The original markdown content will be preserved.
</p>
<textarea id="ttsTextInput" class="form-control" rows="8"></textarea>
<input type="hidden" id="ttsBlockId">
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Cancel</button>
<button type="button" class="btn btn-primary" onclick="saveTtsText()">
<i class="bi bi-check-lg me-1"></i>Save
</button>
</div>
</div>
</div>
</div>
<div class="modal fade" id="changePasswordModal" tabindex="-1">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title">
<i class="bi bi-key me-2"></i>Change Password
</h5>
<button type="button" class="btn-close" data-bs-dismiss="modal"></button>
</div>
<div class="modal-body">
<div class="mb-3">
<label for="currentPassword" class="form-label">Current Password</label>
<input type="password" class="form-control" id="currentPassword" required>
</div>
<div class="mb-3">
<label for="newPassword" class="form-label">New Password</label>
<input type="password" class="form-control" id="newPassword" required minlength="4">
</div>
<div class="mb-3">
<label for="confirmPassword" class="form-label">Confirm New Password</label>
<input type="password" class="form-control" id="confirmPassword" required>
</div>
<div class="alert alert-danger" id="changePasswordError" style="display:none;"></div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Cancel</button>
<button type="button" class="btn btn-primary" onclick="submitChangePassword()">
<i class="bi bi-check-lg me-1"></i>Change Password
</button>
</div>
</div>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<script src="/static/js/app.js"></script>
<script src="/static/js/markdown-editor.js"></script>
<script src="/static/js/pdf-handler.js"></script>
<script src="/static/js/generation.js"></script>
<script src="/static/js/interactive-reader.js"></script>
</body>
</html>

345
templates/login.html Normal file
View File

@@ -0,0 +1,345 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Login - Audiobook Maker Pro</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.1/font/bootstrap-icons.css">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
<style>
:root {
--primary-color: #4f46e5;
--primary-hover: #4338ca;
}
* { box-sizing: border-box; }
body {
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
display: flex;
align-items: center;
justify-content: center;
margin: 0;
padding: 24px;
}
.login-container {
width: 100%;
max-width: 420px;
}
.login-card {
background: white;
border-radius: 20px;
padding: 48px 40px;
box-shadow: 0 25px 60px rgba(0, 0, 0, 0.3);
animation: slideUp 0.4s ease;
}
@keyframes slideUp {
from { opacity: 0; transform: translateY(20px); }
to { opacity: 1; transform: translateY(0); }
}
.login-header {
text-align: center;
margin-bottom: 36px;
}
.login-logo {
width: 64px;
height: 64px;
border-radius: 16px;
background: linear-gradient(135deg, var(--primary-color) 0%, #7c3aed 100%);
display: flex;
align-items: center;
justify-content: center;
margin: 0 auto 16px;
font-size: 1.75rem;
color: white;
}
.login-header h1 {
font-size: 1.5rem;
font-weight: 700;
color: #1e293b;
margin-bottom: 4px;
}
.login-header p {
color: #64748b;
font-size: 0.9rem;
margin: 0;
}
.form-label {
font-weight: 600;
color: #374151;
font-size: 0.85rem;
margin-bottom: 6px;
}
.form-control {
padding: 12px 16px;
border: 2px solid #e2e8f0;
border-radius: 10px;
font-size: 0.95rem;
transition: all 0.2s;
}
.form-control:focus {
border-color: var(--primary-color);
box-shadow: 0 0 0 3px rgba(79, 70, 229, 0.1);
}
.input-group-text {
background: #f8fafc;
border: 2px solid #e2e8f0;
border-right: none;
border-radius: 10px 0 0 10px;
color: #94a3b8;
}
.input-group .form-control {
border-left: none;
border-radius: 0 10px 10px 0;
}
.input-group:focus-within .input-group-text {
border-color: var(--primary-color);
}
.btn-login {
width: 100%;
padding: 12px;
background: linear-gradient(135deg, var(--primary-color) 0%, #7c3aed 100%);
border: none;
border-radius: 10px;
color: white;
font-weight: 600;
font-size: 1rem;
transition: all 0.2s;
display: flex;
align-items: center;
justify-content: center;
gap: 8px;
}
.btn-login:hover {
transform: translateY(-1px);
box-shadow: 0 8px 25px rgba(79, 70, 229, 0.4);
color: white;
}
.btn-login:active {
transform: translateY(0);
}
.btn-login:disabled {
opacity: 0.7;
transform: none;
box-shadow: none;
}
.login-error {
background: #fef2f2;
border: 1px solid #fecaca;
color: #dc2626;
border-radius: 10px;
padding: 12px 16px;
font-size: 0.85rem;
display: none;
align-items: center;
gap: 8px;
margin-bottom: 20px;
animation: shakeError 0.4s ease;
}
.login-error.visible {
display: flex;
}
@keyframes shakeError {
0%, 100% { transform: translateX(0); }
25% { transform: translateX(-8px); }
75% { transform: translateX(8px); }
}
.login-footer {
text-align: center;
margin-top: 24px;
color: #94a3b8;
font-size: 0.78rem;
}
.password-toggle {
position: absolute;
right: 12px;
top: 50%;
transform: translateY(-50%);
background: none;
border: none;
color: #94a3b8;
cursor: pointer;
padding: 4px;
z-index: 5;
}
.password-toggle:hover {
color: #64748b;
}
.password-wrapper {
position: relative;
}
.password-wrapper .form-control {
padding-right: 44px;
}
.spinner-border-sm {
width: 1rem;
height: 1rem;
}
</style>
</head>
<body>
<div class="login-container">
<div class="login-card">
<div class="login-header">
<div class="login-logo">
<i class="bi bi-soundwave"></i>
</div>
<h1>Audiobook Maker Pro</h1>
<p>Sign in to your account</p>
</div>
<div class="login-error" id="loginError">
<i class="bi bi-exclamation-triangle-fill"></i>
<span id="loginErrorText"></span>
</div>
<form id="loginForm" onsubmit="handleLogin(event)">
<div class="mb-3">
<label for="username" class="form-label">Username</label>
<div class="input-group">
<span class="input-group-text"><i class="bi bi-person"></i></span>
<input type="text" class="form-control" id="username"
placeholder="Enter your username" required autofocus
autocomplete="username">
</div>
</div>
<div class="mb-4">
<label for="password" class="form-label">Password</label>
<div class="input-group">
<span class="input-group-text"><i class="bi bi-lock"></i></span>
<div class="password-wrapper" style="flex:1;">
<input type="password" class="form-control" id="password"
placeholder="Enter your password" required
autocomplete="current-password">
<button type="button" class="password-toggle" onclick="togglePassword()">
<i class="bi bi-eye" id="passwordToggleIcon"></i>
</button>
</div>
</div>
</div>
<button type="submit" class="btn btn-login" id="loginBtn">
<span id="loginBtnText">Sign In</span>
<div class="spinner-border spinner-border-sm" id="loginSpinner" style="display:none;" role="status"></div>
</button>
</form>
<div class="login-footer">
<i class="bi bi-shield-lock me-1"></i>
Audiobook Maker Pro v3.1
</div>
</div>
</div>
<script>
function togglePassword() {
const input = document.getElementById('password');
const icon = document.getElementById('passwordToggleIcon');
if (input.type === 'password') {
input.type = 'text';
icon.classList.remove('bi-eye');
icon.classList.add('bi-eye-slash');
} else {
input.type = 'password';
icon.classList.remove('bi-eye-slash');
icon.classList.add('bi-eye');
}
}
async function handleLogin(event) {
event.preventDefault();
const username = document.getElementById('username').value.trim();
const password = document.getElementById('password').value;
const errorDiv = document.getElementById('loginError');
const errorText = document.getElementById('loginErrorText');
const loginBtn = document.getElementById('loginBtn');
const btnText = document.getElementById('loginBtnText');
const spinner = document.getElementById('loginSpinner');
// Hide previous error
errorDiv.classList.remove('visible');
// Show loading
loginBtn.disabled = true;
btnText.textContent = 'Signing in...';
spinner.style.display = 'inline-block';
try {
const response = await fetch('/api/auth/login', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ username, password })
});
const data = await response.json();
if (data.error) {
errorText.textContent = data.error;
errorDiv.classList.add('visible');
loginBtn.disabled = false;
btnText.textContent = 'Sign In';
spinner.style.display = 'none';
// Shake the password field
document.getElementById('password').select();
return;
}
// Success — redirect
btnText.textContent = 'Redirecting...';
window.location.href = '/';
} catch (error) {
errorText.textContent = 'Network error. Please try again.';
errorDiv.classList.add('visible');
loginBtn.disabled = false;
btnText.textContent = 'Sign In';
spinner.style.display = 'none';
}
}
// Handle Enter key
document.addEventListener('keydown', function(e) {
if (e.key === 'Enter') {
document.getElementById('loginForm').requestSubmit();
}
});
</script>
</body>
</html>

516
templates/public_home.html Normal file
View File

@@ -0,0 +1,516 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Audiobook Library</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.1/font/bootstrap-icons.css">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Playfair+Display:wght@600;700;900&display=swap" rel="stylesheet">
<style>
* { box-sizing: border-box; }
body {
font-family: 'Inter', sans-serif;
margin: 0;
background: #f5e9d6;
min-height: 100vh;
color: #3e2723;
}
/* Header */
.library-header {
background: linear-gradient(135deg, #4a2c2a 0%, #6b4226 100%);
color: #f5e9d6;
padding: 24px 32px;
box-shadow: 0 4px 20px rgba(0,0,0,0.15);
position: sticky;
top: 0;
z-index: 100;
}
.header-container {
max-width: 1400px;
margin: 0 auto;
display: flex;
align-items: center;
justify-content: space-between;
gap: 24px;
flex-wrap: wrap;
}
.library-title {
font-family: 'Playfair Display', serif;
font-weight: 900;
font-size: 1.8rem;
margin: 0;
display: flex;
align-items: center;
gap: 12px;
}
.library-title i { color: #f0c97a; }
.header-actions {
display: flex;
gap: 12px;
align-items: center;
}
.search-box {
background: rgba(255,255,255,0.1);
border: 1.5px solid rgba(255,255,255,0.25);
color: #f5e9d6;
border-radius: 24px;
padding: 8px 18px 8px 40px;
min-width: 240px;
font-size: 0.9rem;
position: relative;
}
.search-wrapper {
position: relative;
}
.search-wrapper i {
position: absolute;
left: 14px;
top: 50%;
transform: translateY(-50%);
color: rgba(245,233,214,0.6);
font-size: 0.9rem;
}
.search-box::placeholder { color: rgba(245,233,214,0.5); }
.search-box:focus {
outline: none;
border-color: #f0c97a;
background: rgba(255,255,255,0.15);
}
.btn-login-link {
background: rgba(255,255,255,0.15);
border: 1.5px solid rgba(255,255,255,0.3);
color: #f5e9d6;
padding: 8px 20px;
border-radius: 24px;
font-weight: 600;
font-size: 0.88rem;
text-decoration: none;
transition: all 0.2s;
display: flex;
align-items: center;
gap: 6px;
}
.btn-login-link:hover {
background: rgba(255,255,255,0.25);
color: #fff;
}
/* Bookcase container */
.bookcase-container {
max-width: 1400px;
margin: 0 auto;
padding: 40px 24px;
}
.library-intro {
text-align: center;
margin-bottom: 32px;
}
.library-intro h2 {
font-family: 'Playfair Display', serif;
font-size: 2.2rem;
font-weight: 700;
color: #4a2c2a;
margin-bottom: 8px;
}
.library-intro p {
color: #6b4226;
font-size: 1.05rem;
opacity: 0.85;
}
/* Bookcase shelf */
.bookcase {
background: linear-gradient(180deg, #c8a87b 0%, #a67c52 100%);
border-radius: 16px;
padding: 24px;
box-shadow: 0 12px 40px rgba(74,44,42,0.3), inset 0 2px 4px rgba(255,255,255,0.2);
position: relative;
}
.shelf {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
gap: 24px;
padding: 20px 16px 36px;
position: relative;
border-bottom: 8px solid #6b4226;
box-shadow: 0 6px 0 #5a3520, 0 8px 12px rgba(0,0,0,0.2);
margin-bottom: 24px;
border-radius: 6px;
}
.shelf:last-child {
margin-bottom: 0;
}
/* Book card */
.book-card {
cursor: pointer;
transition: all 0.35s cubic-bezier(0.4, 0, 0.2, 1);
transform-origin: bottom center;
position: relative;
}
.book-card:hover {
transform: translateY(-12px) scale(1.04);
}
.book-cover {
width: 100%;
aspect-ratio: 2 / 3;
border-radius: 4px 8px 8px 4px;
overflow: hidden;
box-shadow:
-2px 2px 0 rgba(0,0,0,0.1),
-4px 4px 0 rgba(0,0,0,0.08),
4px 6px 16px rgba(0,0,0,0.3);
position: relative;
background: linear-gradient(135deg, #2c3e50, #4a6278);
display: flex;
flex-direction: column;
justify-content: flex-end;
padding: 16px;
color: white;
}
.book-cover::before {
content: '';
position: absolute;
left: 0;
top: 0;
bottom: 0;
width: 4px;
background: linear-gradient(90deg, rgba(0,0,0,0.3), transparent);
}
.book-cover img {
position: absolute;
inset: 0;
width: 100%;
height: 100%;
object-fit: cover;
z-index: 0;
}
.book-cover-overlay {
position: absolute;
inset: 0;
background: linear-gradient(to bottom, transparent 40%, rgba(0,0,0,0.85));
z-index: 1;
}
.book-cover-content {
position: relative;
z-index: 2;
}
.book-cover-default {
background: linear-gradient(135deg, #2c3e50 0%, #4a6278 100%);
}
.book-cover-default::after {
content: '';
position: absolute;
top: 12px;
right: 12px;
left: 12px;
bottom: 12px;
border: 2px solid rgba(255,255,255,0.2);
border-radius: 2px;
z-index: 1;
}
.book-title {
font-family: 'Playfair Display', serif;
font-size: 1rem;
font-weight: 700;
line-height: 1.25;
margin-bottom: 4px;
text-shadow: 0 2px 4px rgba(0,0,0,0.5);
display: -webkit-box;
-webkit-line-clamp: 3;
-webkit-box-orient: vertical;
overflow: hidden;
}
.book-author {
font-size: 0.75rem;
opacity: 0.85;
font-weight: 500;
font-style: italic;
}
.book-meta {
margin-top: 12px;
padding: 0 4px;
text-align: center;
}
.book-meta-title {
font-size: 0.85rem;
font-weight: 600;
color: #4a2c2a;
margin-bottom: 2px;
display: -webkit-box;
-webkit-line-clamp: 2;
-webkit-box-orient: vertical;
overflow: hidden;
}
.book-meta-stats {
font-size: 0.72rem;
color: #6b4226;
opacity: 0.7;
display: flex;
justify-content: center;
gap: 8px;
align-items: center;
}
.book-meta-stats i { font-size: 0.7rem; }
/* Empty state */
.empty-state {
text-align: center;
padding: 80px 24px;
color: #6b4226;
}
.empty-state i {
font-size: 4rem;
opacity: 0.4;
margin-bottom: 16px;
}
.empty-state h3 {
font-family: 'Playfair Display', serif;
font-weight: 700;
margin-bottom: 8px;
}
/* Loading */
.loading-state {
text-align: center;
padding: 80px 24px;
color: #6b4226;
}
/* Responsive */
@media (max-width: 768px) {
.header-container {
flex-direction: column;
align-items: stretch;
}
.header-actions {
justify-content: space-between;
}
.search-box {
min-width: 0;
width: 100%;
}
.shelf {
grid-template-columns: repeat(auto-fill, minmax(140px, 1fr));
gap: 16px;
padding: 16px 12px 28px;
}
.library-intro h2 {
font-size: 1.6rem;
}
.bookcase {
padding: 16px;
}
}
@media (max-width: 480px) {
.shelf {
grid-template-columns: repeat(auto-fill, minmax(120px, 1fr));
}
.book-title { font-size: 0.85rem; }
.book-author { font-size: 0.7rem; }
}
/* Footer */
.library-footer {
text-align: center;
padding: 32px 24px;
color: #6b4226;
font-size: 0.85rem;
opacity: 0.7;
}
</style>
</head>
<body>
<header class="library-header">
<div class="header-container">
<h1 class="library-title">
<i class="bi bi-book-half"></i>
Audiobook Library
</h1>
<div class="header-actions">
<div class="search-wrapper">
<i class="bi bi-search"></i>
<input type="text" class="search-box" id="searchInput"
placeholder="Search books..." oninput="filterBooks(this.value)">
</div>
<a href="/login" class="btn-login-link">
<i class="bi bi-person-circle"></i>
<span>Sign In</span>
</a>
</div>
</div>
</header>
<main class="bookcase-container">
<div class="library-intro">
<h2>Discover Stories That Speak</h2>
<p>Browse our collection of interactive audiobooks</p>
</div>
<div id="bookcaseContainer">
<div class="loading-state">
<div class="spinner-border" role="status" style="color: #6b4226;"></div>
<p class="mt-3">Loading library...</p>
</div>
</div>
</main>
<footer class="library-footer">
<p>Powered by Audiobook Maker Pro v4.2</p>
</footer>
<script>
let allBooks = [];
async function loadBooks() {
try {
const resp = await fetch('/api/public/books');
const data = await resp.json();
allBooks = data.books || [];
renderBookcase(allBooks);
} catch (e) {
document.getElementById('bookcaseContainer').innerHTML = `
<div class="empty-state">
<i class="bi bi-exclamation-circle"></i>
<h3>Failed to load library</h3>
<p>Please refresh the page to try again.</p>
</div>
`;
}
}
function renderBookcase(books) {
const container = document.getElementById('bookcaseContainer');
if (!books || books.length === 0) {
container.innerHTML = `
<div class="empty-state">
<i class="bi bi-book"></i>
<h3>No books yet</h3>
<p>The library is being curated. Check back soon!</p>
</div>
`;
return;
}
// Group books into shelves (6 books per shelf approx)
const booksPerShelf = 6;
const shelves = [];
for (let i = 0; i < books.length; i += booksPerShelf) {
shelves.push(books.slice(i, i + booksPerShelf));
}
let html = '<div class="bookcase">';
for (const shelf of shelves) {
html += '<div class="shelf">';
for (const book of shelf) {
html += renderBookCard(book);
}
html += '</div>';
}
html += '</div>';
container.innerHTML = html;
}
function renderBookCard(book) {
const thumbnailHtml = book.thumbnail_data
? `<img src="data:image/${book.thumbnail_format};base64,${book.thumbnail_data}" alt="${escapeHtml(book.name)}">
<div class="book-cover-overlay"></div>`
: '';
const coverClass = book.thumbnail_data ? '' : 'book-cover-default';
const author = book.author || 'Unknown Author';
return `
<div class="book-card" onclick="openBook(${book.id})" title="${escapeHtml(book.name)}">
<div class="book-cover ${coverClass}">
${thumbnailHtml}
<div class="book-cover-content">
<div class="book-title">${escapeHtml(book.name)}</div>
<div class="book-author">by ${escapeHtml(author)}</div>
</div>
</div>
<!--<div class="book-meta">
<div class="book-meta-stats">
<span><i class="bi bi-bookmark"></i> ${book.chapter_count} ch</span>
<span><i class="bi bi-eye"></i> ${book.view_count}</span>
</div>
</div>-->
</div>
`;
}
function openBook(bookId) {
window.location.href = `/read/${bookId}`;
}
function filterBooks(query) {
query = query.toLowerCase().trim();
if (!query) {
renderBookcase(allBooks);
return;
}
const filtered = allBooks.filter(b =>
b.name.toLowerCase().includes(query) ||
(b.author && b.author.toLowerCase().includes(query)) ||
(b.description && b.description.toLowerCase().includes(query))
);
renderBookcase(filtered);
}
function escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text || '';
return div.innerHTML;
}
loadBooks();
</script>
</body>
</html>

View File

@@ -0,0 +1,827 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Audiobook Reader</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Lora:ital,wght@0,400..700;1,400..700&family=Poppins:wght@500;700&display=swap" rel="stylesheet">
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<style>
@keyframes fadeIn { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } }
@keyframes spin { to { transform: rotate(360deg); } }
html { scroll-behavior: smooth; }
body {
background-image: linear-gradient(to top, #f3e7e9 0%, #e3eeff 99%, #e3eeff 100%);
color: #1f2937; font-family: "Lora", serif; margin: 0;
}
.top-nav {
background: rgba(255,255,255,0.9);
backdrop-filter: blur(12px);
padding: 12px 24px;
box-shadow: 0 2px 8px rgba(0,0,0,0.05);
display: flex;
align-items: center;
justify-content: space-between;
position: sticky;
top: 0;
z-index: 50;
}
.back-btn {
display: flex;
align-items: center;
gap: 8px;
color: #4a2c2a;
text-decoration: none;
font-weight: 600;
font-family: 'Inter', sans-serif;
font-size: 0.9rem;
padding: 8px 14px;
border-radius: 8px;
transition: background 0.2s;
}
.back-btn:hover { background: rgba(74,44,42,0.08); color: #4a2c2a; }
.story-title { font-family: "Poppins", sans-serif; font-weight: 700; font-size: 2.5rem; color: #111827; }
.story-subtitle { font-family: "Poppins", sans-serif; color: #4b5563; font-weight: 500; font-size: 1.1rem; }
.main-content-card {
background-color: rgba(255,255,255,0.9); backdrop-filter: blur(12px);
border-radius: 1rem; padding: 3rem 4rem; box-shadow: 0 10px 35px rgba(0,0,0,0.08);
border: 1px solid rgba(255,255,255,0.2); max-width: 1400px; margin: 0 auto;
animation: fadeIn 0.5s ease-in-out;
}
.reader-layout { display: flex; gap: 32px; align-items: flex-start; text-align: left; }
.document-outline {
width: 280px; flex-shrink: 0; position: sticky; top: 80px;
background: white; border-radius: 12px; padding: 20px;
box-shadow: 0 4px 6px -1px rgba(0,0,0,0.05);
border: 1px solid #e2e8f0; max-height: calc(100vh - 100px); overflow-y: auto;
}
.outline-title {
font-family: "Poppins", sans-serif; font-weight: 700; font-size: 0.95rem;
color: #64748b; text-transform: uppercase; margin-bottom: 16px;
border-bottom: 1px solid #e2e8f0; padding-bottom: 12px;
letter-spacing: 0.5px;
}
.outline-list { list-style: none; padding: 0; margin: 0; }
.outline-list li {
padding: 10px 14px; font-size: 0.9rem; color: #334155; cursor: pointer;
border-radius: 8px; transition: all 0.2s; white-space: nowrap;
overflow: hidden; text-overflow: ellipsis; font-family: "Inter", sans-serif;
margin-bottom: 4px;
border-left: 3px solid transparent;
}
.outline-list li:hover { background: #f8fafc; color: #4f46e5; }
.outline-list li.active {
background: #e0e7ff;
color: #3730a3;
font-weight: 600;
border-left: 3px solid #4f46e5;
padding-left: 11px;
}
.reader-content-area { flex: 1; min-width: 0; }
.story-text-container { font-size: 32px; line-height: 2.1; color: #1f2937; cursor: pointer; }
.story-text-container h1, .story-text-container h2, .story-text-container h3 {
font-family: "Poppins", sans-serif; color: #111827; line-height: 1.8;
margin-top: 1.5em; margin-bottom: 0.8em;
}
.story-text-container p { margin-bottom: 1.2em; }
.story-text-container img { max-width: 100%; height: auto; border-radius: 8px; margin: 16px auto; display: block; }
.block-loading-spinner {
display: inline-flex; align-items: center; gap: 8px;
color: #6b7280; font-size: 0.9rem; font-family: "Inter", sans-serif;
padding: 8px 0;
}
.block-loading-spinner::before {
content: ''; width: 16px; height: 16px;
border: 2px solid #e2e8f0; border-top-color: #5753c9;
border-radius: 50%; animation: spin 0.8s linear infinite;
}
.word { transition: all 0.15s ease; border-radius: 3px; cursor: pointer; }
.word:hover { background-color: #f1f5f9; }
.current-sentence-bg {
-webkit-box-decoration-break: clone; box-decoration-break: clone;
background-color: #e0e7ff; padding: 0.1em 0.25em; margin: 0 -0.2em; border-radius: 8px;
}
.current-word { color: #3d4e81; text-decoration: underline; text-decoration-thickness: 3px; text-underline-offset: 3px; font-weight: 700; }
.story-image-block { text-align: center; margin: 24px 0; }
.story-image-block img { max-width: 100%; height: auto; border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); }
/* Floating Player Button — Fixed RIGHT side */
#floating-player-btn {
position: fixed;
top: 5rem;
right: 24px;
height: 56px;
min-width: 56px;
padding: 0 24px;
border-radius: 30px;
background-image: linear-gradient(45deg, #3d4e81 0%, #5753c9 50%, #6e78da 100%);
border: none;
color: white;
box-shadow: 0 8px 25px rgba(0,0,0,0.3);
display: none;
align-items: center;
justify-content: center;
cursor: pointer;
z-index: 60;
transition: transform 0.2s, opacity 0.3s, width 0.3s, padding 0.3s, border-radius 0.3s;
gap: 8px;
}
#floating-player-btn.visible { display: flex; }
#floating-player-btn:hover { transform: scale(1.05); }
#floating-player-btn:active { transform: scale(0.95); }
#floating-player-btn svg { width: 24px; height: 24px; }
#fp-start-text {
font-weight: 600;
font-family: "Poppins", sans-serif;
font-size: 1rem;
}
#floating-player-btn.active-mode {
width: 56px;
padding: 0;
border-radius: 50%;
gap: 0;
}
#floating-player-btn.active-mode #fp-start-text { display: none; }
#floating-player-btn.loading { background: linear-gradient(45deg, #6b7280, #9ca3af); cursor: wait; }
#fp-loading-spinner {
width: 24px; height: 24px;
border: 3px solid rgba(255,255,255,0.3); border-top-color: white;
border-radius: 50%; animation: spin 0.8s linear infinite;
display: none;
}
#floating-player-btn.loading #fp-loading-spinner { display: block; }
#floating-player-btn.loading #fp-play-icon,
#floating-player-btn.loading #fp-pause-icon,
#floating-player-btn.loading #fp-start-text { display: none !important; }
.reader-toast {
position: fixed; bottom: 2rem; left: 50%; transform: translateX(-50%);
background: rgba(30, 41, 59, 0.95); color: white;
padding: 12px 24px; border-radius: 24px;
font-family: "Inter", sans-serif; font-size: 0.9rem;
box-shadow: 0 8px 24px rgba(0,0,0,0.2);
z-index: 1100; opacity: 0; transition: opacity 0.3s;
pointer-events: none; display: flex; align-items: center; gap: 8px;
}
.reader-toast.visible { opacity: 1; }
.reader-toast .spinner {
width: 14px; height: 14px;
border: 2px solid rgba(255,255,255,0.3); border-top-color: white;
border-radius: 50%; animation: spin 0.8s linear infinite;
}
/* Block highlight on outline click */
.story-block.highlight-section,
.story-image-block.highlight-section {
animation: highlightPulse 2s ease-out;
border-radius: 12px;
}
@keyframes highlightPulse {
0% {
background-color: rgba(79, 70, 229, 0.18);
box-shadow: 0 0 0 4px rgba(79, 70, 229, 0.25);
}
100% {
background-color: transparent;
box-shadow: 0 0 0 0 transparent;
}
}
@media (max-width: 992px) {
.reader-layout { flex-direction: column; }
.document-outline { width: 100%; position: relative; top: 0; max-height: 300px; }
}
@media (max-width: 768px) {
.main-content-card { padding: 1.5rem; }
.story-text-container { font-size: 22px; line-height: 1.9; }
#floating-player-btn {
top: auto;
bottom: 1rem;
right: 1rem;
height: 50px;
min-width: 50px;
}
#floating-player-btn.active-mode { width: 50px; }
}
</style>
</head>
<body>
<nav class="top-nav">
<a href="/home" class="back-btn">
<svg width="18" height="18" fill="currentColor" viewBox="0 0 16 16">
<path d="M15 8a.5.5 0 0 0-.5-.5H2.707l3.147-3.146a.5.5 0 1 0-.708-.708l-4 4a.5.5 0 0 0 0 .708l4 4a.5.5 0 0 0 .708-.708L2.707 8.5H14.5A.5.5 0 0 0 15 8z"/>
</svg>
Back to Library
</a>
</nav>
<button id="floating-player-btn">
<svg id="fp-play-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor"><path d="M8 5v14l11-7z"/></svg>
<svg id="fp-pause-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" style="display:none;"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>
<span id="fp-start-text">Start</span>
<div id="fp-loading-spinner"></div>
</button>
<div id="reader-toast" class="reader-toast">
<div class="spinner"></div>
<span id="reader-toast-text">Loading…</span>
</div>
<div class="container-fluid my-4 px-md-5">
<main id="main-content" class="main-content-card">
<header class="text-center mb-5" id="main-header">
<h1 class="story-title" id="book-title">Loading...</h1>
<p class="story-subtitle" id="book-subtitle">Please wait</p>
</header>
<div class="reader-layout">
<aside class="document-outline" id="story-nav" style="display: none;">
<h6 class="outline-title">Table of Sections</h6>
<ul class="outline-list" id="story-nav-list"></ul>
</aside>
<div class="reader-content-area">
<div id="stories-main-container"></div>
</div>
</div>
</main>
</div>
<script>
/**
* Public Reader — Smart Preload Architecture (v3)
*
* Loading Strategy:
* 1. TEXT + TIMESTAMPS: loaded eagerly from /api/public/books/<id> in single batch.
* 2. AUDIO: base64 → Blob URL conversion is DEFERRED until needed.
* 3. Smart preload: when block N plays, preload blob URLs for N+1, N+2.
* At 70% mark of N, ensure N+1 is ready (safety net).
* 4. Memory cap: keep at most MAX_AUDIO_LOADED blob URLs alive;
* revoke distant past audio to free browser memory.
*
* Scroll Strategy:
* - Manual navigation (Start / outline click / word click): scroll to block.
* - Auto-advance (audio ended → next): NO block scroll — word highlighter carries user.
*/
const pathParts = window.location.pathname.split('/');
const BOOK_ID = parseInt(pathParts[pathParts.length - 1], 10);
document.addEventListener("DOMContentLoaded", async () => {
const mainContainer = document.getElementById("stories-main-container");
const floatingPlayerBtn = document.getElementById("floating-player-btn");
const storyNav = document.getElementById("story-nav");
const storyNavList = document.getElementById("story-nav-list");
const toastEl = document.getElementById("reader-toast");
const toastText = document.getElementById("reader-toast-text");
let storyInstances = [];
let currentlyPlayingInstance = null;
let currentlyPlayingIndex = -1;
let hasStarted = false;
let navObserver = null;
// Tunables (matches reader_templates/index.html)
const PRELOAD_AHEAD = 2;
const MID_PRELOAD_THRESHOLD = 0.7;
const MAX_AUDIO_LOADED = 5;
const KEEP_BEHIND = 1;
floatingPlayerBtn.addEventListener("click", handleFloatingBtnClick);
mainContainer.addEventListener("click", handleTextClick);
// ===================================================
// UI Helpers
// ===================================================
function showToast(msg) { toastText.textContent = msg; toastEl.classList.add("visible"); }
function hideToast() { toastEl.classList.remove("visible"); }
function setButtonLoading(b) { floatingPlayerBtn.classList.toggle("loading", b); }
function updateFloatingButton(state) {
const fpPauseIcon = document.getElementById("fp-pause-icon");
const fpPlayIcon = document.getElementById("fp-play-icon");
if (hasStarted) {
floatingPlayerBtn.classList.add("active-mode");
}
if (state === "playing") {
fpPauseIcon.style.display = "block";
fpPlayIcon.style.display = "none";
} else {
fpPauseIcon.style.display = "none";
fpPlayIcon.style.display = "block";
}
}
// ===================================================
// INITIAL LOAD
// ===================================================
try {
const resp = await fetch(`/api/public/books/${BOOK_ID}`);
if (!resp.ok) throw new Error('Failed to load book');
const book = await resp.json();
document.getElementById("book-title").textContent = book.name;
document.getElementById("book-subtitle").textContent = book.author ? `by ${book.author}` : 'An interactive audiobook';
document.title = book.name + ' - Audiobook Reader';
// Build flat list of blocks across chapters
let globalIdx = 0;
for (const chapter of book.chapters) {
let firstInChapter = true;
for (const block of chapter.blocks) {
const isImage = block.block_type === 'image' ||
(block.content && block.content.trim().startsWith('![') && block.content.includes(']('));
if (isImage) {
const imgBlockId = `img-block-${globalIdx}`;
let imgHtml = `<div id="${imgBlockId}" class="story-image-block" data-chapter="${chapter.chapter_number}">`;
if (block.images && block.images.length > 0) {
for (const img of block.images) {
if (img.data) {
imgHtml += `<img src="data:image/${img.format};base64,${img.data}" alt="Image" loading="lazy">`;
}
}
}
imgHtml += '</div>';
mainContainer.insertAdjacentHTML("beforeend", imgHtml);
if (firstInChapter) addOutlineEntry(chapter.title, chapter.chapter_number, imgBlockId);
firstInChapter = false;
globalIdx++;
continue;
}
if (!block.audio_data) continue;
const blockId = `story-block-${globalIdx}`;
if (firstInChapter) addOutlineEntry(chapter.title, chapter.chapter_number, blockId);
firstInChapter = false;
mainContainer.insertAdjacentHTML("beforeend", `
<div id="${blockId}" class="story-block mt-4" data-instance-index="${storyInstances.length}" data-chapter="${chapter.chapter_number}">
<article class="story-text-container"></article>
<audio class="audio-player" preload="none" style="display:none;"></audio>
</div>
`);
storyInstances.push({
index: storyInstances.length,
blockEl: document.getElementById(blockId),
block: block,
chapter: chapter,
audio: null,
audioUrl: null, // blob URL ref for cleanup
audioReady: false,
audioLoadingPromise: null,
midPreloadTriggered: false,
wordSpans: [],
wordMap: [],
sentenceData: [],
animFrameId: null,
lastWordSpan: null,
lastSentenceSpans: []
});
globalIdx++;
}
}
// Render text + sync for each instance (cheap, in-memory)
for (const inst of storyInstances) {
renderMarkdownInto(inst);
smartSync(inst);
}
storyNav.style.display = 'block';
floatingPlayerBtn.classList.add("visible");
setupNavObserver();
} catch (e) {
console.error(e);
mainContainer.innerHTML = `<p class="text-center text-danger">Error loading book: ${e.message}</p>`;
}
// ===================================================
// Outline / Navigation
// ===================================================
function addOutlineEntry(title, chapter, targetId) {
const li = document.createElement("li");
li.textContent = title;
li.title = title;
li.dataset.chapter = chapter;
li.dataset.targetId = targetId;
li.onclick = () => {
const target = document.getElementById(targetId);
if (!target) return;
const offset = target.getBoundingClientRect().top + window.pageYOffset - 80;
window.scrollTo({ top: offset, behavior: 'smooth' });
target.classList.remove('highlight-section');
void target.offsetWidth;
target.classList.add('highlight-section');
setTimeout(() => target.classList.remove('highlight-section'), 2000);
storyNavList.querySelectorAll('li').forEach(l => l.classList.remove('active'));
li.classList.add('active');
};
storyNavList.appendChild(li);
}
function setupNavObserver() {
if (navObserver) navObserver.disconnect();
navObserver = new IntersectionObserver((entries) => {
entries.forEach(entry => {
if (entry.isIntersecting) {
const chapter = entry.target.dataset.chapter;
if (chapter) {
storyNavList.querySelectorAll('li').forEach(l => l.classList.remove('active'));
const active = storyNavList.querySelector(`li[data-chapter='${chapter}']`);
if (active) active.classList.add('active');
}
}
});
}, { threshold: 0.3, rootMargin: '-80px 0px -40% 0px' });
document.querySelectorAll('.story-block, .story-image-block').forEach(b => navObserver.observe(b));
}
// ===================================================
// Render & Sync
// ===================================================
function renderMarkdownInto(inst) {
const container = inst.blockEl.querySelector(".story-text-container");
container.innerHTML = "";
inst.wordSpans = [];
const div = document.createElement("div");
div.innerHTML = marked.parse(inst.block.content, { breaks: true, gfm: true });
(function processNode(node) {
if (node.nodeType === Node.TEXT_NODE) {
const words = node.textContent.split(/(\s+)/);
const frag = document.createDocumentFragment();
words.forEach(part => {
if (part.trim().length > 0) {
const span = document.createElement("span");
span.className = "word";
span.textContent = part;
inst.wordSpans.push(span);
frag.appendChild(span);
} else frag.appendChild(document.createTextNode(part));
});
node.parentNode.replaceChild(frag, node);
} else if (node.nodeType === Node.ELEMENT_NODE) {
Array.from(node.childNodes).forEach(processNode);
}
})(div);
while (div.firstChild) container.appendChild(div.firstChild);
}
function smartSync(inst) {
const transcription = inst.block.transcription || [];
inst.wordMap = new Array(inst.wordSpans.length).fill(undefined);
let ai = 0;
inst.wordSpans.forEach((span, i) => {
const tw = span.textContent.toLowerCase().replace(/[^\w]/g, "");
for (let o = 0; o < 5; o++) {
if (ai + o >= transcription.length) break;
const aw = transcription[ai + o].word.toLowerCase().replace(/[^\w]/g, "");
if (tw === aw) { inst.wordMap[i] = ai + o; ai += o + 1; return; }
}
});
inst.sentenceData = [];
let buf = [], si = 0;
inst.wordSpans.forEach((s, i) => {
buf.push(s);
if (/[.!?]["'\u201D\u2019]?$/.test(s.textContent.trim())) {
let sT = 0, eT = 0;
for (let k = si; k <= i; k++) if (inst.wordMap[k] !== undefined) { sT = transcription[inst.wordMap[k]].start; break; }
for (let k = i; k >= si; k--) if (inst.wordMap[k] !== undefined) { eT = transcription[inst.wordMap[k]].end; break; }
if (eT > sT) inst.sentenceData.push({ spans: [...buf], startTime: sT, endTime: eT });
buf = []; si = i + 1;
}
});
if (buf.length > 0) {
let sT = 0, eT = 0;
for (let k = si; k < inst.wordSpans.length; k++) if (inst.wordMap[k] !== undefined) { sT = transcription[inst.wordMap[k]].start; break; }
for (let k = inst.wordSpans.length - 1; k >= si; k--) if (inst.wordMap[k] !== undefined) { eT = transcription[inst.wordMap[k]].end; break; }
if (eT > sT) inst.sentenceData.push({ spans: [...buf], startTime: sT, endTime: eT });
}
}
// ===================================================
// AUDIO LAZY LOADING + MEMORY MANAGEMENT
// ===================================================
function ensureAudioLoaded(inst) {
if (inst.audioReady && inst.audio) return Promise.resolve(inst);
if (inst.audioLoadingPromise) return inst.audioLoadingPromise;
inst.audioLoadingPromise = new Promise((resolve, reject) => {
if (!inst.block.audio_data) {
inst.audioLoadingPromise = null;
return reject(new Error('No audio data'));
}
try {
const audio = inst.blockEl.querySelector('.audio-player');
const blob = base64ToBlob(inst.block.audio_data, `audio/${inst.block.audio_format || 'mp3'}`);
const url = URL.createObjectURL(blob);
const onCanPlay = () => {
audio.removeEventListener('error', onError);
inst.audio = audio;
inst.audioUrl = url;
inst.audioReady = true;
wireAudioEvents(inst);
resolve(inst);
};
const onError = () => {
audio.removeEventListener('canplay', onCanPlay);
try { URL.revokeObjectURL(url); } catch (e) {}
inst.audioLoadingPromise = null;
reject(new Error('Audio failed to load'));
};
audio.addEventListener('canplay', onCanPlay, { once: true });
audio.addEventListener('error', onError, { once: true });
audio.preload = 'auto';
audio.src = url;
audio.load();
} catch (err) {
inst.audioLoadingPromise = null;
reject(err);
}
});
return inst.audioLoadingPromise;
}
function wireAudioEvents(inst) {
const audio = inst.audio;
audio.addEventListener('play', () => {
startHighlightLoop(inst);
updateFloatingButton('playing');
});
audio.addEventListener('pause', () => {
stopHighlightLoop(inst);
updateFloatingButton('paused');
});
audio.addEventListener('ended', () => {
stopHighlightLoop(inst);
clearHighlights(inst);
const nextIdx = inst.index + 1;
if (nextIdx < storyInstances.length) {
playInstance(nextIdx, 0, { autoAdvance: true });
} else {
currentlyPlayingInstance = null;
currentlyPlayingIndex = -1;
updateFloatingButton('paused');
}
});
// Mid-play safety net
audio.addEventListener('timeupdate', () => {
if (inst.midPreloadTriggered) return;
if (!audio.duration || isNaN(audio.duration)) return;
if ((audio.currentTime / audio.duration) >= MID_PRELOAD_THRESHOLD) {
inst.midPreloadTriggered = true;
const nextIdx = inst.index + 1;
if (nextIdx < storyInstances.length) {
ensureAudioLoaded(storyInstances[nextIdx]).catch(() => {});
}
}
});
}
/**
* Preload N audio blocks ahead (fire-and-forget).
*/
function preloadAhead(fromIndex) {
for (let i = 1; i <= PRELOAD_AHEAD; i++) {
const idx = fromIndex + i;
if (idx >= storyInstances.length) break;
ensureAudioLoaded(storyInstances[idx]).catch(() => {});
}
}
/**
* Memory management: keep only sliding window of audio elements loaded.
* Window = [currentIndex - KEEP_BEHIND, currentIndex + PRELOAD_AHEAD]
* Bounded to MAX_AUDIO_LOADED total.
*/
function pruneLoadedAudio(currentIndex) {
const loaded = storyInstances.filter(i => i.audioReady && i.audio && i.audioUrl);
if (loaded.length <= MAX_AUDIO_LOADED) return;
const keepLow = currentIndex - KEEP_BEHIND;
const keepHigh = currentIndex + PRELOAD_AHEAD;
const candidates = loaded
.filter(inst => inst !== currentlyPlayingInstance)
.map(inst => ({
inst,
inWindow: inst.index >= keepLow && inst.index <= keepHigh,
distance: Math.abs(inst.index - currentIndex)
}))
.sort((a, b) => {
if (a.inWindow !== b.inWindow) return a.inWindow ? 1 : -1;
return b.distance - a.distance;
});
let toEvict = loaded.length - MAX_AUDIO_LOADED;
for (const c of candidates) {
if (toEvict <= 0) break;
releaseAudio(c.inst);
toEvict--;
}
}
function releaseAudio(inst) {
if (!inst.audio) return;
try {
inst.audio.pause();
inst.audio.removeAttribute('src');
inst.audio.load();
} catch (e) { /* ignore */ }
if (inst.audioUrl) {
try { URL.revokeObjectURL(inst.audioUrl); } catch (e) {}
inst.audioUrl = null;
}
inst.audioReady = false;
inst.audioLoadingPromise = null;
inst.midPreloadTriggered = false;
}
// ===================================================
// PLAYBACK
// ===================================================
async function playInstance(idx, ts = 0, opts = {}) {
if (idx < 0 || idx >= storyInstances.length) return;
const inst = storyInstances[idx];
const isAutoAdvance = opts.autoAdvance === true;
if (currentlyPlayingInstance && currentlyPlayingInstance !== inst) {
stopAndResetInstance(currentlyPlayingInstance);
}
currentlyPlayingInstance = inst;
currentlyPlayingIndex = idx;
hasStarted = true;
inst.midPreloadTriggered = false;
const needsLoad = !inst.audioReady;
if (needsLoad) {
setButtonLoading(true);
showToast("Loading audio…");
}
try {
await ensureAudioLoaded(inst);
if (needsLoad) {
hideToast();
setButtonLoading(false);
}
inst.audio.currentTime = ts;
await inst.audio.play();
updateFloatingButton('playing');
// Block-level scroll ONLY for manual navigation
if (!isAutoAdvance) {
const rect = inst.blockEl.getBoundingClientRect();
if (rect.top < 0 || rect.top > window.innerHeight * 0.6) {
inst.blockEl.scrollIntoView({ behavior: 'smooth', block: 'start' });
}
}
preloadAhead(idx);
pruneLoadedAudio(idx);
} catch (err) {
console.error(err);
setButtonLoading(false);
showToast("Failed to load audio. Tap again to retry.");
setTimeout(hideToast, 3000);
updateFloatingButton('paused');
}
}
function stopAndResetInstance(inst) {
if (inst.audio) {
try {
inst.audio.pause();
inst.audio.currentTime = 0;
} catch (e) {}
}
stopHighlightLoop(inst);
clearHighlights(inst);
}
function handleFloatingBtnClick() {
if (!hasStarted) { playInstance(0, 0); return; }
if (currentlyPlayingInstance && currentlyPlayingInstance.audio) {
const audio = currentlyPlayingInstance.audio;
if (audio.paused) {
audio.play().catch(console.error);
updateFloatingButton('playing');
} else {
audio.pause();
updateFloatingButton('paused');
}
} else {
playInstance(0, 0);
}
}
function handleTextClick(e) {
const wordSpan = e.target.closest('.word');
if (!wordSpan) return;
const block = e.target.closest('.story-block');
if (!block) return;
const idx = parseInt(block.dataset.instanceIndex, 10);
const inst = storyInstances[idx];
if (!inst) return;
const wordIdx = inst.wordSpans.indexOf(wordSpan);
const aiIdx = inst.wordMap[wordIdx];
if (aiIdx === undefined) return;
playInstance(idx, inst.block.transcription[aiIdx].start);
}
// ===================================================
// Highlighting
// ===================================================
function startHighlightLoop(inst) {
cancelAnimationFrame(inst.animFrameId);
inst.animFrameId = requestAnimationFrame(() => highlightLoop(inst));
}
function stopHighlightLoop(inst) { cancelAnimationFrame(inst.animFrameId); }
function highlightLoop(inst) {
if (!inst.audio || inst.audio.paused) return;
const t = inst.audio.currentTime;
const transcription = inst.block.transcription || [];
const ai = transcription.findIndex(w => t >= w.start && t < w.end);
if (ai !== -1) {
const ti = inst.wordMap.findIndex(i => i === ai);
if (ti !== -1) {
const sp = inst.wordSpans[ti];
if (sp !== inst.lastWordSpan) {
if (inst.lastWordSpan) inst.lastWordSpan.classList.remove('current-word');
sp.classList.add('current-word');
const r = sp.getBoundingClientRect();
if (r.top < window.innerHeight * 0.2 || r.bottom > window.innerHeight * 0.8) {
sp.scrollIntoView({ behavior: 'smooth', block: 'center' });
}
inst.lastWordSpan = sp;
}
}
}
const sent = inst.sentenceData.find(s => t >= s.startTime && t <= s.endTime);
if (sent && sent.spans !== inst.lastSentenceSpans) {
inst.lastSentenceSpans.forEach(s => s.classList.remove('current-sentence-bg'));
sent.spans.forEach(s => s.classList.add('current-sentence-bg'));
inst.lastSentenceSpans = sent.spans;
}
inst.animFrameId = requestAnimationFrame(() => highlightLoop(inst));
}
function clearHighlights(inst) {
if (inst.lastWordSpan) inst.lastWordSpan.classList.remove('current-word');
inst.lastSentenceSpans.forEach(s => s.classList.remove('current-sentence-bg'));
inst.lastWordSpan = null;
inst.lastSentenceSpans = [];
}
// ===================================================
// Utility
// ===================================================
function base64ToBlob(b64, mime) {
const bin = atob(b64);
const arr = new Uint8Array(bin.length);
for (let i = 0; i < bin.length; i++) arr[i] = bin.charCodeAt(i);
return new Blob([arr], { type: mime });
}
});
</script>
</body>
</html>

83
utils.py Normal file
View File

@@ -0,0 +1,83 @@
# utils.py - Utility Functions
import io
import re
import base64
def convert_to_mp3(audio_base64, source_format='wav'):
"""
Convert audio from any format to MP3 base64.
"""
try:
from pydub import AudioSegment
audio_bytes = base64.b64decode(audio_base64)
audio_buffer = io.BytesIO(audio_bytes)
audio = AudioSegment.from_file(audio_buffer, format=source_format)
mp3_buffer = io.BytesIO()
audio.export(mp3_buffer, format='mp3', bitrate='192k')
mp3_buffer.seek(0)
mp3_base64 = base64.b64encode(mp3_buffer.read()).decode('utf-8')
return mp3_base64
except ImportError:
print("⚠️ pydub not installed, returning original format")
return audio_base64
except Exception as e:
print(f"⚠️ MP3 conversion failed: {e}, returning original")
return audio_base64
def sanitize_filename(name):
"""
Sanitize a string for use as a filename.
"""
if not name:
return 'unnamed'
return "".join(c for c in name if c.isalnum() or c in ('_', '-', ' '))
def strip_markdown(text):
"""
Strip markdown formatting from text to get plain text for TTS.
"""
if not text:
return ""
# Remove image references completely
text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text)
# Remove headings markers
text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
# Remove bold/italic
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
text = re.sub(r'\*(.+?)\*', r'\1', text)
text = re.sub(r'__(.+?)__', r'\1', text)
text = re.sub(r'_(.+?)_', r'\1', text)
# Remove strikethrough
text = re.sub(r'~~(.+?)~~', r'\1', text)
# Remove inline code
text = re.sub(r'`([^`]+)`', r'\1', text)
# Remove links but keep text
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
# Remove blockquote markers
text = re.sub(r'^>\s*', '', text, flags=re.MULTILINE)
# Remove list markers
text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
# Remove horizontal rules
text = re.sub(r'^(-{3,}|\*{3,}|_{3,})$', '', text, flags=re.MULTILINE)
# Clean up whitespace
text = re.sub(r'\n{3,}', '\n\n', text)
return text.strip()