Source Code - SWMM5 Manual Search

app.py

from fastapi import FastAPI, Query, Request, BackgroundTasks
from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse, Response
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
import httpx
from whoosh import index
from whoosh.qparser import MultifieldParser, OrGroup
from whoosh.highlight import ContextFragmenter, UppercaseFormatter
from whoosh.query import Every

from pathlib import Path
import subprocess
import asyncio
import json
import time
import queue
import threading
import re
import hashlib
from datetime import date
from collections import defaultdict, Counter
from inp_reference import find_matching_inp_sections

INDEX_DIR = Path("index")
DOCS_PATH = Path("data/docs.jsonl")

FEATURED_SEARCHES = [
    "LID controls",
    "dynamic wave routing",
    "infiltration",
    "Manning roughness",
    "subcatchment",
    "Green-Ampt",
    "runoff coefficient",
    "Horton equation",
    "storm sewer",
    "rainfall hyetograph",
    "detention pond",
    "water quality",
    "pollutant buildup",
    "groundwater",
    "snowmelt",
    "pump station",
    "flow routing",
    "Curve Number",
    "hydraulic grade line",
    "bioretention",
]

CONCEPT_MAP = {
    "infiltration": ["percolation", "groundwater", "soil moisture", "losses", "Green-Ampt", "Horton", "Curve Number"],
    "runoff": ["rainfall", "impervious", "subcatchment", "overland flow", "hydrograph", "peak flow"],
    "routing": ["dynamic wave", "kinematic wave", "conduit", "flow", "Saint-Venant", "hydraulic"],
    "LID": ["bioretention", "rain garden", "permeable pavement", "green roof", "swale", "low impact"],
    "conduit": ["pipe", "Manning", "roughness", "cross section", "diameter", "slope"],
    "pump": ["pump curve", "pump station", "wet well", "force main", "lift station"],
    "pollutant": ["buildup", "washoff", "water quality", "concentration", "treatment", "EMC"],
    "groundwater": ["aquifer", "infiltration", "percolation", "water table", "baseflow", "lateral flow"],
    "subcatchment": ["area", "width", "slope", "impervious", "pervious", "runoff", "outlet"],
    "flooding": ["surcharge", "ponding", "overflow", "depth", "volume", "node"],
    "rainfall": ["hyetograph", "rain gage", "time series", "intensity", "duration", "frequency"],
    "snowmelt": ["snow pack", "cold content", "melt coefficient", "temperature", "plowing"],
    "detention": ["storage", "pond", "outflow", "stage", "volume", "weir", "orifice"],
    "calibration": ["validation", "sensitivity", "parameters", "observed", "simulated", "error"],
    "weir": ["orifice", "outlet", "transverse", "side flow", "V-notch", "trapezoidal"],
    "junction": ["node", "manhole", "invert", "surcharge", "inflow", "depth"],
    "treatment": ["removal", "pollutant", "BMP", "concentration", "effluent"],
    "hydraulic": ["head", "pressure", "velocity", "energy", "friction", "losses"],
}

def get_todays_search():
    day_index = date.today().toordinal() % len(FEATURED_SEARCHES)
    return FEATURED_SEARCHES[day_index]

# Global queue for streaming indexing progress
progress_queue = queue.Queue()

app = FastAPI(title="SWMM5 Manual Search - Local search for swmm-manual.netlify.app")
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")

def have_index() -> bool:
    try:
        return index.exists_in(INDEX_DIR)
    except Exception:
        return False

def search_whoosh(q: str, limit: int = 20, chapter: str = ""):
    from whoosh.query import And, Term
    ix = index.open_dir(INDEX_DIR)
    with ix.searcher() as s:
        if not q.strip():
            query = Every()
        else:
            terms = [term.strip() for term in q.split(',') if term.strip()]
            
            if len(terms) == 1:
                parser = MultifieldParser(["title","section","content"], schema=ix.schema, group=OrGroup)
                query = parser.parse(terms[0])
            else:
                queries = []
                parser = MultifieldParser(["title","section","content"], schema=ix.schema, group=OrGroup)
                
                for term in terms:
                    try:
                        parsed_query = parser.parse(term)
                        queries.append(parsed_query)
                    except:
                        continue
                
                if queries:
                    query = And(queries)
                else:
                    query = Every()
        
        if chapter and chapter.strip():
            from whoosh.qparser import QueryParser
            ch_parser = QueryParser("title", schema=ix.schema)
            ch_query = ch_parser.parse(f'"{chapter.strip()}"')
            query = And([query, ch_query])
        
        results = s.search(query, limit=limit)
        results.fragmenter = ContextFragmenter(maxchars=200, surround=60)
        results.formatter = UppercaseFormatter()

        payload = []
        for hit in results:
            snippet = hit.highlights("content") or hit.highlights("title") or hit.highlights("section") or ""
            payload.append({
                "url": hit["url"],
                "title": hit.get("title", ""),
                "section": hit.get("section", ""),
                "snippet": snippet
            })
        return payload

@app.get("/")
async def home(request: Request, q: str = "", results: int = 20, chapter: str = ""):
    accept_header = request.headers.get("accept", "")
    user_agent = request.headers.get("user-agent", "")
    
    if ("application/json" in accept_header or 
        "health" in user_agent.lower() or
        "probe" in user_agent.lower() or
        "monitor" in user_agent.lower() or
        "check" in user_agent.lower()):
        return {"status": "ok", "service": "SWMM Search", "index": have_index()}
    
    return templates.TemplateResponse("index.html", {"request": request, "ready": have_index()})

@app.get("/search")
async def search(q: str = Query("", min_length=0, max_length=200), limit: int = 20, chapter: str = Query("", max_length=200)):
    if not have_index():
        return JSONResponse({"error": "index_missing"})
    t0 = time.time()
    results = search_whoosh(q, limit, chapter)
    elapsed = round(time.time() - t0, 3)
    related = get_related_concepts(q)
    inp_refs = find_matching_inp_sections(q)
    return {"results": results, "related": related, "inp_references": inp_refs, "elapsed": elapsed}

@app.get("/stats")
async def stats():
    if not have_index():
        return JSONResponse({"error": "index_missing"})
    ix = index.open_dir(INDEX_DIR)
    doc_count = ix.doc_count()
    chapters = set()
    total_words = 0
    if DOCS_PATH.exists():
        with open(DOCS_PATH, 'r', encoding='utf-8') as f:
            for line in f:
                d = json.loads(line)
                chapters.add(d.get("title", ""))
                total_words += len(d.get("content", "").split())
    import os
    idx_time = None
    if DOCS_PATH.exists():
        idx_time = os.path.getmtime(DOCS_PATH)
    return {
        "chapters": len(chapters),
        "sections": doc_count,
        "words": total_words,
        "indexed_at": idx_time,
        "version": "5.2",
    }

@app.get("/chapters")
async def chapters():
    if not DOCS_PATH.exists():
        return JSONResponse({"error": "no_data"})
    ch = {}
    with open(DOCS_PATH, 'r', encoding='utf-8') as f:
        for line in f:
            d = json.loads(line)
            title = d.get("title", "Unknown")
            if title not in ch:
                ch[title] = 0
            ch[title] += 1
    result = [{"title": t, "sections": c} for t, c in sorted(ch.items())]
    return {"chapters": result}

@app.get("/featured-search")
async def featured_search():
    if not have_index():
        return JSONResponse({"error": "index_missing"})
    query = get_todays_search()
    results = search_whoosh(query, 5)
    return {"query": query, "results": results, "total_available": len(FEATURED_SEARCHES)}

@app.get("/toc")
async def table_of_contents():
    if not DOCS_PATH.exists():
        return JSONResponse({"error": "no_data"})
    toc: dict[str, dict] = {}
    with open(DOCS_PATH, 'r', encoding='utf-8') as f:
        for line in f:
            d = json.loads(line)
            title = d.get("title", "Unknown")
            section = d.get("section", "")
            content = d.get("content", "")
            words = len(content.split())
            if title not in toc:
                toc[title] = {"sections": [], "word_count": 0}
            toc[title]["sections"].append(section)
            toc[title]["word_count"] += words
    result = []
    for title in sorted(toc.keys()):
        info = toc[title]
        sections_list = info["sections"]
        result.append({
            "title": title,
            "section_count": len(sections_list),
            "word_count": info["word_count"],
            "sections": sections_list
        })
    return {"toc": result, "total_documents": sum(t["section_count"] for t in result)}

@app.get("/glossary")
async def glossary():
    if not DOCS_PATH.exists():
        return JSONResponse({"error": "no_data"})
    term_locations = defaultdict(list)
    bold_pattern = re.compile(r'\*\*([A-Z][A-Za-z\s\-/()]{2,40})\*\*')
    with open(DOCS_PATH, 'r', encoding='utf-8') as f:
        for line in f:
            d = json.loads(line)
            content = d.get("content", "")
            matches = bold_pattern.findall(content)
            for match in matches:
                term = match.strip()
                if len(term) > 2 and not term.isupper():
                    term_locations[term].append({
                        "title": d.get("title", ""),
                        "section": d.get("section", ""),
                    })
    heading_pattern = re.compile(r'^#{1,3}\s+(.+)', re.MULTILINE)
    with open(DOCS_PATH, 'r', encoding='utf-8') as f:
        for line in f:
            d = json.loads(line)
            section = d.get("section", "")
            if section and len(section) > 2:
                clean = re.sub(r'^\d+[\.\d]*\s*', '', section).strip()
                if clean and not clean.startswith("CHAPTER"):
                    if clean not in term_locations:
                        term_locations[clean] = []
                    term_locations[clean].append({
                        "title": d.get("title", ""),
                        "section": section,
                    })
    glossary_by_letter = defaultdict(list)
    for term in sorted(term_locations.keys(), key=str.lower):
        first_letter = term[0].upper()
        if first_letter.isalpha():
            unique_locs = []
            seen = set()
            for loc in term_locations[term]:
                key = f"{loc['title']}|{loc['section']}"
                if key not in seen:
                    seen.add(key)
                    unique_locs.append(loc)
            glossary_by_letter[first_letter].append({
                "term": term,
                "count": len(unique_locs),
                "locations": unique_locs[:5]
            })
    return {
        "glossary": dict(sorted(glossary_by_letter.items())),
        "total_terms": sum(len(v) for v in glossary_by_letter.values())
    }

def get_related_concepts(query: str) -> list:
    if not query or not query.strip():
        return []
    q_lower = query.lower()
    related = []
    for concept, neighbors in CONCEPT_MAP.items():
        if concept.lower() in q_lower or q_lower in concept.lower():
            for neighbor in neighbors:
                if neighbor.lower() not in q_lower:
                    related.append(neighbor)
        else:
            for neighbor in neighbors:
                if neighbor.lower() in q_lower or q_lower in neighbor.lower():
                    related.append(concept)
                    for n2 in neighbors:
                        if n2.lower() not in q_lower and n2 != neighbor:
                            related.append(n2)
                    break
    seen = set()
    unique = []
    for r in related:
        if r.lower() not in seen:
            seen.add(r.lower())
            unique.append(r)
    return unique[:8]

def run_indexing_with_progress():
    """Run indexing and capture output for streaming"""
    try:
        # Clear the queue
        while not progress_queue.empty():
            progress_queue.get()
        
        progress_queue.put("STATUS: Starting crawling process...")
        
        # Run crawler and capture output
        crawler_process = subprocess.Popen(
            ["python", "crawler.py"], 
            stdout=subprocess.PIPE, 
            stderr=subprocess.STDOUT, 
            text=True, 
            bufsize=1
        )
        
        if crawler_process.stdout:
            for line in crawler_process.stdout:
                line = line.strip()
                if line:
                    progress_queue.put(line)
        
        crawler_process.wait()
        
        progress_queue.put("STATUS: Crawling complete, building search index...")
        
        # Run indexer and capture output
        indexer_process = subprocess.Popen(
            ["python", "indexer.py"], 
            stdout=subprocess.PIPE, 
            stderr=subprocess.STDOUT, 
            text=True, 
            bufsize=1
        )
        
        if indexer_process.stdout:
            for line in indexer_process.stdout:
                line = line.strip()
                if line:
                    progress_queue.put(line)
        
        indexer_process.wait()
        
        progress_queue.put("STATUS: Indexing complete!")
        
    except Exception as e:
        progress_queue.put(f"ERROR: {str(e)}")

@app.post("/reindex")
async def reindex():
    # Start indexing in background thread
    thread = threading.Thread(target=run_indexing_with_progress)
    thread.daemon = True
    thread.start()
    return {"status": "started"}

@app.get("/indexing-progress")
async def indexing_progress():
    """Stream indexing progress via Server-Sent Events"""
    def event_stream():
        while True:
            try:
                # Get message from queue with timeout
                message = progress_queue.get(timeout=1)
                yield f"data: {json.dumps({'message': message})}\n\n"
                
                # If this is the completion message, end the stream
                if message.startswith("STATUS: Indexing complete"):
                    break
                    
            except queue.Empty:
                # Send heartbeat to keep connection alive
                yield f"data: {json.dumps({'heartbeat': True})}\n\n"
            except Exception:
                break
    
    return StreamingResponse(
        event_stream(), 
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
        }
    )

@app.get("/live-screenshot")
async def live_screenshot():
    """Capture a live screenshot of the SWMM manual website"""
    try:
        # Using screenshotapi.net - free tier available
        screenshot_url = "https://shot.screenshotapi.net/screenshot"
        params = {
            "url": "https://swmm-manual.netlify.app/",
            "width": "1200",
            "height": "800",
            "output": "image",
            "file_type": "png",
            "wait_for_event": "load",
            "fresh": "true"  # Get a fresh screenshot, not cached
        }
        
        async with httpx.AsyncClient(timeout=30.0) as client:
            response = await client.get(screenshot_url, params=params)
            
            if response.status_code == 200:
                return Response(
                    content=response.content,
                    media_type="image/png",
                    headers={
                        "Cache-Control": "public, max-age=300",  # Cache for 5 minutes
                        "Content-Type": "image/png"
                    }
                )
            else:
                # Fallback to static image if screenshot fails
                try:
                    with open("static/swmm-manual-collection.png", "rb") as f:
                        static_content = f.read()
                    return Response(
                        content=static_content,
                        media_type="image/png",
                        headers={"Content-Type": "image/png"}
                    )
                except:
                    return Response(
                        content=b"",
                        status_code=404
                    )
    except Exception:
        # Fallback to static image on any error
        try:
            with open("static/swmm-manual-collection.png", "rb") as f:
                static_content = f.read()
            return Response(
                content=static_content,
                media_type="image/png",
                headers={"Content-Type": "image/png"}
            )
        except:
            return Response(
                content=b"",
                status_code=404
            )

@app.get("/healthz")
async def health():
    return {"status": "ok", "index": have_index()}

@app.get("/source", response_class=HTMLResponse)
async def view_source(request: Request):
    """Display the source code of the application"""
    source_files = ["app.py", "main.py", "crawler.py", "indexer.py", "utils.py", "requirements.txt"]
    file_contents = {}
    
    for filename in source_files:
        try:
            with open(filename, 'r', encoding='utf-8') as f:
                file_contents[filename] = f.read()
        except FileNotFoundError:
            file_contents[filename] = f"# {filename} not found"
    
    # Also get the HTML templates
    try:
        with open("templates/index.html", 'r', encoding='utf-8') as f:
            file_contents["templates/index.html"] = f.read()
    except FileNotFoundError:
        pass
    
    try:
        with open("templates/source.html", 'r', encoding='utf-8') as f:
            file_contents["templates/source.html"] = f.read()
    except FileNotFoundError:
        pass
    
    # And the JavaScript
    try:
        with open("static/app.js", 'r', encoding='utf-8') as f:
            file_contents["static/app.js"] = f.read()
    except FileNotFoundError:
        pass
    
    return templates.TemplateResponse("source.html", {
        "request": request, 
        "files": file_contents,
        "title": "Source Code - SWMM5 Manual Search"
    })

main.py

import os
import uvicorn
from app import app

if __name__ == "__main__":
    port = int(os.getenv("PORT", 5000))
    uvicorn.run(app, host="0.0.0.0", port=port)

crawler.py

import asyncio
import json
import os
from pathlib import Path
from typing import Iterable, Dict
from urllib.parse import urlparse, urljoin

import httpx
from bs4 import BeautifulSoup
from urllib.robotparser import RobotFileParser

from utils import normalize_url, pick_best_container, textify, SAME_HOST

BASE_URL = "https://swmm-manual.netlify.app/"
OUT_PATH = Path("data/docs.jsonl")
MAX_PAGES = int(os.getenv("MAX_PAGES", "250"))

# Global progress reporting
progress_callback = None

def set_progress_callback(callback):
    global progress_callback
    progress_callback = callback

def report_progress(message):
    if progress_callback:
        progress_callback(message)
    else:
        print(message)

def ensure_dirs():
    Path("data").mkdir(exist_ok=True)
    Path("index").mkdir(exist_ok=True)

async def fetch_robots_allowed(client: httpx.AsyncClient) -> RobotFileParser:
    rp = RobotFileParser()
    robots_url = "https://swmm-manual.netlify.app/robots.txt"
    try:
        r = await client.get(robots_url, timeout=20)
        rp.parse(r.text.splitlines())
    except Exception:
        rp.parse(["User-agent: *", "Allow: /"])
    return rp

def split_into_sections(url: str, title: str, container) -> Iterable[Dict]:
    """
    Split page into sub-docs by H2/H3 where possible.
    """
    # Collect sections
    docs = []
    current_title = title or ""
    current_parts = []

    def flush():
        text = textify(" ".join(current_parts))
        if text:
            docs.append({
                "url": url,
                "title": title or "",
                "section": current_title,
                "content": text
            })

    # If there are headers, split; else treat as one blob
    headers = container.select("h2, h3")
    if not headers:
        text = textify(container.get_text(" ", strip=True))
        if text:
            docs.append({"url": url, "title": title or "", "section": "", "content": text})
        return docs

    # Walk through elements
    for h in headers:
        if current_parts:
            flush()
            current_parts = []
        current_title = textify(h.get_text(" ", strip=True))
        # gather siblings until next header
        for sib in h.next_siblings:
            # Stop if next header
            if getattr(sib, "name", None) in {"h2", "h3"}:
                break
            if hasattr(sib, "get_text"):
                current_parts.append(sib.get_text(" ", strip=True))
    # flush tail
    if current_parts:
        flush()
    return docs

async def discover_markdown_files(client: httpx.AsyncClient) -> list:
    """Automatically discover markdown files from the main page"""
    try:
        response = await client.get(BASE_URL)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Find all elements with data-src attributes pointing to .md files
        md_files = []
        for element in soup.find_all(attrs={"data-src": True}):
            data_src = element.get("data-src")
            if data_src and data_src.endswith(".md"):
                md_files.append(data_src)
        
        if md_files:
            report_progress(f"STATUS: Discovered {len(md_files)} markdown files automatically")
            return md_files
        else:
            # Fallback to known files if discovery fails
            fallback_files = [
                "Manual/manual/Intro.md",
                "Manual/manual/Chapter1.md",
                "Manual/manual/Chapter2.md", 
                "Manual/manual/Chapter3.md",
                "Manual/manual/Chapter4.md",
                "Manual/manual/Chapter5.md",
                "Manual/manual/Chapter6.md",
                "Manual/manual/Chapter7.md",
                "Manual/manual/Chapter8.md",
                "Manual/manual/Chapter9.md",
                "Manual/manual/Chapter10.md",
                "Manual/manual/Chapter11.md",
                "Manual/manual/Chapter12.md",
                "Manual/manual/AppendixA.md",
                "Manual/manual/AppendixB.md",
                "Manual/manual/AppendixC.md",
                "Manual/manual/AppendixD.md",
                "Manual/manual/AppendixE.md",
                "VolumeI/sections/Disclaimer.md"
            ]
            report_progress("STATUS: Using fallback file list")
            return fallback_files
    except Exception as e:
        report_progress(f"FAILED: Could not discover files - {e}")
        return []

def slugify(text: str) -> str:
    """Convert text to URL-friendly slug like the SWMM site does"""
    import re
    # Remove special characters, convert to lowercase, replace spaces with hyphens
    slug = re.sub(r'[^\w\s-]', '', text).strip().lower()
    slug = re.sub(r'[-\s]+', '-', slug)
    return slug

def process_markdown_content(markdown_text: str, title: str, base_url: str) -> list:
    """Process markdown content and split into sections"""
    docs = []
    lines = markdown_text.split('\n')
    current_section = ""
    current_content = []
    
    def flush_section():
        if current_content:
            content_text = textify('\n'.join(current_content))
            if content_text.strip():
                # Create proper URL with slugified anchor
                section_url = base_url
                if current_section:
                    section_url += "#" + slugify(current_section)
                
                docs.append({
                    "url": section_url,
                    "title": title,
                    "section": current_section,
                    "content": content_text
                })
    
    for line in lines:
        line = line.strip()
        # Check if this is a heading (markdown style)
        if line.startswith('#'):
            # Flush previous section
            flush_section()
            current_content = []
            # Extract heading text
            current_section = line.lstrip('#').strip()
        else:
            current_content.append(line)
    
    # Flush final section
    flush_section()
    return docs

async def crawl():
    ensure_dirs()
    docs = []
    
    report_progress("STATUS: Starting to crawl SWMM manual markdown files...")
    
    async with httpx.AsyncClient(
        follow_redirects=True, 
        headers={"User-Agent": "SWMM-SearchBot/1.0"},
        timeout=30.0
    ) as client:
        
        # Automatically discover markdown files
        md_files = await discover_markdown_files(client)
        if not md_files:
            report_progress("ERROR: No markdown files found")
            return 0

        for md_file in md_files:
            url = urljoin(BASE_URL, md_file)
            
            report_progress(f"CRAWLING: {url}")
            try:
                response = await client.get(url)
                response.raise_for_status()
                markdown_content = response.text
            except Exception as e:
                report_progress(f"FAILED: {url} - {str(e)}")
                continue

            # Extract title from filename
            filename = md_file.split('/')[-1].replace('.md', '')
            if filename.startswith('Chapter'):
                title = f"SWMM Manual {filename}"
            elif filename.startswith('Appendix'):
                title = f"SWMM Manual {filename}"
            else:
                title = f"SWMM Manual {filename}"

            # Process markdown content
            page_docs = process_markdown_content(markdown_content, title, url)
            docs.extend(page_docs)
            
            report_progress(f"INDEXED: {title} ({len(page_docs)} sections)")

    # write docs
    with OUT_PATH.open("w", encoding="utf-8") as f:
        for d in docs:
            f.write(json.dumps(d, ensure_ascii=False) + "\n")
    
    report_progress(f"STATUS: Completed! Wrote {len(docs)} docs to {OUT_PATH}")
    return len(docs)

if __name__ == "__main__":
    result = asyncio.run(crawl())
    print(f"Crawling completed with {result} documents")

indexer.py

import json
from pathlib import Path
from whoosh import index
from whoosh.fields import Schema, TEXT, ID
from whoosh.analysis import StemmingAnalyzer

DOCS_PATH = Path("data/docs.jsonl")
INDEX_DIR = Path("index")

def build_index():
    INDEX_DIR.mkdir(exist_ok=True)
    schema = Schema(
        url=ID(stored=True, unique=True),
        title=TEXT(stored=True, analyzer=StemmingAnalyzer()),
        section=TEXT(stored=True, analyzer=StemmingAnalyzer()),
        content=TEXT(stored=True, analyzer=StemmingAnalyzer())
    )
    if index.exists_in(INDEX_DIR):
        ix = index.open_dir(INDEX_DIR)
        ix.close()
        # recreate for deterministic rebuild
        for p in INDEX_DIR.glob("*"):
            p.unlink()
    ix = index.create_in(INDEX_DIR, schema)
    writer = ix.writer(limitmb=256, procs=1, multisegment=True)

    doc_count = 0
    with DOCS_PATH.open("r", encoding="utf-8") as f:
        for line in f:
            d = json.loads(line)
            doc_count += 1
            # Deduplicate by URL + section
            unique_url = d["url"] + ("#" + d["section"] if d.get("section") else "")
            writer.update_document(
                url=unique_url,
                title=d.get("title", ""),
                section=d.get("section", ""),
                content=d.get("content", "")
            )
            if doc_count % 50 == 0:
                print(f"BUILDING: Processed {doc_count} document sections")
    writer.commit()
    print("Index built at", INDEX_DIR)

if __name__ == "__main__":
    build_index()

utils.py

import re
from urllib.parse import urljoin, urlparse, urldefrag

SAME_HOST = "swmm-manual.netlify.app"

def normalize_url(base: str, href: str) -> str | None:
    if not href:
        return None
    # remove in-page fragments
    href, _ = urldefrag(href)
    if not href:
        return None
    # no javascript/mailto/tel
    if href.startswith(("javascript:", "mailto:", "tel:")):
        return None
    url = urljoin(base, href)
    parsed = urlparse(url)
    if parsed.netloc != SAME_HOST:
        return None
    # stay on https
    if parsed.scheme not in ("http", "https"):
        return None
    return parsed.geturl()

def textify(s) -> str:
    # Normalize whitespace and collapse spaces
    s = re.sub(r"\s+", " ", (s or "")).strip()
    return s

MAIN_SELECTORS = [
    "main",
    "article",
    "#content",
    ".content",
    "#app main",
    "body"  # fallback
]

def pick_best_container(soup):
    for sel in MAIN_SELECTORS:
        el = soup.select_one(sel)
        if el and text_len(el) > 200:
            return el
    return soup  # fallback

def text_len(el) -> int:
    return len(el.get_text(" ", strip=True)) if el else 0

requirements.txt

fastapi==0.115.0
uvicorn[standard]==0.30.6
jinja2==3.1.4
httpx[http2]==0.27.2
beautifulsoup4==4.12.3
whoosh==2.7.4
playwright==1.47.0
python-multipart==0.0.9
beautifulsoup4==4.12.3
fastapi==0.115.0
httpx[http2]==0.27.2
jinja2==3.1.4
playwright==1.47.0
python-multipart==0.0.9
uvicorn[standard]==0.30.6
whoosh==2.7.4

templates/index.html

<!doctype html>
<html lang="en">
<head>
  <meta charset="utf-8"/>
  <meta name="viewport" content="width=device-width,initial-scale=1"/>
  <link rel="icon" type="image/png" href="/static/favicon.png"/>
  <title>SWMM5 Manual Search</title>
  <style>
    :root {
      --bg: #f4f8fb;
      --bg-card: #ffffff;
      --bg-surface: #eaf2f8;
      --bg-input: #ffffff;
      --text: #1a2a3a;
      --text-muted: #5a7a8a;
      --text-section: #3a6a7a;
      --border: #c8dae8;
      --border-light: #dfe9f0;
      --accent: #0277bd;
      --accent-hover: #01579b;
      --accent-light: #e1f0fa;
      --header-gradient-start: #0277bd;
      --header-gradient-end: #00838f;
      --tab-active-bg: var(--bg-card);
      --tab-hover: #dce8f0;
      --featured-bg-start: #e0f2f1;
      --featured-bg-end: #b2dfdb;
      --featured-border: #80cbc4;
      --featured-text: #004d40;
      --featured-link: #00695c;
      --featured-snippet-border: #a5d6d0;
      --related-bg: #e0f7fa;
      --related-border: #80deea;
      --related-tag-bg: #ffffff;
      --related-tag-border: #80deea;
      --related-tag-text: #006064;
      --related-heading: #00838f;
      --toc-header-bg: var(--bg-surface);
      --toc-header-hover: #d4e4ef;
      --glossary-term-bg: transparent;
      --glossary-term-hover-bg: var(--accent);
      --glossary-term-hover-text: #fff;
      --result-border: #e0ecf2;
      --inp-bg: #f0f7ed;
      --inp-border: #b5d6a7;
      --inp-header: #2e7d32;
      --inp-field-bg: #fafdf8;
      --copy-btn: #546e7a;
      --copy-btn-hover: #37474f;
      --chip-bg: #e8f4fd;
      --chip-border: #90caf9;
      --chip-text: #0277bd;
      --stats-bg: linear-gradient(135deg, #e3f2fd, #bbdefb);
      --stats-border: #90caf9;
      --stats-text: #1565c0;
      --shadow: 0 2px 8px rgba(0,40,80,0.08);
      --shadow-hover: 0 4px 16px rgba(0,40,80,0.12);
      --footer-bg: #e8f0f5;
      --footer-text: #5a7a8a;
    }

    [data-theme="dark"] {
      --bg: #0a1929;
      --bg-card: #0d2137;
      --bg-surface: #112840;
      --bg-input: #112840;
      --text: #d4e4f4;
      --text-muted: #7a9ab8;
      --text-section: #90caf9;
      --border: #1e3a52;
      --border-light: #1a3348;
      --accent: #4fc3f7;
      --accent-hover: #81d4fa;
      --accent-light: #0d2a42;
      --header-gradient-start: #0d2a42;
      --header-gradient-end: #0a2233;
      --tab-active-bg: var(--bg-card);
      --tab-hover: #163050;
      --featured-bg-start: #0d2a2e;
      --featured-bg-end: #0a3030;
      --featured-border: #1a5050;
      --featured-text: #80cbc4;
      --featured-link: #4db6ac;
      --featured-snippet-border: #1a4040;
      --related-bg: #0d2a35;
      --related-border: #1a4a5a;
      --related-tag-bg: #112840;
      --related-tag-border: #1a4a5a;
      --related-tag-text: #4dd0e1;
      --related-heading: #4dd0e1;
      --toc-header-bg: var(--bg-surface);
      --toc-header-hover: #163050;
      --glossary-term-bg: var(--bg-surface);
      --glossary-term-hover-bg: var(--accent);
      --glossary-term-hover-text: #0a1929;
      --result-border: #1e3a52;
      --inp-bg: #0d2a1e;
      --inp-border: #1a5030;
      --inp-header: #66bb6a;
      --inp-field-bg: #0a2218;
      --copy-btn: #90a4ae;
      --copy-btn-hover: #cfd8dc;
      --chip-bg: #112840;
      --chip-border: #1a4a6a;
      --chip-text: #4fc3f7;
      --stats-bg: linear-gradient(135deg, #0d2137, #112840);
      --stats-border: #1a4a6a;
      --stats-text: #4fc3f7;
      --shadow: 0 2px 8px rgba(0,0,0,0.3);
      --shadow-hover: 0 4px 16px rgba(0,0,0,0.4);
      --footer-bg: #0d2137;
      --footer-text: #5a7a8a;
    }

    * { box-sizing: border-box; }
    body {
      font-family: system-ui, -apple-system, Segoe UI, Roboto, sans-serif;
      margin: 0; padding: 0;
      background: var(--bg);
      color: var(--text);
      line-height: 1.5;
      min-height: 100vh;
      transition: background 0.3s, color 0.3s;
      display: flex;
      flex-direction: column;
    }
    .container { max-width: 1060px; margin: 0 auto; padding: 1.5rem 1.5rem 2rem; width: 100%; }
    .main-content { flex: 1; }

    .water-header {
      background: linear-gradient(135deg, var(--header-gradient-start), var(--header-gradient-end));
      padding: 1.2rem 0;
      position: relative;
      overflow: hidden;
    }
    .water-header::before {
      content: '';
      position: absolute;
      bottom: -5px;
      left: 0; right: 0;
      height: 20px;
      background: var(--bg);
      border-radius: 50% 50% 0 0 / 100% 100% 0 0;
    }
    .water-header .container {
      display: flex;
      align-items: center;
      justify-content: space-between;
      flex-wrap: wrap;
      gap: .5rem;
      padding-top: 0; padding-bottom: 0;
    }
    .header-left { display: flex; align-items: baseline; gap: .8rem; flex-wrap: wrap; }
    .header-left h1 { font-size: 1.4rem; margin: 0; color: #fff; font-weight: 700; }
    .header-left p { margin: 0; color: rgba(255,255,255,0.8); font-size: .82rem; }
    .header-left a { color: rgba(255,255,255,0.95); }
    .header-right { display: flex; align-items: center; gap: .5rem; }
    .pill { display: inline-block; padding: .2rem .55rem; border: 1px solid rgba(255,255,255,0.3); border-radius: 999px; font-size: .72rem; color: rgba(255,255,255,0.85); text-decoration: none; }
    .pill:hover { background: rgba(255,255,255,0.15); }

    .theme-toggle {
      background: rgba(255,255,255,0.15);
      border: 1px solid rgba(255,255,255,0.3);
      border-radius: 999px;
      padding: .25rem .6rem;
      color: #fff;
      cursor: pointer;
      font-size: .78rem;
      display: flex;
      align-items: center;
      gap: .3rem;
      transition: background 0.2s;
    }
    .theme-toggle:hover { background: rgba(255,255,255,0.25); }
    .theme-icon { font-size: 1rem; }

    .stats-bar {
      background: var(--stats-bg);
      border: 1px solid var(--stats-border);
      border-radius: 10px;
      padding: .7rem 1rem;
      margin-bottom: 1rem;
      display: flex;
      flex-wrap: wrap;
      gap: .4rem 1.2rem;
      align-items: center;
      font-size: .82rem;
      color: var(--stats-text);
    }
    .stats-bar .stat-item { white-space: nowrap; }
    .stats-bar .stat-divider { color: var(--border); }

    .search-hero {
      text-align: center;
      margin-bottom: .8rem;
    }
    .search-hero h2 {
      font-size: 1.15rem;
      margin: 0 0 .2rem 0;
      color: var(--text);
      font-weight: 600;
    }
    .search-hero p {
      margin: 0;
      font-size: .85rem;
      color: var(--text-muted);
    }

    .search-form-wrapper {
      background: var(--bg-card);
      border: 1px solid var(--border);
      border-radius: 12px;
      padding: 1rem;
      box-shadow: var(--shadow);
      margin-bottom: .6rem;
    }
    form { display: flex; gap: .4rem; width: 100%; align-items: stretch; }
    input[type="text"] {
      flex: 1; padding: .65rem .85rem; font-size: 1rem;
      border: 2px solid var(--border); border-radius: .5rem;
      background: var(--bg-input); color: var(--text);
      transition: border-color 0.2s, box-shadow 0.2s;
    }
    input[type="text"]:focus { outline: none; border-color: var(--accent); box-shadow: 0 0 0 3px rgba(2,119,189,0.15); }
    select {
      padding: .65rem .4rem; font-size: .88rem;
      border: 2px solid var(--border); border-radius: .5rem;
      background: var(--bg-input); color: var(--text); cursor: pointer;
    }
    button[type="submit"], .btn {
      padding: .65rem 1.1rem; font-size: .95rem;
      border: none; background: var(--accent); color: #fff;
      border-radius: .5rem; cursor: pointer; font-weight: 600;
      transition: background 0.2s, transform 0.1s;
    }
    button[type="submit"]:hover, .btn:hover { background: var(--accent-hover); }
    button[type="submit"]:active { transform: scale(0.97); }

    .filter-row {
      display: flex; gap: .4rem; margin-top: .5rem; align-items: center; flex-wrap: wrap;
    }
    .filter-row label { font-size: .82rem; color: var(--text-muted); white-space: nowrap; }
    .filter-row select { font-size: .82rem; padding: .35rem .4rem; flex: 1; max-width: 280px; }

    .example-queries {
      display: flex; flex-wrap: wrap; gap: .35rem; align-items: center;
      margin-top: .5rem; padding: 0 .2rem;
    }
    .example-queries .label { font-size: .8rem; color: var(--text-muted); margin-right: .1rem; }
    .example-chip {
      padding: .2rem .55rem;
      background: var(--chip-bg);
      border: 1px solid var(--chip-border);
      border-radius: 999px;
      font-size: .78rem;
      color: var(--chip-text);
      cursor: pointer;
      transition: all 0.15s;
      text-decoration: none;
    }
    .example-chip:hover { background: var(--accent); color: #fff; border-color: var(--accent); }

    .hint { color: var(--text-muted); font-size: .95rem; margin-top: .5rem; }
    .muted { color: var(--text-muted); font-size: .88rem; }

    .results-header { display: flex; justify-content: space-between; align-items: center; margin: .6rem 0 .3rem; }

    .result { padding: .9rem; margin: .5rem 0; border: 1px solid var(--result-border); border-radius: 8px; background: var(--bg-card); transition: box-shadow 0.2s; }
    .result:hover { box-shadow: var(--shadow-hover); }
    .result-title-row { display: flex; justify-content: space-between; align-items: flex-start; gap: .5rem; }
    .result a.result-link { text-decoration: none; font-weight: 600; color: var(--accent); font-size: .93rem; }
    .result a.result-link:hover { text-decoration: underline; }
    .section { color: var(--text-section); font-size: .82rem; margin: .15rem 0; }
    .snippet { margin-top: .25rem; color: var(--text); font-size: .88rem; line-height: 1.5; }
    .result-actions { display: flex; gap: .5rem; margin-top: .4rem; flex-wrap: wrap; }
    .result-action {
      font-size: .75rem; padding: .15rem .5rem;
      border: 1px solid var(--border);
      border-radius: 4px;
      color: var(--copy-btn);
      cursor: pointer;
      background: transparent;
      transition: all 0.15s;
      text-decoration: none;
      display: inline-flex; align-items: center; gap: .25rem;
    }
    .result-action:hover { color: var(--accent); border-color: var(--accent); }

    .dual-pane { display: grid; grid-template-columns: 1fr; gap: 1rem; margin-top: .5rem; }
    @media (min-width: 760px) { .dual-pane { grid-template-columns: 1fr 360px; } }

    .inp-panel {
      background: var(--inp-bg);
      border: 1px solid var(--inp-border);
      border-radius: 10px;
      padding: .9rem;
      position: sticky;
      top: 1rem;
      max-height: calc(100vh - 2rem);
      overflow-y: auto;
    }
    .inp-panel h3 { margin: 0 0 .5rem 0; font-size: .9rem; color: var(--inp-header); }
    .inp-section { margin-bottom: .8rem; }
    .inp-section-title { font-weight: 700; font-size: .85rem; color: var(--inp-header); margin-bottom: .3rem; }
    .inp-section-desc { font-size: .78rem; color: var(--text-muted); margin-bottom: .3rem; }
    .inp-table-wrap { overflow-x: auto; -webkit-overflow-scrolling: touch; }
    .inp-field-table { width: 100%; font-size: .75rem; border-collapse: collapse; min-width: 280px; }
    .inp-field-table th { text-align: left; padding: .2rem .3rem; border-bottom: 1px solid var(--inp-border); color: var(--text-muted); font-weight: 600; }
    .inp-field-table td { padding: .2rem .3rem; border-bottom: 1px solid var(--border-light); color: var(--text); }
    .inp-example { font-family: monospace; font-size: .72rem; background: var(--inp-field-bg); padding: .3rem .5rem; border-radius: 4px; margin-top: .3rem; white-space: pre-wrap; word-break: break-all; color: var(--text); }
    .inp-cross-links { display: flex; flex-wrap: wrap; gap: .3rem; margin-top: .3rem; }
    .inp-cross-link {
      font-size: .72rem; padding: .15rem .4rem;
      background: transparent; border: 1px solid var(--inp-border);
      border-radius: 4px; color: var(--inp-header);
      text-decoration: none; cursor: pointer;
      transition: all 0.15s;
    }
    .inp-cross-link:hover { background: var(--inp-header); color: #fff; }

    .tabs { display: flex; gap: 0; margin: .8rem 0 0 0; border-bottom: 2px solid var(--border); }
    .tab {
      padding: .5rem 1rem; cursor: pointer;
      border: 1px solid transparent; border-bottom: none;
      border-radius: .5rem .5rem 0 0; font-size: .85rem;
      color: var(--text-muted); background: transparent;
      transition: all 0.2s;
    }
    .tab:hover { color: var(--text); background: var(--tab-hover); }
    .tab.active { color: var(--accent); border-color: var(--border); border-bottom: 2px solid var(--bg); margin-bottom: -2px; background: var(--bg); font-weight: 600; }
    .tab-content { display: none; padding: .8rem 0; }
    .tab-content.active { display: block; }

    .featured-box {
      background: linear-gradient(135deg, var(--featured-bg-start), var(--featured-bg-end));
      border: 1px solid var(--featured-border);
      border-radius: 10px; padding: 1rem; margin: .8rem 0;
      box-shadow: var(--shadow);
    }
    .featured-box h3 { margin: 0 0 .2rem 0; font-size: .82rem; color: var(--featured-text); text-transform: uppercase; letter-spacing: .05em; }
    .featured-box .featured-query { font-size: 1.05rem; font-weight: 700; color: var(--featured-link); cursor: pointer; }
    .featured-box .featured-query:hover { text-decoration: underline; }
    .featured-result { padding: .4rem 0; border-bottom: 1px solid var(--featured-snippet-border); }
    .featured-result:last-child { border-bottom: none; }
    .featured-result a { color: var(--featured-link); text-decoration: none; font-weight: 500; font-size: .84rem; }
    .featured-result a:hover { text-decoration: underline; }
    .featured-result .snippet { font-size: .78rem; color: var(--text-muted); margin-top: .05rem; }
    .featured-meta { color: var(--text-muted); font-size: .75rem; margin-top: .4rem; }
    .see-all-link { display: inline-block; margin-top: .3rem; color: var(--accent); cursor: pointer; font-size: .84rem; font-weight: 500; }
    .see-all-link:hover { text-decoration: underline; }

    .toc-container { max-height: 550px; overflow-y: auto; }
    .toc-item { border: 1px solid var(--border-light); border-radius: 6px; margin: .4rem 0; overflow: hidden; transition: box-shadow 0.2s; }
    .toc-item:hover { box-shadow: var(--shadow); }
    .toc-header { display: flex; justify-content: space-between; align-items: center; padding: .5rem .7rem; cursor: pointer; background: var(--toc-header-bg); transition: background 0.2s; }
    .toc-header:hover { background: var(--toc-header-hover); }
    .toc-title { font-weight: 500; font-size: .87rem; color: var(--text); }
    .toc-meta { font-size: .73rem; color: var(--text-muted); white-space: nowrap; }
    .toc-sections { display: none; padding: .4rem .7rem; background: var(--bg-card); border-top: 1px solid var(--border-light); }
    .toc-sections.open { display: block; }
    .toc-section-item { padding: .2rem 0; font-size: .82rem; color: var(--accent); cursor: pointer; }
    .toc-section-item:hover { text-decoration: underline; }
    .toc-stats { display: flex; gap: .6rem; margin-bottom: .6rem; color: var(--text-muted); font-size: .82rem; flex-wrap: wrap; }
    .toc-stat { background: var(--bg-surface); padding: .25rem .6rem; border-radius: 4px; border: 1px solid var(--border-light); }

    .glossary-container { max-height: 550px; overflow-y: auto; }
    .glossary-letter { font-size: 1rem; font-weight: 700; color: var(--accent); margin: .7rem 0 .25rem 0; padding-bottom: .15rem; border-bottom: 2px solid var(--border); }
    .glossary-term {
      display: inline-block; padding: .18rem .45rem; margin: .12rem;
      border: 1px solid var(--border); border-radius: 4px;
      font-size: .78rem; color: var(--text);
      cursor: pointer; transition: all 0.15s;
      background: var(--glossary-term-bg);
    }
    .glossary-term:hover { background: var(--glossary-term-hover-bg); color: var(--glossary-term-hover-text); border-color: var(--accent); }
    .glossary-count { font-size: .65rem; color: var(--text-muted); margin-left: .15rem; }

    .related-box { margin: .6rem 0; padding: .6rem .8rem; background: var(--related-bg); border: 1px solid var(--related-border); border-radius: 8px; }
    .related-box h4 { margin: 0 0 .3rem 0; font-size: .78rem; color: var(--related-heading); text-transform: uppercase; letter-spacing: .04em; }
    .related-tags { display: flex; flex-wrap: wrap; gap: .25rem; }
    .related-tag {
      padding: .18rem .5rem;
      background: var(--related-tag-bg); border: 1px solid var(--related-tag-border);
      border-radius: 999px; font-size: .76rem;
      color: var(--related-tag-text); cursor: pointer; transition: all 0.15s;
    }
    .related-tag:hover { background: var(--accent); color: #fff; border-color: var(--accent); }

    .largest-badge { background: #ef5350; color: #fff; font-size: .58rem; padding: .08rem .25rem; border-radius: 3px; margin-left: .25rem; vertical-align: middle; font-weight: 600; }

    .source-ref {
      display: flex; align-items: center; gap: .5rem; margin-top: .8rem;
      font-size: .78rem; color: var(--text-muted);
    }
    .source-ref a { color: var(--accent); text-decoration: none; }
    .source-ref a:hover { text-decoration: underline; }
    .source-toggle {
      background: transparent; border: 1px solid var(--border);
      border-radius: 4px; padding: .15rem .5rem;
      font-size: .72rem; color: var(--text-muted); cursor: pointer;
      transition: all 0.15s;
    }
    .source-toggle:hover { border-color: var(--accent); color: var(--accent); }
    .source-preview { display: none; margin-top: .5rem; }
    .source-preview img { max-width: 100%; border-radius: 8px; border: 1px solid var(--border); }

    .site-footer {
      background: var(--footer-bg);
      border-top: 1px solid var(--border);
      padding: .8rem 0;
      margin-top: auto;
      text-align: center;
      font-size: .75rem;
      color: var(--footer-text);
      transition: background 0.3s;
    }
    .site-footer a { color: var(--accent); text-decoration: none; }
    .site-footer a:hover { text-decoration: underline; }

    mark {
      background: #fff176;
      color: #1a2a3a;
      padding: 0 .1rem;
      border-radius: 2px;
    }
    [data-theme="dark"] mark {
      background: #f9a825;
      color: #0a1929;
    }

    .toast {
      position: fixed; bottom: 1.5rem; left: 50%; transform: translateX(-50%);
      background: var(--accent); color: #fff;
      padding: .5rem 1.2rem; border-radius: 8px;
      font-size: .85rem; font-weight: 500;
      box-shadow: 0 4px 16px rgba(0,0,0,0.2);
      z-index: 9999; opacity: 0;
      transition: opacity 0.3s;
      pointer-events: none;
    }
    .toast.show { opacity: 1; }

    #build {
      padding: .7rem 1.4rem; font-size: 1rem;
      border: none; background: var(--accent); color: #fff;
      border-radius: .5rem; cursor: pointer; font-weight: 600;
    }
    #build:hover { background: var(--accent-hover); }

    @media (max-width: 768px) {
      .container { padding: .8rem; }
      .water-header { padding: .8rem 0; }
      .water-header .container { flex-direction: column; align-items: flex-start; gap: .4rem; }
      .header-left { width: 100%; }
      .header-left h1 { font-size: 1.15rem; }
      .header-left p { font-size: .75rem; }
      .header-right { width: 100%; justify-content: flex-end; }
      .stats-bar { font-size: .72rem; gap: .25rem .6rem; padding: .5rem .7rem; border-radius: 8px; justify-content: center; }
      .search-form-wrapper { padding: .7rem; border-radius: 10px; }
      form { flex-wrap: wrap; gap: .35rem; }
      input[type="text"] { width: 100%; flex: 1 1 100%; font-size: .95rem; padding: .6rem .7rem; }
      select { font-size: .82rem; }
      button[type="submit"], .btn { flex: 1; min-width: 0; padding: .6rem .8rem; font-size: .9rem; }
      #limit { flex: 0 0 auto; width: 60px; }
      .filter-row { margin-top: .4rem; }
      .filter-row label { font-size: .78rem; }
      .filter-row select { max-width: 100%; flex: 1; font-size: .8rem; }
      .example-queries { gap: .25rem; margin-top: .4rem; }
      .example-chip { font-size: .72rem; padding: .18rem .45rem; }
      .tabs { overflow-x: auto; -webkit-overflow-scrolling: touch; scrollbar-width: none; gap: 0; flex-wrap: nowrap; }
      .tabs::-webkit-scrollbar { display: none; }
      .tab { padding: .4rem .65rem; font-size: .78rem; white-space: nowrap; flex-shrink: 0; }
      .featured-box { padding: .7rem; border-radius: 8px; margin: .6rem 0; }
      .featured-box h3 { font-size: .75rem; }
      .featured-box .featured-query { font-size: .95rem; }
      .featured-result a { font-size: .8rem; }
      .featured-result .snippet { font-size: .74rem; }
      .result { padding: .7rem; margin: .4rem 0; border-radius: 6px; }
      .result a.result-link { font-size: .87rem; }
      .section { font-size: .76rem; }
      .snippet { font-size: .82rem; }
      .result-actions { gap: .35rem; }
      .result-action { font-size: .7rem; padding: .12rem .4rem; }
      .dual-pane { grid-template-columns: 1fr; }
      .inp-panel { position: static; max-height: none; border-radius: 8px; padding: .7rem; margin-top: .5rem; }
      .inp-panel h3 { font-size: .85rem; }
      .inp-field-table { font-size: .7rem; }
      .inp-field-table th, .inp-field-table td { padding: .15rem .2rem; }
      .inp-example { font-size: .68rem; }
      .related-box { padding: .5rem .6rem; }
      .related-box h4 { font-size: .72rem; }
      .related-tag { font-size: .7rem; padding: .15rem .4rem; }
      .toc-container { max-height: 400px; }
      .toc-header { padding: .4rem .55rem; }
      .toc-title { font-size: .8rem; }
      .toc-meta { font-size: .68rem; }
      .toc-stats { font-size: .75rem; gap: .4rem; }
      .toc-stat { padding: .2rem .45rem; font-size: .73rem; }
      .toc-section-item { font-size: .78rem; }
      .glossary-container { max-height: 400px; }
      .glossary-letter { font-size: .9rem; }
      .glossary-term { font-size: .72rem; padding: .15rem .35rem; }
      .source-ref { flex-wrap: wrap; font-size: .72rem; }
      .site-footer { font-size: .7rem; padding: .6rem 0; }
      .toast { font-size: .78rem; padding: .4rem 1rem; bottom: 1rem; }
      .results-header { flex-wrap: wrap; gap: .3rem; }
    }

    @media (max-width: 380px) {
      .container { padding: .6rem; }
      .header-left h1 { font-size: 1rem; }
      .stats-bar { font-size: .65rem; gap: .2rem .5rem; }
      .stats-bar .stat-divider { display: none; }
      .stats-bar .stat-item { border-right: 1px solid var(--border); padding-right: .5rem; }
      .stats-bar .stat-item:last-child { border-right: none; padding-right: 0; }
      input[type="text"] { font-size: .88rem; }
      .example-queries .label { display: none; }
      .tab { padding: .35rem .5rem; font-size: .72rem; }
    }

    .kbd-hint {
      display: none;
      font-size: .68rem;
      color: var(--text-muted);
      padding: .1rem .35rem;
      border: 1px solid var(--border);
      border-radius: 4px;
      margin-left: auto;
      pointer-events: none;
      font-family: monospace;
      position: absolute;
      right: .7rem;
      top: 50%;
      transform: translateY(-50%);
    }
    @media (min-width: 769px) {
      .kbd-hint { display: inline-block; }
      .search-input-wrap { position: relative; }
      .search-input-wrap input { padding-right: 3rem; }
    }
  </style>
</head>
<body>
  <div class="water-header">
    <div class="container">
      <div class="header-left">
        <h1>SWMM5 Manual Search</h1>
        <p>Search <a href="https://swmm-manual.netlify.app/" target="_blank">SWMM Manual Collection</a></p>
      </div>
      <div class="header-right">
        <a href="/source" class="pill">Source</a>
        <button class="theme-toggle" id="theme-toggle" title="Toggle dark mode">
          <span class="theme-icon" id="theme-icon">&#9790;</span>
          <span id="theme-label">Dark</span>
        </button>
      </div>
    </div>
  </div>

  <div class="main-content">
  <div class="container">
  {% if not ready %}
    <p class="hint">No index yet. Click <strong>"Build index"</strong> to crawl and index the site the first time.</p>
    <p class="hint" style="font-size: 0.85rem; margin-top: 0.25rem;">
      <strong>Estimated time:</strong> 15-25 minutes to crawl ~250 pages from the SWMM Manual Collection
    </p>
    <button id="build">Build index</button>
    <div id="status" class="muted"></div>
    <div id="progress" style="position: fixed; bottom: 0; left: 0; right: 0; background: var(--bg-card); border-top: 1px solid var(--border); padding: 1rem; display: none; max-height: 200px; overflow-y: auto; font-family: monospace; font-size: 0.8rem; z-index: 1000;"></div>
    <script>
      document.getElementById("build").onclick = async () => {
        const statusEl = document.getElementById("status");
        const progressEl = document.getElementById("progress");
        statusEl.textContent = "Starting indexing...";
        progressEl.style.display = "block";
        progressEl.innerHTML = "";
        try {
          const r = await fetch("/reindex", { method: "POST" });
          const j = await r.json();
          if (j.status === "started") {
            const eventSource = new EventSource("/indexing-progress");
            eventSource.onmessage = function(event) {
              const data = JSON.parse(event.data);
              if (data.message && !data.heartbeat) {
                const messageEl = document.createElement("div");
                messageEl.textContent = data.message;
                if (data.message.startsWith("CRAWLING:")) messageEl.style.color = "#29b6f6";
                else if (data.message.startsWith("INDEXED:")) messageEl.style.color = "#66bb6a";
                else if (data.message.startsWith("BUILDING:")) messageEl.style.color = "#ffa726";
                else if (data.message.startsWith("STATUS:")) { messageEl.style.color = "var(--text-muted)"; messageEl.style.fontWeight = "bold"; }
                else if (data.message.startsWith("FAILED:") || data.message.startsWith("ERROR:")) messageEl.style.color = "#ef5350";
                progressEl.appendChild(messageEl);
                progressEl.scrollTop = progressEl.scrollHeight;
                if (data.message.startsWith("STATUS: Indexing complete")) {
                  statusEl.textContent = "Done! Reload the page to start searching.";
                  eventSource.close();
                }
              }
            };
            eventSource.onerror = function() { eventSource.close(); statusEl.textContent = "Error occurred during indexing."; };
          } else { statusEl.textContent = "Failed to start indexing."; }
        } catch (error) { statusEl.textContent = "Error: " + error.message; }
      }
    </script>
  {% else %}

    <div id="stats-bar" class="stats-bar" style="display:none;"></div>

    <div class="search-form-wrapper">
      <form id="f">
        <div class="search-input-wrap" style="flex:1;display:flex;">
          <input id="q" type="text" placeholder="Search the SWMM manual (use commas for AND logic)..." autofocus style="width:100%;"/>
          <span class="kbd-hint">/</span>
        </div>
        <select id="limit" title="Results per page">
          <option value="20">20</option>
          <option value="50">50</option>
          <option value="100">100</option>
          <option value="200">200</option>
        </select>
        <button type="submit">Search</button>
      </form>
      <div class="filter-row">
        <label for="chapter-filter">Filter by chapter:</label>
        <select id="chapter-filter">
          <option value="">All Chapters</option>
        </select>
      </div>
    </div>

    <div class="example-queries">
      <span class="label">Try:</span>
      <span class="example-chip" data-query="Manning roughness">Manning roughness</span>
      <span class="example-chip" data-query="Green-Ampt infiltration">Green-Ampt infiltration</span>
      <span class="example-chip" data-query="LID controls">LID controls</span>
      <span class="example-chip" data-query="dynamic wave routing">dynamic wave routing</span>
      <span class="example-chip" data-query="pollutant buildup">pollutant buildup</span>
      <span class="example-chip" data-query="storage unit">storage unit</span>
    </div>

    <div id="featured-section"></div>

    <div class="tabs">
      <div class="tab active" data-tab="search">Search</div>
      <div class="tab" data-tab="toc">Table of Contents</div>
      <div class="tab" data-tab="glossary">Glossary</div>
    </div>

    <div id="tab-search" class="tab-content active">
      <div id="related-concepts"></div>
      <div class="results-header">
        <div id="count" class="muted"></div>
        <div id="elapsed" class="muted"></div>
      </div>
      <div id="search-area"></div>
    </div>

    <div id="tab-toc" class="tab-content">
      <div id="toc-loading" class="muted">Loading table of contents...</div>
      <div id="toc-content"></div>
    </div>

    <div id="tab-glossary" class="tab-content">
      <div id="glossary-loading" class="muted">Loading glossary...</div>
      <div id="glossary-content"></div>
    </div>

    <div class="source-ref">
      <span>Source: <a href="https://swmm-manual.netlify.app/" target="_blank">swmm-manual.netlify.app</a></span>
      <button class="source-toggle" onclick="toggleSourcePreview()">Show source preview</button>
    </div>
    <div class="source-preview" id="source-preview">
      <img src="/live-screenshot" alt="SWMM Manual website preview" loading="lazy"/>
    </div>

    <div id="toast" class="toast"></div>

    <script src="/static/app.js?v=6"></script>
  {% endif %}
  </div>
  </div>

  <footer class="site-footer">
    <div class="container">
      App by <a href="https://www.linkedin.com/in/robertdickinson/" target="_blank" rel="noopener">Robert Dickinson</a>
      &middot; Built by <a href="https://www.linkedin.com/in/scott-jeffers-pe-phd-a7638717/" target="_blank" rel="noopener">Scott Jeffers, PE, PhD</a>
      &middot; SWMM5 Manual Search Engine &middot; Data sourced from EPA SWMM 5.2 documentation
    </div>
  </footer>

  <script>
    (function() {
      const saved = localStorage.getItem('swmm-theme');
      if (saved === 'dark') document.documentElement.setAttribute('data-theme', 'dark');
      const toggle = document.getElementById('theme-toggle');
      if (toggle) {
        function updateToggleUI() {
          const isDark = document.documentElement.getAttribute('data-theme') === 'dark';
          document.getElementById('theme-icon').innerHTML = isDark ? '&#9788;' : '&#9790;';
          document.getElementById('theme-label').textContent = isDark ? 'Light' : 'Dark';
        }
        updateToggleUI();
        toggle.addEventListener('click', function() {
          const isDark = document.documentElement.getAttribute('data-theme') === 'dark';
          if (isDark) {
            document.documentElement.removeAttribute('data-theme');
            localStorage.setItem('swmm-theme', 'light');
          } else {
            document.documentElement.setAttribute('data-theme', 'dark');
            localStorage.setItem('swmm-theme', 'dark');
          }
          updateToggleUI();
        });
      }
    })();
  </script>
</body>
</html>

templates/source.html

<!doctype html>
<html lang="en">
<head>
  <meta charset="utf-8"/>
  <meta name="viewport" content="width=device-width,initial-scale=1"/>
  <link rel="icon" type="image/png" href="/static/favicon.png"/>
  <title>{{ title }}</title>
  <style>
    body { font-family: system-ui, -apple-system, Segoe UI, Roboto, sans-serif; margin: 2rem auto; max-width: 1200px; line-height: 1.45; }
    header { display: flex; gap: 1rem; align-items: baseline; flex-wrap: wrap; margin-bottom: 2rem; }
    h1 { font-size: 1.6rem; margin: 0; }
    .back-link { color: #0066cc; text-decoration: none; font-size: 0.9rem; }
    .back-link:hover { text-decoration: underline; }
    .file-section { margin-bottom: 2rem; border: 1px solid #ddd; border-radius: 8px; overflow: hidden; }
    .file-header { background: #f5f5f5; padding: 0.75rem 1rem; font-weight: 600; font-size: 0.9rem; border-bottom: 1px solid #ddd; }
    .file-content { overflow-x: auto; }
    pre { margin: 0; padding: 1rem; background: #fff; font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; font-size: 0.85rem; line-height: 1.4; }
    code { font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; }
    .description { color: #666; font-size: 0.9rem; margin-bottom: 1.5rem; }
  </style>
</head>
<body>
  <header>
    <h1>{{ title }}</h1>
    <a href="/" class="back-link">← Back to search</a>
  </header>
  
  <div class="description">
    <p>This SWMM Manual Search application crawls <a href="https://swmm-manual.netlify.app/" target="_blank">swmm-manual.netlify.app</a>, builds a local search index, and provides instant search functionality.</p>
  </div>

  {% for filename, content in files.items() %}
  <div class="file-section">
    <div class="file-header">{{ filename }}</div>
    <div class="file-content">
      <pre><code>{{ content }}</code></pre>
    </div>
  </div>
  {% endfor %}
</body>
</html>

static/app.js

const form = document.getElementById("f");
const input = document.getElementById("q");
const limitSelect = document.getElementById("limit");
const searchArea = document.getElementById("search-area");
const countEl = document.getElementById("count");
const elapsedEl = document.getElementById("elapsed");
const relatedEl = document.getElementById("related-concepts");
const featuredEl = document.getElementById("featured-section");
const chapterFilter = document.getElementById("chapter-filter");
const statsBar = document.getElementById("stats-bar");

function escapeHtml(s) {
  return (s || "").replace(/[&<>"']/g, c => ({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[c]));
}

function showToast(msg) {
  const t = document.getElementById("toast");
  t.textContent = msg;
  t.classList.add("show");
  setTimeout(() => t.classList.remove("show"), 2000);
}

function toggleSourcePreview() {
  const el = document.getElementById("source-preview");
  el.style.display = el.style.display === "block" ? "none" : "block";
}

function copyPassage(text) {
  navigator.clipboard.writeText(text).then(() => showToast("Passage copied to clipboard"));
}

function renderInpPanel(refs) {
  if (!refs || refs.length === 0) return "";
  let html = '<div class="inp-panel"><h3>INP File Reference</h3>';
  refs.forEach(ref => {
    html += '<div class="inp-section">';
    html += `<div class="inp-section-title">[${escapeHtml(ref.section)}]</div>`;
    html += `<div class="inp-section-desc">${escapeHtml(ref.description || "")}</div>`;
    const fields = ref.fields || [];
    if (fields.length > 0) {
      html += '<div class="inp-table-wrap"><table class="inp-field-table"><thead><tr><th>Field</th><th>Description</th><th>Unit</th></tr></thead><tbody>';
      fields.forEach(f => {
        html += `<tr><td><strong>${escapeHtml(f.name)}</strong></td><td>${escapeHtml(f.description)}</td><td>${escapeHtml(f.unit || f.type || "")}</td></tr>`;
      });
      html += '</tbody></table></div>';
    }
    if (ref.methods) {
      Object.entries(ref.methods).forEach(([method, data]) => {
        html += `<div style="margin-top:.4rem;font-size:.78rem;font-weight:600;color:var(--inp-header);">${escapeHtml(method)} method:</div>`;
        if (data.fields && data.fields.length > 0) {
          html += '<div class="inp-table-wrap"><table class="inp-field-table"><thead><tr><th>Field</th><th>Description</th><th>Unit</th></tr></thead><tbody>';
          data.fields.forEach(f => {
            html += `<tr><td><strong>${escapeHtml(f.name)}</strong></td><td>${escapeHtml(f.description)}</td><td>${escapeHtml(f.unit || f.type || "")}</td></tr>`;
          });
          html += '</tbody></table></div>';
        }
        if (data.example) {
          html += `<div class="inp-example">${escapeHtml(data.example)}</div>`;
        }
      });
    } else if (ref.example) {
      html += `<div class="inp-example">${escapeHtml(ref.example)}</div>`;
    }
    if (ref.layers) {
      html += '<div style="margin-top:.4rem;font-size:.78rem;font-weight:600;color:var(--inp-header);">Layers:</div>';
      Object.entries(ref.layers).forEach(([layer, params]) => {
        html += `<div style="font-size:.72rem;margin-top:.2rem;"><strong>${escapeHtml(layer)}:</strong> ${params.map(p => escapeHtml(p)).join(", ")}</div>`;
      });
    }
    html += '<div class="inp-cross-links">';
    html += `<span class="inp-cross-link" onclick="searchFor('${escapeHtml(ref.section)}')">Search manual for ${escapeHtml(ref.section)}</span>`;
    html += '</div>';
    html += '</div>';
  });
  html += '</div>';
  return html;
}

async function run(q, limit) {
  if (!q.trim()) { searchArea.innerHTML = ""; countEl.textContent = ""; elapsedEl.textContent = ""; relatedEl.innerHTML = ""; return; }
  countEl.textContent = "Searching...";
  elapsedEl.textContent = "";
  const chapter = chapterFilter ? chapterFilter.value : "";
  let url = `/search?q=${encodeURIComponent(q)}&limit=${limit}`;
  if (chapter) url += `&chapter=${encodeURIComponent(chapter)}`;
  let json;
  try {
    const res = await fetch(url);
    json = await res.json();
  } catch(e) {
    countEl.textContent = "Search failed. Please try again.";
    return;
  }
  if (json.error) {
    countEl.textContent = json.error === "index_missing" ? "Index not built yet." : "Search error.";
    return;
  }
  const items = json.results || [];
  const related = json.related || [];
  const inpRefs = json.inp_references || [];
  const elapsed = json.elapsed;

  countEl.textContent = `${items.length} result${items.length === 1 ? "" : "s"}`;
  if (elapsed !== undefined) {
    elapsedEl.textContent = `${elapsed}s`;
  }

  const currentQuery = q;
  let resultsHtml = items.map(it => {
    const rawSnippet = it.snippet || "";
    const highlightedSnippet = highlightTerms(rawSnippet, currentQuery);
    const cleanSnippet = rawSnippet.replace(/<[^>]*>/g, "").replace(/&[^;]+;/g, " ");
    return `
    <div class="result">
      <div class="result-title-row">
        <a class="result-link" href="${it.url}" target="_blank" rel="noopener">${escapeHtml(it.title || it.section || it.url)}</a>
      </div>
      ${it.section ? `<div class="section">${escapeHtml(it.section)}</div>` : ``}
      <div class="snippet">${highlightedSnippet}</div>
      <div class="result-actions">
        <a class="result-action" href="${it.url}" target="_blank" rel="noopener">View in manual</a>
        <button class="result-action" onclick="copyPassage(\`${cleanSnippet.replace(/`/g, "'").replace(/\\/g, "\\\\")}\`)">Copy passage</button>
      </div>
    </div>`;
  }).join("");

  if (inpRefs.length > 0) {
    searchArea.innerHTML = `<div class="dual-pane"><div>${resultsHtml}</div>${renderInpPanel(inpRefs)}</div>`;
  } else {
    searchArea.innerHTML = resultsHtml;
  }

  if (related.length > 0) {
    relatedEl.innerHTML = `
      <div class="related-box">
        <h4>Related Concepts</h4>
        <div class="related-tags">
          ${related.map(r => `<span class="related-tag" onclick="searchFor('${escapeHtml(r)}')">${escapeHtml(r)}</span>`).join("")}
        </div>
      </div>`;
  } else {
    relatedEl.innerHTML = "";
  }

  updateUrl(q, limit, chapter);
}

function updateUrl(q, limit, chapter) {
  const params = new URLSearchParams();
  if (q) params.set("q", q);
  if (limit && limit !== "20") params.set("results", limit);
  if (chapter) params.set("chapter", chapter);
  const newUrl = params.toString() ? `?${params.toString()}` : window.location.pathname;
  history.replaceState(null, "", newUrl);
}

function searchFor(term) {
  input.value = term;
  run(term, limitSelect.value);
  const tabs = document.querySelectorAll(".tab");
  const contents = document.querySelectorAll(".tab-content");
  tabs.forEach(t => t.classList.remove("active"));
  contents.forEach(c => c.classList.remove("active"));
  tabs[0].classList.add("active");
  document.getElementById("tab-search").classList.add("active");
  document.querySelector(".search-form-wrapper").scrollIntoView({ behavior: "smooth", block: "start" });
}

form.addEventListener("submit", (e) => {
  e.preventDefault();
  run(input.value.trim(), limitSelect.value);
});

limitSelect.addEventListener("change", () => {
  const query = input.value.trim();
  if (query) run(query, limitSelect.value);
});

chapterFilter.addEventListener("change", () => {
  const query = input.value.trim();
  if (query) run(query, limitSelect.value);
});

document.querySelectorAll(".example-chip").forEach(chip => {
  chip.addEventListener("click", () => {
    searchFor(chip.dataset.query);
  });
});

document.querySelectorAll(".tab").forEach(tab => {
  tab.addEventListener("click", () => {
    document.querySelectorAll(".tab").forEach(t => t.classList.remove("active"));
    document.querySelectorAll(".tab-content").forEach(c => c.classList.remove("active"));
    tab.classList.add("active");
    const target = document.getElementById("tab-" + tab.dataset.tab);
    if (target) target.classList.add("active");
    if (tab.dataset.tab === "toc" && !tocLoaded) loadToc();
    if (tab.dataset.tab === "glossary" && !glossaryLoaded) loadGlossary();
  });
});

async function loadStats() {
  try {
    const res = await fetch("/stats");
    if (!res.ok) return;
    const json = await res.json();
    if (json.error) return;
    let idxDate = "";
    if (json.indexed_at) {
      const d = new Date(json.indexed_at * 1000);
      idxDate = d.toLocaleDateString("en-US", { month: "short", day: "numeric", year: "numeric" });
    }
    statsBar.innerHTML = `
      <span class="stat-item">${json.chapters} chapters</span>
      <span class="stat-divider">&middot;</span>
      <span class="stat-item">${json.sections} sections</span>
      <span class="stat-divider">&middot;</span>
      <span class="stat-item">${json.words.toLocaleString()} words indexed</span>
      <span class="stat-divider">&middot;</span>
      <span class="stat-item">SWMM ${json.version}</span>
      ${idxDate ? `<span class="stat-divider">&middot;</span><span class="stat-item">Indexed: ${idxDate}</span>` : ""}
    `;
    statsBar.style.display = "flex";
  } catch(e) {}
}

async function loadChapters() {
  try {
    const res = await fetch("/chapters");
    if (!res.ok) return;
    const json = await res.json();
    if (json.error) return;
    (json.chapters || []).forEach(ch => {
      const opt = document.createElement("option");
      opt.value = ch.title;
      const shortTitle = ch.title.replace("SWMM Manual ", "");
      opt.textContent = `${shortTitle} (${ch.sections})`;
      chapterFilter.appendChild(opt);
    });
  } catch(e) {}
}

async function loadFeatured() {
  try {
    const res = await fetch("/featured-search");
    const json = await res.json();
    if (json.error) return;
    const items = json.results || [];
    if (items.length === 0) return;
    featuredEl.innerHTML = `
      <div class="featured-box">
        <h3>Featured Search of the Day</h3>
        <span class="featured-query" onclick="searchFor('${escapeHtml(json.query)}')">"${escapeHtml(json.query)}"</span>
        <div class="featured-meta">${items.length} preview results</div>
        <div style="margin-top: .4rem;">
          ${items.slice(0, 3).map(it => `
            <div class="featured-result">
              <a href="${it.url}" target="_blank" rel="noopener">${escapeHtml(it.title || it.section || it.url)}</a>
              ${it.section ? `<div class="section" style="font-size:.75rem">${escapeHtml(it.section)}</div>` : ``}
              <div class="snippet">${it.snippet || ""}</div>
            </div>
          `).join("")}
        </div>
        <span class="see-all-link" onclick="searchFor('${escapeHtml(json.query)}')">See all results &rarr;</span>
      </div>`;
  } catch(e) {}
}

let tocLoaded = false;
async function loadToc() {
  try {
    const res = await fetch("/toc");
    const json = await res.json();
    if (json.error) { document.getElementById("toc-loading").textContent = "No data available."; return; }
    tocLoaded = true;
    const toc = json.toc || [];
    const maxWords = Math.max(...toc.map(t => t.word_count));
    document.getElementById("toc-loading").style.display = "none";
    let html = `
      <div class="toc-stats">
        <span class="toc-stat">${toc.length} documents</span>
        <span class="toc-stat">${json.total_documents} sections</span>
        <span class="toc-stat">${toc.reduce((sum, t) => sum + t.word_count, 0).toLocaleString()} total words</span>
      </div>
      <div class="toc-container">`;
    toc.forEach((item, idx) => {
      const isLargest = item.word_count === maxWords;
      html += `
        <div class="toc-item">
          <div class="toc-header" onclick="document.getElementById('toc-sec-${idx}').classList.toggle('open')">
            <span class="toc-title">${escapeHtml(item.title)}${isLargest ? '<span class="largest-badge">largest</span>' : ''}</span>
            <span class="toc-meta">${item.section_count} sections &middot; ${item.word_count.toLocaleString()} words</span>
          </div>
          <div class="toc-sections" id="toc-sec-${idx}">
            ${item.sections.filter(s => s).map(s => `<div class="toc-section-item" onclick="searchFor('${escapeHtml(s.replace(/'/g, ""))}')">${escapeHtml(s)}</div>`).join("")}
          </div>
        </div>`;
    });
    html += '</div>';
    document.getElementById("toc-content").innerHTML = html;
  } catch(e) {
    document.getElementById("toc-loading").textContent = "Failed to load table of contents.";
  }
}

let glossaryLoaded = false;
async function loadGlossary() {
  try {
    const res = await fetch("/glossary");
    const json = await res.json();
    if (json.error) { document.getElementById("glossary-loading").textContent = "No data available."; return; }
    glossaryLoaded = true;
    const glossary = json.glossary || {};
    const letters = Object.keys(glossary).sort();
    document.getElementById("glossary-loading").style.display = "none";
    let html = `
      <div style="margin-bottom:.4rem; font-size:.82rem; color:var(--text-muted);">
        ${json.total_terms} terms auto-extracted from the SWMM manual. Click any term to search.
      </div>
      <div style="margin-bottom:.6rem; font-size:.8rem;">
        ${letters.map(l => `<a href="#glossary-${l}" style="margin-right:.35rem; color:var(--accent); font-weight:600; text-decoration:none;">${l}</a>`).join("")}
      </div>
      <div class="glossary-container">`;
    letters.forEach(letter => {
      const terms = glossary[letter] || [];
      html += `<div class="glossary-letter" id="glossary-${letter}">${letter}</div>`;
      terms.forEach(t => {
        html += `<span class="glossary-term" onclick="searchFor('${escapeHtml(t.term.replace(/'/g, ""))}')" title="${t.count} occurrence${t.count !== 1 ? 's' : ''}">${escapeHtml(t.term)}<span class="glossary-count">(${t.count})</span></span>`;
      });
    });
    html += '</div>';
    document.getElementById("glossary-content").innerHTML = html;
  } catch(e) {
    document.getElementById("glossary-loading").textContent = "Failed to load glossary.";
  }
}

async function handleUrlParams() {
  const params = new URLSearchParams(window.location.search);
  const q = params.get("q");
  const results = params.get("results");
  const chapter = params.get("chapter");
  if (results && ["20","50","100","200"].includes(results)) {
    limitSelect.value = results;
  }
  if (chapter && chapterFilter) {
    chapterFilter.value = chapter;
  }
  if (q) {
    input.value = q;
    run(q, limitSelect.value);
  }
}

document.addEventListener("keydown", (e) => {
  if (e.key === "/" && !["INPUT","TEXTAREA","SELECT"].includes(document.activeElement.tagName)) {
    e.preventDefault();
    input.focus();
  }
  if ((e.ctrlKey || e.metaKey) && e.key === "k") {
    e.preventDefault();
    input.focus();
    input.select();
  }
});

function highlightTerms(html, query) {
  if (!query) return html;
  const terms = query.split(",").map(t => t.trim()).filter(Boolean);
  const words = [];
  terms.forEach(term => {
    term.split(/\s+/).filter(Boolean).forEach(w => {
      if (w.length >= 2) words.push(w);
    });
  });
  if (words.length === 0) return html;
  const parts = html.split(/(<[^>]*>)/);
  const pattern = new RegExp('\\b(' + words.map(w => w.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|') + ')\\b', 'gi');
  return parts.map(part => {
    if (part.startsWith('<')) return part;
    return part.replace(pattern, '<mark>$1</mark>');
  }).join('');
}

async function init() {
  loadStats();
  await loadChapters();
  loadFeatured();
  handleUrlParams();
}

init();