local-deep-research/scripts/generate_config_docs.py

#!/usr/bin/env python3
"""
Generate CONFIGURATION.md from default settings JSON files and env_definitions.

Usage:
    python scripts/generate_config_docs.py                    # Write to docs/CONFIGURATION.md
    python scripts/generate_config_docs.py --output /tmp/out  # Write to custom location
    python scripts/generate_config_docs.py --check            # Exit 1 if docs are stale
"""

import argparse
import ast
import json
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional


def get_project_root() -> Path:
    """Return the project root directory."""
    return Path(__file__).resolve().parent.parent


def get_env_var_name(key: str) -> str:
    """Convert setting key to environment variable name."""
    return f"LDR_{key.replace('.', '_').upper()}"


def format_value(value: Any) -> str:
    """Format default value for markdown."""
    if value is None:
        return "null"
    if isinstance(value, bool):
        return str(value).lower()
    if isinstance(value, (dict, list)):
        return f"`{json.dumps(value)}`"
    return str(value)


def _discover_env_definition_files(env_defs_dir: Path) -> List[Path]:
    """Auto-discover env_definitions modules, excluding __init__.py and env_settings.py."""
    if not env_defs_dir.is_dir():
        return []
    return sorted(
        p
        for p in env_defs_dir.glob("*.py")
        if p.name not in ("__init__.py", "env_settings.py")
    )


def _category_from_filename(filename: str) -> str:
    """Derive a human-readable category name from a filename.

    Example: 'db_config.py' -> 'Db Config'
    """
    stem = filename.removesuffix(".py")
    return stem.replace("_", " ").title()


def _extract_setting_from_call(node: ast.Call) -> Optional[Dict[str, Any]]:
    """Extract a setting dict from a *Setting() AST call node."""
    if not (
        isinstance(node.func, ast.Name) and node.func.id.endswith("Setting")
    ):
        return None

    keywords = {k.arg: k.value for k in node.keywords if k.arg}

    if "key" not in keywords:
        return None

    key_node = keywords["key"]
    if not isinstance(key_node, ast.Constant):
        return None
    key = key_node.value  # gitleaks:allow

    # Description — may be a simple string or a parenthesised concatenation
    description = ""
    if "description" in keywords:
        desc_node = keywords["description"]
        if isinstance(desc_node, ast.Constant):
            description = desc_node.value
        else:
            description = ast.unparse(desc_node)

    # Default
    default_val = "None"
    if "default" in keywords:
        default_val = ast.unparse(keywords["default"])

    # Env var (auto-generated unless explicitly overridden)
    if "env_var" in keywords and isinstance(keywords["env_var"], ast.Constant):
        env_var = keywords["env_var"].value
    else:
        env_var = get_env_var_name(key)

    # Type from the class name (e.g. BooleanSetting -> Boolean)
    setting_type = node.func.id.replace("Setting", "")

    # Required
    required = False
    if "required" in keywords and isinstance(
        keywords["required"], ast.Constant
    ):
        required = bool(keywords["required"].value)

    # Min/max value
    min_value = None
    if "min_value" in keywords and isinstance(
        keywords["min_value"], ast.Constant
    ):
        min_value = keywords["min_value"].value

    max_value = None
    if "max_value" in keywords and isinstance(
        keywords["max_value"], ast.Constant
    ):
        max_value = keywords["max_value"].value

    # Allowed values (ast.Set of ast.Constant)
    allowed_values = None
    if "allowed_values" in keywords and isinstance(
        keywords["allowed_values"], ast.Set
    ):
        allowed_values = sorted(
            elt.value
            for elt in keywords["allowed_values"].elts
            if isinstance(elt, ast.Constant)
        )

    # Deprecated env var
    deprecated_env_var = None
    if "deprecated_env_var" in keywords and isinstance(
        keywords["deprecated_env_var"], ast.Constant
    ):
        deprecated_env_var = keywords["deprecated_env_var"].value

    return {
        "key": key,
        "env_var": env_var,
        "description": description,
        "default": default_val,
        "type": setting_type,
        "required": required,
        "min_value": min_value,
        "max_value": max_value,
        "allowed_values": allowed_values,
        "deprecated_env_var": deprecated_env_var,
    }


def get_env_only_settings(
    root_dir: Optional[Path] = None,
) -> List[Dict[str, Any]]:
    """
    Extract env-only settings from env_definitions/ by auto-discovering modules.

    These are settings required before database initialization.
    """
    root_dir = root_dir or get_project_root()
    env_defs_dir = (
        root_dir
        / "src"
        / "local_deep_research"
        / "settings"
        / "env_definitions"
    )

    env_only: List[Dict[str, Any]] = []

    for filepath in _discover_env_definition_files(env_defs_dir):
        category = _category_from_filename(filepath.name)

        try:
            content = filepath.read_text()
            tree = ast.parse(content)
        except Exception as e:
            print(f"Warning: Could not parse {filepath}: {e}")
            continue

        for node in ast.walk(tree):
            if not isinstance(node, ast.Call):
                continue

            setting = _extract_setting_from_call(node)
            if setting is None:
                continue

            setting["category"] = category
            env_only.append(setting)

    return env_only


def _format_constraints(setting: Dict[str, Any]) -> str:
    """Build a human-readable constraints string."""
    parts = []
    if (
        setting.get("min_value") is not None
        and setting.get("max_value") is not None
    ):
        parts.append(f"{setting['min_value']}..{setting['max_value']}")
    elif setting.get("min_value") is not None:
        parts.append(f">={setting['min_value']}")
    elif setting.get("max_value") is not None:
        parts.append(f"<={setting['max_value']}")

    if setting.get("allowed_values"):
        parts.append(", ".join(setting["allowed_values"]))

    return " | ".join(parts) if parts else ""


def generate_docs_content(root_dir: Optional[Path] = None) -> str:
    """Generate the full CONFIGURATION.md content as a string."""
    root_dir = root_dir or get_project_root()
    defaults_dir = root_dir / "src" / "local_deep_research" / "defaults"

    settings: Dict[str, Any] = {}

    # Recursively find all JSON files
    for json_file in sorted(defaults_dir.rglob("*.json")):
        try:
            with open(json_file, "r") as f:
                data = json.load(f)
                settings.update(data)
        except Exception as e:
            print(f"Warning: Could not load {json_file}: {e}")

    sorted_keys = sorted(settings.keys())

    # Get env-only settings
    env_only_settings = get_env_only_settings(root_dir)

    # Build markdown
    content = [
        "# Configuration Reference",
        "",
        "This document is automatically generated from the application's default settings.",
        "All settings can be configured via the Web UI (Settings page), or overridden via Environment Variables.",
        "",
        "## Environment Variables",
        "",
        "To override a setting using an environment variable, convert the key to uppercase, replace dots with underscores, and prefix with `LDR_`.",
        "For example, `app.debug` becomes `LDR_APP_DEBUG`.",
        "",
        "Configuration Priority: Web UI Config > Environment Variables > Default Values",
        "> Environmental Variables are used to override default values, easing installation, while allowing for adjustments to configuration via Web UI.",
        "",
        "### System Locking",
        "There is a special environment variable `LDR_LOCKED_SETTINGS` that allows administrators to strictly enforce specific settings.",
        "",
        "*   **Variable**: `LDR_LOCKED_SETTINGS`",
        "*   **Format**: Comma-separated list of setting keys (e.g., `llm.model,app.port`)",
        "*   **Behavior**:",
        "    1.  Any setting listed here **MUST** have a corresponding value defined in the environment variables (e.g., `LDR_LLM_MODEL`). If not, the application will fail to start.",
        "    2.  The setting becomes **read-only** in the Web UI.",
        "    3.  The **Environment Variable** value takes absolute precedence, ignoring any value in the database.",
        "",
        "**Priority for Locked Settings**: Environment Variable > Database (Ignored) > Default (Ignored)",
        "",
        "",
    ]

    # Env-only section with expanded columns
    if env_only_settings:
        content.extend(
            [
                "## Pre-Database (Env-Only) Settings",
                "",
                "These settings are **required before database initialization** and can only be set via environment variables.",
                "They are not available in the Web UI because they are needed to start the application.",
                "",
                "| Environment Variable | Type | Default | Required | Constraints | Description | Category | Deprecated Alias |",
                "|----------------------|------|---------|----------|-------------|-------------|----------|------------------|",
            ]
        )

        for setting in sorted(env_only_settings, key=lambda x: x["env_var"]):
            env_var = setting["env_var"]
            stype = setting["type"]
            default = setting["default"]
            required = "Yes" if setting.get("required") else "No"
            constraints = _format_constraints(setting).replace("|", "\\|")
            desc = setting["description"].replace("|", "\\|").replace("\n", " ")
            category = setting["category"]
            deprecated = setting.get("deprecated_env_var") or ""

            row = (
                f"| `{env_var}` | {stype} | `{default}` | {required} "
                f"| {constraints} | {desc} | {category} | {deprecated} |"
            )
            content.append(row)

        content.extend(["", ""])

    # Main settings list
    content.extend(
        [
            "## Settings List",
            "",
            "| Key | Environment Variable | Default Value | Description | Type |",
            "|-----|----------------------|---------------|-------------|------|",
        ]
    )

    for key in sorted_keys:
        setting = settings[key]
        env_var = setting.get("env_var") or get_env_var_name(key)
        default_val = format_value(setting.get("value"))
        description = (
            setting.get("description", "")
            .replace("\n", " ")
            .replace("|", "\\|")
        )
        setting_type = setting.get("type", "UNKNOWN")

        row = f"| `{key}` | `{env_var}` | `{default_val}` | {description} | {setting_type} |"
        content.append(row)

    content.append("")
    content.append("*Generated by scripts/generate_config_docs.py*")

    return "\n".join(content) + "\n"


def generate_docs(
    output_path: Optional[Path] = None,
    check: bool = False,
) -> int:
    """Generate (or check) CONFIGURATION.md.

    Returns 0 on success, 1 if check finds stale docs.
    """
    root_dir = get_project_root()
    output_file = output_path or (root_dir / "docs" / "CONFIGURATION.md")

    new_content = generate_docs_content(root_dir)

    if check:
        if not output_file.exists():
            print(
                f"FAIL: {output_file} does not exist. "
                "Run 'python scripts/generate_config_docs.py' to generate it."
            )
            return 1
        existing = output_file.read_text()
        if existing == new_content:
            print("OK: Configuration docs are up to date.")
            return 0
        print(
            f"FAIL: {output_file} is out of date. "
            "Run 'python scripts/generate_config_docs.py' to regenerate it."
        )
        return 1

    output_file.parent.mkdir(parents=True, exist_ok=True)
    output_file.write_text(new_content)
    print(f"Wrote {output_file}")
    return 0


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Generate CONFIGURATION.md from defaults"
    )
    parser.add_argument(
        "--output",
        "-o",
        type=Path,
        help="Output file path (default: docs/CONFIGURATION.md)",
    )
    parser.add_argument(
        "--check",
        action="store_true",
        help="Check if docs are up to date (exit 1 if stale)",
    )
    args = parser.parse_args()
    sys.exit(generate_docs(output_path=args.output, check=args.check))