mission-control/routers/models.py

"""
Modelle-Router: Status, Download, Einpflegen, Unload, Schnelltest-Chat.

Bildet den Kern von Mission Control ab — alles, was direkt mit den llama-swap-
Modellen und ihrer config.yaml zu tun hat.
"""

from pathlib import Path

import httpx
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from ruamel.yaml.scalarstring import LiteralScalarString

from auth import auth
from config import CMD_TEMPLATE, CONFIG_PATH, DEFAULT_TTL, LLAMA_SWAP_URL, MODELS_DIR
from jobengine import JOBS, start_job
from llamaswap import _swap_get, read_config, write_config

router = APIRouter(prefix="/api", dependencies=[Depends(auth)])


# ---------------------------------------------------------------------------
# Request-Modelle
# ---------------------------------------------------------------------------
class DownloadReq(BaseModel):
    repo: str
    file: str
    subdir: str | None = None


class RegisterReq(BaseModel):
    alias: str
    model_path: str
    ctx: int = 8192
    ttl: int | None = None


class ChatReq(BaseModel):
    model: str
    message: str


# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get("/status")
def status():
    cfg = read_config()
    configured = {}
    for name, spec in (cfg.get("models") or {}).items():
        spec = spec or {}
        configured[name] = {
            "name": name,
            "ttl": spec.get("ttl", cfg.get("globalTTL", 0)),
            "cmd": str(spec.get("cmd", "")).strip(),
            "state": "idle",
            "port": None,
        }
    swap_ok = True
    try:
        running = _swap_get("/running")
        items = running.get("running", running) if isinstance(running, dict) else running
        for item in items or []:
            mid = item.get("model") or item.get("id") or item.get("name")
            if mid in configured:
                configured[mid]["state"] = item.get("state", "running")
                configured[mid]["port"] = item.get("port")
            elif mid:
                configured[mid] = {
                    "name": mid, "ttl": None, "cmd": "",
                    "state": item.get("state", "running"), "port": item.get("port"),
                }
    except Exception:  # noqa: BLE001
        swap_ok = False
    return {
        "swap_ok": swap_ok,
        "swap_url": LLAMA_SWAP_URL,
        "config_path": str(CONFIG_PATH),
        "models_dir": str(MODELS_DIR),
        "models": list(configured.values()),
    }


@router.post("/download")
def download(req: DownloadReq):
    sub = req.subdir or req.repo.split("/")[-1]
    target = MODELS_DIR / sub
    target.mkdir(parents=True, exist_ok=True)
    args = ["hf", "download", req.repo, req.file, "--local-dir", str(target)]
    job_id = start_job(args, f"download {req.repo}/{req.file}",
                       env={"HF_XET_HIGH_PERFORMANCE": "1"})
    JOBS[job_id]["result_path"] = str(target / req.file)
    return {"job_id": job_id, "expected_path": str(target / req.file)}


@router.post("/register")
def register(req: RegisterReq):
    if not Path(req.model_path).exists():
        raise HTTPException(404, f"Datei nicht gefunden: {req.model_path}")
    cfg = read_config()
    cmd = CMD_TEMPLATE.replace("{model}", req.model_path).replace("{ctx}", str(req.ctx))
    cfg["models"][req.alias] = {
        "cmd": LiteralScalarString(cmd + "\n"),
        "ttl": req.ttl if req.ttl is not None else DEFAULT_TTL,
    }
    write_config(cfg)
    return {"ok": True, "alias": req.alias,
            "note": "In config.yaml geschrieben. llama-swap mit -watch-config laedt automatisch neu."}


@router.post("/unload")
def unload(model: str | None = None):
    path = f"/api/models/unload/{model}" if model else "/api/models/unload"
    try:
        with httpx.Client(timeout=10.0) as c:
            r = c.post(f"{LLAMA_SWAP_URL}{path}")
        return {"ok": r.status_code < 400, "status": r.status_code}
    except Exception as exc:  # noqa: BLE001
        raise HTTPException(502, f"llama-swap nicht erreichbar: {exc}")


@router.post("/chat")
def chat(req: ChatReq):
    payload = {"model": req.model, "messages": [{"role": "user", "content": req.message}]}
    try:
        with httpx.Client(timeout=120.0) as c:
            r = c.post(f"{LLAMA_SWAP_URL}/v1/chat/completions", json=payload)
            r.raise_for_status()
            data = r.json()
        return {"reply": data["choices"][0]["message"]["content"]}
    except Exception as exc:  # noqa: BLE001
        raise HTTPException(502, f"Anfrage fehlgeschlagen: {exc}")