""" Modelle-Router: Status, Download, Einpflegen, Unload, Schnelltest-Chat. Bildet den Kern von Mission Control ab — alles, was direkt mit den llama-swap- Modellen und ihrer config.yaml zu tun hat. """ from pathlib import Path import httpx from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel from ruamel.yaml.scalarstring import LiteralScalarString from auth import auth from config import CMD_TEMPLATE, CONFIG_PATH, DEFAULT_TTL, LLAMA_SWAP_URL, MODELS_DIR from jobengine import JOBS, start_job from llamaswap import _swap_get, read_config, write_config import re import os router = APIRouter(prefix="/api", dependencies=[Depends(auth)]) # --------------------------------------------------------------------------- # Request-Modelle # --------------------------------------------------------------------------- class DownloadReq(BaseModel): repo: str file: str subdir: str | None = None class RegisterReq(BaseModel): alias: str model_path: str ctx: int = 8192 ttl: int | None = None class ChatReq(BaseModel): model: str message: str class UpdateReq(BaseModel): alias: str ctx: int # --------------------------------------------------------------------------- # Endpoints # --------------------------------------------------------------------------- @router.get("/status") def status(): cfg = read_config() configured = {} for name, spec in (cfg.get("models") or {}).items(): spec = spec or {} cmd = str(spec.get("cmd", "")).strip() # Parse Meta ctx = 8192 m_ctx = re.search(r'-(?:c|-ctx-size)\s+(\d+)', cmd) if m_ctx: ctx = int(m_ctx.group(1)) size_bytes = None quant = "" filename = "" m_path = re.search(r'-(?:m|-model)\s+([^\s]+)', cmd) if m_path: path = m_path.group(1).replace("'", "").replace('"', '') if os.path.exists(path): size_bytes = os.path.getsize(path) filename = os.path.basename(path) q_match = re.search(r'(Q\d_[A-Z0-9_]+|IQ\d_[A-Z0-9_]+|fp16|bf16)\.gguf', path, flags=re.IGNORECASE) if q_match: quant = q_match.group(1).upper() caps = ["Text"] if "coder" in name.lower() or (m_path and "code" in m_path.group(1).lower()): caps = ["Code"] if "--mmproj" in cmd: caps.append("Bild") configured[name] = { "name": name, "ttl": spec.get("ttl", cfg.get("globalTTL", 0)), "cmd": cmd, "state": "idle", "port": None, "meta": { "ctx": ctx, "size_bytes": size_bytes, "quant": quant, "caps": caps, "filename": filename } } swap_ok = True try: running = _swap_get("/running") items = running.get("running", running) if isinstance(running, dict) else running for item in items or []: mid = item.get("model") or item.get("id") or item.get("name") if mid in configured: configured[mid]["state"] = item.get("state", "running") configured[mid]["port"] = item.get("port") elif mid: configured[mid] = { "name": mid, "ttl": None, "cmd": "", "state": item.get("state", "running"), "port": item.get("port"), } except Exception: # noqa: BLE001 swap_ok = False return { "swap_ok": swap_ok, "swap_url": LLAMA_SWAP_URL, "config_path": str(CONFIG_PATH), "models_dir": str(MODELS_DIR), "models": list(configured.values()), } @router.post("/download") def download(req: DownloadReq): sub = req.subdir or req.repo.split("/")[-1] target = MODELS_DIR / sub target.mkdir(parents=True, exist_ok=True) args = ["hf", "download", req.repo, req.file, "--local-dir", str(target)] job_id = start_job(args, f"download {req.repo}/{req.file}", env={"HF_XET_HIGH_PERFORMANCE": "1"}) JOBS[job_id]["result_path"] = str(target / req.file) return {"job_id": job_id, "expected_path": str(target / req.file)} @router.post("/register") def register(req: RegisterReq): if not Path(req.model_path).exists(): raise HTTPException(404, f"Datei nicht gefunden: {req.model_path}") cfg = read_config() cmd = CMD_TEMPLATE.replace("{model}", req.model_path).replace("{ctx}", str(req.ctx)) cfg["models"][req.alias] = { "cmd": LiteralScalarString(cmd + "\n"), "ttl": req.ttl if req.ttl is not None else DEFAULT_TTL, } write_config(cfg) return {"ok": True, "alias": req.alias, "note": "In config.yaml geschrieben. llama-swap mit -watch-config laedt automatisch neu."} @router.post("/update_model") def update_model(req: UpdateReq): cfg = read_config() if req.alias not in cfg.get("models", {}): raise HTTPException(404, "Modell nicht gefunden") spec = cfg["models"][req.alias] cmd = str(spec.get("cmd", "")) # Replace or add context size if re.search(r'-(?:c|-ctx-size)\s+\d+', cmd): cmd = re.sub(r'-(?:c|-ctx-size)\s+\d+', f'-c {req.ctx}', cmd) else: cmd = cmd.strip() + f" -c {req.ctx}\n" cfg["models"][req.alias]["cmd"] = LiteralScalarString(cmd) write_config(cfg) return {"ok": True} @router.post("/unload") def unload(model: str | None = None): path = f"/api/models/unload/{model}" if model else "/api/models/unload" try: with httpx.Client(timeout=10.0) as c: r = c.post(f"{LLAMA_SWAP_URL}{path}") return {"ok": r.status_code < 400, "status": r.status_code} except Exception as exc: # noqa: BLE001 raise HTTPException(502, f"llama-swap nicht erreichbar: {exc}") @router.post("/chat") def chat(req: ChatReq): payload = {"model": req.model, "messages": [{"role": "user", "content": req.message}]} try: with httpx.Client(timeout=120.0) as c: r = c.post(f"{LLAMA_SWAP_URL}/v1/chat/completions", json=payload) r.raise_for_status() data = r.json() return {"reply": data["choices"][0]["message"]["content"]} except Exception as exc: # noqa: BLE001 raise HTTPException(502, f"Anfrage fehlgeschlagen: {exc}")