Files
mission-control/routers/models.py
T

169 lines
5.7 KiB
Python

"""
Modelle-Router: Status, Download, Einpflegen, Unload, Schnelltest-Chat.
Bildet den Kern von Mission Control ab — alles, was direkt mit den llama-swap-
Modellen und ihrer config.yaml zu tun hat.
"""
from pathlib import Path
import httpx
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from ruamel.yaml.scalarstring import LiteralScalarString
from auth import auth
from config import CMD_TEMPLATE, CONFIG_PATH, DEFAULT_TTL, LLAMA_SWAP_URL, MODELS_DIR
from jobengine import JOBS, start_job
from llamaswap import _swap_get, read_config, write_config
import re
import os
router = APIRouter(prefix="/api", dependencies=[Depends(auth)])
# ---------------------------------------------------------------------------
# Request-Modelle
# ---------------------------------------------------------------------------
class DownloadReq(BaseModel):
repo: str
file: str
subdir: str | None = None
class RegisterReq(BaseModel):
alias: str
model_path: str
ctx: int = 8192
ttl: int | None = None
class ChatReq(BaseModel):
model: str
message: str
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@router.get("/status")
def status():
cfg = read_config()
configured = {}
for name, spec in (cfg.get("models") or {}).items():
spec = spec or {}
cmd = str(spec.get("cmd", "")).strip()
# Parse Meta
ctx = 8192
m_ctx = re.search(r'-(?:c|-ctx-size)\s+(\d+)', cmd)
if m_ctx: ctx = int(m_ctx.group(1))
size_bytes = None
quant = ""
filename = ""
m_path = re.search(r'-(?:m|-model)\s+([^\s]+)', cmd)
if m_path:
path = m_path.group(1).replace("'", "").replace('"', '')
if os.path.exists(path):
size_bytes = os.path.getsize(path)
filename = os.path.basename(path)
q_match = re.search(r'(Q\d_[A-Z0-9_]+|IQ\d_[A-Z0-9_]+|fp16|bf16)\.gguf', path, flags=re.IGNORECASE)
if q_match:
quant = q_match.group(1).upper()
caps = ["Text"]
if "coder" in name.lower() or (m_path and "code" in m_path.group(1).lower()):
caps = ["Code"]
if "--mmproj" in cmd:
caps.append("Bild")
configured[name] = {
"name": name,
"ttl": spec.get("ttl", cfg.get("globalTTL", 0)),
"cmd": cmd,
"state": "idle",
"port": None,
"meta": {
"ctx": ctx,
"size_bytes": size_bytes,
"quant": quant,
"caps": caps,
"filename": filename
}
}
swap_ok = True
try:
running = _swap_get("/running")
items = running.get("running", running) if isinstance(running, dict) else running
for item in items or []:
mid = item.get("model") or item.get("id") or item.get("name")
if mid in configured:
configured[mid]["state"] = item.get("state", "running")
configured[mid]["port"] = item.get("port")
elif mid:
configured[mid] = {
"name": mid, "ttl": None, "cmd": "",
"state": item.get("state", "running"), "port": item.get("port"),
}
except Exception: # noqa: BLE001
swap_ok = False
return {
"swap_ok": swap_ok,
"swap_url": LLAMA_SWAP_URL,
"config_path": str(CONFIG_PATH),
"models_dir": str(MODELS_DIR),
"models": list(configured.values()),
}
@router.post("/download")
def download(req: DownloadReq):
sub = req.subdir or req.repo.split("/")[-1]
target = MODELS_DIR / sub
target.mkdir(parents=True, exist_ok=True)
args = ["hf", "download", req.repo, req.file, "--local-dir", str(target)]
job_id = start_job(args, f"download {req.repo}/{req.file}",
env={"HF_XET_HIGH_PERFORMANCE": "1"})
JOBS[job_id]["result_path"] = str(target / req.file)
return {"job_id": job_id, "expected_path": str(target / req.file)}
@router.post("/register")
def register(req: RegisterReq):
if not Path(req.model_path).exists():
raise HTTPException(404, f"Datei nicht gefunden: {req.model_path}")
cfg = read_config()
cmd = CMD_TEMPLATE.replace("{model}", req.model_path).replace("{ctx}", str(req.ctx))
cfg["models"][req.alias] = {
"cmd": LiteralScalarString(cmd + "\n"),
"ttl": req.ttl if req.ttl is not None else DEFAULT_TTL,
}
write_config(cfg)
return {"ok": True, "alias": req.alias,
"note": "In config.yaml geschrieben. llama-swap mit -watch-config laedt automatisch neu."}
@router.post("/unload")
def unload(model: str | None = None):
path = f"/api/models/unload/{model}" if model else "/api/models/unload"
try:
with httpx.Client(timeout=10.0) as c:
r = c.post(f"{LLAMA_SWAP_URL}{path}")
return {"ok": r.status_code < 400, "status": r.status_code}
except Exception as exc: # noqa: BLE001
raise HTTPException(502, f"llama-swap nicht erreichbar: {exc}")
@router.post("/chat")
def chat(req: ChatReq):
payload = {"model": req.model, "messages": [{"role": "user", "content": req.message}]}
try:
with httpx.Client(timeout=120.0) as c:
r = c.post(f"{LLAMA_SWAP_URL}/v1/chat/completions", json=payload)
r.raise_for_status()
data = r.json()
return {"reply": data["choices"][0]["message"]["content"]}
except Exception as exc: # noqa: BLE001
raise HTTPException(502, f"Anfrage fehlgeschlagen: {exc}")