364939466f
Architektur auf Separation of Concerns umgestellt – ohne Build-Schritt,
ohne neues Framework, ohne DB (KISS bleibt). Endpoint-URLs unveraendert,
daher 1:1-kompatibel zum bisherigen Stand.
Backend (Top-Level-Helfer + ein Router je Bereich):
- app.py auf duennen Einstieg reduziert (FastAPI + include_router + static)
- config/auth/jobengine/llamaswap als getrennte Helfer-Module
- Endpoints in routers/{models,jobs,maintenance}.py
Frontend (native ES-Module statt Single-File):
- index.html = Huelle: Sidebar-Nav, Topbar, Alert-Banner, Hash-Routing
- css/{base,components}.css – Tokens + Komponenten
- js/core/{api,ui,nav}.js + js/panels/{overview,models,maintenance,jobs}.js + main.js
- Panel-Vertrag: { id, mount?(), onStatus?(s), onJobs?(jobs) }
- Optik an docs/mission-control-overview.png angelehnt (Hero, KPI-Kacheln,
Listen, Aktivitaets-Stream, getoente Karten)
Doku: CLAUDE.md + README auf die neue Struktur aktualisiert.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
134 lines
4.5 KiB
Python
134 lines
4.5 KiB
Python
"""
|
|
Modelle-Router: Status, Download, Einpflegen, Unload, Schnelltest-Chat.
|
|
|
|
Bildet den Kern von Mission Control ab — alles, was direkt mit den llama-swap-
|
|
Modellen und ihrer config.yaml zu tun hat.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
from fastapi import APIRouter, Depends, HTTPException
|
|
from pydantic import BaseModel
|
|
from ruamel.yaml.scalarstring import LiteralScalarString
|
|
|
|
from auth import auth
|
|
from config import CMD_TEMPLATE, CONFIG_PATH, DEFAULT_TTL, LLAMA_SWAP_URL, MODELS_DIR
|
|
from jobengine import JOBS, start_job
|
|
from llamaswap import _swap_get, read_config, write_config
|
|
|
|
router = APIRouter(prefix="/api", dependencies=[Depends(auth)])
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Request-Modelle
|
|
# ---------------------------------------------------------------------------
|
|
class DownloadReq(BaseModel):
|
|
repo: str
|
|
file: str
|
|
subdir: str | None = None
|
|
|
|
|
|
class RegisterReq(BaseModel):
|
|
alias: str
|
|
model_path: str
|
|
ctx: int = 8192
|
|
ttl: int | None = None
|
|
|
|
|
|
class ChatReq(BaseModel):
|
|
model: str
|
|
message: str
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Endpoints
|
|
# ---------------------------------------------------------------------------
|
|
@router.get("/status")
|
|
def status():
|
|
cfg = read_config()
|
|
configured = {}
|
|
for name, spec in (cfg.get("models") or {}).items():
|
|
spec = spec or {}
|
|
configured[name] = {
|
|
"name": name,
|
|
"ttl": spec.get("ttl", cfg.get("globalTTL", 0)),
|
|
"cmd": str(spec.get("cmd", "")).strip(),
|
|
"state": "idle",
|
|
"port": None,
|
|
}
|
|
swap_ok = True
|
|
try:
|
|
running = _swap_get("/running")
|
|
items = running.get("running", running) if isinstance(running, dict) else running
|
|
for item in items or []:
|
|
mid = item.get("model") or item.get("id") or item.get("name")
|
|
if mid in configured:
|
|
configured[mid]["state"] = item.get("state", "running")
|
|
configured[mid]["port"] = item.get("port")
|
|
elif mid:
|
|
configured[mid] = {
|
|
"name": mid, "ttl": None, "cmd": "",
|
|
"state": item.get("state", "running"), "port": item.get("port"),
|
|
}
|
|
except Exception: # noqa: BLE001
|
|
swap_ok = False
|
|
return {
|
|
"swap_ok": swap_ok,
|
|
"swap_url": LLAMA_SWAP_URL,
|
|
"config_path": str(CONFIG_PATH),
|
|
"models_dir": str(MODELS_DIR),
|
|
"models": list(configured.values()),
|
|
}
|
|
|
|
|
|
@router.post("/download")
|
|
def download(req: DownloadReq):
|
|
sub = req.subdir or req.repo.split("/")[-1]
|
|
target = MODELS_DIR / sub
|
|
target.mkdir(parents=True, exist_ok=True)
|
|
args = ["hf", "download", req.repo, req.file, "--local-dir", str(target)]
|
|
job_id = start_job(args, f"download {req.repo}/{req.file}",
|
|
env={"HF_XET_HIGH_PERFORMANCE": "1"})
|
|
JOBS[job_id]["result_path"] = str(target / req.file)
|
|
return {"job_id": job_id, "expected_path": str(target / req.file)}
|
|
|
|
|
|
@router.post("/register")
|
|
def register(req: RegisterReq):
|
|
if not Path(req.model_path).exists():
|
|
raise HTTPException(404, f"Datei nicht gefunden: {req.model_path}")
|
|
cfg = read_config()
|
|
cmd = CMD_TEMPLATE.replace("{model}", req.model_path).replace("{ctx}", str(req.ctx))
|
|
cfg["models"][req.alias] = {
|
|
"cmd": LiteralScalarString(cmd + "\n"),
|
|
"ttl": req.ttl if req.ttl is not None else DEFAULT_TTL,
|
|
}
|
|
write_config(cfg)
|
|
return {"ok": True, "alias": req.alias,
|
|
"note": "In config.yaml geschrieben. llama-swap mit -watch-config laedt automatisch neu."}
|
|
|
|
|
|
@router.post("/unload")
|
|
def unload(model: str | None = None):
|
|
path = f"/api/models/unload/{model}" if model else "/api/models/unload"
|
|
try:
|
|
with httpx.Client(timeout=10.0) as c:
|
|
r = c.post(f"{LLAMA_SWAP_URL}{path}")
|
|
return {"ok": r.status_code < 400, "status": r.status_code}
|
|
except Exception as exc: # noqa: BLE001
|
|
raise HTTPException(502, f"llama-swap nicht erreichbar: {exc}")
|
|
|
|
|
|
@router.post("/chat")
|
|
def chat(req: ChatReq):
|
|
payload = {"model": req.model, "messages": [{"role": "user", "content": req.message}]}
|
|
try:
|
|
with httpx.Client(timeout=120.0) as c:
|
|
r = c.post(f"{LLAMA_SWAP_URL}/v1/chat/completions", json=payload)
|
|
r.raise_for_status()
|
|
data = r.json()
|
|
return {"reply": data["choices"][0]["message"]["content"]}
|
|
except Exception as exc: # noqa: BLE001
|
|
raise HTTPException(502, f"Anfrage fehlgeschlagen: {exc}")
|