diff --git a/app.py b/app.py
index 859afdf..a830675 100644
--- a/app.py
+++ b/app.py
@@ -20,7 +20,7 @@ from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
-from routers import jobs, maintenance, models, system
+from routers import jobs, maintenance, models, system, cookbook
app = FastAPI(title="Mission Control")
@@ -28,6 +28,7 @@ app.include_router(models.router)
app.include_router(jobs.router)
app.include_router(maintenance.router)
app.include_router(system.router)
+app.include_router(cookbook.router)
_STATIC = Path(__file__).parent / "static"
diff --git a/hw_math.py b/hw_math.py
new file mode 100644
index 0000000..6d1704b
--- /dev/null
+++ b/hw_math.py
@@ -0,0 +1,71 @@
+"""
+Extrahierte Mathematik aus dem Odysseus Projekt zur VRAM/RAM Berechnung.
+Abgestimmt auf APUs mit Unified Memory (Bosgame M5 / Strix Halo).
+"""
+
+# Annahme: Bytes per Parameter für GGUF Quants
+QUANT_BYTES_PER_PARAM = {
+ "Q2_K": 0.35,
+ "Q3_K_S": 0.38,
+ "Q3_K_M": 0.42,
+ "Q3_K_L": 0.45,
+ "Q4_0": 0.50,
+ "Q4_1": 0.55,
+ "Q4_K_S": 0.50,
+ "Q4_K_M": 0.55,
+ "Q5_0": 0.62,
+ "Q5_1": 0.68,
+ "Q5_K_S": 0.62,
+ "Q5_K_M": 0.65,
+ "Q6_K": 0.75,
+ "Q8_0": 1.00,
+ "F16": 2.00,
+ "BF16": 2.00,
+}
+
+def estimate_memory_gb(params_b: float, quant: str, ctx: int) -> float:
+ """Berechnet den geschätzten Speicherbedarf in GB (Gewichte + Kontext)."""
+ # Wenn unbekanntes Format, nimm sicherheitshalber Q5_K_M (0.65)
+ bpp = QUANT_BYTES_PER_PARAM.get(quant.upper(), 0.65)
+ weights = params_b * bpp
+
+ # Heuristik für Context-RAM: 8k Context bei 7B Parametern frisst ca. 0.8 GB
+ context_vram = (ctx / 8192) * (max(params_b, 7) / 7) * 0.8
+
+ return weights + context_vram
+
+def estimate_speed(req_gb: float, sys_ram_gb: float) -> float:
+ """Berechnet die geschätzte Tokens/s basierend auf der 273 GB/s Bandbreite der APU."""
+ # Strix Halo hat ca 273 GB/s Unified Memory Bandbreite.
+ bw = 273 if sys_ram_gb > 8 else 70
+ if req_gb <= 0:
+ return 0.0
+
+ # (Bandbreite / Modellgröße) * Effizienz (0.55)
+ raw_tps = (bw / req_gb) * 0.55
+ return raw_tps
+
+def evaluate_fit(params_b: float, quant: str, ctx: int, sys_ram_gb: float) -> dict:
+ """Berechnet den Fit für ein System mit Shared Memory (APU)."""
+ req_gb = estimate_memory_gb(params_b, quant, ctx)
+ tps = estimate_speed(req_gb, sys_ram_gb)
+
+ # Das OS und andere Prozesse brauchen RAM. Wir lassen 4GB Puffer.
+ usable_ram = max(sys_ram_gb - 4.0, 0)
+
+ if req_gb > usable_ram:
+ fit_level = "too_tight"
+ text = "Zu groß (OOM)"
+ elif req_gb > usable_ram * 0.8:
+ fit_level = "marginal"
+ text = "Könnte knapp werden"
+ else:
+ fit_level = "perfect"
+ text = "Passt perfekt"
+
+ return {
+ "level": fit_level,
+ "text": text,
+ "req_gb": round(req_gb, 1),
+ "tps": round(tps, 0)
+ }
diff --git a/routers/cookbook.py b/routers/cookbook.py
new file mode 100644
index 0000000..f99e70f
--- /dev/null
+++ b/routers/cookbook.py
@@ -0,0 +1,100 @@
+"""
+Cookbook Router: Verbindet die HuggingFace API mit der Odysseus-Hardware-Berechnung.
+"""
+
+import httpx
+import re
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel
+import psutil
+
+from auth import auth
+from hw_math import evaluate_fit
+
+router = APIRouter(prefix="/api/cookbook", dependencies=[Depends(auth)])
+
+class AnalyzeRequest(BaseModel):
+ repo_id: str
+ ctx: int = 8192
+
+class EvaluateRequest(BaseModel):
+ params_b: float
+ quant: str
+ ctx: int
+
+def extract_params_b(repo_id: str) -> float:
+ """Extrahiert die Parametergröße (in Milliarden) aus dem Repo-Namen."""
+ # z.B. Qwen2.5-Coder-32B -> 32
+ # 8x7B -> 56 (MoE)
+ moe = re.search(r"(\d+)x(\d+(?:\.\d+)?)[bB]", repo_id)
+ if moe:
+ return float(moe.group(1)) * float(moe.group(2))
+ m = re.search(r"(\d+(?:\.\d+)?)[bB](?![a-zA-Z])", repo_id)
+ if m:
+ return float(m.group(1))
+ return 7.0 # Fallback
+
+def extract_quant(filename: str) -> str:
+ m = re.search(r"(Q\d_[A-Z0-9_]+|IQ\d_[A-Z0-9_]+|FP16|BF16)", filename, re.IGNORECASE)
+ return m.group(1).upper() if m else "Q4_K_M"
+
+@router.post("/analyze")
+async def analyze_repo(req: AnalyzeRequest):
+ """Holt die GGUF Dateien von HuggingFace und berechnet den Hardware-Fit."""
+ url = f"https://huggingface.co/api/models/{req.repo_id}/tree/main"
+
+ async with httpx.AsyncClient() as client:
+ try:
+ resp = await client.get(url, timeout=10.0)
+ resp.raise_for_status()
+ tree = resp.json()
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"HuggingFace Fehler: {str(e)}")
+
+ gguf_files = [f["path"] for f in tree if f.get("path", "").endswith(".gguf")]
+
+ if not gguf_files:
+ return {"files": []}
+
+ params_b = extract_params_b(req.repo_id)
+
+ # Ermittle RAM des Systems (da APU = Shared Memory)
+ ram_gb = psutil.virtual_memory().total / (1024**3)
+
+ results = []
+ for f in gguf_files:
+ quant = extract_quant(f)
+ fit = evaluate_fit(params_b, quant, req.ctx, ram_gb)
+
+ # Priority-Score, um den besten Fit an oberste Stelle zu setzen.
+ # "Q4_K_M" ist oft der Sweetspot.
+ priority = 0
+ if fit["level"] == "perfect":
+ priority += 10
+ if quant == "Q4_K_M": priority += 5
+ elif quant.startswith("Q4"): priority += 4
+ elif quant.startswith("Q5"): priority += 3
+
+ results.append({
+ "filename": f,
+ "quant": quant,
+ "fit": fit,
+ "priority": priority
+ })
+
+ # Sortieren: Highest priority first, dann nach tps (schnellste zuerst)
+ results.sort(key=lambda x: (x["priority"], x["fit"]["tps"]), reverse=True)
+
+ return {
+ "repo": req.repo_id,
+ "params_b": params_b,
+ "sys_ram_gb": round(ram_gb, 1),
+ "files": results
+ }
+
+@router.post("/evaluate")
+def evaluate_single(req: EvaluateRequest):
+ ram_gb = psutil.virtual_memory().total / (1024**3)
+ fit = evaluate_fit(req.params_b, req.quant, req.ctx, ram_gb)
+ return fit
+
diff --git a/static/js/panels/cookbook.js b/static/js/panels/cookbook.js
index e6a7957..5e2fc89 100644
--- a/static/js/panels/cookbook.js
+++ b/static/js/panels/cookbook.js
@@ -37,38 +37,7 @@ const CURATED_MODELS = [
}
];
-function estimateMemoryGB(params_b, quant, ctx) {
- const bpp = 0.6;
- const weights = params_b * bpp;
- const context = (ctx / 8192) * (params_b / 7) * 0.8;
- return weights + context;
-}
-
-function estimateSpeed(req_gb, vram_gb) {
- // Heuristic for speed in tokens/s
- // Bosgame APU (Strix Halo) has unified memory with ~273 GB/s bandwidth.
- // We approximate bandwidth: if huge VRAM/GTT, it's the APU.
- const bw = (vram_gb > 32) ? 250 : 70; // 250 GB/s for APU, 70 GB/s for standard CPU
- if (req_gb <= 0) return 0;
- return (bw / req_gb) * 0.55; // 55% efficiency
-}
-
-function getFit(m, sys) {
- const req = estimateMemoryGB(m.params_b, m.quant, m.ctx);
- const vram_bytes = (sys?.gpu?.vram?.total || 0) + (sys?.gpu?.gtt?.total || 0);
- const vram = vram_bytes / (1024 ** 3);
- const ram_bytes = sys?.ram?.total || 0;
- const ram_used = sys?.ram?.used || 0;
- const ram = ram_bytes / (1024 ** 3);
- const freeRam = (ram_bytes - ram_used) / (1024 ** 3);
-
- const tps = estimateSpeed(req, vram);
-
- if (vram === 0 && ram === 0) return { level: "perfect", class: "b-run", text: "Lade...", req, tps };
- if (vram > 0 && req <= vram) return { level: "perfect", class: "b-run", text: "Passt in VRAM", req, tps };
- if (req <= (vram + freeRam)) return { level: "good", class: "b-load", text: "RAM Offload", req, tps };
- return { level: "too_tight", class: "b-err", text: "Zu groß (OOM)", req, tps };
-}
+// Lokale Mathe entfernt. Wir nutzen jetzt das Backend.
let lastSys = null;
let currentResults = [];
@@ -134,43 +103,58 @@ function mount() {
$("#cb-m-download").addEventListener("click", doDownload);
$("#cb-m-files").addEventListener("change", updateLiveFit);
- $("#cb-m-ctx").addEventListener("input", updateLiveFit);
+ $("#cb-m-ctx").addEventListener("change", reanalyzeCtx);
renderCurated();
}
-function extractParamsB(name) {
- const moe = name.match(/(\d+)x(\d+(?:\.\d+)?)[bB]/i);
- if (moe) return parseInt(moe[1]) * parseFloat(moe[2]);
- const m = name.match(/(\d+(?:\.\d+)?)[bB](?![a-zA-Z])/i);
- if (m) return parseFloat(m[1]);
- return 7; // Fallback
-}
-
-function extractQuant(filename) {
- const m = filename.match(/(Q\d_[A-Z0-9_]+|IQ\d_[A-Z0-9_]+|FP16|BF16)/i);
- return m ? m[1].toUpperCase() : "Q4_K_M";
-}
+// Aktuelle Analyse-Daten vom Backend
+let currentAnalysis = null;
function updateLiveFit() {
- const repo = $("#cb-m-repo").textContent;
const file = $("#cb-m-files").value;
- const ctx = parseInt($("#cb-m-ctx").value) || 8192;
-
- if (!repo || !file) {
+ if (!currentAnalysis || !file) {
$("#cb-m-fit-container").style.display = "none";
return;
}
- const params_b = extractParamsB(repo);
- const quant = extractQuant(file);
+ const fData = currentAnalysis.files.find(f => f.filename === file);
+ if (!fData) return;
- const m = { params_b, quant, ctx };
- const fit = getFit(m, lastSys);
+ const fit = fData.fit;
+ const cls = fit.level === "perfect" ? "b-run" : (fit.level === "marginal" ? "b-load" : "b-err");
$("#cb-m-fit-container").style.display = "flex";
- $("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: ~${fit.req.toFixed(1)} GB RAM/VRAM
${params_b}B Params · ${quant} · ~${Math.round(fit.tps)} t/s`;
- $("#cb-m-fit-badge").innerHTML = `${fit.text}`;
+ $("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: ~${fit.req_gb.toFixed(1)} GB RAM/VRAM
${currentAnalysis.params_b}B Params · ${fData.quant} · ~${Math.round(fit.tps)} t/s`;
+ $("#cb-m-fit-badge").innerHTML = `${fit.text}`;
+
+ // Wenn "too_tight", machen wir den Download-Button gelb zur Warnung, erlauben ihn aber
+ const btn = $("#cb-m-download");
+ if (fit.level === "too_tight") {
+ btn.className = "primary warn";
+ btn.innerHTML = "Trotzdem herunterladen (OOM Risiko!)";
+ } else {
+ btn.className = "primary";
+ btn.innerHTML = "Herunterladen & Einpflegen";
+ }
+}
+
+async function reanalyzeCtx() {
+ if (!currentAnalysis) return;
+ const ctx = parseInt($("#cb-m-ctx").value) || 8192;
+ const repo = currentAnalysis.repo;
+ const file = $("#cb-m-files").value;
+
+ $("#cb-m-fit-text").innerHTML = "Berechne neues Context-Limit...";
+ try {
+ const res = await api("/api/cookbook/analyze", {
+ method: "POST", body: JSON.stringify({ repo_id: repo, ctx })
+ });
+ currentAnalysis = res;
+ // Auswahl beibehalten
+ $("#cb-m-files").value = file;
+ updateLiveFit();
+ } catch(e) {}
}
async function doSearch() {
@@ -226,27 +210,37 @@ window.openModelModal = async (index) => {
$("#cb-m-files").style.display = "none";
$("#cb-m-loading").style.display = "block";
+ $("#cb-m-download").disabled = true;
try {
- const url = `https://huggingface.co/api/models/${m.id}/tree/main`;
- const r = await fetch(url);
- const tree = await r.json();
- const files = tree.filter(f => f.path.endsWith('.gguf')).map(f => f.path);
+ const ctx = parseInt($("#cb-m-ctx").value) || 8192;
+ const res = await api("/api/cookbook/analyze", {
+ method: "POST", body: JSON.stringify({ repo_id: m.id, ctx })
+ });
+
+ currentAnalysis = res;
$("#cb-m-loading").style.display = "none";
$("#cb-m-files").style.display = "block";
- if (files.length === 0) {
- $("#cb-m-files").innerHTML = "";
- $("#cb-m-download").disabled = true;
+ if (!res.files || res.files.length === 0) {
+ $("#cb-m-files").innerHTML = "";
$("#cb-m-fit-container").style.display = "none";
} else {
- $("#cb-m-files").innerHTML = files.map(f => ``).join("");
+ // Optische Indikatoren im Dropdown
+ $("#cb-m-files").innerHTML = res.files.map(f => {
+ let mark = "";
+ if (f.fit.level === "perfect") mark = "🟢";
+ else if (f.fit.level === "marginal") mark = "🟡";
+ else mark = "🔴";
+ return ``;
+ }).join("");
+
$("#cb-m-download").disabled = false;
updateLiveFit();
}
} catch(e) {
- $("#cb-m-loading").textContent = "Fehler beim Laden der Dateien.";
+ $("#cb-m-loading").textContent = "Fehler: " + e.message;
}
};
@@ -287,31 +281,46 @@ async function doDownload() {
$("#cb-m-download").textContent = "Herunterladen & Einpflegen";
}
-function renderCurated() {
+async function renderCurated() {
$("#cb-section-title").textContent = "Kuratierte Empfehlungen";
const grid = $("#cb-grid");
if (!grid) return;
- grid.innerHTML = CURATED_MODELS.map((m, i) => {
- const fit = getFit(m, lastSys);
- return `
-