feat: smart cookbook MVP mit odysseus fit logik
This commit is contained in:
@@ -20,7 +20,7 @@ from fastapi import FastAPI, HTTPException
|
|||||||
from fastapi.responses import FileResponse, JSONResponse
|
from fastapi.responses import FileResponse, JSONResponse
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
|
||||||
from routers import jobs, maintenance, models, system
|
from routers import jobs, maintenance, models, system, cookbook
|
||||||
|
|
||||||
app = FastAPI(title="Mission Control")
|
app = FastAPI(title="Mission Control")
|
||||||
|
|
||||||
@@ -28,6 +28,7 @@ app.include_router(models.router)
|
|||||||
app.include_router(jobs.router)
|
app.include_router(jobs.router)
|
||||||
app.include_router(maintenance.router)
|
app.include_router(maintenance.router)
|
||||||
app.include_router(system.router)
|
app.include_router(system.router)
|
||||||
|
app.include_router(cookbook.router)
|
||||||
|
|
||||||
_STATIC = Path(__file__).parent / "static"
|
_STATIC = Path(__file__).parent / "static"
|
||||||
|
|
||||||
|
|||||||
+71
@@ -0,0 +1,71 @@
|
|||||||
|
"""
|
||||||
|
Extrahierte Mathematik aus dem Odysseus Projekt zur VRAM/RAM Berechnung.
|
||||||
|
Abgestimmt auf APUs mit Unified Memory (Bosgame M5 / Strix Halo).
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Annahme: Bytes per Parameter für GGUF Quants
|
||||||
|
QUANT_BYTES_PER_PARAM = {
|
||||||
|
"Q2_K": 0.35,
|
||||||
|
"Q3_K_S": 0.38,
|
||||||
|
"Q3_K_M": 0.42,
|
||||||
|
"Q3_K_L": 0.45,
|
||||||
|
"Q4_0": 0.50,
|
||||||
|
"Q4_1": 0.55,
|
||||||
|
"Q4_K_S": 0.50,
|
||||||
|
"Q4_K_M": 0.55,
|
||||||
|
"Q5_0": 0.62,
|
||||||
|
"Q5_1": 0.68,
|
||||||
|
"Q5_K_S": 0.62,
|
||||||
|
"Q5_K_M": 0.65,
|
||||||
|
"Q6_K": 0.75,
|
||||||
|
"Q8_0": 1.00,
|
||||||
|
"F16": 2.00,
|
||||||
|
"BF16": 2.00,
|
||||||
|
}
|
||||||
|
|
||||||
|
def estimate_memory_gb(params_b: float, quant: str, ctx: int) -> float:
|
||||||
|
"""Berechnet den geschätzten Speicherbedarf in GB (Gewichte + Kontext)."""
|
||||||
|
# Wenn unbekanntes Format, nimm sicherheitshalber Q5_K_M (0.65)
|
||||||
|
bpp = QUANT_BYTES_PER_PARAM.get(quant.upper(), 0.65)
|
||||||
|
weights = params_b * bpp
|
||||||
|
|
||||||
|
# Heuristik für Context-RAM: 8k Context bei 7B Parametern frisst ca. 0.8 GB
|
||||||
|
context_vram = (ctx / 8192) * (max(params_b, 7) / 7) * 0.8
|
||||||
|
|
||||||
|
return weights + context_vram
|
||||||
|
|
||||||
|
def estimate_speed(req_gb: float, sys_ram_gb: float) -> float:
|
||||||
|
"""Berechnet die geschätzte Tokens/s basierend auf der 273 GB/s Bandbreite der APU."""
|
||||||
|
# Strix Halo hat ca 273 GB/s Unified Memory Bandbreite.
|
||||||
|
bw = 273 if sys_ram_gb > 8 else 70
|
||||||
|
if req_gb <= 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# (Bandbreite / Modellgröße) * Effizienz (0.55)
|
||||||
|
raw_tps = (bw / req_gb) * 0.55
|
||||||
|
return raw_tps
|
||||||
|
|
||||||
|
def evaluate_fit(params_b: float, quant: str, ctx: int, sys_ram_gb: float) -> dict:
|
||||||
|
"""Berechnet den Fit für ein System mit Shared Memory (APU)."""
|
||||||
|
req_gb = estimate_memory_gb(params_b, quant, ctx)
|
||||||
|
tps = estimate_speed(req_gb, sys_ram_gb)
|
||||||
|
|
||||||
|
# Das OS und andere Prozesse brauchen RAM. Wir lassen 4GB Puffer.
|
||||||
|
usable_ram = max(sys_ram_gb - 4.0, 0)
|
||||||
|
|
||||||
|
if req_gb > usable_ram:
|
||||||
|
fit_level = "too_tight"
|
||||||
|
text = "Zu groß (OOM)"
|
||||||
|
elif req_gb > usable_ram * 0.8:
|
||||||
|
fit_level = "marginal"
|
||||||
|
text = "Könnte knapp werden"
|
||||||
|
else:
|
||||||
|
fit_level = "perfect"
|
||||||
|
text = "Passt perfekt"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"level": fit_level,
|
||||||
|
"text": text,
|
||||||
|
"req_gb": round(req_gb, 1),
|
||||||
|
"tps": round(tps, 0)
|
||||||
|
}
|
||||||
@@ -0,0 +1,100 @@
|
|||||||
|
"""
|
||||||
|
Cookbook Router: Verbindet die HuggingFace API mit der Odysseus-Hardware-Berechnung.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import re
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from pydantic import BaseModel
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
from auth import auth
|
||||||
|
from hw_math import evaluate_fit
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/api/cookbook", dependencies=[Depends(auth)])
|
||||||
|
|
||||||
|
class AnalyzeRequest(BaseModel):
|
||||||
|
repo_id: str
|
||||||
|
ctx: int = 8192
|
||||||
|
|
||||||
|
class EvaluateRequest(BaseModel):
|
||||||
|
params_b: float
|
||||||
|
quant: str
|
||||||
|
ctx: int
|
||||||
|
|
||||||
|
def extract_params_b(repo_id: str) -> float:
|
||||||
|
"""Extrahiert die Parametergröße (in Milliarden) aus dem Repo-Namen."""
|
||||||
|
# z.B. Qwen2.5-Coder-32B -> 32
|
||||||
|
# 8x7B -> 56 (MoE)
|
||||||
|
moe = re.search(r"(\d+)x(\d+(?:\.\d+)?)[bB]", repo_id)
|
||||||
|
if moe:
|
||||||
|
return float(moe.group(1)) * float(moe.group(2))
|
||||||
|
m = re.search(r"(\d+(?:\.\d+)?)[bB](?![a-zA-Z])", repo_id)
|
||||||
|
if m:
|
||||||
|
return float(m.group(1))
|
||||||
|
return 7.0 # Fallback
|
||||||
|
|
||||||
|
def extract_quant(filename: str) -> str:
|
||||||
|
m = re.search(r"(Q\d_[A-Z0-9_]+|IQ\d_[A-Z0-9_]+|FP16|BF16)", filename, re.IGNORECASE)
|
||||||
|
return m.group(1).upper() if m else "Q4_K_M"
|
||||||
|
|
||||||
|
@router.post("/analyze")
|
||||||
|
async def analyze_repo(req: AnalyzeRequest):
|
||||||
|
"""Holt die GGUF Dateien von HuggingFace und berechnet den Hardware-Fit."""
|
||||||
|
url = f"https://huggingface.co/api/models/{req.repo_id}/tree/main"
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
try:
|
||||||
|
resp = await client.get(url, timeout=10.0)
|
||||||
|
resp.raise_for_status()
|
||||||
|
tree = resp.json()
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"HuggingFace Fehler: {str(e)}")
|
||||||
|
|
||||||
|
gguf_files = [f["path"] for f in tree if f.get("path", "").endswith(".gguf")]
|
||||||
|
|
||||||
|
if not gguf_files:
|
||||||
|
return {"files": []}
|
||||||
|
|
||||||
|
params_b = extract_params_b(req.repo_id)
|
||||||
|
|
||||||
|
# Ermittle RAM des Systems (da APU = Shared Memory)
|
||||||
|
ram_gb = psutil.virtual_memory().total / (1024**3)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for f in gguf_files:
|
||||||
|
quant = extract_quant(f)
|
||||||
|
fit = evaluate_fit(params_b, quant, req.ctx, ram_gb)
|
||||||
|
|
||||||
|
# Priority-Score, um den besten Fit an oberste Stelle zu setzen.
|
||||||
|
# "Q4_K_M" ist oft der Sweetspot.
|
||||||
|
priority = 0
|
||||||
|
if fit["level"] == "perfect":
|
||||||
|
priority += 10
|
||||||
|
if quant == "Q4_K_M": priority += 5
|
||||||
|
elif quant.startswith("Q4"): priority += 4
|
||||||
|
elif quant.startswith("Q5"): priority += 3
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
"filename": f,
|
||||||
|
"quant": quant,
|
||||||
|
"fit": fit,
|
||||||
|
"priority": priority
|
||||||
|
})
|
||||||
|
|
||||||
|
# Sortieren: Highest priority first, dann nach tps (schnellste zuerst)
|
||||||
|
results.sort(key=lambda x: (x["priority"], x["fit"]["tps"]), reverse=True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"repo": req.repo_id,
|
||||||
|
"params_b": params_b,
|
||||||
|
"sys_ram_gb": round(ram_gb, 1),
|
||||||
|
"files": results
|
||||||
|
}
|
||||||
|
|
||||||
|
@router.post("/evaluate")
|
||||||
|
def evaluate_single(req: EvaluateRequest):
|
||||||
|
ram_gb = psutil.virtual_memory().total / (1024**3)
|
||||||
|
fit = evaluate_fit(req.params_b, req.quant, req.ctx, ram_gb)
|
||||||
|
return fit
|
||||||
|
|
||||||
+105
-84
@@ -37,38 +37,7 @@ const CURATED_MODELS = [
|
|||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
function estimateMemoryGB(params_b, quant, ctx) {
|
// Lokale Mathe entfernt. Wir nutzen jetzt das Backend.
|
||||||
const bpp = 0.6;
|
|
||||||
const weights = params_b * bpp;
|
|
||||||
const context = (ctx / 8192) * (params_b / 7) * 0.8;
|
|
||||||
return weights + context;
|
|
||||||
}
|
|
||||||
|
|
||||||
function estimateSpeed(req_gb, vram_gb) {
|
|
||||||
// Heuristic for speed in tokens/s
|
|
||||||
// Bosgame APU (Strix Halo) has unified memory with ~273 GB/s bandwidth.
|
|
||||||
// We approximate bandwidth: if huge VRAM/GTT, it's the APU.
|
|
||||||
const bw = (vram_gb > 32) ? 250 : 70; // 250 GB/s for APU, 70 GB/s for standard CPU
|
|
||||||
if (req_gb <= 0) return 0;
|
|
||||||
return (bw / req_gb) * 0.55; // 55% efficiency
|
|
||||||
}
|
|
||||||
|
|
||||||
function getFit(m, sys) {
|
|
||||||
const req = estimateMemoryGB(m.params_b, m.quant, m.ctx);
|
|
||||||
const vram_bytes = (sys?.gpu?.vram?.total || 0) + (sys?.gpu?.gtt?.total || 0);
|
|
||||||
const vram = vram_bytes / (1024 ** 3);
|
|
||||||
const ram_bytes = sys?.ram?.total || 0;
|
|
||||||
const ram_used = sys?.ram?.used || 0;
|
|
||||||
const ram = ram_bytes / (1024 ** 3);
|
|
||||||
const freeRam = (ram_bytes - ram_used) / (1024 ** 3);
|
|
||||||
|
|
||||||
const tps = estimateSpeed(req, vram);
|
|
||||||
|
|
||||||
if (vram === 0 && ram === 0) return { level: "perfect", class: "b-run", text: "Lade...", req, tps };
|
|
||||||
if (vram > 0 && req <= vram) return { level: "perfect", class: "b-run", text: "Passt in VRAM", req, tps };
|
|
||||||
if (req <= (vram + freeRam)) return { level: "good", class: "b-load", text: "RAM Offload", req, tps };
|
|
||||||
return { level: "too_tight", class: "b-err", text: "Zu groß (OOM)", req, tps };
|
|
||||||
}
|
|
||||||
|
|
||||||
let lastSys = null;
|
let lastSys = null;
|
||||||
let currentResults = [];
|
let currentResults = [];
|
||||||
@@ -134,43 +103,58 @@ function mount() {
|
|||||||
$("#cb-m-download").addEventListener("click", doDownload);
|
$("#cb-m-download").addEventListener("click", doDownload);
|
||||||
|
|
||||||
$("#cb-m-files").addEventListener("change", updateLiveFit);
|
$("#cb-m-files").addEventListener("change", updateLiveFit);
|
||||||
$("#cb-m-ctx").addEventListener("input", updateLiveFit);
|
$("#cb-m-ctx").addEventListener("change", reanalyzeCtx);
|
||||||
|
|
||||||
renderCurated();
|
renderCurated();
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractParamsB(name) {
|
// Aktuelle Analyse-Daten vom Backend
|
||||||
const moe = name.match(/(\d+)x(\d+(?:\.\d+)?)[bB]/i);
|
let currentAnalysis = null;
|
||||||
if (moe) return parseInt(moe[1]) * parseFloat(moe[2]);
|
|
||||||
const m = name.match(/(\d+(?:\.\d+)?)[bB](?![a-zA-Z])/i);
|
|
||||||
if (m) return parseFloat(m[1]);
|
|
||||||
return 7; // Fallback
|
|
||||||
}
|
|
||||||
|
|
||||||
function extractQuant(filename) {
|
|
||||||
const m = filename.match(/(Q\d_[A-Z0-9_]+|IQ\d_[A-Z0-9_]+|FP16|BF16)/i);
|
|
||||||
return m ? m[1].toUpperCase() : "Q4_K_M";
|
|
||||||
}
|
|
||||||
|
|
||||||
function updateLiveFit() {
|
function updateLiveFit() {
|
||||||
const repo = $("#cb-m-repo").textContent;
|
|
||||||
const file = $("#cb-m-files").value;
|
const file = $("#cb-m-files").value;
|
||||||
const ctx = parseInt($("#cb-m-ctx").value) || 8192;
|
if (!currentAnalysis || !file) {
|
||||||
|
|
||||||
if (!repo || !file) {
|
|
||||||
$("#cb-m-fit-container").style.display = "none";
|
$("#cb-m-fit-container").style.display = "none";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const params_b = extractParamsB(repo);
|
const fData = currentAnalysis.files.find(f => f.filename === file);
|
||||||
const quant = extractQuant(file);
|
if (!fData) return;
|
||||||
|
|
||||||
const m = { params_b, quant, ctx };
|
const fit = fData.fit;
|
||||||
const fit = getFit(m, lastSys);
|
const cls = fit.level === "perfect" ? "b-run" : (fit.level === "marginal" ? "b-load" : "b-err");
|
||||||
|
|
||||||
$("#cb-m-fit-container").style.display = "flex";
|
$("#cb-m-fit-container").style.display = "flex";
|
||||||
$("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: <b>~${fit.req.toFixed(1)} GB RAM/VRAM</b> <br><small class="meta">${params_b}B Params · ${quant} · ~${Math.round(fit.tps)} t/s</small>`;
|
$("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: <b>~${fit.req_gb.toFixed(1)} GB RAM/VRAM</b> <br><small class="meta">${currentAnalysis.params_b}B Params · ${fData.quant} · ~${Math.round(fit.tps)} t/s</small>`;
|
||||||
$("#cb-m-fit-badge").innerHTML = `<span class="badge ${fit.class}">${fit.text}</span>`;
|
$("#cb-m-fit-badge").innerHTML = `<span class="badge ${cls}">${fit.text}</span>`;
|
||||||
|
|
||||||
|
// Wenn "too_tight", machen wir den Download-Button gelb zur Warnung, erlauben ihn aber
|
||||||
|
const btn = $("#cb-m-download");
|
||||||
|
if (fit.level === "too_tight") {
|
||||||
|
btn.className = "primary warn";
|
||||||
|
btn.innerHTML = "Trotzdem herunterladen (OOM Risiko!)";
|
||||||
|
} else {
|
||||||
|
btn.className = "primary";
|
||||||
|
btn.innerHTML = "Herunterladen & Einpflegen";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function reanalyzeCtx() {
|
||||||
|
if (!currentAnalysis) return;
|
||||||
|
const ctx = parseInt($("#cb-m-ctx").value) || 8192;
|
||||||
|
const repo = currentAnalysis.repo;
|
||||||
|
const file = $("#cb-m-files").value;
|
||||||
|
|
||||||
|
$("#cb-m-fit-text").innerHTML = "Berechne neues Context-Limit...";
|
||||||
|
try {
|
||||||
|
const res = await api("/api/cookbook/analyze", {
|
||||||
|
method: "POST", body: JSON.stringify({ repo_id: repo, ctx })
|
||||||
|
});
|
||||||
|
currentAnalysis = res;
|
||||||
|
// Auswahl beibehalten
|
||||||
|
$("#cb-m-files").value = file;
|
||||||
|
updateLiveFit();
|
||||||
|
} catch(e) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function doSearch() {
|
async function doSearch() {
|
||||||
@@ -226,27 +210,37 @@ window.openModelModal = async (index) => {
|
|||||||
|
|
||||||
$("#cb-m-files").style.display = "none";
|
$("#cb-m-files").style.display = "none";
|
||||||
$("#cb-m-loading").style.display = "block";
|
$("#cb-m-loading").style.display = "block";
|
||||||
|
$("#cb-m-download").disabled = true;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const url = `https://huggingface.co/api/models/${m.id}/tree/main`;
|
const ctx = parseInt($("#cb-m-ctx").value) || 8192;
|
||||||
const r = await fetch(url);
|
const res = await api("/api/cookbook/analyze", {
|
||||||
const tree = await r.json();
|
method: "POST", body: JSON.stringify({ repo_id: m.id, ctx })
|
||||||
const files = tree.filter(f => f.path.endsWith('.gguf')).map(f => f.path);
|
});
|
||||||
|
|
||||||
|
currentAnalysis = res;
|
||||||
|
|
||||||
$("#cb-m-loading").style.display = "none";
|
$("#cb-m-loading").style.display = "none";
|
||||||
$("#cb-m-files").style.display = "block";
|
$("#cb-m-files").style.display = "block";
|
||||||
|
|
||||||
if (files.length === 0) {
|
if (!res.files || res.files.length === 0) {
|
||||||
$("#cb-m-files").innerHTML = "<option value=''>Keine GGUF-Dateien im Hauptverzeichnis gefunden.</option>";
|
$("#cb-m-files").innerHTML = "<option value=''>Keine GGUF-Dateien gefunden.</option>";
|
||||||
$("#cb-m-download").disabled = true;
|
|
||||||
$("#cb-m-fit-container").style.display = "none";
|
$("#cb-m-fit-container").style.display = "none";
|
||||||
} else {
|
} else {
|
||||||
$("#cb-m-files").innerHTML = files.map(f => `<option value="${esc(f)}">${esc(f)}</option>`).join("");
|
// Optische Indikatoren im Dropdown
|
||||||
|
$("#cb-m-files").innerHTML = res.files.map(f => {
|
||||||
|
let mark = "";
|
||||||
|
if (f.fit.level === "perfect") mark = "🟢";
|
||||||
|
else if (f.fit.level === "marginal") mark = "🟡";
|
||||||
|
else mark = "🔴";
|
||||||
|
return `<option value="${esc(f.filename)}">${mark} ${esc(f.filename)}</option>`;
|
||||||
|
}).join("");
|
||||||
|
|
||||||
$("#cb-m-download").disabled = false;
|
$("#cb-m-download").disabled = false;
|
||||||
updateLiveFit();
|
updateLiveFit();
|
||||||
}
|
}
|
||||||
} catch(e) {
|
} catch(e) {
|
||||||
$("#cb-m-loading").textContent = "Fehler beim Laden der Dateien.";
|
$("#cb-m-loading").textContent = "Fehler: " + e.message;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -287,31 +281,46 @@ async function doDownload() {
|
|||||||
$("#cb-m-download").textContent = "Herunterladen & Einpflegen";
|
$("#cb-m-download").textContent = "Herunterladen & Einpflegen";
|
||||||
}
|
}
|
||||||
|
|
||||||
function renderCurated() {
|
async function renderCurated() {
|
||||||
$("#cb-section-title").textContent = "Kuratierte Empfehlungen";
|
$("#cb-section-title").textContent = "Kuratierte Empfehlungen";
|
||||||
const grid = $("#cb-grid");
|
const grid = $("#cb-grid");
|
||||||
if (!grid) return;
|
if (!grid) return;
|
||||||
grid.innerHTML = CURATED_MODELS.map((m, i) => {
|
|
||||||
const fit = getFit(m, lastSys);
|
grid.innerHTML = "<div class='meta' style='grid-column:1/-1;text-align:center;padding:40px'>Berechne Hardware-Fit für Empfehlungen...</div>";
|
||||||
return `
|
|
||||||
<div class="card" style="display:flex; flex-direction:column; cursor:pointer" onclick="window.openCuratedModal(${i})">
|
try {
|
||||||
<div style="display:flex; justify-content:space-between; align-items:center;">
|
let html = "";
|
||||||
<h3 style="margin:0; font-size:16px">${esc(m.name)}</h3>
|
for (let i = 0; i < CURATED_MODELS.length; i++) {
|
||||||
<span class="badge ${fit.class}">${fit.text}</span>
|
const m = CURATED_MODELS[i];
|
||||||
|
const fit = await api("/api/cookbook/evaluate", {
|
||||||
|
method: "POST", body: JSON.stringify({ params_b: m.params_b, quant: m.quant, ctx: m.ctx })
|
||||||
|
});
|
||||||
|
|
||||||
|
const cls = fit.level === "perfect" ? "b-run" : (fit.level === "marginal" ? "b-load" : "b-err");
|
||||||
|
|
||||||
|
html += `
|
||||||
|
<div class="card" style="display:flex; flex-direction:column; cursor:pointer" onclick="window.openCuratedModal(${i})">
|
||||||
|
<div style="display:flex; justify-content:space-between; align-items:center;">
|
||||||
|
<h3 style="margin:0; font-size:16px">${esc(m.name)}</h3>
|
||||||
|
<span class="badge ${cls}">${fit.text}</span>
|
||||||
|
</div>
|
||||||
|
<div style="font-size:13px; color:var(--mut); margin-top:12px; flex:1; line-height:1.5;">
|
||||||
|
${m.desc}
|
||||||
|
</div>
|
||||||
|
<div style="display:flex; justify-content:space-between; margin-top:16px; font-size:12px" class="meta">
|
||||||
|
<span>~${fit.req_gb.toFixed(1)} GB RAM/VRAM · ~${Math.round(fit.tps)} t/s</span>
|
||||||
|
<span>${m.quant}</span>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div style="font-size:13px; color:var(--mut); margin-top:12px; flex:1; line-height:1.5;">
|
`;
|
||||||
${m.desc}
|
}
|
||||||
</div>
|
grid.innerHTML = html;
|
||||||
<div style="display:flex; justify-content:space-between; margin-top:16px; font-size:12px" class="meta">
|
} catch (e) {
|
||||||
<span>~${fit.req.toFixed(1)} GB RAM · ~${Math.round(fit.tps)} t/s</span>
|
grid.innerHTML = `<div class="alert err" style="grid-column:1/-1">Fehler beim Laden der Empfehlungen: ${e.message}</div>`;
|
||||||
<span>${m.quant}</span>
|
}
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
`;
|
|
||||||
}).join("");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
window.openCuratedModal = (index) => {
|
window.openCuratedModal = async (index) => {
|
||||||
const m = CURATED_MODELS[index];
|
const m = CURATED_MODELS[index];
|
||||||
if (!m) return;
|
if (!m) return;
|
||||||
$("#cb-modal").style.display = "flex";
|
$("#cb-modal").style.display = "flex";
|
||||||
@@ -323,7 +332,19 @@ window.openCuratedModal = (index) => {
|
|||||||
$("#cb-m-alias").value = m.alias;
|
$("#cb-m-alias").value = m.alias;
|
||||||
$("#cb-m-ctx").value = m.ctx;
|
$("#cb-m-ctx").value = m.ctx;
|
||||||
$("#cb-m-download").disabled = false;
|
$("#cb-m-download").disabled = false;
|
||||||
updateLiveFit();
|
|
||||||
|
// Wir nutzen die neue API Struktur auch für das simulierte Modal
|
||||||
|
try {
|
||||||
|
const fit = await api("/api/cookbook/evaluate", {
|
||||||
|
method: "POST", body: JSON.stringify({ params_b: m.params_b, quant: m.quant, ctx: m.ctx })
|
||||||
|
});
|
||||||
|
currentAnalysis = {
|
||||||
|
repo: m.repo,
|
||||||
|
params_b: m.params_b,
|
||||||
|
files: [{ filename: m.file, quant: m.quant, fit: fit }]
|
||||||
|
};
|
||||||
|
updateLiveFit();
|
||||||
|
} catch(e) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
function onSystem(sys) {
|
function onSystem(sys) {
|
||||||
|
|||||||
Reference in New Issue
Block a user