From a51f6ee88a4161742afbcca29d8ac8aaff920a89 Mon Sep 17 00:00:00 2001 From: Hitonabi Date: Sat, 20 Jun 2026 22:41:02 +0200 Subject: [PATCH] Cookbook RAM check and Speed Estimation --- static/js/panels/cookbook.js | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/static/js/panels/cookbook.js b/static/js/panels/cookbook.js index 3790c4b..e6a7957 100644 --- a/static/js/panels/cookbook.js +++ b/static/js/panels/cookbook.js @@ -44,19 +44,30 @@ function estimateMemoryGB(params_b, quant, ctx) { return weights + context; } +function estimateSpeed(req_gb, vram_gb) { + // Heuristic for speed in tokens/s + // Bosgame APU (Strix Halo) has unified memory with ~273 GB/s bandwidth. + // We approximate bandwidth: if huge VRAM/GTT, it's the APU. + const bw = (vram_gb > 32) ? 250 : 70; // 250 GB/s for APU, 70 GB/s for standard CPU + if (req_gb <= 0) return 0; + return (bw / req_gb) * 0.55; // 55% efficiency +} + function getFit(m, sys) { const req = estimateMemoryGB(m.params_b, m.quant, m.ctx); - const vram_bytes = sys?.gpu?.vram?.total || 0; + const vram_bytes = (sys?.gpu?.vram?.total || 0) + (sys?.gpu?.gtt?.total || 0); const vram = vram_bytes / (1024 ** 3); const ram_bytes = sys?.ram?.total || 0; const ram_used = sys?.ram?.used || 0; const ram = ram_bytes / (1024 ** 3); const freeRam = (ram_bytes - ram_used) / (1024 ** 3); - if (vram === 0 && ram === 0) return { level: "perfect", class: "b-run", text: "Fits (Mock)", req }; - if (vram > 0 && req <= vram) return { level: "perfect", class: "b-run", text: "Fits VRAM", req }; - if (req <= (vram + freeRam)) return { level: "good", class: "b-load", text: "RAM Offload", req }; - return { level: "too_tight", class: "b-err", text: "OOM (Zu groß)", req }; + const tps = estimateSpeed(req, vram); + + if (vram === 0 && ram === 0) return { level: "perfect", class: "b-run", text: "Lade...", req, tps }; + if (vram > 0 && req <= vram) return { level: "perfect", class: "b-run", text: "Passt in VRAM", req, tps }; + if (req <= (vram + freeRam)) return { level: "good", class: "b-load", text: "RAM Offload", req, tps }; + return { level: "too_tight", class: "b-err", text: "Zu groß (OOM)", req, tps }; } let lastSys = null; @@ -158,7 +169,7 @@ function updateLiveFit() { const fit = getFit(m, lastSys); $("#cb-m-fit-container").style.display = "flex"; - $("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: ~${fit.req.toFixed(1)} GB RAM/VRAM
${params_b}B Params · ${quant}`; + $("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: ~${fit.req.toFixed(1)} GB RAM/VRAM
${params_b}B Params · ${quant} · ~${Math.round(fit.tps)} t/s`; $("#cb-m-fit-badge").innerHTML = `${fit.text}`; } @@ -292,7 +303,7 @@ function renderCurated() { ${m.desc}
- ~${fit.req.toFixed(1)} GB RAM + ~${fit.req.toFixed(1)} GB RAM · ~${Math.round(fit.tps)} t/s ${m.quant}