Cookbook RAM check and Speed Estimation

This commit is contained in:
Hitonabi
2026-06-20 22:41:02 +02:00
parent 8e8d564469
commit a51f6ee88a
+18 -7
View File
@@ -44,19 +44,30 @@ function estimateMemoryGB(params_b, quant, ctx) {
return weights + context; return weights + context;
} }
function estimateSpeed(req_gb, vram_gb) {
// Heuristic for speed in tokens/s
// Bosgame APU (Strix Halo) has unified memory with ~273 GB/s bandwidth.
// We approximate bandwidth: if huge VRAM/GTT, it's the APU.
const bw = (vram_gb > 32) ? 250 : 70; // 250 GB/s for APU, 70 GB/s for standard CPU
if (req_gb <= 0) return 0;
return (bw / req_gb) * 0.55; // 55% efficiency
}
function getFit(m, sys) { function getFit(m, sys) {
const req = estimateMemoryGB(m.params_b, m.quant, m.ctx); const req = estimateMemoryGB(m.params_b, m.quant, m.ctx);
const vram_bytes = sys?.gpu?.vram?.total || 0; const vram_bytes = (sys?.gpu?.vram?.total || 0) + (sys?.gpu?.gtt?.total || 0);
const vram = vram_bytes / (1024 ** 3); const vram = vram_bytes / (1024 ** 3);
const ram_bytes = sys?.ram?.total || 0; const ram_bytes = sys?.ram?.total || 0;
const ram_used = sys?.ram?.used || 0; const ram_used = sys?.ram?.used || 0;
const ram = ram_bytes / (1024 ** 3); const ram = ram_bytes / (1024 ** 3);
const freeRam = (ram_bytes - ram_used) / (1024 ** 3); const freeRam = (ram_bytes - ram_used) / (1024 ** 3);
if (vram === 0 && ram === 0) return { level: "perfect", class: "b-run", text: "Fits (Mock)", req }; const tps = estimateSpeed(req, vram);
if (vram > 0 && req <= vram) return { level: "perfect", class: "b-run", text: "Fits VRAM", req };
if (req <= (vram + freeRam)) return { level: "good", class: "b-load", text: "RAM Offload", req }; if (vram === 0 && ram === 0) return { level: "perfect", class: "b-run", text: "Lade...", req, tps };
return { level: "too_tight", class: "b-err", text: "OOM (Zu groß)", req }; if (vram > 0 && req <= vram) return { level: "perfect", class: "b-run", text: "Passt in VRAM", req, tps };
if (req <= (vram + freeRam)) return { level: "good", class: "b-load", text: "RAM Offload", req, tps };
return { level: "too_tight", class: "b-err", text: "Zu groß (OOM)", req, tps };
} }
let lastSys = null; let lastSys = null;
@@ -158,7 +169,7 @@ function updateLiveFit() {
const fit = getFit(m, lastSys); const fit = getFit(m, lastSys);
$("#cb-m-fit-container").style.display = "flex"; $("#cb-m-fit-container").style.display = "flex";
$("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: <b>~${fit.req.toFixed(1)} GB RAM/VRAM</b> <br><small class="meta">${params_b}B Params · ${quant}</small>`; $("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: <b>~${fit.req.toFixed(1)} GB RAM/VRAM</b> <br><small class="meta">${params_b}B Params · ${quant} · ~${Math.round(fit.tps)} t/s</small>`;
$("#cb-m-fit-badge").innerHTML = `<span class="badge ${fit.class}">${fit.text}</span>`; $("#cb-m-fit-badge").innerHTML = `<span class="badge ${fit.class}">${fit.text}</span>`;
} }
@@ -292,7 +303,7 @@ function renderCurated() {
${m.desc} ${m.desc}
</div> </div>
<div style="display:flex; justify-content:space-between; margin-top:16px; font-size:12px" class="meta"> <div style="display:flex; justify-content:space-between; margin-top:16px; font-size:12px" class="meta">
<span>~${fit.req.toFixed(1)} GB RAM</span> <span>~${fit.req.toFixed(1)} GB RAM · ~${Math.round(fit.tps)} t/s</span>
<span>${m.quant}</span> <span>${m.quant}</span>
</div> </div>
</div> </div>