Cookbook RAM check and Speed Estimation
This commit is contained in:
@@ -44,19 +44,30 @@ function estimateMemoryGB(params_b, quant, ctx) {
|
|||||||
return weights + context;
|
return weights + context;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function estimateSpeed(req_gb, vram_gb) {
|
||||||
|
// Heuristic for speed in tokens/s
|
||||||
|
// Bosgame APU (Strix Halo) has unified memory with ~273 GB/s bandwidth.
|
||||||
|
// We approximate bandwidth: if huge VRAM/GTT, it's the APU.
|
||||||
|
const bw = (vram_gb > 32) ? 250 : 70; // 250 GB/s for APU, 70 GB/s for standard CPU
|
||||||
|
if (req_gb <= 0) return 0;
|
||||||
|
return (bw / req_gb) * 0.55; // 55% efficiency
|
||||||
|
}
|
||||||
|
|
||||||
function getFit(m, sys) {
|
function getFit(m, sys) {
|
||||||
const req = estimateMemoryGB(m.params_b, m.quant, m.ctx);
|
const req = estimateMemoryGB(m.params_b, m.quant, m.ctx);
|
||||||
const vram_bytes = sys?.gpu?.vram?.total || 0;
|
const vram_bytes = (sys?.gpu?.vram?.total || 0) + (sys?.gpu?.gtt?.total || 0);
|
||||||
const vram = vram_bytes / (1024 ** 3);
|
const vram = vram_bytes / (1024 ** 3);
|
||||||
const ram_bytes = sys?.ram?.total || 0;
|
const ram_bytes = sys?.ram?.total || 0;
|
||||||
const ram_used = sys?.ram?.used || 0;
|
const ram_used = sys?.ram?.used || 0;
|
||||||
const ram = ram_bytes / (1024 ** 3);
|
const ram = ram_bytes / (1024 ** 3);
|
||||||
const freeRam = (ram_bytes - ram_used) / (1024 ** 3);
|
const freeRam = (ram_bytes - ram_used) / (1024 ** 3);
|
||||||
|
|
||||||
if (vram === 0 && ram === 0) return { level: "perfect", class: "b-run", text: "Fits (Mock)", req };
|
const tps = estimateSpeed(req, vram);
|
||||||
if (vram > 0 && req <= vram) return { level: "perfect", class: "b-run", text: "Fits VRAM", req };
|
|
||||||
if (req <= (vram + freeRam)) return { level: "good", class: "b-load", text: "RAM Offload", req };
|
if (vram === 0 && ram === 0) return { level: "perfect", class: "b-run", text: "Lade...", req, tps };
|
||||||
return { level: "too_tight", class: "b-err", text: "OOM (Zu groß)", req };
|
if (vram > 0 && req <= vram) return { level: "perfect", class: "b-run", text: "Passt in VRAM", req, tps };
|
||||||
|
if (req <= (vram + freeRam)) return { level: "good", class: "b-load", text: "RAM Offload", req, tps };
|
||||||
|
return { level: "too_tight", class: "b-err", text: "Zu groß (OOM)", req, tps };
|
||||||
}
|
}
|
||||||
|
|
||||||
let lastSys = null;
|
let lastSys = null;
|
||||||
@@ -158,7 +169,7 @@ function updateLiveFit() {
|
|||||||
const fit = getFit(m, lastSys);
|
const fit = getFit(m, lastSys);
|
||||||
|
|
||||||
$("#cb-m-fit-container").style.display = "flex";
|
$("#cb-m-fit-container").style.display = "flex";
|
||||||
$("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: <b>~${fit.req.toFixed(1)} GB RAM/VRAM</b> <br><small class="meta">${params_b}B Params · ${quant}</small>`;
|
$("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: <b>~${fit.req.toFixed(1)} GB RAM/VRAM</b> <br><small class="meta">${params_b}B Params · ${quant} · ~${Math.round(fit.tps)} t/s</small>`;
|
||||||
$("#cb-m-fit-badge").innerHTML = `<span class="badge ${fit.class}">${fit.text}</span>`;
|
$("#cb-m-fit-badge").innerHTML = `<span class="badge ${fit.class}">${fit.text}</span>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -292,7 +303,7 @@ function renderCurated() {
|
|||||||
${m.desc}
|
${m.desc}
|
||||||
</div>
|
</div>
|
||||||
<div style="display:flex; justify-content:space-between; margin-top:16px; font-size:12px" class="meta">
|
<div style="display:flex; justify-content:space-between; margin-top:16px; font-size:12px" class="meta">
|
||||||
<span>~${fit.req.toFixed(1)} GB RAM</span>
|
<span>~${fit.req.toFixed(1)} GB RAM · ~${Math.round(fit.tps)} t/s</span>
|
||||||
<span>${m.quant}</span>
|
<span>${m.quant}</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user