Cookbook RAM check and Speed Estimation

2026-06-20 22:41:02 +02:00
parent 8e8d564469
commit a51f6ee88a
1 changed files with 18 additions and 7 deletions
@@ -44,19 +44,30 @@ function estimateMemoryGB(params_b, quant, ctx) {
  return weights + context;
 }
 function estimateSpeed(req_gb, vram_gb) {
  // Heuristic for speed in tokens/s
  // Bosgame APU (Strix Halo) has unified memory with ~273 GB/s bandwidth.
  // We approximate bandwidth: if huge VRAM/GTT, it's the APU.
  const bw = (vram_gb > 32) ? 250 : 70; // 250 GB/s for APU, 70 GB/s for standard CPU
  if (req_gb <= 0) return 0;
  return (bw / req_gb) * 0.55; // 55% efficiency
 }
 function getFit(m, sys) {
  const req = estimateMemoryGB(m.params_b, m.quant, m.ctx);
-  const vram_bytes = sys?.gpu?.vram?.total || 0;
+  const vram_bytes = (sys?.gpu?.vram?.total || 0) + (sys?.gpu?.gtt?.total || 0);
  const vram = vram_bytes / (1024 ** 3);
  const ram_bytes = sys?.ram?.total || 0;
  const ram_used = sys?.ram?.used || 0;
  const ram = ram_bytes / (1024 ** 3);
  const freeRam = (ram_bytes - ram_used) / (1024 ** 3);
-  if (vram === 0 && ram === 0) return { level: "perfect", class: "b-run", text: "Fits (Mock)", req };
+  const tps = estimateSpeed(req, vram);
-  if (vram > 0 && req <= vram) return { level: "perfect", class: "b-run", text: "Fits VRAM", req };
+  
-  if (req <= (vram + freeRam)) return { level: "good", class: "b-load", text: "RAM Offload", req };
+  if (vram === 0 && ram === 0) return { level: "perfect", class: "b-run", text: "Lade...", req, tps };
-  return { level: "too_tight", class: "b-err", text: "OOM (Zu groß)", req };
+  if (vram > 0 && req <= vram) return { level: "perfect", class: "b-run", text: "Passt in VRAM", req, tps };
  if (req <= (vram + freeRam)) return { level: "good", class: "b-load", text: "RAM Offload", req, tps };
  return { level: "too_tight", class: "b-err", text: "Zu groß (OOM)", req, tps };
 }
 let lastSys = null;
@@ -158,7 +169,7 @@ function updateLiveFit() {
  const fit = getFit(m, lastSys);
  $("#cb-m-fit-container").style.display = "flex";
-  $("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: <b>~${fit.req.toFixed(1)} GB RAM/VRAM</b> <br><small class="meta">${params_b}B Params · ${quant}</small>`;
+  $("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: <b>~${fit.req.toFixed(1)} GB RAM/VRAM</b> <br><small class="meta">${params_b}B Params · ${quant} · ~${Math.round(fit.tps)} t/s</small>`;
  $("#cb-m-fit-badge").innerHTML = `<span class="badge ${fit.class}">${fit.text}</span>`;
 }
@@ -292,7 +303,7 @@ function renderCurated() {
          ${m.desc}
        </div>
        <div style="display:flex; justify-content:space-between; margin-top:16px; font-size:12px" class="meta">
-          <span>~${fit.req.toFixed(1)} GB RAM</span>
+          <span>~${fit.req.toFixed(1)} GB RAM · ~${Math.round(fit.tps)} t/s</span>
          <span>${m.quant}</span>
        </div>
      </div>