feat: smart cookbook MVP mit odysseus fit logik

This commit is contained in:
Hitonabi
2026-06-20 23:13:05 +02:00
parent c76bcc7293
commit 0a81a9fe99
4 changed files with 278 additions and 85 deletions
+105 -84
View File
@@ -37,38 +37,7 @@ const CURATED_MODELS = [
}
];
function estimateMemoryGB(params_b, quant, ctx) {
const bpp = 0.6;
const weights = params_b * bpp;
const context = (ctx / 8192) * (params_b / 7) * 0.8;
return weights + context;
}
function estimateSpeed(req_gb, vram_gb) {
// Heuristic for speed in tokens/s
// Bosgame APU (Strix Halo) has unified memory with ~273 GB/s bandwidth.
// We approximate bandwidth: if huge VRAM/GTT, it's the APU.
const bw = (vram_gb > 32) ? 250 : 70; // 250 GB/s for APU, 70 GB/s for standard CPU
if (req_gb <= 0) return 0;
return (bw / req_gb) * 0.55; // 55% efficiency
}
function getFit(m, sys) {
const req = estimateMemoryGB(m.params_b, m.quant, m.ctx);
const vram_bytes = (sys?.gpu?.vram?.total || 0) + (sys?.gpu?.gtt?.total || 0);
const vram = vram_bytes / (1024 ** 3);
const ram_bytes = sys?.ram?.total || 0;
const ram_used = sys?.ram?.used || 0;
const ram = ram_bytes / (1024 ** 3);
const freeRam = (ram_bytes - ram_used) / (1024 ** 3);
const tps = estimateSpeed(req, vram);
if (vram === 0 && ram === 0) return { level: "perfect", class: "b-run", text: "Lade...", req, tps };
if (vram > 0 && req <= vram) return { level: "perfect", class: "b-run", text: "Passt in VRAM", req, tps };
if (req <= (vram + freeRam)) return { level: "good", class: "b-load", text: "RAM Offload", req, tps };
return { level: "too_tight", class: "b-err", text: "Zu groß (OOM)", req, tps };
}
// Lokale Mathe entfernt. Wir nutzen jetzt das Backend.
let lastSys = null;
let currentResults = [];
@@ -134,43 +103,58 @@ function mount() {
$("#cb-m-download").addEventListener("click", doDownload);
$("#cb-m-files").addEventListener("change", updateLiveFit);
$("#cb-m-ctx").addEventListener("input", updateLiveFit);
$("#cb-m-ctx").addEventListener("change", reanalyzeCtx);
renderCurated();
}
function extractParamsB(name) {
const moe = name.match(/(\d+)x(\d+(?:\.\d+)?)[bB]/i);
if (moe) return parseInt(moe[1]) * parseFloat(moe[2]);
const m = name.match(/(\d+(?:\.\d+)?)[bB](?![a-zA-Z])/i);
if (m) return parseFloat(m[1]);
return 7; // Fallback
}
function extractQuant(filename) {
const m = filename.match(/(Q\d_[A-Z0-9_]+|IQ\d_[A-Z0-9_]+|FP16|BF16)/i);
return m ? m[1].toUpperCase() : "Q4_K_M";
}
// Aktuelle Analyse-Daten vom Backend
let currentAnalysis = null;
function updateLiveFit() {
const repo = $("#cb-m-repo").textContent;
const file = $("#cb-m-files").value;
const ctx = parseInt($("#cb-m-ctx").value) || 8192;
if (!repo || !file) {
if (!currentAnalysis || !file) {
$("#cb-m-fit-container").style.display = "none";
return;
}
const params_b = extractParamsB(repo);
const quant = extractQuant(file);
const fData = currentAnalysis.files.find(f => f.filename === file);
if (!fData) return;
const m = { params_b, quant, ctx };
const fit = getFit(m, lastSys);
const fit = fData.fit;
const cls = fit.level === "perfect" ? "b-run" : (fit.level === "marginal" ? "b-load" : "b-err");
$("#cb-m-fit-container").style.display = "flex";
$("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: <b>~${fit.req.toFixed(1)} GB RAM/VRAM</b> <br><small class="meta">${params_b}B Params · ${quant} · ~${Math.round(fit.tps)} t/s</small>`;
$("#cb-m-fit-badge").innerHTML = `<span class="badge ${fit.class}">${fit.text}</span>`;
$("#cb-m-fit-text").innerHTML = `Geschätzter Bedarf: <b>~${fit.req_gb.toFixed(1)} GB RAM/VRAM</b> <br><small class="meta">${currentAnalysis.params_b}B Params · ${fData.quant} · ~${Math.round(fit.tps)} t/s</small>`;
$("#cb-m-fit-badge").innerHTML = `<span class="badge ${cls}">${fit.text}</span>`;
// Wenn "too_tight", machen wir den Download-Button gelb zur Warnung, erlauben ihn aber
const btn = $("#cb-m-download");
if (fit.level === "too_tight") {
btn.className = "primary warn";
btn.innerHTML = "Trotzdem herunterladen (OOM Risiko!)";
} else {
btn.className = "primary";
btn.innerHTML = "Herunterladen & Einpflegen";
}
}
async function reanalyzeCtx() {
if (!currentAnalysis) return;
const ctx = parseInt($("#cb-m-ctx").value) || 8192;
const repo = currentAnalysis.repo;
const file = $("#cb-m-files").value;
$("#cb-m-fit-text").innerHTML = "Berechne neues Context-Limit...";
try {
const res = await api("/api/cookbook/analyze", {
method: "POST", body: JSON.stringify({ repo_id: repo, ctx })
});
currentAnalysis = res;
// Auswahl beibehalten
$("#cb-m-files").value = file;
updateLiveFit();
} catch(e) {}
}
async function doSearch() {
@@ -226,27 +210,37 @@ window.openModelModal = async (index) => {
$("#cb-m-files").style.display = "none";
$("#cb-m-loading").style.display = "block";
$("#cb-m-download").disabled = true;
try {
const url = `https://huggingface.co/api/models/${m.id}/tree/main`;
const r = await fetch(url);
const tree = await r.json();
const files = tree.filter(f => f.path.endsWith('.gguf')).map(f => f.path);
const ctx = parseInt($("#cb-m-ctx").value) || 8192;
const res = await api("/api/cookbook/analyze", {
method: "POST", body: JSON.stringify({ repo_id: m.id, ctx })
});
currentAnalysis = res;
$("#cb-m-loading").style.display = "none";
$("#cb-m-files").style.display = "block";
if (files.length === 0) {
$("#cb-m-files").innerHTML = "<option value=''>Keine GGUF-Dateien im Hauptverzeichnis gefunden.</option>";
$("#cb-m-download").disabled = true;
if (!res.files || res.files.length === 0) {
$("#cb-m-files").innerHTML = "<option value=''>Keine GGUF-Dateien gefunden.</option>";
$("#cb-m-fit-container").style.display = "none";
} else {
$("#cb-m-files").innerHTML = files.map(f => `<option value="${esc(f)}">${esc(f)}</option>`).join("");
// Optische Indikatoren im Dropdown
$("#cb-m-files").innerHTML = res.files.map(f => {
let mark = "";
if (f.fit.level === "perfect") mark = "🟢";
else if (f.fit.level === "marginal") mark = "🟡";
else mark = "🔴";
return `<option value="${esc(f.filename)}">${mark} ${esc(f.filename)}</option>`;
}).join("");
$("#cb-m-download").disabled = false;
updateLiveFit();
}
} catch(e) {
$("#cb-m-loading").textContent = "Fehler beim Laden der Dateien.";
$("#cb-m-loading").textContent = "Fehler: " + e.message;
}
};
@@ -287,31 +281,46 @@ async function doDownload() {
$("#cb-m-download").textContent = "Herunterladen & Einpflegen";
}
function renderCurated() {
async function renderCurated() {
$("#cb-section-title").textContent = "Kuratierte Empfehlungen";
const grid = $("#cb-grid");
if (!grid) return;
grid.innerHTML = CURATED_MODELS.map((m, i) => {
const fit = getFit(m, lastSys);
return `
<div class="card" style="display:flex; flex-direction:column; cursor:pointer" onclick="window.openCuratedModal(${i})">
<div style="display:flex; justify-content:space-between; align-items:center;">
<h3 style="margin:0; font-size:16px">${esc(m.name)}</h3>
<span class="badge ${fit.class}">${fit.text}</span>
grid.innerHTML = "<div class='meta' style='grid-column:1/-1;text-align:center;padding:40px'>Berechne Hardware-Fit für Empfehlungen...</div>";
try {
let html = "";
for (let i = 0; i < CURATED_MODELS.length; i++) {
const m = CURATED_MODELS[i];
const fit = await api("/api/cookbook/evaluate", {
method: "POST", body: JSON.stringify({ params_b: m.params_b, quant: m.quant, ctx: m.ctx })
});
const cls = fit.level === "perfect" ? "b-run" : (fit.level === "marginal" ? "b-load" : "b-err");
html += `
<div class="card" style="display:flex; flex-direction:column; cursor:pointer" onclick="window.openCuratedModal(${i})">
<div style="display:flex; justify-content:space-between; align-items:center;">
<h3 style="margin:0; font-size:16px">${esc(m.name)}</h3>
<span class="badge ${cls}">${fit.text}</span>
</div>
<div style="font-size:13px; color:var(--mut); margin-top:12px; flex:1; line-height:1.5;">
${m.desc}
</div>
<div style="display:flex; justify-content:space-between; margin-top:16px; font-size:12px" class="meta">
<span>~${fit.req_gb.toFixed(1)} GB RAM/VRAM · ~${Math.round(fit.tps)} t/s</span>
<span>${m.quant}</span>
</div>
</div>
<div style="font-size:13px; color:var(--mut); margin-top:12px; flex:1; line-height:1.5;">
${m.desc}
</div>
<div style="display:flex; justify-content:space-between; margin-top:16px; font-size:12px" class="meta">
<span>~${fit.req.toFixed(1)} GB RAM · ~${Math.round(fit.tps)} t/s</span>
<span>${m.quant}</span>
</div>
</div>
`;
}).join("");
`;
}
grid.innerHTML = html;
} catch (e) {
grid.innerHTML = `<div class="alert err" style="grid-column:1/-1">Fehler beim Laden der Empfehlungen: ${e.message}</div>`;
}
}
window.openCuratedModal = (index) => {
window.openCuratedModal = async (index) => {
const m = CURATED_MODELS[index];
if (!m) return;
$("#cb-modal").style.display = "flex";
@@ -323,7 +332,19 @@ window.openCuratedModal = (index) => {
$("#cb-m-alias").value = m.alias;
$("#cb-m-ctx").value = m.ctx;
$("#cb-m-download").disabled = false;
updateLiveFit();
// Wir nutzen die neue API Struktur auch für das simulierte Modal
try {
const fit = await api("/api/cookbook/evaluate", {
method: "POST", body: JSON.stringify({ params_b: m.params_b, quant: m.quant, ctx: m.ctx })
});
currentAnalysis = {
repo: m.repo,
params_b: m.params_b,
files: [{ filename: m.file, quant: m.quant, fit: fit }]
};
updateLiveFit();
} catch(e) {}
};
function onSystem(sys) {