feat: smart cookbook MVP mit odysseus fit logik

2026-06-20 23:13:05 +02:00
parent c76bcc7293
commit 0a81a9fe99
4 changed files with 278 additions and 85 deletions
@@ -0,0 +1,71 @@
+"""
+Extrahierte Mathematik aus dem Odysseus Projekt zur VRAM/RAM Berechnung.
+Abgestimmt auf APUs mit Unified Memory (Bosgame M5 / Strix Halo).
+"""
+
+# Annahme: Bytes per Parameter für GGUF Quants
+QUANT_BYTES_PER_PARAM = {
+    "Q2_K": 0.35,
+    "Q3_K_S": 0.38,
+    "Q3_K_M": 0.42,
+    "Q3_K_L": 0.45,
+    "Q4_0": 0.50,
+    "Q4_1": 0.55,
+    "Q4_K_S": 0.50,
+    "Q4_K_M": 0.55,
+    "Q5_0": 0.62,
+    "Q5_1": 0.68,
+    "Q5_K_S": 0.62,
+    "Q5_K_M": 0.65,
+    "Q6_K": 0.75,
+    "Q8_0": 1.00,
+    "F16": 2.00,
+    "BF16": 2.00,
+}
+
+def estimate_memory_gb(params_b: float, quant: str, ctx: int) -> float:
+    """Berechnet den geschätzten Speicherbedarf in GB (Gewichte + Kontext)."""
+    # Wenn unbekanntes Format, nimm sicherheitshalber Q5_K_M (0.65)
+    bpp = QUANT_BYTES_PER_PARAM.get(quant.upper(), 0.65)
+    weights = params_b * bpp
+    
+    # Heuristik für Context-RAM: 8k Context bei 7B Parametern frisst ca. 0.8 GB
+    context_vram = (ctx / 8192) * (max(params_b, 7) / 7) * 0.8
+    
+    return weights + context_vram
+
+def estimate_speed(req_gb: float, sys_ram_gb: float) -> float:
+    """Berechnet die geschätzte Tokens/s basierend auf der 273 GB/s Bandbreite der APU."""
+    # Strix Halo hat ca 273 GB/s Unified Memory Bandbreite.
+    bw = 273 if sys_ram_gb > 8 else 70
+    if req_gb <= 0:
+        return 0.0
+    
+    # (Bandbreite / Modellgröße) * Effizienz (0.55)
+    raw_tps = (bw / req_gb) * 0.55
+    return raw_tps
+
+def evaluate_fit(params_b: float, quant: str, ctx: int, sys_ram_gb: float) -> dict:
+    """Berechnet den Fit für ein System mit Shared Memory (APU)."""
+    req_gb = estimate_memory_gb(params_b, quant, ctx)
+    tps = estimate_speed(req_gb, sys_ram_gb)
+    
+    # Das OS und andere Prozesse brauchen RAM. Wir lassen 4GB Puffer.
+    usable_ram = max(sys_ram_gb - 4.0, 0)
+    
+    if req_gb > usable_ram:
+        fit_level = "too_tight"
+        text = "Zu groß (OOM)"
+    elif req_gb > usable_ram * 0.8:
+        fit_level = "marginal"
+        text = "Könnte knapp werden"
+    else:
+        fit_level = "perfect"
+        text = "Passt perfekt"
+        
+    return {
+        "level": fit_level,
+        "text": text,
+        "req_gb": round(req_gb, 1),
+        "tps": round(tps, 0)
+    }