feat: smart cookbook MVP mit odysseus fit logik
This commit is contained in:
+71
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
Extrahierte Mathematik aus dem Odysseus Projekt zur VRAM/RAM Berechnung.
|
||||
Abgestimmt auf APUs mit Unified Memory (Bosgame M5 / Strix Halo).
|
||||
"""
|
||||
|
||||
# Annahme: Bytes per Parameter für GGUF Quants
|
||||
QUANT_BYTES_PER_PARAM = {
|
||||
"Q2_K": 0.35,
|
||||
"Q3_K_S": 0.38,
|
||||
"Q3_K_M": 0.42,
|
||||
"Q3_K_L": 0.45,
|
||||
"Q4_0": 0.50,
|
||||
"Q4_1": 0.55,
|
||||
"Q4_K_S": 0.50,
|
||||
"Q4_K_M": 0.55,
|
||||
"Q5_0": 0.62,
|
||||
"Q5_1": 0.68,
|
||||
"Q5_K_S": 0.62,
|
||||
"Q5_K_M": 0.65,
|
||||
"Q6_K": 0.75,
|
||||
"Q8_0": 1.00,
|
||||
"F16": 2.00,
|
||||
"BF16": 2.00,
|
||||
}
|
||||
|
||||
def estimate_memory_gb(params_b: float, quant: str, ctx: int) -> float:
|
||||
"""Berechnet den geschätzten Speicherbedarf in GB (Gewichte + Kontext)."""
|
||||
# Wenn unbekanntes Format, nimm sicherheitshalber Q5_K_M (0.65)
|
||||
bpp = QUANT_BYTES_PER_PARAM.get(quant.upper(), 0.65)
|
||||
weights = params_b * bpp
|
||||
|
||||
# Heuristik für Context-RAM: 8k Context bei 7B Parametern frisst ca. 0.8 GB
|
||||
context_vram = (ctx / 8192) * (max(params_b, 7) / 7) * 0.8
|
||||
|
||||
return weights + context_vram
|
||||
|
||||
def estimate_speed(req_gb: float, sys_ram_gb: float) -> float:
|
||||
"""Berechnet die geschätzte Tokens/s basierend auf der 273 GB/s Bandbreite der APU."""
|
||||
# Strix Halo hat ca 273 GB/s Unified Memory Bandbreite.
|
||||
bw = 273 if sys_ram_gb > 8 else 70
|
||||
if req_gb <= 0:
|
||||
return 0.0
|
||||
|
||||
# (Bandbreite / Modellgröße) * Effizienz (0.55)
|
||||
raw_tps = (bw / req_gb) * 0.55
|
||||
return raw_tps
|
||||
|
||||
def evaluate_fit(params_b: float, quant: str, ctx: int, sys_ram_gb: float) -> dict:
|
||||
"""Berechnet den Fit für ein System mit Shared Memory (APU)."""
|
||||
req_gb = estimate_memory_gb(params_b, quant, ctx)
|
||||
tps = estimate_speed(req_gb, sys_ram_gb)
|
||||
|
||||
# Das OS und andere Prozesse brauchen RAM. Wir lassen 4GB Puffer.
|
||||
usable_ram = max(sys_ram_gb - 4.0, 0)
|
||||
|
||||
if req_gb > usable_ram:
|
||||
fit_level = "too_tight"
|
||||
text = "Zu groß (OOM)"
|
||||
elif req_gb > usable_ram * 0.8:
|
||||
fit_level = "marginal"
|
||||
text = "Könnte knapp werden"
|
||||
else:
|
||||
fit_level = "perfect"
|
||||
text = "Passt perfekt"
|
||||
|
||||
return {
|
||||
"level": fit_level,
|
||||
"text": text,
|
||||
"req_gb": round(req_gb, 1),
|
||||
"tps": round(tps, 0)
|
||||
}
|
||||
Reference in New Issue
Block a user