feat: smart cookbook MVP mit odysseus fit logik

2026-06-20 23:13:05 +02:00
parent c76bcc7293
commit 0a81a9fe99
4 changed files with 278 additions and 85 deletions
@@ -0,0 +1,100 @@
+"""
+Cookbook Router: Verbindet die HuggingFace API mit der Odysseus-Hardware-Berechnung.
+"""
+
+import httpx
+import re
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel
+import psutil
+
+from auth import auth
+from hw_math import evaluate_fit
+
+router = APIRouter(prefix="/api/cookbook", dependencies=[Depends(auth)])
+
+class AnalyzeRequest(BaseModel):
+    repo_id: str
+    ctx: int = 8192
+
+class EvaluateRequest(BaseModel):
+    params_b: float
+    quant: str
+    ctx: int
+
+def extract_params_b(repo_id: str) -> float:
+    """Extrahiert die Parametergröße (in Milliarden) aus dem Repo-Namen."""
+    # z.B. Qwen2.5-Coder-32B -> 32
+    # 8x7B -> 56 (MoE)
+    moe = re.search(r"(\d+)x(\d+(?:\.\d+)?)[bB]", repo_id)
+    if moe:
+        return float(moe.group(1)) * float(moe.group(2))
+    m = re.search(r"(\d+(?:\.\d+)?)[bB](?![a-zA-Z])", repo_id)
+    if m:
+        return float(m.group(1))
+    return 7.0  # Fallback
+
+def extract_quant(filename: str) -> str:
+    m = re.search(r"(Q\d_[A-Z0-9_]+|IQ\d_[A-Z0-9_]+|FP16|BF16)", filename, re.IGNORECASE)
+    return m.group(1).upper() if m else "Q4_K_M"
+
+@router.post("/analyze")
+async def analyze_repo(req: AnalyzeRequest):
+    """Holt die GGUF Dateien von HuggingFace und berechnet den Hardware-Fit."""
+    url = f"https://huggingface.co/api/models/{req.repo_id}/tree/main"
+    
+    async with httpx.AsyncClient() as client:
+        try:
+            resp = await client.get(url, timeout=10.0)
+            resp.raise_for_status()
+            tree = resp.json()
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"HuggingFace Fehler: {str(e)}")
+            
+    gguf_files = [f["path"] for f in tree if f.get("path", "").endswith(".gguf")]
+    
+    if not gguf_files:
+        return {"files": []}
+        
+    params_b = extract_params_b(req.repo_id)
+    
+    # Ermittle RAM des Systems (da APU = Shared Memory)
+    ram_gb = psutil.virtual_memory().total / (1024**3)
+    
+    results = []
+    for f in gguf_files:
+        quant = extract_quant(f)
+        fit = evaluate_fit(params_b, quant, req.ctx, ram_gb)
+        
+        # Priority-Score, um den besten Fit an oberste Stelle zu setzen.
+        # "Q4_K_M" ist oft der Sweetspot.
+        priority = 0
+        if fit["level"] == "perfect":
+            priority += 10
+            if quant == "Q4_K_M": priority += 5
+            elif quant.startswith("Q4"): priority += 4
+            elif quant.startswith("Q5"): priority += 3
+        
+        results.append({
+            "filename": f,
+            "quant": quant,
+            "fit": fit,
+            "priority": priority
+        })
+        
+    # Sortieren: Highest priority first, dann nach tps (schnellste zuerst)
+    results.sort(key=lambda x: (x["priority"], x["fit"]["tps"]), reverse=True)
+    
+    return {
+        "repo": req.repo_id,
+        "params_b": params_b,
+        "sys_ram_gb": round(ram_gb, 1),
+        "files": results
+    }
+
+@router.post("/evaluate")
+def evaluate_single(req: EvaluateRequest):
+    ram_gb = psutil.virtual_memory().total / (1024**3)
+    fit = evaluate_fit(req.params_b, req.quant, req.ctx, ram_gb)
+    return fit
+