""" Cookbook Router: Verbindet die HuggingFace API mit der Odysseus-Hardware-Berechnung. """ import httpx import re from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel import psutil from auth import auth from hw_math import evaluate_fit router = APIRouter(prefix="/api/cookbook", dependencies=[Depends(auth)]) class AnalyzeRequest(BaseModel): repo_id: str ctx: int = 8192 class EvaluateRequest(BaseModel): params_b: float quant: str ctx: int def extract_params_b(repo_id: str) -> float: """Extrahiert die Parametergröße (in Milliarden) aus dem Repo-Namen.""" # z.B. Qwen2.5-Coder-32B -> 32 # 8x7B -> 56 (MoE) moe = re.search(r"(\d+)x(\d+(?:\.\d+)?)[bB]", repo_id) if moe: return float(moe.group(1)) * float(moe.group(2)) m = re.search(r"(\d+(?:\.\d+)?)[bB](?![a-zA-Z])", repo_id) if m: return float(m.group(1)) return 7.0 # Fallback def extract_quant(filename: str) -> str: m = re.search(r"(Q\d_[A-Z0-9_]+|IQ\d_[A-Z0-9_]+|FP16|BF16)", filename, re.IGNORECASE) return m.group(1).upper() if m else "Q4_K_M" @router.post("/analyze") async def analyze_repo(req: AnalyzeRequest): """Holt die GGUF Dateien von HuggingFace und berechnet den Hardware-Fit.""" url = f"https://huggingface.co/api/models/{req.repo_id}/tree/main" async with httpx.AsyncClient() as client: try: resp = await client.get(url, timeout=10.0) resp.raise_for_status() tree = resp.json() except Exception as e: raise HTTPException(status_code=500, detail=f"HuggingFace Fehler: {str(e)}") gguf_files = [f["path"] for f in tree if f.get("path", "").endswith(".gguf")] if not gguf_files: return {"files": []} params_b = extract_params_b(req.repo_id) # Ermittle RAM des Systems (da APU = Shared Memory) ram_gb = psutil.virtual_memory().total / (1024**3) results = [] for f in gguf_files: quant = extract_quant(f) fit = evaluate_fit(params_b, quant, req.ctx, ram_gb) # Priority-Score, um den besten Fit an oberste Stelle zu setzen. # "Q4_K_M" ist oft der Sweetspot. priority = 0 if fit["level"] == "perfect": priority += 10 if quant == "Q4_K_M": priority += 5 elif quant.startswith("Q4"): priority += 4 elif quant.startswith("Q5"): priority += 3 results.append({ "filename": f, "quant": quant, "fit": fit, "priority": priority }) # Sortieren: Highest priority first, dann nach tps (schnellste zuerst) results.sort(key=lambda x: (x["priority"], x["fit"]["tps"]), reverse=True) return { "repo": req.repo_id, "params_b": params_b, "sys_ram_gb": round(ram_gb, 1), "files": results } @router.post("/evaluate") def evaluate_single(req: EvaluateRequest): ram_gb = psutil.virtual_memory().total / (1024**3) fit = evaluate_fit(req.params_b, req.quant, req.ctx, ram_gb) return fit