feat: smart cookbook MVP mit odysseus fit logik
This commit is contained in:
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
Cookbook Router: Verbindet die HuggingFace API mit der Odysseus-Hardware-Berechnung.
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import re
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
import psutil
|
||||
|
||||
from auth import auth
|
||||
from hw_math import evaluate_fit
|
||||
|
||||
router = APIRouter(prefix="/api/cookbook", dependencies=[Depends(auth)])
|
||||
|
||||
class AnalyzeRequest(BaseModel):
|
||||
repo_id: str
|
||||
ctx: int = 8192
|
||||
|
||||
class EvaluateRequest(BaseModel):
|
||||
params_b: float
|
||||
quant: str
|
||||
ctx: int
|
||||
|
||||
def extract_params_b(repo_id: str) -> float:
|
||||
"""Extrahiert die Parametergröße (in Milliarden) aus dem Repo-Namen."""
|
||||
# z.B. Qwen2.5-Coder-32B -> 32
|
||||
# 8x7B -> 56 (MoE)
|
||||
moe = re.search(r"(\d+)x(\d+(?:\.\d+)?)[bB]", repo_id)
|
||||
if moe:
|
||||
return float(moe.group(1)) * float(moe.group(2))
|
||||
m = re.search(r"(\d+(?:\.\d+)?)[bB](?![a-zA-Z])", repo_id)
|
||||
if m:
|
||||
return float(m.group(1))
|
||||
return 7.0 # Fallback
|
||||
|
||||
def extract_quant(filename: str) -> str:
|
||||
m = re.search(r"(Q\d_[A-Z0-9_]+|IQ\d_[A-Z0-9_]+|FP16|BF16)", filename, re.IGNORECASE)
|
||||
return m.group(1).upper() if m else "Q4_K_M"
|
||||
|
||||
@router.post("/analyze")
|
||||
async def analyze_repo(req: AnalyzeRequest):
|
||||
"""Holt die GGUF Dateien von HuggingFace und berechnet den Hardware-Fit."""
|
||||
url = f"https://huggingface.co/api/models/{req.repo_id}/tree/main"
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
resp = await client.get(url, timeout=10.0)
|
||||
resp.raise_for_status()
|
||||
tree = resp.json()
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"HuggingFace Fehler: {str(e)}")
|
||||
|
||||
gguf_files = [f["path"] for f in tree if f.get("path", "").endswith(".gguf")]
|
||||
|
||||
if not gguf_files:
|
||||
return {"files": []}
|
||||
|
||||
params_b = extract_params_b(req.repo_id)
|
||||
|
||||
# Ermittle RAM des Systems (da APU = Shared Memory)
|
||||
ram_gb = psutil.virtual_memory().total / (1024**3)
|
||||
|
||||
results = []
|
||||
for f in gguf_files:
|
||||
quant = extract_quant(f)
|
||||
fit = evaluate_fit(params_b, quant, req.ctx, ram_gb)
|
||||
|
||||
# Priority-Score, um den besten Fit an oberste Stelle zu setzen.
|
||||
# "Q4_K_M" ist oft der Sweetspot.
|
||||
priority = 0
|
||||
if fit["level"] == "perfect":
|
||||
priority += 10
|
||||
if quant == "Q4_K_M": priority += 5
|
||||
elif quant.startswith("Q4"): priority += 4
|
||||
elif quant.startswith("Q5"): priority += 3
|
||||
|
||||
results.append({
|
||||
"filename": f,
|
||||
"quant": quant,
|
||||
"fit": fit,
|
||||
"priority": priority
|
||||
})
|
||||
|
||||
# Sortieren: Highest priority first, dann nach tps (schnellste zuerst)
|
||||
results.sort(key=lambda x: (x["priority"], x["fit"]["tps"]), reverse=True)
|
||||
|
||||
return {
|
||||
"repo": req.repo_id,
|
||||
"params_b": params_b,
|
||||
"sys_ram_gb": round(ram_gb, 1),
|
||||
"files": results
|
||||
}
|
||||
|
||||
@router.post("/evaluate")
|
||||
def evaluate_single(req: EvaluateRequest):
|
||||
ram_gb = psutil.virtual_memory().total / (1024**3)
|
||||
fit = evaluate_fit(req.params_b, req.quant, req.ctx, ram_gb)
|
||||
return fit
|
||||
|
||||
Reference in New Issue
Block a user