|
@@ -0,0 +1,98 @@
|
|
|
|
|
+#!/usr/bin/env python3
|
|
|
|
|
+import json
|
|
|
|
|
+import time
|
|
|
|
|
+import requests
|
|
|
|
|
+from statistics import mean
|
|
|
|
|
+
|
|
|
|
|
+URL = "http://127.0.0.1:11434/api/chat"
|
|
|
|
|
+MODEL = "glm-4.7-flash-128k"
|
|
|
|
|
+
|
|
|
|
|
+SCENARIO = [
|
|
|
|
|
+ "你是资深Python工程师。请实现一个LRUCache类(get/put,O(1)),并附上类型标注。",
|
|
|
|
|
+ "基于上一步代码,补充线程安全支持(使用RLock),并说明性能影响。",
|
|
|
|
|
+ "增加TTL过期机制,要求懒清理+写入时清理,给出关键测试用例。",
|
|
|
|
|
+ "把核心逻辑重构为两个类:Storage与Policy,保证接口不变。",
|
|
|
|
|
+ "增加一个命令行演示入口:支持put/get/dump命令。",
|
|
|
|
|
+ "修复潜在bug:高并发下TTL扫描可能导致长时间持锁,请优化。",
|
|
|
|
|
+ "把项目拆成3个文件并给出最小可运行目录结构。",
|
|
|
|
|
+ "最后给出pytest测试代码(覆盖率目标>90%)。"
|
|
|
|
|
+]
|
|
|
|
|
+
|
|
|
|
|
+PROFILES = [
|
|
|
|
|
+ {"name": "ctx32k_temp0.4", "options": {"num_ctx": 32768, "temperature": 0.4, "num_predict": 1024}},
|
|
|
|
|
+ {"name": "ctx64k_temp0.4", "options": {"num_ctx": 65536, "temperature": 0.4, "num_predict": 1024}},
|
|
|
|
|
+ {"name": "ctx128k_temp0.4", "options": {"num_ctx": 131072, "temperature": 0.4, "num_predict": 1024}},
|
|
|
|
|
+]
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def run_profile(profile):
|
|
|
|
|
+ messages = [{"role": "system", "content": "你是严谨的代码助手,回答要给出可运行代码,避免空话。"}]
|
|
|
|
|
+ rounds = []
|
|
|
|
|
+ for i, prompt in enumerate(SCENARIO, start=1):
|
|
|
|
|
+ messages.append({"role": "user", "content": prompt})
|
|
|
|
|
+ payload = {
|
|
|
|
|
+ "model": MODEL,
|
|
|
|
|
+ "messages": messages,
|
|
|
|
|
+ "stream": False,
|
|
|
|
|
+ "options": profile["options"],
|
|
|
|
|
+ }
|
|
|
|
|
+ t0 = time.time()
|
|
|
|
|
+ ok = True
|
|
|
|
|
+ err = ""
|
|
|
|
|
+ content = ""
|
|
|
|
|
+ eval_count = None
|
|
|
|
|
+ prompt_eval_count = None
|
|
|
|
|
+ done_reason = ""
|
|
|
|
|
+ try:
|
|
|
|
|
+ r = requests.post(URL, json=payload, timeout=300)
|
|
|
|
|
+ r.raise_for_status()
|
|
|
|
|
+ data = r.json()
|
|
|
|
|
+ content = data.get("message", {}).get("content", "")
|
|
|
|
|
+ eval_count = data.get("eval_count")
|
|
|
|
|
+ prompt_eval_count = data.get("prompt_eval_count")
|
|
|
|
|
+ done_reason = data.get("done_reason", "")
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ ok = False
|
|
|
|
|
+ err = str(e)
|
|
|
|
|
+ dt = time.time() - t0
|
|
|
|
|
+ rounds.append({
|
|
|
|
|
+ "round": i,
|
|
|
|
|
+ "ok": ok,
|
|
|
|
|
+ "latency_s": round(dt, 2),
|
|
|
|
|
+ "chars": len(content),
|
|
|
|
|
+ "eval_count": eval_count,
|
|
|
|
|
+ "prompt_eval_count": prompt_eval_count,
|
|
|
|
|
+ "done_reason": done_reason,
|
|
|
|
|
+ "error": err,
|
|
|
|
|
+ })
|
|
|
|
|
+ if not ok:
|
|
|
|
|
+ break
|
|
|
|
|
+ messages.append({"role": "assistant", "content": content})
|
|
|
|
|
+ oks = [r for r in rounds if r["ok"]]
|
|
|
|
|
+ return {
|
|
|
|
|
+ "profile": profile["name"],
|
|
|
|
|
+ "options": profile["options"],
|
|
|
|
|
+ "rounds": rounds,
|
|
|
|
|
+ "success_rounds": len(oks),
|
|
|
|
|
+ "all_success": len(oks) == len(SCENARIO),
|
|
|
|
|
+ "avg_latency_s": round(mean([r["latency_s"] for r in oks]), 2) if oks else None,
|
|
|
|
|
+ "max_latency_s": max([r["latency_s"] for r in oks]) if oks else None,
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def main():
|
|
|
|
|
+ out = {"model": MODEL, "profiles": []}
|
|
|
|
|
+ for p in PROFILES:
|
|
|
|
|
+ print(f"Running {p['name']} ...", flush=True)
|
|
|
|
|
+ out["profiles"].append(run_profile(p))
|
|
|
|
|
+ ts = time.strftime("%Y%m%d-%H%M%S")
|
|
|
|
|
+ path = f"/home/zhn/.openclaw/workspace/reports/ollama-benchmark-{ts}.json"
|
|
|
|
|
+ import os
|
|
|
|
|
+ os.makedirs("/home/zhn/.openclaw/workspace/reports", exist_ok=True)
|
|
|
|
|
+ with open(path, "w", encoding="utf-8") as f:
|
|
|
|
|
+ json.dump(out, f, ensure_ascii=False, indent=2)
|
|
|
|
|
+ print(path)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
|
+ main()
|