| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- #!/usr/bin/env python3
- import json
- import time
- import requests
- from statistics import mean
- URL = "http://127.0.0.1:11434/api/chat"
- MODEL = "glm-4.7-flash-128k"
- SCENARIO = [
- "你是资深Python工程师。请实现一个LRUCache类(get/put,O(1)),并附上类型标注。",
- "基于上一步代码,补充线程安全支持(使用RLock),并说明性能影响。",
- "增加TTL过期机制,要求懒清理+写入时清理,给出关键测试用例。",
- "把核心逻辑重构为两个类:Storage与Policy,保证接口不变。",
- "增加一个命令行演示入口:支持put/get/dump命令。",
- "修复潜在bug:高并发下TTL扫描可能导致长时间持锁,请优化。",
- "把项目拆成3个文件并给出最小可运行目录结构。",
- "最后给出pytest测试代码(覆盖率目标>90%)。"
- ]
- PROFILES = [
- {"name": "ctx32k_temp0.4", "options": {"num_ctx": 32768, "temperature": 0.4, "num_predict": 1024}},
- {"name": "ctx64k_temp0.4", "options": {"num_ctx": 65536, "temperature": 0.4, "num_predict": 1024}},
- {"name": "ctx128k_temp0.4", "options": {"num_ctx": 131072, "temperature": 0.4, "num_predict": 1024}},
- ]
- def run_profile(profile):
- messages = [{"role": "system", "content": "你是严谨的代码助手,回答要给出可运行代码,避免空话。"}]
- rounds = []
- for i, prompt in enumerate(SCENARIO, start=1):
- messages.append({"role": "user", "content": prompt})
- payload = {
- "model": MODEL,
- "messages": messages,
- "stream": False,
- "options": profile["options"],
- }
- t0 = time.time()
- ok = True
- err = ""
- content = ""
- eval_count = None
- prompt_eval_count = None
- done_reason = ""
- try:
- r = requests.post(URL, json=payload, timeout=300)
- r.raise_for_status()
- data = r.json()
- content = data.get("message", {}).get("content", "")
- eval_count = data.get("eval_count")
- prompt_eval_count = data.get("prompt_eval_count")
- done_reason = data.get("done_reason", "")
- except Exception as e:
- ok = False
- err = str(e)
- dt = time.time() - t0
- rounds.append({
- "round": i,
- "ok": ok,
- "latency_s": round(dt, 2),
- "chars": len(content),
- "eval_count": eval_count,
- "prompt_eval_count": prompt_eval_count,
- "done_reason": done_reason,
- "error": err,
- })
- if not ok:
- break
- messages.append({"role": "assistant", "content": content})
- oks = [r for r in rounds if r["ok"]]
- return {
- "profile": profile["name"],
- "options": profile["options"],
- "rounds": rounds,
- "success_rounds": len(oks),
- "all_success": len(oks) == len(SCENARIO),
- "avg_latency_s": round(mean([r["latency_s"] for r in oks]), 2) if oks else None,
- "max_latency_s": max([r["latency_s"] for r in oks]) if oks else None,
- }
- def main():
- out = {"model": MODEL, "profiles": []}
- for p in PROFILES:
- print(f"Running {p['name']} ...", flush=True)
- out["profiles"].append(run_profile(p))
- ts = time.strftime("%Y%m%d-%H%M%S")
- path = f"/home/zhn/.openclaw/workspace/reports/ollama-benchmark-{ts}.json"
- import os
- os.makedirs("/home/zhn/.openclaw/workspace/reports", exist_ok=True)
- with open(path, "w", encoding="utf-8") as f:
- json.dump(out, f, ensure_ascii=False, indent=2)
- print(path)
- if __name__ == "__main__":
- main()
|