#!/usr/bin/env python3 import json import time import requests from statistics import mean URL = "http://127.0.0.1:11434/api/chat" MODEL = "glm-4.7-flash-128k" SCENARIO = [ "你是资深Python工程师。请实现一个LRUCache类(get/put,O(1)),并附上类型标注。", "基于上一步代码,补充线程安全支持(使用RLock),并说明性能影响。", "增加TTL过期机制,要求懒清理+写入时清理,给出关键测试用例。", "把核心逻辑重构为两个类:Storage与Policy,保证接口不变。", "增加一个命令行演示入口:支持put/get/dump命令。", "修复潜在bug:高并发下TTL扫描可能导致长时间持锁,请优化。", "把项目拆成3个文件并给出最小可运行目录结构。", "最后给出pytest测试代码(覆盖率目标>90%)。" ] PROFILES = [ {"name": "ctx32k_temp0.4", "options": {"num_ctx": 32768, "temperature": 0.4, "num_predict": 1024}}, {"name": "ctx64k_temp0.4", "options": {"num_ctx": 65536, "temperature": 0.4, "num_predict": 1024}}, {"name": "ctx128k_temp0.4", "options": {"num_ctx": 131072, "temperature": 0.4, "num_predict": 1024}}, ] def run_profile(profile): messages = [{"role": "system", "content": "你是严谨的代码助手,回答要给出可运行代码,避免空话。"}] rounds = [] for i, prompt in enumerate(SCENARIO, start=1): messages.append({"role": "user", "content": prompt}) payload = { "model": MODEL, "messages": messages, "stream": False, "options": profile["options"], } t0 = time.time() ok = True err = "" content = "" eval_count = None prompt_eval_count = None done_reason = "" try: r = requests.post(URL, json=payload, timeout=300) r.raise_for_status() data = r.json() content = data.get("message", {}).get("content", "") eval_count = data.get("eval_count") prompt_eval_count = data.get("prompt_eval_count") done_reason = data.get("done_reason", "") except Exception as e: ok = False err = str(e) dt = time.time() - t0 rounds.append({ "round": i, "ok": ok, "latency_s": round(dt, 2), "chars": len(content), "eval_count": eval_count, "prompt_eval_count": prompt_eval_count, "done_reason": done_reason, "error": err, }) if not ok: break messages.append({"role": "assistant", "content": content}) oks = [r for r in rounds if r["ok"]] return { "profile": profile["name"], "options": profile["options"], "rounds": rounds, "success_rounds": len(oks), "all_success": len(oks) == len(SCENARIO), "avg_latency_s": round(mean([r["latency_s"] for r in oks]), 2) if oks else None, "max_latency_s": max([r["latency_s"] for r in oks]) if oks else None, } def main(): out = {"model": MODEL, "profiles": []} for p in PROFILES: print(f"Running {p['name']} ...", flush=True) out["profiles"].append(run_profile(p)) ts = time.strftime("%Y%m%d-%H%M%S") path = f"/home/zhn/.openclaw/workspace/reports/ollama-benchmark-{ts}.json" import os os.makedirs("/home/zhn/.openclaw/workspace/reports", exist_ok=True) with open(path, "w", encoding="utf-8") as f: json.dump(out, f, ensure_ascii=False, indent=2) print(path) if __name__ == "__main__": main()