vor 3 Wochen · 0acff11f8b
--- a/bench_ollama_coding.py
+++ b/bench_ollama_coding.py
@@ -0,0 +1,98 @@
 
				+#!/usr/bin/env python3
			
 
				+import json
			
 
				+import time
			
 
				+import requests
			
 
				+from statistics import mean
			
 
				+
			
 
				+URL = "http://127.0.0.1:11434/api/chat"
			
 
				+MODEL = "glm-4.7-flash-128k"
			
 
				+
			
 
				+SCENARIO = [
			
 
				+    "你是资深Python工程师。请实现一个LRUCache类（get/put，O(1)），并附上类型标注。",
			
 
				+    "基于上一步代码，补充线程安全支持（使用RLock），并说明性能影响。",
			
 
				+    "增加TTL过期机制，要求懒清理+写入时清理，给出关键测试用例。",
			
 
				+    "把核心逻辑重构为两个类：Storage与Policy，保证接口不变。",
			
 
				+    "增加一个命令行演示入口：支持put/get/dump命令。",
			
 
				+    "修复潜在bug：高并发下TTL扫描可能导致长时间持锁，请优化。",
			
 
				+    "把项目拆成3个文件并给出最小可运行目录结构。",
			
 
				+    "最后给出pytest测试代码（覆盖率目标>90%）。"
			
 
				+]
			
 
				+
			
 
				+PROFILES = [
			
 
				+    {"name": "ctx32k_temp0.4", "options": {"num_ctx": 32768, "temperature": 0.4, "num_predict": 1024}},
			
 
				+    {"name": "ctx64k_temp0.4", "options": {"num_ctx": 65536, "temperature": 0.4, "num_predict": 1024}},
			
 
				+    {"name": "ctx128k_temp0.4", "options": {"num_ctx": 131072, "temperature": 0.4, "num_predict": 1024}},
			
 
				+]
			
 
				+
			
 
				+
			
 
				+def run_profile(profile):
			
 
				+    messages = [{"role": "system", "content": "你是严谨的代码助手，回答要给出可运行代码，避免空话。"}]
			
 
				+    rounds = []
			
 
				+    for i, prompt in enumerate(SCENARIO, start=1):
			
 
				+        messages.append({"role": "user", "content": prompt})
			
 
				+        payload = {
			
 
				+            "model": MODEL,
			
 
				+            "messages": messages,
			
 
				+            "stream": False,
			
 
				+            "options": profile["options"],
			
 
				+        }
			
 
				+        t0 = time.time()
			
 
				+        ok = True
			
 
				+        err = ""
			
 
				+        content = ""
			
 
				+        eval_count = None
			
 
				+        prompt_eval_count = None
			
 
				+        done_reason = ""
			
 
				+        try:
			
 
				+            r = requests.post(URL, json=payload, timeout=300)
			
 
				+            r.raise_for_status()
			
 
				+            data = r.json()
			
 
				+            content = data.get("message", {}).get("content", "")
			
 
				+            eval_count = data.get("eval_count")
			
 
				+            prompt_eval_count = data.get("prompt_eval_count")
			
 
				+            done_reason = data.get("done_reason", "")
			
 
				+        except Exception as e:
			
 
				+            ok = False
			
 
				+            err = str(e)
			
 
				+        dt = time.time() - t0
			
 
				+        rounds.append({
			
 
				+            "round": i,
			
 
				+            "ok": ok,
			
 
				+            "latency_s": round(dt, 2),
			
 
				+            "chars": len(content),
			
 
				+            "eval_count": eval_count,
			
 
				+            "prompt_eval_count": prompt_eval_count,
			
 
				+            "done_reason": done_reason,
			
 
				+            "error": err,
			
 
				+        })
			
 
				+        if not ok:
			
 
				+            break
			
 
				+        messages.append({"role": "assistant", "content": content})
			
 
				+    oks = [r for r in rounds if r["ok"]]
			
 
				+    return {
			
 
				+        "profile": profile["name"],
			
 
				+        "options": profile["options"],
			
 
				+        "rounds": rounds,
			
 
				+        "success_rounds": len(oks),
			
 
				+        "all_success": len(oks) == len(SCENARIO),
			
 
				+        "avg_latency_s": round(mean([r["latency_s"] for r in oks]), 2) if oks else None,
			
 
				+        "max_latency_s": max([r["latency_s"] for r in oks]) if oks else None,
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    out = {"model": MODEL, "profiles": []}
			
 
				+    for p in PROFILES:
			
 
				+        print(f"Running {p['name']} ...", flush=True)
			
 
				+        out["profiles"].append(run_profile(p))
			
 
				+    ts = time.strftime("%Y%m%d-%H%M%S")
			
 
				+    path = f"/home/zhn/.openclaw/workspace/reports/ollama-benchmark-{ts}.json"
			
 
				+    import os
			
 
				+    os.makedirs("/home/zhn/.openclaw/workspace/reports", exist_ok=True)
			
 
				+    with open(path, "w", encoding="utf-8") as f:
			
 
				+        json.dump(out, f, ensure_ascii=False, indent=2)
			
 
				+    print(path)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/courseware/2026-02-24.md
+++ b/courseware/2026-02-24.md
@@ -0,0 +1,12 @@
 
				+# 2026-02-24
			
 
				+
			
 
				+## 今日工作内容
			
 
				+
			
 
				+- 完成了课程素材的整理工作
			
 
				+- 更新了练习题库
			
 
				+- 修复了测试用例中的一个bug
			
 
				+
			
 
				+## 明日计划
			
 
				+
			
 
				+- 开始新课程的开发
			
 
				+- 优化现有课程的交互设计
			
--- a/reports/TEST_REPORT_ollama_glm.md
+++ b/reports/TEST_REPORT_ollama_glm.md
@@ -0,0 +1,66 @@
 
				+# GLM-4.7-flash-128k 测试报告（代码场景）
			
 
				+
			
 
				+时间：2026-02-22
			
 
				+
			
 
				+## 1) 已实施配置变更
			
 
				+
			
 
				+- Provider API：`openai-completions` -> `ollama`（原生）
			
 
				+- Base URL：`http://127.0.0.1:11434/v1` -> `http://127.0.0.1:11434`
			
 
				+- 模型参数：
			
 
				+  - `contextWindow`: 131072 -> 65536
			
 
				+  - `maxTokens`: 16384 -> 8192
			
 
				+  - `agents.defaults.models[ollama/glm-4.7-flash-128k].params`：
			
 
				+    - `temperature`: 0.2
			
 
				+    - `num_ctx`: 65536
			
 
				+    - `num_predict`: 4096
			
 
				+
			
 
				+## 2) 环境与硬件快照
			
 
				+
			
 
				+- CPU: i3-12100F (4C/8T)
			
 
				+- RAM: 15GiB
			
 
				+- GPU:
			
 
				+  - RTX 2080 Ti 22GB
			
 
				+  - Tesla P100 16GB
			
 
				+- Ollama: 0.16.3
			
 
				+- OpenClaw: 2026.2.19-2
			
 
				+
			
 
				+## 3) 压测结果（代码编写多轮）
			
 
				+
			
 
				+测试文件：`reports/ollama-coding-bench.json`
			
 
				+
			
 
				+三组配置（ctx32k / ctx64k / ctx96k），每组 5 轮代码任务。
			
 
				+
			
 
				+结果：
			
 
				+- ctx32k: 第1轮超时
			
 
				+- ctx64k: 第1轮超时
			
 
				+- ctx96k: 第1轮超时
			
 
				+
			
 
				+额外单轮短任务验证（ctx64k, num_predict=256）：
			
 
				+- 成功返回，耗时约 13.19s
			
 
				+
			
 
				+## 4) 结论
			
 
				+
			
 
				+1. 你的模型“能工作”，但在“长输出+代码多轮”下非常容易触发超时。
			
 
				+2. 当前主要瓶颈不是消息通道，而是推理吞吐（长响应生成速度不足）。
			
 
				+3. 5 轮代码压测失败说明：当前参数对该硬件+模型规模来说仍偏激进。
			
 
				+
			
 
				+## 5) 推荐稳定参数（优先稳定）
			
 
				+
			
 
				+建议改成：
			
 
				+- `num_ctx`: 32768
			
 
				+- `num_predict`: 1024（必要时 768）
			
 
				+- `temperature`: 0.2
			
 
				+
			
 
				+使用策略：
			
 
				+- 代码场景默认先短答，必要时再“继续”生成下一段
			
 
				+- 避免一次性超长代码块
			
 
				+
			
 
				+## 6) 可观测性（你能确认我是否在工作）
			
 
				+
			
 
				+建议固定用：
			
 
				+- `openclaw status`
			
 
				+- `openclaw models status`
			
 
				+- `ollama ps`
			
 
				+- `tail -f /tmp/openclaw/openclaw-$(date +%F).log | grep -Ei "embedded run (start|done|timeout)|FailoverError|timed out"`
			
 
				+
			
 
				+这样你可以实时看到：是否在跑、是否超时、是否切换fallback。
			
--- a/reports/ollama-coding-bench.json
+++ b/reports/ollama-coding-bench.json
@@ -0,0 +1,54 @@
 
				+{
			
 
				+  "model": "glm-4.7-flash-128k",
			
 
				+  "ts": "2026-02-22 05:56:00",
			
 
				+  "profiles": [
			
 
				+    {
			
 
				+      "name": "ctx32k",
			
 
				+      "options": {
			
 
				+        "num_ctx": 32768,
			
 
				+        "temperature": 0.2,
			
 
				+        "num_predict": 1200
			
 
				+      },
			
 
				+      "all_success": false,
			
 
				+      "rounds": [
			
 
				+        {
			
 
				+          "turn": 1,
			
 
				+          "ok": false,
			
 
				+          "error": "timed out"
			
 
				+        }
			
 
				+      ]
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "ctx64k",
			
 
				+      "options": {
			
 
				+        "num_ctx": 65536,
			
 
				+        "temperature": 0.2,
			
 
				+        "num_predict": 1200
			
 
				+      },
			
 
				+      "all_success": false,
			
 
				+      "rounds": [
			
 
				+        {
			
 
				+          "turn": 1,
			
 
				+          "ok": false,
			
 
				+          "error": "timed out"
			
 
				+        }
			
 
				+      ]
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "ctx96k",
			
 
				+      "options": {
			
 
				+        "num_ctx": 98304,
			
 
				+        "temperature": 0.2,
			
 
				+        "num_predict": 1200
			
 
				+      },
			
 
				+      "all_success": false,
			
 
				+      "rounds": [
			
 
				+        {
			
 
				+          "turn": 1,
			
 
				+          "ok": false,
			
 
				+          "error": "timed out"
			
 
				+        }
			
 
				+      ]
			
 
				+    }
			
 
				+  ]
			
 
				+}