3 hete · 0acff11f8b
--- a/bench_ollama_coding.py
+++ b/bench_ollama_coding.py
@@ -0,0 +1,98 @@
 
															+#!/usr/bin/env python3
														
 
															+import json
														
 
															+import time
														
 
															+import requests
														
 
															+from statistics import mean
														
 
															+
														
 
															+URL = "http://127.0.0.1:11434/api/chat"
														
 
															+MODEL = "glm-4.7-flash-128k"
														
 
															+
														
 
															+SCENARIO = [
														
 
															+    "你是资深Python工程师。请实现一个LRUCache类（get/put，O(1)），并附上类型标注。",
														
 
															+    "基于上一步代码，补充线程安全支持（使用RLock），并说明性能影响。",
														
 
															+    "增加TTL过期机制，要求懒清理+写入时清理，给出关键测试用例。",
														
 
															+    "把核心逻辑重构为两个类：Storage与Policy，保证接口不变。",
														
 
															+    "增加一个命令行演示入口：支持put/get/dump命令。",
														
 
															+    "修复潜在bug：高并发下TTL扫描可能导致长时间持锁，请优化。",
														
 
															+    "把项目拆成3个文件并给出最小可运行目录结构。",
														
 
															+    "最后给出pytest测试代码（覆盖率目标>90%）。"
														
 
															+]
														
 
															+
														
 
															+PROFILES = [
														
 
															+    {"name": "ctx32k_temp0.4", "options": {"num_ctx": 32768, "temperature": 0.4, "num_predict": 1024}},
														
 
															+    {"name": "ctx64k_temp0.4", "options": {"num_ctx": 65536, "temperature": 0.4, "num_predict": 1024}},
														
 
															+    {"name": "ctx128k_temp0.4", "options": {"num_ctx": 131072, "temperature": 0.4, "num_predict": 1024}},
														
 
															+]
														
 
															+
														
 
															+
														
 
															+def run_profile(profile):
														
 
															+    messages = [{"role": "system", "content": "你是严谨的代码助手，回答要给出可运行代码，避免空话。"}]
														
 
															+    rounds = []
														
 
															+    for i, prompt in enumerate(SCENARIO, start=1):
														
 
															+        messages.append({"role": "user", "content": prompt})
														
 
															+        payload = {
														
 
															+            "model": MODEL,
														
 
															+            "messages": messages,
														
 
															+            "stream": False,
														
 
															+            "options": profile["options"],
														
 
															+        }
														
 
															+        t0 = time.time()
														
 
															+        ok = True
														
 
															+        err = ""
														
 
															+        content = ""
														
 
															+        eval_count = None
														
 
															+        prompt_eval_count = None
														
 
															+        done_reason = ""
														
 
															+        try:
														
 
															+            r = requests.post(URL, json=payload, timeout=300)
														
 
															+            r.raise_for_status()
														
 
															+            data = r.json()
														
 
															+            content = data.get("message", {}).get("content", "")
														
 
															+            eval_count = data.get("eval_count")
														
 
															+            prompt_eval_count = data.get("prompt_eval_count")
														
 
															+            done_reason = data.get("done_reason", "")
														
 
															+        except Exception as e:
														
 
															+            ok = False
														
 
															+            err = str(e)
														
 
															+        dt = time.time() - t0
														
 
															+        rounds.append({
														
 
															+            "round": i,
														
 
															+            "ok": ok,
														
 
															+            "latency_s": round(dt, 2),
														
 
															+            "chars": len(content),
														
 
															+            "eval_count": eval_count,
														
 
															+            "prompt_eval_count": prompt_eval_count,
														
 
															+            "done_reason": done_reason,
														
 
															+            "error": err,
														
 
															+        })
														
 
															+        if not ok:
														
 
															+            break
														
 
															+        messages.append({"role": "assistant", "content": content})
														
 
															+    oks = [r for r in rounds if r["ok"]]
														
 
															+    return {
														
 
															+        "profile": profile["name"],
														
 
															+        "options": profile["options"],
														
 
															+        "rounds": rounds,
														
 
															+        "success_rounds": len(oks),
														
 
															+        "all_success": len(oks) == len(SCENARIO),
														
 
															+        "avg_latency_s": round(mean([r["latency_s"] for r in oks]), 2) if oks else None,
														
 
															+        "max_latency_s": max([r["latency_s"] for r in oks]) if oks else None,
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+def main():
														
 
															+    out = {"model": MODEL, "profiles": []}
														
 
															+    for p in PROFILES:
														
 
															+        print(f"Running {p['name']} ...", flush=True)
														
 
															+        out["profiles"].append(run_profile(p))
														
 
															+    ts = time.strftime("%Y%m%d-%H%M%S")
														
 
															+    path = f"/home/zhn/.openclaw/workspace/reports/ollama-benchmark-{ts}.json"
														
 
															+    import os
														
 
															+    os.makedirs("/home/zhn/.openclaw/workspace/reports", exist_ok=True)
														
 
															+    with open(path, "w", encoding="utf-8") as f:
														
 
															+        json.dump(out, f, ensure_ascii=False, indent=2)
														
 
															+    print(path)
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    main()
														
--- a/courseware/2026-02-24.md
+++ b/courseware/2026-02-24.md
@@ -0,0 +1,12 @@
 
															+# 2026-02-24
														
 
															+
														
 
															+## 今日工作内容
														
 
															+
														
 
															+- 完成了课程素材的整理工作
														
 
															+- 更新了练习题库
														
 
															+- 修复了测试用例中的一个bug
														
 
															+
														
 
															+## 明日计划
														
 
															+
														
 
															+- 开始新课程的开发
														
 
															+- 优化现有课程的交互设计
														
--- a/reports/TEST_REPORT_ollama_glm.md
+++ b/reports/TEST_REPORT_ollama_glm.md
@@ -0,0 +1,66 @@
 
															+# GLM-4.7-flash-128k 测试报告（代码场景）
														
 
															+
														
 
															+时间：2026-02-22
														
 
															+
														
 
															+## 1) 已实施配置变更
														
 
															+
														
 
															+- Provider API：`openai-completions` -> `ollama`（原生）
														
 
															+- Base URL：`http://127.0.0.1:11434/v1` -> `http://127.0.0.1:11434`
														
 
															+- 模型参数：
														
 
															+  - `contextWindow`: 131072 -> 65536
														
 
															+  - `maxTokens`: 16384 -> 8192
														
 
															+  - `agents.defaults.models[ollama/glm-4.7-flash-128k].params`：
														
 
															+    - `temperature`: 0.2
														
 
															+    - `num_ctx`: 65536
														
 
															+    - `num_predict`: 4096
														
 
															+
														
 
															+## 2) 环境与硬件快照
														
 
															+
														
 
															+- CPU: i3-12100F (4C/8T)
														
 
															+- RAM: 15GiB
														
 
															+- GPU:
														
 
															+  - RTX 2080 Ti 22GB
														
 
															+  - Tesla P100 16GB
														
 
															+- Ollama: 0.16.3
														
 
															+- OpenClaw: 2026.2.19-2
														
 
															+
														
 
															+## 3) 压测结果（代码编写多轮）
														
 
															+
														
 
															+测试文件：`reports/ollama-coding-bench.json`
														
 
															+
														
 
															+三组配置（ctx32k / ctx64k / ctx96k），每组 5 轮代码任务。
														
 
															+
														
 
															+结果：
														
 
															+- ctx32k: 第1轮超时
														
 
															+- ctx64k: 第1轮超时
														
 
															+- ctx96k: 第1轮超时
														
 
															+
														
 
															+额外单轮短任务验证（ctx64k, num_predict=256）：
														
 
															+- 成功返回，耗时约 13.19s
														
 
															+
														
 
															+## 4) 结论
														
 
															+
														
 
															+1. 你的模型“能工作”，但在“长输出+代码多轮”下非常容易触发超时。
														
 
															+2. 当前主要瓶颈不是消息通道，而是推理吞吐（长响应生成速度不足）。
														
 
															+3. 5 轮代码压测失败说明：当前参数对该硬件+模型规模来说仍偏激进。
														
 
															+
														
 
															+## 5) 推荐稳定参数（优先稳定）
														
 
															+
														
 
															+建议改成：
														
 
															+- `num_ctx`: 32768
														
 
															+- `num_predict`: 1024（必要时 768）
														
 
															+- `temperature`: 0.2
														
 
															+
														
 
															+使用策略：
														
 
															+- 代码场景默认先短答，必要时再“继续”生成下一段
														
 
															+- 避免一次性超长代码块
														
 
															+
														
 
															+## 6) 可观测性（你能确认我是否在工作）
														
 
															+
														
 
															+建议固定用：
														
 
															+- `openclaw status`
														
 
															+- `openclaw models status`
														
 
															+- `ollama ps`
														
 
															+- `tail -f /tmp/openclaw/openclaw-$(date +%F).log | grep -Ei "embedded run (start|done|timeout)|FailoverError|timed out"`
														
 
															+
														
 
															+这样你可以实时看到：是否在跑、是否超时、是否切换fallback。
														
--- a/reports/ollama-coding-bench.json
+++ b/reports/ollama-coding-bench.json
@@ -0,0 +1,54 @@
 
															+{
														
 
															+  "model": "glm-4.7-flash-128k",
														
 
															+  "ts": "2026-02-22 05:56:00",
														
 
															+  "profiles": [
														
 
															+    {
														
 
															+      "name": "ctx32k",
														
 
															+      "options": {
														
 
															+        "num_ctx": 32768,
														
 
															+        "temperature": 0.2,
														
 
															+        "num_predict": 1200
														
 
															+      },
														
 
															+      "all_success": false,
														
 
															+      "rounds": [
														
 
															+        {
														
 
															+          "turn": 1,
														
 
															+          "ok": false,
														
 
															+          "error": "timed out"
														
 
															+        }
														
 
															+      ]
														
 
															+    },
														
 
															+    {
														
 
															+      "name": "ctx64k",
														
 
															+      "options": {
														
 
															+        "num_ctx": 65536,
														
 
															+        "temperature": 0.2,
														
 
															+        "num_predict": 1200
														
 
															+      },
														
 
															+      "all_success": false,
														
 
															+      "rounds": [
														
 
															+        {
														
 
															+          "turn": 1,
														
 
															+          "ok": false,
														
 
															+          "error": "timed out"
														
 
															+        }
														
 
															+      ]
														
 
															+    },
														
 
															+    {
														
 
															+      "name": "ctx96k",
														
 
															+      "options": {
														
 
															+        "num_ctx": 98304,
														
 
															+        "temperature": 0.2,
														
 
															+        "num_predict": 1200
														
 
															+      },
														
 
															+      "all_success": false,
														
 
															+      "rounds": [
														
 
															+        {
														
 
															+          "turn": 1,
														
 
															+          "ok": false,
														
 
															+          "error": "timed out"
														
 
															+        }
														
 
															+      ]
														
 
															+    }
														
 
															+  ]
														
 
															+}