Просмотр исходного кода

chore: 整理 arxiv-digest 脚本和配置

Daily Deploy Bot 6 дней назад
Родитель
Сommit
6a39fb1ea0

+ 6 - 0
arxiv-digest/.env.example

@@ -1,6 +1,10 @@
 OPENCLAW_BIN=/home/zhn/.nvm/versions/node/v22.22.0/bin/openclaw
 OPENCLAW_BIN=/home/zhn/.nvm/versions/node/v22.22.0/bin/openclaw
 INSIGHT_MODELS=glm-4.7:cloud,qwen3.5:cloud,qwen3.5:27b,glm-4.7-flash-64k:latest
 INSIGHT_MODELS=glm-4.7:cloud,qwen3.5:cloud,qwen3.5:27b,glm-4.7-flash-64k:latest
 ROBOTDAILY_OUTPUT_DIR=/home/zhn/.openclaw/workspace/skills/robdaily/arxiv-digest/output
 ROBOTDAILY_OUTPUT_DIR=/home/zhn/.openclaw/workspace/skills/robdaily/arxiv-digest/output
+HUGO_CONTENT_DIR=/home/zhn/.openclaw/workspace/skills/robdaily/site/content/ai-daily
+# Or use HUGO_SITE_DIR + HUGO_CONTENT_SECTION instead:
+# HUGO_SITE_DIR=/home/zhn/.openclaw/workspace/skills/robdaily/site
+# HUGO_CONTENT_SECTION=ai-daily
 DISCORD_DELIVERY_MODE=fixed-channel
 DISCORD_DELIVERY_MODE=fixed-channel
 DISCORD_ACCOUNT_ID=codex
 DISCORD_ACCOUNT_ID=codex
 DISCORD_GUILD_ID=<guild id>
 DISCORD_GUILD_ID=<guild id>
@@ -8,3 +12,5 @@ DISCORD_TARGET_CHANNEL_NAME=robotdaily
 # DISCORD_CATEGORY_ID=<optional category id>
 # DISCORD_CATEGORY_ID=<optional category id>
 # DISCORD_BOT_TOKEN=<required only when a missing channel must be created>
 # DISCORD_BOT_TOKEN=<required only when a missing channel must be created>
 DISCORD_THREAD_AUTO_ARCHIVE_MIN=10080
 DISCORD_THREAD_AUTO_ARCHIVE_MIN=10080
+HUGO_SITE_DIR=/path/to/robdaily/site
+HUGO_CONTENT_SECTION=ai-daily

+ 14 - 4
arxiv-digest/SKILL.md

@@ -1,6 +1,6 @@
 ---
 ---
 name: arxiv-digest
 name: arxiv-digest
-description: Daily arXiv digest generation for embodied intelligence, representation learning, and reinforcement learning. Use when Codex needs to: (1) fetch recent papers from arXiv, (2) rank them with an applied-research bias, (3) pick 2-3 papers per domain, (4) translate abstracts into Chinese, add short explanations and tag keywords, (5) render mobile-friendly digest cards, or (6) publish the digest to Discord threads/channels on a schedule.
+description: "Daily arXiv digest generation for embodied intelligence, representation learning, and reinforcement learning. Use when Codex needs to: (1) fetch recent papers from arXiv, (2) rank them with an applied-research bias, (3) pick 2-3 papers per domain, (4) translate abstracts into Chinese, add short explanations and tag keywords, (5) render mobile-friendly digest cards, or (6) publish the digest to Discord threads/channels on a schedule."
 ---
 ---
 
 
 # arXiv Digest
 # arXiv Digest
@@ -21,8 +21,11 @@ Use `scripts/run_daily.py` as the single entry point.
    - 卡片标签
    - 卡片标签
 5. Render two outputs:
 5. Render two outputs:
    - mobile-friendly HTML digest with expandable cards
    - mobile-friendly HTML digest with expandable cards
-   - Markdown archive for Discord / quick search
-6. Publish to Discord:
+   - Markdown archive for Discord / quick search / Hugo import
+6. Publish to Hugo (optional):
+   - convert the Markdown digest into `site/content/ai-daily/YYYY-MM-DD.md`
+   - keep daily briefs separate from personal blog and resume content
+7. Publish to Discord:
    - `thread` mode: OpenClaw-native daily thread/forum post
    - `thread` mode: OpenClaw-native daily thread/forum post
    - `channel` mode: create one dated text channel per day via Discord REST + OpenClaw posting
    - `channel` mode: create one dated text channel per day via Discord REST + OpenClaw posting
    - `fixed-channel` mode: reuse one stable channel name such as `robotdaily`, and create it if missing
    - `fixed-channel` mode: reuse one stable channel name such as `robotdaily`, and create it if missing
@@ -42,6 +45,12 @@ Generate digest and publish to Discord:
 python3 scripts/run_daily.py --publish-discord
 python3 scripts/run_daily.py --publish-discord
 ```
 ```
 
 
+Generate digest and sync into Hugo content:
+
+```bash
+python3 scripts/run_daily.py --publish-hugo
+```
+
 Generate digest but skip LLM enrichment:
 Generate digest but skip LLM enrichment:
 
 
 ```bash
 ```bash
@@ -54,8 +63,9 @@ Read `references/selection-and-delivery.md` when you need to tune scoring or cho
 
 
 Common env vars in `arxiv-digest/.env`:
 Common env vars in `arxiv-digest/.env`:
 
 
-- `INSIGHT_MODELS=glm-4.7:cloud,qwen3.5:cloud,qwen3.5:27b,glm-4.7-flash-64k:latest`
+- `INSIGHT_MODELS=qwen3.5:27b`
 - `ROBOTDAILY_OUTPUT_DIR=/path/to/output`
 - `ROBOTDAILY_OUTPUT_DIR=/path/to/output`
+- `HUGO_CONTENT_DIR=/path/to/robdaily/site/content/ai-daily`
 - `DISCORD_DELIVERY_MODE=thread|channel|fixed-channel|existing-channel`
 - `DISCORD_DELIVERY_MODE=thread|channel|fixed-channel|existing-channel`
 - `DISCORD_ACCOUNT_ID=codex`
 - `DISCORD_ACCOUNT_ID=codex`
 - `DISCORD_GUILD_ID=...`
 - `DISCORD_GUILD_ID=...`

+ 13 - 3
arxiv-digest/scripts/enrich_papers.py

@@ -30,7 +30,7 @@ def build_prompt(paper: Dict[str, Any]) -> str:
 }}
 }}
 
 
 要求:
 要求:
-1. translated_abstract_zh:忠实翻译原摘要,不要增加原文没有的实验结果;控制在 180-320 个中文字符
+1. translated_abstract_zh:忠实翻译原摘要,不要增加原文没有的实验结果;控制在 180-400 个中文字符,必须完整覆盖原文摘要的所有要点
 2. brief_explanation_zh:40-90 个中文字符,说明为什么值得读,尽量偏应用价值和创新点。
 2. brief_explanation_zh:40-90 个中文字符,说明为什么值得读,尽量偏应用价值和创新点。
 3. tags:给 4-6 个适合直接贴在移动端卡片上的简短标签;尽量用中文,必要时保留通用英文术语,如 World Model、Offline RL。
 3. tags:给 4-6 个适合直接贴在移动端卡片上的简短标签;尽量用中文,必要时保留通用英文术语,如 World Model、Offline RL。
 4. 语气务实、技术导向,不要夸张。
 4. 语气务实、技术导向,不要夸张。
@@ -58,6 +58,7 @@ def fallback_enrichment(paper: Dict[str, Any]) -> Dict[str, Any]:
 def enrich_paper(paper: Dict[str, Any], model_names: List[str]) -> Dict[str, Any]:
 def enrich_paper(paper: Dict[str, Any], model_names: List[str]) -> Dict[str, Any]:
     prompt = build_prompt(paper)
     prompt = build_prompt(paper)
     result = None
     result = None
+    used_model = ""
     for model in model_names:
     for model in model_names:
         model = normalize_space(model)
         model = normalize_space(model)
         if not model:
         if not model:
@@ -65,6 +66,7 @@ def enrich_paper(paper: Dict[str, Any], model_names: List[str]) -> Dict[str, Any
         log(f"Enriching {paper['arxiv_id']} with {model}")
         log(f"Enriching {paper['arxiv_id']} with {model}")
         result = ollama_generate_json(prompt, model=model, timeout=150)
         result = ollama_generate_json(prompt, model=model, timeout=150)
         if result:
         if result:
+            used_model = model
             break
             break
 
 
     enriched = dict(paper)
     enriched = dict(paper)
@@ -76,6 +78,7 @@ def enrich_paper(paper: Dict[str, Any], model_names: List[str]) -> Dict[str, Any
     enriched["translated_abstract_zh"] = normalize_space(payload.get("translated_abstract_zh", "")) or fallback_enrichment(paper)["translated_abstract_zh"]
     enriched["translated_abstract_zh"] = normalize_space(payload.get("translated_abstract_zh", "")) or fallback_enrichment(paper)["translated_abstract_zh"]
     enriched["brief_explanation_zh"] = normalize_space(payload.get("brief_explanation_zh", "")) or fallback_enrichment(paper)["brief_explanation_zh"]
     enriched["brief_explanation_zh"] = normalize_space(payload.get("brief_explanation_zh", "")) or fallback_enrichment(paper)["brief_explanation_zh"]
     enriched["tags"] = tags[:6]
     enriched["tags"] = tags[:6]
+    enriched["enrichment_model"] = used_model or "fallback"
     return enriched
     return enriched
 
 
 
 
@@ -90,7 +93,14 @@ def enrich_selection(selection_payload: Dict[str, Any], model_names: List[str])
     output = dict(selection_payload)
     output = dict(selection_payload)
     output["papers"] = enriched_papers
     output["papers"] = enriched_papers
     output["selected_by_domain"] = by_domain
     output["selected_by_domain"] = by_domain
-    output["models_used"] = model_names
+    output["configured_models"] = model_names
+    output["effective_models_used"] = list(
+        dict.fromkeys(
+            paper.get("enrichment_model", "")
+            for paper in enriched_papers
+            if paper.get("enrichment_model")
+        )
+    )
     return output
     return output
 
 
 
 
@@ -98,7 +108,7 @@ def main() -> None:
     parser = argparse.ArgumentParser(description="Enrich RobotDaily papers with zh translation and tags")
     parser = argparse.ArgumentParser(description="Enrich RobotDaily papers with zh translation and tags")
     parser.add_argument("--input", required=True)
     parser.add_argument("--input", required=True)
     parser.add_argument("--output", default="")
     parser.add_argument("--output", default="")
-    parser.add_argument("--models", default="glm-4.7:cloud,qwen3.5:cloud,qwen3.5:27b,glm-4.7-flash-64k:latest")
+    parser.add_argument("--models", default="qwen3.5:27b")
     args = parser.parse_args()
     args = parser.parse_args()
 
 
     payload = read_json(args.input, default={}) or {}
     payload = read_json(args.input, default={}) or {}

+ 114 - 74
arxiv-digest/scripts/publish_discord.py

@@ -6,7 +6,6 @@ from __future__ import annotations
 import argparse
 import argparse
 import json
 import json
 import re
 import re
-from pathlib import Path
 from typing import Any, Dict, List, Optional
 from typing import Any, Dict, List, Optional
 from urllib.error import HTTPError
 from urllib.error import HTTPError
 from urllib.request import Request, urlopen
 from urllib.request import Request, urlopen
@@ -24,6 +23,11 @@ from utils import (
 
 
 DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
 DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
 DISCORD_API = "https://discord.com/api/v10"
 DISCORD_API = "https://discord.com/api/v10"
+DOMAIN_STYLE = {
+    "embodied": {"color": 0x4F8CFF, "emoji": "🤖"},
+    "representation": {"color": 0x9B59B6, "emoji": "🧠"},
+    "reinforcement": {"color": 0x2ECC71, "emoji": "🎯"},
+}
 
 
 
 
 class PublishError(RuntimeError):
 class PublishError(RuntimeError):
@@ -128,6 +132,7 @@ class DiscordPublisher:
             headers={
             headers={
                 "Authorization": f"Bot {self.bot_token}",
                 "Authorization": f"Bot {self.bot_token}",
                 "Content-Type": "application/json",
                 "Content-Type": "application/json",
+                "User-Agent": "DiscordBot (https://github.com/openclaw/openclaw, 1.0)",
             },
             },
         )
         )
         try:
         try:
@@ -203,19 +208,28 @@ class DiscordPublisher:
 
 
         raise PublishError(f"未知的投递模式: {self.mode}")
         raise PublishError(f"未知的投递模式: {self.mode}")
 
 
-
-    def send_embeds_via_rest(self, target_channel_id: str, content: str = "", embeds: List[Dict[str, Any]] = None) -> Dict[str, Any]:
+    def send_embeds_via_rest(
+        self,
+        target_channel_id: str,
+        content: str = "",
+        embeds: Optional[List[Dict[str, Any]]] = None,
+        components: Optional[List[Dict[str, Any]]] = None,
+    ) -> Dict[str, Any]:
         if self.dry_run:
         if self.dry_run:
-            log(f"[Dry Run] send_embeds_via_rest: channel {target_channel_id}, embeds count {len(embeds or [])}")
+            log(
+                f"[Dry Run] send_embeds_via_rest: channel {target_channel_id}, embeds count {len(embeds or [])}, components count {len(components or [])}"
+            )
             return {"id": "dry-run-msg-id"}
             return {"id": "dry-run-msg-id"}
         if not self.bot_token:
         if not self.bot_token:
-            raise PublishError("发送 Embed 需要 DISCORD_BOT_TOKEN")
+            raise PublishError("发送富文本卡片需要 DISCORD_BOT_TOKEN")
 
 
         body: Dict[str, Any] = {}
         body: Dict[str, Any] = {}
         if content:
         if content:
             body["content"] = content
             body["content"] = content
         if embeds:
         if embeds:
             body["embeds"] = embeds
             body["embeds"] = embeds
+        if components:
+            body["components"] = components
 
 
         request = Request(
         request = Request(
             url=f"{DISCORD_API}/channels/{target_channel_id}/messages",
             url=f"{DISCORD_API}/channels/{target_channel_id}/messages",
@@ -224,6 +238,7 @@ class DiscordPublisher:
             headers={
             headers={
                 "Authorization": f"Bot {self.bot_token}",
                 "Authorization": f"Bot {self.bot_token}",
                 "Content-Type": "application/json",
                 "Content-Type": "application/json",
+                "User-Agent": "DiscordBot (https://github.com/openclaw/openclaw, 1.0)",
             },
             },
         )
         )
         try:
         try:
@@ -245,62 +260,99 @@ class DiscordPublisher:
 def build_opening_message(payload: Dict[str, Any]) -> str:
 def build_opening_message(payload: Dict[str, Any]) -> str:
     total = len(payload.get("papers", []))
     total = len(payload.get("papers", []))
     counts = payload.get("counts", {})
     counts = payload.get("counts", {})
-    parts = [f"老大早安~今天给你挑了 {total} 篇偏应用论文。"]
+    breakdown = []
     for domain in DOMAIN_ORDER:
     for domain in DOMAIN_ORDER:
         count = counts.get(domain, 0)
         count = counts.get(domain, 0)
         if count:
         if count:
-            parts.append(f"{DOMAIN_CONFIGS[domain]['label_zh']} {count} 篇")
-    parts.append("下面每张卡片都带 DOI / arXiv / PDF,可直接点开读。")
-    return " | ".join(parts)
+            breakdown.append(f"{DOMAIN_CONFIGS[domain]['label_zh']} {count}")
+    suffix = " / ".join(breakdown)
+    if suffix:
+        return f"**RobotDaily | {now_local().strftime('%Y-%m-%d')}**\n共 {total} 篇偏应用论文,按方向整理成短卡片:{suffix}。"
+    return f"**RobotDaily | {now_local().strftime('%Y-%m-%d')}**\n共 {total} 篇偏应用论文。"
 
 
 
 
 def build_domain_header(domain: str, count: int) -> str:
 def build_domain_header(domain: str, count: int) -> str:
-    return f"## {DOMAIN_CONFIGS[domain]['label_zh']}({count} 篇)"
+    style = DOMAIN_STYLE.get(domain, {})
+    emoji = style.get("emoji", "📌")
+    return f"## {emoji} {DOMAIN_CONFIGS[domain]['label_zh']}({count} 篇)"
 
 
 
 
 def build_paper_embed(paper: Dict[str, Any]) -> Dict[str, Any]:
 def build_paper_embed(paper: Dict[str, Any]) -> Dict[str, Any]:
-    tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:6])
-    
-    # Title max 256
-    title = f"{paper.get('domain_rank', '?')}. {paper.get('title', '')}"
-    if len(title) > 256:
-        title = title[:253] + "..."
-        
-    description_lines = []
-    description_lines.append(f"**作者:** {format_authors(paper.get('authors', []), limit=4)}")
-    if tags:
-        description_lines.append(f"**关键词:** {tags}")
-        
-    brief = paper.get('brief_explanation_zh', '')
-    if brief:
-        description_lines.append(f"\n**💡 简析**\n{brief}")
-        
-    abstract = truncate(paper.get('translated_abstract_zh', ''), 700)
-    if abstract:
-        description_lines.append(f"\n**📖 摘要**\n{abstract}")
-        
-    links = []
-    if paper.get('doi_url'): links.append(f"[DOI]({paper.get('doi_url')})")
-    if paper.get('abs_url'): links.append(f"[arXiv]({paper.get('abs_url')})")
-    if paper.get('pdf_url'): links.append(f"[PDF]({paper.get('pdf_url')})")
-    
-    if links:
-        description_lines.append(f"\n**🔗 链接:** {' | '.join(links)}")
-        
-    description = "\n".join(description_lines)
-    if len(description) > 4096:
-        description = description[:4093] + "..."
-        
-    embed = {
+    domain = paper.get("domain", "")
+    label_zh = DOMAIN_CONFIGS.get(domain, {}).get("label_zh", domain or "未分类")
+    style = DOMAIN_STYLE.get(domain, {"color": 0x3498DB, "emoji": "📄"})
+
+    title_prefix = f"{paper.get('domain_rank', '?')}. "
+    title = truncate(title_prefix + str(paper.get("title", "")), 256)
+    brief = truncate(
+        paper.get("brief_explanation_zh", "")
+        or paper.get("selection_reason", "")
+        or paper.get("translated_abstract_zh", ""),
+        200,
+    )
+    authors = truncate(format_authors(paper.get("authors", []), limit=4), 120) or "—"
+    tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:4]) or "—"
+    published = truncate(str(paper.get("published_local", ""))[:10], 32) or "—"
+    arxiv_id = truncate(paper.get("arxiv_id", ""), 64) or "—"
+
+    fields: List[Dict[str, Any]] = [
+        {"name": "💡 简析", "value": brief or "—", "inline": False},
+        {"name": "👤 作者", "value": authors, "inline": True},
+        {"name": "🏷️ 标签", "value": tags, "inline": True},
+    ]
+
+    embed: Dict[str, Any] = {
         "title": title,
         "title": title,
-        "description": description,
-        "url": paper.get('abs_url', ''),
-        "color": 0x3498db
+        "url": paper.get("abs_url", ""),
+        "description": f"{style['emoji']} **{label_zh}** · `{published}` · `arXiv:{arxiv_id}`",
+        "color": style["color"],
+        "fields": fields,
+        "footer": {"text": "RobotDaily 卡片视图"},
     }
     }
-    
     return embed
     return embed
 
 
 
 
+def build_link_buttons(paper: Dict[str, Any]) -> List[Dict[str, Any]]:
+    buttons: List[Dict[str, Any]] = []
+    for label, url in [
+        ("DOI", paper.get("doi_url", "")),
+        ("arXiv", paper.get("abs_url", "")),
+        ("PDF", paper.get("pdf_url", "")),
+    ]:
+        if not url:
+            continue
+        buttons.append({"type": 2, "style": 5, "label": label, "url": url})
+    if not buttons:
+        return []
+    return [{"type": 1, "components": buttons[:3]}]
+
+
+def build_fallback_paper_markdown(paper: Dict[str, Any]) -> str:
+    links = []
+    if paper.get("doi_url"):
+        links.append(f"[DOI]({paper.get('doi_url')})")
+    if paper.get("abs_url"):
+        links.append(f"[arXiv]({paper.get('abs_url')})")
+    if paper.get("pdf_url"):
+        links.append(f"[PDF]({paper.get('pdf_url')})")
+    tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:4]) or "—"
+    brief = truncate(
+        paper.get("brief_explanation_zh", "") or paper.get("selection_reason", "") or paper.get("translated_abstract_zh", ""),
+        180,
+    )
+    abstract = truncate(paper.get("translated_abstract_zh", "") or paper.get("summary", ""), 220)
+    lines = [
+        f"### {paper.get('domain_rank', '?')}. {paper.get('title', '')}",
+        f"> {brief or '—'}",
+        f"- 作者:{truncate(format_authors(paper.get('authors', []), limit=4), 120) or '—'}",
+        f"- 标签:{tags}",
+        f"- 中文摘要:{abstract or '—'}",
+    ]
+    if links:
+        lines.append(f"- 链接:{' | '.join(links)}")
+    return "\n".join(lines)
+
+
 def publish_digest(
 def publish_digest(
     payload: Dict[str, Any],
     payload: Dict[str, Any],
     *,
     *,
@@ -311,11 +363,7 @@ def publish_digest(
     opening_message = build_opening_message(payload)
     opening_message = build_opening_message(payload)
     target_channel_id = publisher.create_or_resolve_target(opening_message, opening_message)
     target_channel_id = publisher.create_or_resolve_target(opening_message, opening_message)
 
 
-    attached_message = opening_message + "\n\n今天起换成了全新卡片式排版,直接在 Discord 里看中译摘要和核心内容啦!"
-    if html_path:
-        publisher.send_message(target_channel_id, attached_message, media=html_path)
-    else:
-        publisher.send_message(target_channel_id, attached_message)
+    publisher.send_message(target_channel_id, opening_message)
 
 
     grouped: Dict[str, List[Dict[str, Any]]] = {domain: [] for domain in DOMAIN_ORDER}
     grouped: Dict[str, List[Dict[str, Any]]] = {domain: [] for domain in DOMAIN_ORDER}
     for paper in payload.get("papers", []):
     for paper in payload.get("papers", []):
@@ -325,31 +373,23 @@ def publish_digest(
         papers = grouped.get(domain, [])
         papers = grouped.get(domain, [])
         if not papers:
         if not papers:
             continue
             continue
-        
-        # Build embeds for the domain
-        embeds = [build_paper_embed(paper) for paper in papers]
-        
-        # Discord limit is 10 embeds per message, we chunk them by 4 to be safe with total characters
-        chunk_size = 4
-        for i in range(0, len(embeds), chunk_size):
-            chunk_embeds = embeds[i:i + chunk_size]
-            
-            # Print domain header on the first chunk
-            msg_content = build_domain_header(domain, len(papers)) if i == 0 else ""
-            
+
+        publisher.send_message(target_channel_id, build_domain_header(domain, len(papers)))
+        for paper in papers:
             if publisher.bot_token:
             if publisher.bot_token:
-                # Use REST API to send rich embeds
-                publisher.send_embeds_via_rest(target_channel_id, content=msg_content, embeds=chunk_embeds)
-            else:
-                # Fallback to plain text if no bot token
-                if msg_content:
-                    publisher.send_message(target_channel_id, msg_content)
-                for paper in papers[i:i + chunk_size]:
-                    fallback_text = f"**{paper.get('title')}**\n{paper.get('abs_url')}"
-                    publisher.send_message(target_channel_id, fallback_text)
+                try:
+                    publisher.send_embeds_via_rest(
+                        target_channel_id,
+                        embeds=[build_paper_embed(paper)],
+                        components=build_link_buttons(paper),
+                    )
+                    continue
+                except PublishError as exc:
+                    log(f"富文本卡片发送失败,回退到纯 Markdown:{exc}")
+            publisher.send_message(target_channel_id, build_fallback_paper_markdown(paper))
 
 
     if markdown_path:
     if markdown_path:
-        publisher.send_message(target_channel_id, "附一份 Markdown 归档版,桌面端检索会更方便。", media=markdown_path)
+        publisher.send_message(target_channel_id, "附一份 Markdown 归档版,方便桌面端检索。", media=markdown_path)
 
 
     return target_channel_id
     return target_channel_id
 
 

+ 21 - 8
arxiv-digest/scripts/render_digest.py

@@ -10,7 +10,7 @@ from pathlib import Path
 from typing import Any, Dict, List
 from typing import Any, Dict, List
 
 
 from fetch_arxiv import DOMAIN_CONFIGS
 from fetch_arxiv import DOMAIN_CONFIGS
-from utils import SKILL_DIR, format_authors, html_escape, normalize_space, now_local, read_json, write_text
+from utils import SKILL_DIR, format_authors, html_escape, normalize_space, now_local, read_json, truncate, write_text
 
 
 DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
 DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
 TEMPLATE_PATH = SKILL_DIR / "assets" / "mobile_digest_template.html"
 TEMPLATE_PATH = SKILL_DIR / "assets" / "mobile_digest_template.html"
@@ -123,7 +123,7 @@ def render_markdown(payload: Dict[str, Any]) -> str:
     lines: List[str] = []
     lines: List[str] = []
     lines.append(f"# RobotDaily | {now_local().strftime('%Y-%m-%d')}")
     lines.append(f"# RobotDaily | {now_local().strftime('%Y-%m-%d')}")
     lines.append("")
     lines.append("")
-    lines.append("具身智能 / 表征学习 / 强化学习,每个方向 2-3 篇偏应用候选。")
+    lines.append("偏应用导向精选,按方向整理成短卡片式 Markdown 归档。")
     lines.append("")
     lines.append("")
     for domain in DOMAIN_ORDER:
     for domain in DOMAIN_ORDER:
         papers = [paper for paper in payload.get("papers", []) if paper.get("domain") == domain]
         papers = [paper for paper in payload.get("papers", []) if paper.get("domain") == domain]
@@ -132,16 +132,29 @@ def render_markdown(payload: Dict[str, Any]) -> str:
         lines.append(f"## {DOMAIN_CONFIGS[domain]['label_zh']}({len(papers)} 篇)")
         lines.append(f"## {DOMAIN_CONFIGS[domain]['label_zh']}({len(papers)} 篇)")
         lines.append("")
         lines.append("")
         for idx, paper in enumerate(papers, start=1):
         for idx, paper in enumerate(papers, start=1):
-            tags = " ".join(f"`{tag}`" for tag in paper.get("tags", []))
+            tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:4]) or "—"
+            brief = truncate(
+                paper.get("brief_explanation_zh", "") or paper.get("selection_reason", "") or paper.get("translated_abstract_zh", ""),
+                180,
+            )
+            abstract = truncate(paper.get("translated_abstract_zh", "") or paper.get("summary", ""), 450)
+            links = [
+                part
+                for part in [
+                    f"[DOI]({paper.get('doi_url', '')})" if paper.get("doi_url") else "",
+                    f"[arXiv]({paper.get('abs_url', '')})" if paper.get("abs_url") else "",
+                    f"[PDF]({paper.get('pdf_url', '')})" if paper.get("pdf_url") else "",
+                ]
+                if part
+            ]
             lines.extend(
             lines.extend(
                 [
                 [
                     f"### {idx}. {paper.get('title', '')}",
                     f"### {idx}. {paper.get('title', '')}",
-                    f"- 作者:{format_authors(paper.get('authors', []), limit=4)}",
-                    f"- 亮点:{paper.get('brief_explanation_zh', '')}",
+                    f"> {brief or '—'}",
+                    f"- 作者:{format_authors(paper.get('authors', []), limit=4) or '—'}",
                     f"- 标签:{tags}",
                     f"- 标签:{tags}",
-                    f"- DOI:{paper.get('doi_url', '')}",
-                    f"- arXiv:{paper.get('abs_url', '')}",
-                    f"- PDF:{paper.get('pdf_url', '')}",
+                    f"- 中文摘要:{abstract or '—'}",
+                    f"- 链接:{' | '.join(links) if links else '—'}",
                     "",
                     "",
                 ]
                 ]
             )
             )

+ 24 - 2
arxiv-digest/scripts/run_daily.py

@@ -11,6 +11,7 @@ from typing import Any, Dict, List
 from enrich_papers import enrich_selection
 from enrich_papers import enrich_selection
 from fetch_arxiv import fetch_candidates
 from fetch_arxiv import fetch_candidates
 from publish_discord import DiscordPublisher, publish_digest
 from publish_discord import DiscordPublisher, publish_digest
+from publish_hugo import publish_markdown_to_hugo, publish_to_hugo
 from render_digest import render_html, render_markdown
 from render_digest import render_html, render_markdown
 from select_papers import select_papers
 from select_papers import select_papers
 from utils import DEFAULT_OUTPUT_DIR, ensure_dir, load_env, log, now_local, write_json, write_text
 from utils import DEFAULT_OUTPUT_DIR, ensure_dir, load_env, log, now_local, write_json, write_text
@@ -57,6 +58,8 @@ def main() -> None:
     parser.add_argument("--models", default="")
     parser.add_argument("--models", default="")
     parser.add_argument("--skip-enrich", action="store_true")
     parser.add_argument("--skip-enrich", action="store_true")
     parser.add_argument("--publish-discord", action="store_true")
     parser.add_argument("--publish-discord", action="store_true")
+    parser.add_argument("--publish-hugo", action="store_true")
+    parser.add_argument("--hugo-content-dir", default="")
     parser.add_argument("--dry-run", action="store_true")
     parser.add_argument("--dry-run", action="store_true")
     args = parser.parse_args()
     args = parser.parse_args()
 
 
@@ -73,7 +76,7 @@ def main() -> None:
     write_json(paths["candidates_json"], {"generated_at": now_local().isoformat(), "papers": selection.get("candidates", [])})
     write_json(paths["candidates_json"], {"generated_at": now_local().isoformat(), "papers": selection.get("candidates", [])})
     write_json(paths["selected_json"], {k: v for k, v in selection.items() if k != "candidates"})
     write_json(paths["selected_json"], {k: v for k, v in selection.items() if k != "candidates"})
 
 
-    models = parse_models(args.models or env.get("INSIGHT_MODELS", "qwen3.5:27b"))
+    models = parse_models(args.models or env.get("INSIGHT_MODELS", "qwen3.5:cloud,glm-4.7:cloud"))
     if args.skip_enrich:
     if args.skip_enrich:
         enriched = {k: v for k, v in selection.items() if k != "candidates"}
         enriched = {k: v for k, v in selection.items() if k != "candidates"}
         for paper in enriched.get("papers", []):
         for paper in enriched.get("papers", []):
@@ -101,6 +104,26 @@ def main() -> None:
     }
     }
     write_json(paths["manifest_json"], manifest)
     write_json(paths["manifest_json"], manifest)
 
 
+    if args.publish_hugo:
+        content_dir = args.hugo_content_dir or env.get("HUGO_CONTENT_DIR", "")
+        if content_dir:
+            hugo_target = publish_to_hugo(
+                markdown_path=str(paths["digest_md"]),
+                manifest_path=str(paths["manifest_json"]),
+                content_dir=content_dir,
+            )
+        else:
+            site_dir = env.get("HUGO_SITE_DIR", "")
+            if not site_dir:
+                raise SystemExit("--publish-hugo 需要设置 HUGO_CONTENT_DIR 或 HUGO_SITE_DIR")
+            hugo_target = publish_markdown_to_hugo(
+                str(paths["digest_md"]),
+                site_dir=site_dir,
+                section=env.get("HUGO_CONTENT_SECTION", "ai-daily"),
+            )
+        manifest["hugo_target"] = str(hugo_target)
+        write_json(paths["manifest_json"], manifest)
+
     if args.publish_discord:
     if args.publish_discord:
         publisher = DiscordPublisher(
         publisher = DiscordPublisher(
             openclaw_bin=env.get("OPENCLAW_BIN", "openclaw"),
             openclaw_bin=env.get("OPENCLAW_BIN", "openclaw"),
@@ -117,7 +140,6 @@ def main() -> None:
         )
         )
         target = publish_digest(
         target = publish_digest(
             enriched,
             enriched,
-            # html_path=str(paths["digest_html"]),
             markdown_path=str(paths["digest_md"]),
             markdown_path=str(paths["digest_md"]),
             publisher=publisher,
             publisher=publisher,
         )
         )