2 dni temu · 8df0a49d7c
--- a/arxiv-digest/scripts/publish_discord.py
+++ b/arxiv-digest/scripts/publish_discord.py
@@ -203,6 +203,38 @@ class DiscordPublisher:
 
				 
			
 
				         raise PublishError(f"未知的投递模式: {self.mode}")
			
 
				 
			
 
				+
			
 
				+    def send_embeds_via_rest(self, target_channel_id: str, content: str = "", embeds: List[Dict[str, Any]] = None) -> Dict[str, Any]:
			
 
				+        if self.dry_run:
			
 
				+            log(f"[Dry Run] send_embeds_via_rest: channel {target_channel_id}, embeds count {len(embeds or [])}")
			
 
				+            return {"id": "dry-run-msg-id"}
			
 
				+        if not self.bot_token:
			
 
				+            raise PublishError("发送 Embed 需要 DISCORD_BOT_TOKEN")
			
 
				+
			
 
				+        body: Dict[str, Any] = {}
			
 
				+        if content:
			
 
				+            body["content"] = content
			
 
				+        if embeds:
			
 
				+            body["embeds"] = embeds
			
 
				+
			
 
				+        request = Request(
			
 
				+            url=f"{DISCORD_API}/channels/{target_channel_id}/messages",
			
 
				+            method="POST",
			
 
				+            data=json.dumps(body).encode("utf-8"),
			
 
				+            headers={
			
 
				+                "Authorization": f"Bot {self.bot_token}",
			
 
				+                "Content-Type": "application/json",
			
 
				+            },
			
 
				+        )
			
 
				+        try:
			
 
				+            with urlopen(request, timeout=30) as response:
			
 
				+                result = json.loads(response.read().decode("utf-8", errors="ignore"))
			
 
				+                return result
			
 
				+        except HTTPError as exc:
			
 
				+            detail = exc.read().decode("utf-8", errors="ignore")
			
 
				+            log(f"REST API 错误: {exc.code} {exc.reason} - {detail}")
			
 
				+            raise PublishError(f"创建 Discord 消息失败: {exc.code} {detail}") from exc
			
 
				+
			
 
				     def send_message(self, target_channel_id: str, message: str, media: str = "") -> Dict[str, Any]:
			
 
				         args = ["send", "--target", f"channel:{target_channel_id}", "--message", message]
			
 
				         if media:
			
@@ -226,33 +258,64 @@ def build_domain_header(domain: str, count: int) -> str:
 
				     return f"## {DOMAIN_CONFIGS[domain]['label_zh']}（{count} 篇）"
			
 
				 
			
 
				 
			
 
				-def build_paper_message(paper: Dict[str, Any]) -> str:
			
 
				+def build_paper_embed(paper: Dict[str, Any]) -> Dict[str, Any]:
			
 
				     tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:6])
			
 
				-    lines = [
			
 
				-        f"### {paper.get('domain_rank', '?')}. {paper.get('title', '')}",
			
 
				-        f"作者：{format_authors(paper.get('authors', []), limit=4)}",
			
 
				-        f"关键词：{tags}" if tags else "关键词：暂无",
			
 
				-        f"简析：{paper.get('brief_explanation_zh', '')}",
			
 
				-        f"摘要中译：{truncate(paper.get('translated_abstract_zh', ''), 700)}",
			
 
				-        f"DOI：{paper.get('doi_url', '')}",
			
 
				-        f"arXiv：{paper.get('abs_url', '')}",
			
 
				-        f"PDF：{paper.get('pdf_url', '')}",
			
 
				-    ]
			
 
				-    return "\n".join(lines)
			
 
				+    
			
 
				+    # Title max 256
			
 
				+    title = f"{paper.get('domain_rank', '?')}. {paper.get('title', '')}"
			
 
				+    if len(title) > 256:
			
 
				+        title = title[:253] + "..."
			
 
				+        
			
 
				+    description_lines = []
			
 
				+    description_lines.append(f"**作者：** {format_authors(paper.get('authors', []), limit=4)}")
			
 
				+    if tags:
			
 
				+        description_lines.append(f"**关键词：** {tags}")
			
 
				+        
			
 
				+    brief = paper.get('brief_explanation_zh', '')
			
 
				+    if brief:
			
 
				+        description_lines.append(f"\n**💡 简析**\n{brief}")
			
 
				+        
			
 
				+    abstract = truncate(paper.get('translated_abstract_zh', ''), 700)
			
 
				+    if abstract:
			
 
				+        description_lines.append(f"\n**📖 摘要**\n{abstract}")
			
 
				+        
			
 
				+    links = []
			
 
				+    if paper.get('doi_url'): links.append(f"[DOI]({paper.get('doi_url')})")
			
 
				+    if paper.get('abs_url'): links.append(f"[arXiv]({paper.get('abs_url')})")
			
 
				+    if paper.get('pdf_url'): links.append(f"[PDF]({paper.get('pdf_url')})")
			
 
				+    
			
 
				+    if links:
			
 
				+        description_lines.append(f"\n**🔗 链接：** {' | '.join(links)}")
			
 
				+        
			
 
				+    description = "\n".join(description_lines)
			
 
				+    if len(description) > 4096:
			
 
				+        description = description[:4093] + "..."
			
 
				+        
			
 
				+    embed = {
			
 
				+        "title": title,
			
 
				+        "description": description,
			
 
				+        "url": paper.get('abs_url', ''),
			
 
				+        "color": 0x3498db
			
 
				+    }
			
 
				+    
			
 
				+    return embed
			
 
				 
			
 
				 
			
 
				 def publish_digest(
			
 
				     payload: Dict[str, Any],
			
 
				     *,
			
 
				-    html_path: str,
			
 
				-    markdown_path: str,
			
 
				+    html_path: str = "",
			
 
				+    markdown_path: str = "",
			
 
				     publisher: DiscordPublisher,
			
 
				 ) -> str:
			
 
				     opening_message = build_opening_message(payload)
			
 
				     target_channel_id = publisher.create_or_resolve_target(opening_message, opening_message)
			
 
				 
			
 
				-    attached_message = opening_message + "\n\n已附上移动端 HTML 晨读版，点开卡片能直接看中译摘要。"
			
 
				-    publisher.send_message(target_channel_id, attached_message, media=html_path)
			
 
				+    attached_message = opening_message + "\n\n今天起换成了全新卡片式排版，直接在 Discord 里看中译摘要和核心内容啦！"
			
 
				+    if html_path:
			
 
				+        publisher.send_message(target_channel_id, attached_message, media=html_path)
			
 
				+    else:
			
 
				+        publisher.send_message(target_channel_id, attached_message)
			
 
				 
			
 
				     grouped: Dict[str, List[Dict[str, Any]]] = {domain: [] for domain in DOMAIN_ORDER}
			
 
				     for paper in payload.get("papers", []):
			
@@ -262,9 +325,28 @@ def publish_digest(
 
				         papers = grouped.get(domain, [])
			
 
				         if not papers:
			
 
				             continue
			
 
				-        publisher.send_message(target_channel_id, build_domain_header(domain, len(papers)))
			
 
				-        for paper in papers:
			
 
				-            publisher.send_message(target_channel_id, build_paper_message(paper))
			
 
				+        
			
 
				+        # Build embeds for the domain
			
 
				+        embeds = [build_paper_embed(paper) for paper in papers]
			
 
				+        
			
 
				+        # Discord limit is 10 embeds per message, we chunk them by 4 to be safe with total characters
			
 
				+        chunk_size = 4
			
 
				+        for i in range(0, len(embeds), chunk_size):
			
 
				+            chunk_embeds = embeds[i:i + chunk_size]
			
 
				+            
			
 
				+            # Print domain header on the first chunk
			
 
				+            msg_content = build_domain_header(domain, len(papers)) if i == 0 else ""
			
 
				+            
			
 
				+            if publisher.bot_token:
			
 
				+                # Use REST API to send rich embeds
			
 
				+                publisher.send_embeds_via_rest(target_channel_id, content=msg_content, embeds=chunk_embeds)
			
 
				+            else:
			
 
				+                # Fallback to plain text if no bot token
			
 
				+                if msg_content:
			
 
				+                    publisher.send_message(target_channel_id, msg_content)
			
 
				+                for paper in papers[i:i + chunk_size]:
			
 
				+                    fallback_text = f"**{paper.get('title')}**\n{paper.get('abs_url')}"
			
 
				+                    publisher.send_message(target_channel_id, fallback_text)
			
 
				 
			
 
				     if markdown_path:
			
 
				         publisher.send_message(target_channel_id, "附一份 Markdown 归档版，桌面端检索会更方便。", media=markdown_path)
			
@@ -275,7 +357,7 @@ def publish_digest(
 
				 def main() -> None:
			
 
				     parser = argparse.ArgumentParser(description="Publish RobotDaily digest to Discord")
			
 
				     parser.add_argument("--input", required=True)
			
 
				-    parser.add_argument("--html", required=True)
			
 
				+    parser.add_argument("--html", default="")
			
 
				     parser.add_argument("--markdown", default="")
			
 
				     parser.add_argument("--mode", default="thread")
			
 
				     parser.add_argument("--openclaw-bin", default="openclaw")
			
--- a/arxiv-digest/scripts/run_daily.py
+++ b/arxiv-digest/scripts/run_daily.py
@@ -73,7 +73,7 @@ def main() -> None:
 
				     write_json(paths["candidates_json"], {"generated_at": now_local().isoformat(), "papers": selection.get("candidates", [])})
			
 
				     write_json(paths["selected_json"], {k: v for k, v in selection.items() if k != "candidates"})
			
 
				 
			
 
				-    models = parse_models(args.models or env.get("INSIGHT_MODELS", "glm-4.7:cloud,qwen3.5:cloud,qwen3.5:27b,glm-4.7-flash-64k:latest"))
			
 
				+    models = parse_models(args.models or env.get("INSIGHT_MODELS", "qwen3.5:27b"))
			
 
				     if args.skip_enrich:
			
 
				         enriched = {k: v for k, v in selection.items() if k != "candidates"}
			
 
				         for paper in enriched.get("papers", []):
			
@@ -96,6 +96,7 @@ def main() -> None:
 
				         "selected_count": len(enriched.get("papers", [])),
			
 
				         "counts": enriched.get("counts", {}),
			
 
				         "models": models,
			
 
				+        "effective_models_used": enriched.get("effective_models_used", []),
			
 
				         "paths": {name: str(path) for name, path in paths.items() if name != "bundle_dir"},
			
 
				     }
			
 
				     write_json(paths["manifest_json"], manifest)
			
@@ -116,7 +117,7 @@ def main() -> None:
 
				         )
			
 
				         target = publish_digest(
			
 
				             enriched,
			
 
				-            html_path=str(paths["digest_html"]),
			
 
				+            # html_path=str(paths["digest_html"]),
			
 
				             markdown_path=str(paths["digest_md"]),
			
 
				             publisher=publisher,
			
 
				         )