Przeglądaj źródła

feat(discord): switch to rich embeds and disable HTML generation

Daily Deploy Bot 2 dni temu
rodzic
commit
8df0a49d7c

+ 102 - 20
arxiv-digest/scripts/publish_discord.py

@@ -203,6 +203,38 @@ class DiscordPublisher:
 
         raise PublishError(f"未知的投递模式: {self.mode}")
 
+
+    def send_embeds_via_rest(self, target_channel_id: str, content: str = "", embeds: List[Dict[str, Any]] = None) -> Dict[str, Any]:
+        if self.dry_run:
+            log(f"[Dry Run] send_embeds_via_rest: channel {target_channel_id}, embeds count {len(embeds or [])}")
+            return {"id": "dry-run-msg-id"}
+        if not self.bot_token:
+            raise PublishError("发送 Embed 需要 DISCORD_BOT_TOKEN")
+
+        body: Dict[str, Any] = {}
+        if content:
+            body["content"] = content
+        if embeds:
+            body["embeds"] = embeds
+
+        request = Request(
+            url=f"{DISCORD_API}/channels/{target_channel_id}/messages",
+            method="POST",
+            data=json.dumps(body).encode("utf-8"),
+            headers={
+                "Authorization": f"Bot {self.bot_token}",
+                "Content-Type": "application/json",
+            },
+        )
+        try:
+            with urlopen(request, timeout=30) as response:
+                result = json.loads(response.read().decode("utf-8", errors="ignore"))
+                return result
+        except HTTPError as exc:
+            detail = exc.read().decode("utf-8", errors="ignore")
+            log(f"REST API 错误: {exc.code} {exc.reason} - {detail}")
+            raise PublishError(f"创建 Discord 消息失败: {exc.code} {detail}") from exc
+
     def send_message(self, target_channel_id: str, message: str, media: str = "") -> Dict[str, Any]:
         args = ["send", "--target", f"channel:{target_channel_id}", "--message", message]
         if media:
@@ -226,33 +258,64 @@ def build_domain_header(domain: str, count: int) -> str:
     return f"## {DOMAIN_CONFIGS[domain]['label_zh']}({count} 篇)"
 
 
-def build_paper_message(paper: Dict[str, Any]) -> str:
+def build_paper_embed(paper: Dict[str, Any]) -> Dict[str, Any]:
     tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:6])
-    lines = [
-        f"### {paper.get('domain_rank', '?')}. {paper.get('title', '')}",
-        f"作者:{format_authors(paper.get('authors', []), limit=4)}",
-        f"关键词:{tags}" if tags else "关键词:暂无",
-        f"简析:{paper.get('brief_explanation_zh', '')}",
-        f"摘要中译:{truncate(paper.get('translated_abstract_zh', ''), 700)}",
-        f"DOI:{paper.get('doi_url', '')}",
-        f"arXiv:{paper.get('abs_url', '')}",
-        f"PDF:{paper.get('pdf_url', '')}",
-    ]
-    return "\n".join(lines)
+    
+    # Title max 256
+    title = f"{paper.get('domain_rank', '?')}. {paper.get('title', '')}"
+    if len(title) > 256:
+        title = title[:253] + "..."
+        
+    description_lines = []
+    description_lines.append(f"**作者:** {format_authors(paper.get('authors', []), limit=4)}")
+    if tags:
+        description_lines.append(f"**关键词:** {tags}")
+        
+    brief = paper.get('brief_explanation_zh', '')
+    if brief:
+        description_lines.append(f"\n**💡 简析**\n{brief}")
+        
+    abstract = truncate(paper.get('translated_abstract_zh', ''), 700)
+    if abstract:
+        description_lines.append(f"\n**📖 摘要**\n{abstract}")
+        
+    links = []
+    if paper.get('doi_url'): links.append(f"[DOI]({paper.get('doi_url')})")
+    if paper.get('abs_url'): links.append(f"[arXiv]({paper.get('abs_url')})")
+    if paper.get('pdf_url'): links.append(f"[PDF]({paper.get('pdf_url')})")
+    
+    if links:
+        description_lines.append(f"\n**🔗 链接:** {' | '.join(links)}")
+        
+    description = "\n".join(description_lines)
+    if len(description) > 4096:
+        description = description[:4093] + "..."
+        
+    embed = {
+        "title": title,
+        "description": description,
+        "url": paper.get('abs_url', ''),
+        "color": 0x3498db
+    }
+    
+    return embed
 
 
 def publish_digest(
     payload: Dict[str, Any],
     *,
-    html_path: str,
-    markdown_path: str,
+    html_path: str = "",
+    markdown_path: str = "",
     publisher: DiscordPublisher,
 ) -> str:
     opening_message = build_opening_message(payload)
     target_channel_id = publisher.create_or_resolve_target(opening_message, opening_message)
 
-    attached_message = opening_message + "\n\n已附上移动端 HTML 晨读版,点开卡片能直接看中译摘要。"
-    publisher.send_message(target_channel_id, attached_message, media=html_path)
+    attached_message = opening_message + "\n\n今天起换成了全新卡片式排版,直接在 Discord 里看中译摘要和核心内容啦!"
+    if html_path:
+        publisher.send_message(target_channel_id, attached_message, media=html_path)
+    else:
+        publisher.send_message(target_channel_id, attached_message)
 
     grouped: Dict[str, List[Dict[str, Any]]] = {domain: [] for domain in DOMAIN_ORDER}
     for paper in payload.get("papers", []):
@@ -262,9 +325,28 @@ def publish_digest(
         papers = grouped.get(domain, [])
         if not papers:
             continue
-        publisher.send_message(target_channel_id, build_domain_header(domain, len(papers)))
-        for paper in papers:
-            publisher.send_message(target_channel_id, build_paper_message(paper))
+        
+        # Build embeds for the domain
+        embeds = [build_paper_embed(paper) for paper in papers]
+        
+        # Discord limit is 10 embeds per message, we chunk them by 4 to be safe with total characters
+        chunk_size = 4
+        for i in range(0, len(embeds), chunk_size):
+            chunk_embeds = embeds[i:i + chunk_size]
+            
+            # Print domain header on the first chunk
+            msg_content = build_domain_header(domain, len(papers)) if i == 0 else ""
+            
+            if publisher.bot_token:
+                # Use REST API to send rich embeds
+                publisher.send_embeds_via_rest(target_channel_id, content=msg_content, embeds=chunk_embeds)
+            else:
+                # Fallback to plain text if no bot token
+                if msg_content:
+                    publisher.send_message(target_channel_id, msg_content)
+                for paper in papers[i:i + chunk_size]:
+                    fallback_text = f"**{paper.get('title')}**\n{paper.get('abs_url')}"
+                    publisher.send_message(target_channel_id, fallback_text)
 
     if markdown_path:
         publisher.send_message(target_channel_id, "附一份 Markdown 归档版,桌面端检索会更方便。", media=markdown_path)
@@ -275,7 +357,7 @@ def publish_digest(
 def main() -> None:
     parser = argparse.ArgumentParser(description="Publish RobotDaily digest to Discord")
     parser.add_argument("--input", required=True)
-    parser.add_argument("--html", required=True)
+    parser.add_argument("--html", default="")
     parser.add_argument("--markdown", default="")
     parser.add_argument("--mode", default="thread")
     parser.add_argument("--openclaw-bin", default="openclaw")

+ 3 - 2
arxiv-digest/scripts/run_daily.py

@@ -73,7 +73,7 @@ def main() -> None:
     write_json(paths["candidates_json"], {"generated_at": now_local().isoformat(), "papers": selection.get("candidates", [])})
     write_json(paths["selected_json"], {k: v for k, v in selection.items() if k != "candidates"})
 
-    models = parse_models(args.models or env.get("INSIGHT_MODELS", "glm-4.7:cloud,qwen3.5:cloud,qwen3.5:27b,glm-4.7-flash-64k:latest"))
+    models = parse_models(args.models or env.get("INSIGHT_MODELS", "qwen3.5:27b"))
     if args.skip_enrich:
         enriched = {k: v for k, v in selection.items() if k != "candidates"}
         for paper in enriched.get("papers", []):
@@ -96,6 +96,7 @@ def main() -> None:
         "selected_count": len(enriched.get("papers", [])),
         "counts": enriched.get("counts", {}),
         "models": models,
+        "effective_models_used": enriched.get("effective_models_used", []),
         "paths": {name: str(path) for name, path in paths.items() if name != "bundle_dir"},
     }
     write_json(paths["manifest_json"], manifest)
@@ -116,7 +117,7 @@ def main() -> None:
         )
         target = publish_digest(
             enriched,
-            html_path=str(paths["digest_html"]),
+            # html_path=str(paths["digest_html"]),
             markdown_path=str(paths["digest_md"]),
             publisher=publisher,
         )