|
@@ -6,7 +6,6 @@ from __future__ import annotations
|
|
|
import argparse
|
|
import argparse
|
|
|
import json
|
|
import json
|
|
|
import re
|
|
import re
|
|
|
-from pathlib import Path
|
|
|
|
|
from typing import Any, Dict, List, Optional
|
|
from typing import Any, Dict, List, Optional
|
|
|
from urllib.error import HTTPError
|
|
from urllib.error import HTTPError
|
|
|
from urllib.request import Request, urlopen
|
|
from urllib.request import Request, urlopen
|
|
@@ -24,6 +23,11 @@ from utils import (
|
|
|
|
|
|
|
|
DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
|
|
DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
|
|
|
DISCORD_API = "https://discord.com/api/v10"
|
|
DISCORD_API = "https://discord.com/api/v10"
|
|
|
|
|
+DOMAIN_STYLE = {
|
|
|
|
|
+ "embodied": {"color": 0x4F8CFF, "emoji": "🤖"},
|
|
|
|
|
+ "representation": {"color": 0x9B59B6, "emoji": "🧠"},
|
|
|
|
|
+ "reinforcement": {"color": 0x2ECC71, "emoji": "🎯"},
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
|
|
|
|
|
class PublishError(RuntimeError):
|
|
class PublishError(RuntimeError):
|
|
@@ -128,6 +132,7 @@ class DiscordPublisher:
|
|
|
headers={
|
|
headers={
|
|
|
"Authorization": f"Bot {self.bot_token}",
|
|
"Authorization": f"Bot {self.bot_token}",
|
|
|
"Content-Type": "application/json",
|
|
"Content-Type": "application/json",
|
|
|
|
|
+ "User-Agent": "DiscordBot (https://github.com/openclaw/openclaw, 1.0)",
|
|
|
},
|
|
},
|
|
|
)
|
|
)
|
|
|
try:
|
|
try:
|
|
@@ -203,19 +208,28 @@ class DiscordPublisher:
|
|
|
|
|
|
|
|
raise PublishError(f"未知的投递模式: {self.mode}")
|
|
raise PublishError(f"未知的投递模式: {self.mode}")
|
|
|
|
|
|
|
|
-
|
|
|
|
|
- def send_embeds_via_rest(self, target_channel_id: str, content: str = "", embeds: List[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
|
|
|
|
|
+ def send_embeds_via_rest(
|
|
|
|
|
+ self,
|
|
|
|
|
+ target_channel_id: str,
|
|
|
|
|
+ content: str = "",
|
|
|
|
|
+ embeds: Optional[List[Dict[str, Any]]] = None,
|
|
|
|
|
+ components: Optional[List[Dict[str, Any]]] = None,
|
|
|
|
|
+ ) -> Dict[str, Any]:
|
|
|
if self.dry_run:
|
|
if self.dry_run:
|
|
|
- log(f"[Dry Run] send_embeds_via_rest: channel {target_channel_id}, embeds count {len(embeds or [])}")
|
|
|
|
|
|
|
+ log(
|
|
|
|
|
+ f"[Dry Run] send_embeds_via_rest: channel {target_channel_id}, embeds count {len(embeds or [])}, components count {len(components or [])}"
|
|
|
|
|
+ )
|
|
|
return {"id": "dry-run-msg-id"}
|
|
return {"id": "dry-run-msg-id"}
|
|
|
if not self.bot_token:
|
|
if not self.bot_token:
|
|
|
- raise PublishError("发送 Embed 需要 DISCORD_BOT_TOKEN")
|
|
|
|
|
|
|
+ raise PublishError("发送富文本卡片需要 DISCORD_BOT_TOKEN")
|
|
|
|
|
|
|
|
body: Dict[str, Any] = {}
|
|
body: Dict[str, Any] = {}
|
|
|
if content:
|
|
if content:
|
|
|
body["content"] = content
|
|
body["content"] = content
|
|
|
if embeds:
|
|
if embeds:
|
|
|
body["embeds"] = embeds
|
|
body["embeds"] = embeds
|
|
|
|
|
+ if components:
|
|
|
|
|
+ body["components"] = components
|
|
|
|
|
|
|
|
request = Request(
|
|
request = Request(
|
|
|
url=f"{DISCORD_API}/channels/{target_channel_id}/messages",
|
|
url=f"{DISCORD_API}/channels/{target_channel_id}/messages",
|
|
@@ -224,6 +238,7 @@ class DiscordPublisher:
|
|
|
headers={
|
|
headers={
|
|
|
"Authorization": f"Bot {self.bot_token}",
|
|
"Authorization": f"Bot {self.bot_token}",
|
|
|
"Content-Type": "application/json",
|
|
"Content-Type": "application/json",
|
|
|
|
|
+ "User-Agent": "DiscordBot (https://github.com/openclaw/openclaw, 1.0)",
|
|
|
},
|
|
},
|
|
|
)
|
|
)
|
|
|
try:
|
|
try:
|
|
@@ -245,62 +260,99 @@ class DiscordPublisher:
|
|
|
def build_opening_message(payload: Dict[str, Any]) -> str:
|
|
def build_opening_message(payload: Dict[str, Any]) -> str:
|
|
|
total = len(payload.get("papers", []))
|
|
total = len(payload.get("papers", []))
|
|
|
counts = payload.get("counts", {})
|
|
counts = payload.get("counts", {})
|
|
|
- parts = [f"老大早安~今天给你挑了 {total} 篇偏应用论文。"]
|
|
|
|
|
|
|
+ breakdown = []
|
|
|
for domain in DOMAIN_ORDER:
|
|
for domain in DOMAIN_ORDER:
|
|
|
count = counts.get(domain, 0)
|
|
count = counts.get(domain, 0)
|
|
|
if count:
|
|
if count:
|
|
|
- parts.append(f"{DOMAIN_CONFIGS[domain]['label_zh']} {count} 篇")
|
|
|
|
|
- parts.append("下面每张卡片都带 DOI / arXiv / PDF,可直接点开读。")
|
|
|
|
|
- return " | ".join(parts)
|
|
|
|
|
|
|
+ breakdown.append(f"{DOMAIN_CONFIGS[domain]['label_zh']} {count}")
|
|
|
|
|
+ suffix = " / ".join(breakdown)
|
|
|
|
|
+ if suffix:
|
|
|
|
|
+ return f"**RobotDaily | {now_local().strftime('%Y-%m-%d')}**\n共 {total} 篇偏应用论文,按方向整理成短卡片:{suffix}。"
|
|
|
|
|
+ return f"**RobotDaily | {now_local().strftime('%Y-%m-%d')}**\n共 {total} 篇偏应用论文。"
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_domain_header(domain: str, count: int) -> str:
|
|
def build_domain_header(domain: str, count: int) -> str:
|
|
|
- return f"## {DOMAIN_CONFIGS[domain]['label_zh']}({count} 篇)"
|
|
|
|
|
|
|
+ style = DOMAIN_STYLE.get(domain, {})
|
|
|
|
|
+ emoji = style.get("emoji", "📌")
|
|
|
|
|
+ return f"## {emoji} {DOMAIN_CONFIGS[domain]['label_zh']}({count} 篇)"
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_paper_embed(paper: Dict[str, Any]) -> Dict[str, Any]:
|
|
def build_paper_embed(paper: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
- tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:6])
|
|
|
|
|
-
|
|
|
|
|
- # Title max 256
|
|
|
|
|
- title = f"{paper.get('domain_rank', '?')}. {paper.get('title', '')}"
|
|
|
|
|
- if len(title) > 256:
|
|
|
|
|
- title = title[:253] + "..."
|
|
|
|
|
-
|
|
|
|
|
- description_lines = []
|
|
|
|
|
- description_lines.append(f"**作者:** {format_authors(paper.get('authors', []), limit=4)}")
|
|
|
|
|
- if tags:
|
|
|
|
|
- description_lines.append(f"**关键词:** {tags}")
|
|
|
|
|
-
|
|
|
|
|
- brief = paper.get('brief_explanation_zh', '')
|
|
|
|
|
- if brief:
|
|
|
|
|
- description_lines.append(f"\n**💡 简析**\n{brief}")
|
|
|
|
|
-
|
|
|
|
|
- abstract = truncate(paper.get('translated_abstract_zh', ''), 700)
|
|
|
|
|
- if abstract:
|
|
|
|
|
- description_lines.append(f"\n**📖 摘要**\n{abstract}")
|
|
|
|
|
-
|
|
|
|
|
- links = []
|
|
|
|
|
- if paper.get('doi_url'): links.append(f"[DOI]({paper.get('doi_url')})")
|
|
|
|
|
- if paper.get('abs_url'): links.append(f"[arXiv]({paper.get('abs_url')})")
|
|
|
|
|
- if paper.get('pdf_url'): links.append(f"[PDF]({paper.get('pdf_url')})")
|
|
|
|
|
-
|
|
|
|
|
- if links:
|
|
|
|
|
- description_lines.append(f"\n**🔗 链接:** {' | '.join(links)}")
|
|
|
|
|
-
|
|
|
|
|
- description = "\n".join(description_lines)
|
|
|
|
|
- if len(description) > 4096:
|
|
|
|
|
- description = description[:4093] + "..."
|
|
|
|
|
-
|
|
|
|
|
- embed = {
|
|
|
|
|
|
|
+ domain = paper.get("domain", "")
|
|
|
|
|
+ label_zh = DOMAIN_CONFIGS.get(domain, {}).get("label_zh", domain or "未分类")
|
|
|
|
|
+ style = DOMAIN_STYLE.get(domain, {"color": 0x3498DB, "emoji": "📄"})
|
|
|
|
|
+
|
|
|
|
|
+ title_prefix = f"{paper.get('domain_rank', '?')}. "
|
|
|
|
|
+ title = truncate(title_prefix + str(paper.get("title", "")), 256)
|
|
|
|
|
+ brief = truncate(
|
|
|
|
|
+ paper.get("brief_explanation_zh", "")
|
|
|
|
|
+ or paper.get("selection_reason", "")
|
|
|
|
|
+ or paper.get("translated_abstract_zh", ""),
|
|
|
|
|
+ 200,
|
|
|
|
|
+ )
|
|
|
|
|
+ authors = truncate(format_authors(paper.get("authors", []), limit=4), 120) or "—"
|
|
|
|
|
+ tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:4]) or "—"
|
|
|
|
|
+ published = truncate(str(paper.get("published_local", ""))[:10], 32) or "—"
|
|
|
|
|
+ arxiv_id = truncate(paper.get("arxiv_id", ""), 64) or "—"
|
|
|
|
|
+
|
|
|
|
|
+ fields: List[Dict[str, Any]] = [
|
|
|
|
|
+ {"name": "💡 简析", "value": brief or "—", "inline": False},
|
|
|
|
|
+ {"name": "👤 作者", "value": authors, "inline": True},
|
|
|
|
|
+ {"name": "🏷️ 标签", "value": tags, "inline": True},
|
|
|
|
|
+ ]
|
|
|
|
|
+
|
|
|
|
|
+ embed: Dict[str, Any] = {
|
|
|
"title": title,
|
|
"title": title,
|
|
|
- "description": description,
|
|
|
|
|
- "url": paper.get('abs_url', ''),
|
|
|
|
|
- "color": 0x3498db
|
|
|
|
|
|
|
+ "url": paper.get("abs_url", ""),
|
|
|
|
|
+ "description": f"{style['emoji']} **{label_zh}** · `{published}` · `arXiv:{arxiv_id}`",
|
|
|
|
|
+ "color": style["color"],
|
|
|
|
|
+ "fields": fields,
|
|
|
|
|
+ "footer": {"text": "RobotDaily 卡片视图"},
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
return embed
|
|
return embed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def build_link_buttons(paper: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
|
|
|
+ buttons: List[Dict[str, Any]] = []
|
|
|
|
|
+ for label, url in [
|
|
|
|
|
+ ("DOI", paper.get("doi_url", "")),
|
|
|
|
|
+ ("arXiv", paper.get("abs_url", "")),
|
|
|
|
|
+ ("PDF", paper.get("pdf_url", "")),
|
|
|
|
|
+ ]:
|
|
|
|
|
+ if not url:
|
|
|
|
|
+ continue
|
|
|
|
|
+ buttons.append({"type": 2, "style": 5, "label": label, "url": url})
|
|
|
|
|
+ if not buttons:
|
|
|
|
|
+ return []
|
|
|
|
|
+ return [{"type": 1, "components": buttons[:3]}]
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def build_fallback_paper_markdown(paper: Dict[str, Any]) -> str:
|
|
|
|
|
+ links = []
|
|
|
|
|
+ if paper.get("doi_url"):
|
|
|
|
|
+ links.append(f"[DOI]({paper.get('doi_url')})")
|
|
|
|
|
+ if paper.get("abs_url"):
|
|
|
|
|
+ links.append(f"[arXiv]({paper.get('abs_url')})")
|
|
|
|
|
+ if paper.get("pdf_url"):
|
|
|
|
|
+ links.append(f"[PDF]({paper.get('pdf_url')})")
|
|
|
|
|
+ tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:4]) or "—"
|
|
|
|
|
+ brief = truncate(
|
|
|
|
|
+ paper.get("brief_explanation_zh", "") or paper.get("selection_reason", "") or paper.get("translated_abstract_zh", ""),
|
|
|
|
|
+ 180,
|
|
|
|
|
+ )
|
|
|
|
|
+ abstract = truncate(paper.get("translated_abstract_zh", "") or paper.get("summary", ""), 220)
|
|
|
|
|
+ lines = [
|
|
|
|
|
+ f"### {paper.get('domain_rank', '?')}. {paper.get('title', '')}",
|
|
|
|
|
+ f"> {brief or '—'}",
|
|
|
|
|
+ f"- 作者:{truncate(format_authors(paper.get('authors', []), limit=4), 120) or '—'}",
|
|
|
|
|
+ f"- 标签:{tags}",
|
|
|
|
|
+ f"- 中文摘要:{abstract or '—'}",
|
|
|
|
|
+ ]
|
|
|
|
|
+ if links:
|
|
|
|
|
+ lines.append(f"- 链接:{' | '.join(links)}")
|
|
|
|
|
+ return "\n".join(lines)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def publish_digest(
|
|
def publish_digest(
|
|
|
payload: Dict[str, Any],
|
|
payload: Dict[str, Any],
|
|
|
*,
|
|
*,
|
|
@@ -311,11 +363,7 @@ def publish_digest(
|
|
|
opening_message = build_opening_message(payload)
|
|
opening_message = build_opening_message(payload)
|
|
|
target_channel_id = publisher.create_or_resolve_target(opening_message, opening_message)
|
|
target_channel_id = publisher.create_or_resolve_target(opening_message, opening_message)
|
|
|
|
|
|
|
|
- attached_message = opening_message + "\n\n今天起换成了全新卡片式排版,直接在 Discord 里看中译摘要和核心内容啦!"
|
|
|
|
|
- if html_path:
|
|
|
|
|
- publisher.send_message(target_channel_id, attached_message, media=html_path)
|
|
|
|
|
- else:
|
|
|
|
|
- publisher.send_message(target_channel_id, attached_message)
|
|
|
|
|
|
|
+ publisher.send_message(target_channel_id, opening_message)
|
|
|
|
|
|
|
|
grouped: Dict[str, List[Dict[str, Any]]] = {domain: [] for domain in DOMAIN_ORDER}
|
|
grouped: Dict[str, List[Dict[str, Any]]] = {domain: [] for domain in DOMAIN_ORDER}
|
|
|
for paper in payload.get("papers", []):
|
|
for paper in payload.get("papers", []):
|
|
@@ -325,31 +373,23 @@ def publish_digest(
|
|
|
papers = grouped.get(domain, [])
|
|
papers = grouped.get(domain, [])
|
|
|
if not papers:
|
|
if not papers:
|
|
|
continue
|
|
continue
|
|
|
-
|
|
|
|
|
- # Build embeds for the domain
|
|
|
|
|
- embeds = [build_paper_embed(paper) for paper in papers]
|
|
|
|
|
-
|
|
|
|
|
- # Discord limit is 10 embeds per message, we chunk them by 4 to be safe with total characters
|
|
|
|
|
- chunk_size = 4
|
|
|
|
|
- for i in range(0, len(embeds), chunk_size):
|
|
|
|
|
- chunk_embeds = embeds[i:i + chunk_size]
|
|
|
|
|
-
|
|
|
|
|
- # Print domain header on the first chunk
|
|
|
|
|
- msg_content = build_domain_header(domain, len(papers)) if i == 0 else ""
|
|
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
|
|
+ publisher.send_message(target_channel_id, build_domain_header(domain, len(papers)))
|
|
|
|
|
+ for paper in papers:
|
|
|
if publisher.bot_token:
|
|
if publisher.bot_token:
|
|
|
- # Use REST API to send rich embeds
|
|
|
|
|
- publisher.send_embeds_via_rest(target_channel_id, content=msg_content, embeds=chunk_embeds)
|
|
|
|
|
- else:
|
|
|
|
|
- # Fallback to plain text if no bot token
|
|
|
|
|
- if msg_content:
|
|
|
|
|
- publisher.send_message(target_channel_id, msg_content)
|
|
|
|
|
- for paper in papers[i:i + chunk_size]:
|
|
|
|
|
- fallback_text = f"**{paper.get('title')}**\n{paper.get('abs_url')}"
|
|
|
|
|
- publisher.send_message(target_channel_id, fallback_text)
|
|
|
|
|
|
|
+ try:
|
|
|
|
|
+ publisher.send_embeds_via_rest(
|
|
|
|
|
+ target_channel_id,
|
|
|
|
|
+ embeds=[build_paper_embed(paper)],
|
|
|
|
|
+ components=build_link_buttons(paper),
|
|
|
|
|
+ )
|
|
|
|
|
+ continue
|
|
|
|
|
+ except PublishError as exc:
|
|
|
|
|
+ log(f"富文本卡片发送失败,回退到纯 Markdown:{exc}")
|
|
|
|
|
+ publisher.send_message(target_channel_id, build_fallback_paper_markdown(paper))
|
|
|
|
|
|
|
|
if markdown_path:
|
|
if markdown_path:
|
|
|
- publisher.send_message(target_channel_id, "附一份 Markdown 归档版,桌面端检索会更方便。", media=markdown_path)
|
|
|
|
|
|
|
+ publisher.send_message(target_channel_id, "附一份 Markdown 归档版,方便桌面端检索。", media=markdown_path)
|
|
|
|
|
|
|
|
return target_channel_id
|
|
return target_channel_id
|
|
|
|
|
|