publish_discord.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. #!/usr/bin/env python3
  2. """Publish RobotDaily digests to Discord via OpenClaw and optional Discord REST."""
  3. from __future__ import annotations
  4. import argparse
  5. import json
  6. import re
  7. from typing import Any, Dict, List, Optional
  8. from urllib.error import HTTPError
  9. from urllib.request import Request, urlopen
  10. from fetch_arxiv import DOMAIN_CONFIGS
  11. from utils import (
  12. format_authors,
  13. log,
  14. normalize_space,
  15. now_local,
  16. read_json,
  17. run_command_json,
  18. truncate,
  19. )
  20. DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
  21. DISCORD_API = "https://discord.com/api/v10"
  22. DOMAIN_STYLE = {
  23. "embodied": {"color": 0x4F8CFF, "emoji": "🤖"},
  24. "representation": {"color": 0x9B59B6, "emoji": "🧠"},
  25. "reinforcement": {"color": 0x2ECC71, "emoji": "🎯"},
  26. }
  27. class PublishError(RuntimeError):
  28. pass
  29. def normalize_channel_name(name: str) -> str:
  30. text = normalize_space(name).lstrip("#").lower()
  31. text = re.sub(r"\s+", "-", text)
  32. return text
  33. class DiscordPublisher:
  34. def __init__(
  35. self,
  36. *,
  37. openclaw_bin: str,
  38. account_id: str,
  39. mode: str,
  40. guild_id: str,
  41. parent_channel_id: str,
  42. target_channel_id: str,
  43. target_channel_name: str,
  44. category_id: str,
  45. bot_token: str,
  46. thread_auto_archive_min: int,
  47. dry_run: bool,
  48. ) -> None:
  49. self.openclaw_bin = openclaw_bin
  50. self.account_id = account_id
  51. self.mode = mode
  52. self.guild_id = guild_id
  53. self.parent_channel_id = parent_channel_id
  54. self.target_channel_id = target_channel_id
  55. self.target_channel_name = target_channel_name
  56. self.category_id = category_id
  57. self.bot_token = bot_token
  58. self.thread_auto_archive_min = thread_auto_archive_min
  59. self.dry_run = dry_run
  60. def openclaw(self, *args: str) -> Dict[str, Any]:
  61. command = [self.openclaw_bin, "message", *args, "--channel", "discord", "--account", self.account_id, "--json"]
  62. if self.dry_run:
  63. command.append("--dry-run")
  64. return run_command_json(command)
  65. def list_channels(self) -> List[Dict[str, Any]]:
  66. if not self.guild_id:
  67. return []
  68. payload = self.openclaw("channel", "list", "--guild-id", self.guild_id)
  69. return payload.get("payload", {}).get("channels", [])
  70. def list_threads(self) -> List[Dict[str, Any]]:
  71. if not self.guild_id or not self.parent_channel_id:
  72. return []
  73. payload = self.openclaw(
  74. "thread",
  75. "list",
  76. "--guild-id",
  77. self.guild_id,
  78. "--channel-id",
  79. self.parent_channel_id,
  80. "--include-archived",
  81. "--limit",
  82. "100",
  83. )
  84. return payload.get("payload", {}).get("threads", {}).get("threads", [])
  85. def find_existing_channel(self, name: str) -> Optional[str]:
  86. wanted_exact = normalize_space(name)
  87. wanted_normalized = normalize_channel_name(name)
  88. for channel in self.list_channels():
  89. current_name = str(channel.get("name", ""))
  90. if current_name == wanted_exact or normalize_channel_name(current_name) == wanted_normalized:
  91. return str(channel.get("id", ""))
  92. return None
  93. def find_existing_thread(self, name: str) -> Optional[str]:
  94. for thread in self.list_threads():
  95. if thread.get("name") == name:
  96. return str(thread.get("id", ""))
  97. return None
  98. def create_channel_via_rest(self, name: str, topic: str = "") -> str:
  99. if not self.guild_id:
  100. raise PublishError("channel 模式需要 DISCORD_GUILD_ID")
  101. if self.dry_run:
  102. return f"dry-run-channel-{normalize_channel_name(name) or 'robotdaily'}"
  103. if not self.bot_token:
  104. raise PublishError("创建 Discord 频道需要 DISCORD_BOT_TOKEN;当前 OpenClaw CLI 版本没有公开 channel create 子命令")
  105. body: Dict[str, Any] = {"name": normalize_channel_name(name) or "robotdaily", "type": 0}
  106. if self.category_id:
  107. body["parent_id"] = self.category_id
  108. if topic:
  109. body["topic"] = topic
  110. request = Request(
  111. url=f"{DISCORD_API}/guilds/{self.guild_id}/channels",
  112. method="POST",
  113. data=json.dumps(body).encode("utf-8"),
  114. headers={
  115. "Authorization": f"Bot {self.bot_token}",
  116. "Content-Type": "application/json",
  117. "User-Agent": "DiscordBot (https://github.com/openclaw/openclaw, 1.0)",
  118. },
  119. )
  120. try:
  121. with urlopen(request, timeout=30) as response:
  122. payload = json.loads(response.read().decode("utf-8", errors="ignore"))
  123. except HTTPError as exc:
  124. detail = exc.read().decode("utf-8", errors="ignore")
  125. raise PublishError(f"Discord channel create failed: {exc.code} {detail}") from exc
  126. return str(payload.get("id", ""))
  127. def create_thread(self, thread_name: str, opening_message: str) -> str:
  128. if not self.parent_channel_id:
  129. raise PublishError("thread 模式需要 DISCORD_PARENT_CHANNEL_ID")
  130. existing = self.find_existing_thread(thread_name)
  131. if existing:
  132. return existing
  133. payload = self.openclaw(
  134. "thread",
  135. "create",
  136. "--target",
  137. f"channel:{self.parent_channel_id}",
  138. "--thread-name",
  139. thread_name,
  140. "--message",
  141. opening_message,
  142. "--auto-archive-min",
  143. str(self.thread_auto_archive_min),
  144. )
  145. thread = payload.get("payload", {}).get("thread", {})
  146. thread_id = str(thread.get("id", ""))
  147. if not thread_id and self.dry_run:
  148. return f"dry-run-thread-{thread_name}"
  149. if not thread_id:
  150. raise PublishError("OpenClaw thread create 没有返回 thread id")
  151. return thread_id
  152. def create_fixed_channel(self, title: str) -> str:
  153. channel_name = normalize_channel_name(self.target_channel_name or "robotdaily") or "robotdaily"
  154. existing = self.find_existing_channel(channel_name)
  155. if existing:
  156. return existing
  157. topic = truncate(title, 180)
  158. channel_id = self.create_channel_via_rest(channel_name, topic=topic)
  159. if not channel_id:
  160. raise PublishError("Discord fixed channel create 返回空 id")
  161. return channel_id
  162. def create_or_resolve_target(self, title: str, opening_message: str) -> str:
  163. date_slug = now_local().strftime("%Y-%m-%d")
  164. if self.mode == "existing-channel":
  165. if not self.target_channel_id:
  166. raise PublishError("existing-channel 模式需要 DISCORD_TARGET_CHANNEL_ID")
  167. return self.target_channel_id
  168. if self.mode == "fixed-channel":
  169. return self.create_fixed_channel(title)
  170. if self.mode == "thread":
  171. thread_name = f"RobotDaily {date_slug}"
  172. return self.create_thread(thread_name, opening_message)
  173. if self.mode == "channel":
  174. prefix = normalize_channel_name(self.target_channel_name or "robotdaily") or "robotdaily"
  175. channel_name = f"{prefix}-{date_slug}"
  176. existing = self.find_existing_channel(channel_name)
  177. if existing:
  178. return existing
  179. topic = truncate(title, 180)
  180. channel_id = self.create_channel_via_rest(channel_name, topic=topic)
  181. if not channel_id:
  182. raise PublishError("Discord channel create 返回空 id")
  183. return channel_id
  184. raise PublishError(f"未知的投递模式: {self.mode}")
  185. def send_embeds_via_rest(
  186. self,
  187. target_channel_id: str,
  188. content: str = "",
  189. embeds: Optional[List[Dict[str, Any]]] = None,
  190. components: Optional[List[Dict[str, Any]]] = None,
  191. ) -> Dict[str, Any]:
  192. if self.dry_run:
  193. log(
  194. f"[Dry Run] send_embeds_via_rest: channel {target_channel_id}, embeds count {len(embeds or [])}, components count {len(components or [])}"
  195. )
  196. return {"id": "dry-run-msg-id"}
  197. if not self.bot_token:
  198. raise PublishError("发送富文本卡片需要 DISCORD_BOT_TOKEN")
  199. body: Dict[str, Any] = {}
  200. if content:
  201. body["content"] = content
  202. if embeds:
  203. body["embeds"] = embeds
  204. if components:
  205. body["components"] = components
  206. request = Request(
  207. url=f"{DISCORD_API}/channels/{target_channel_id}/messages",
  208. method="POST",
  209. data=json.dumps(body).encode("utf-8"),
  210. headers={
  211. "Authorization": f"Bot {self.bot_token}",
  212. "Content-Type": "application/json",
  213. "User-Agent": "DiscordBot (https://github.com/openclaw/openclaw, 1.0)",
  214. },
  215. )
  216. try:
  217. with urlopen(request, timeout=30) as response:
  218. result = json.loads(response.read().decode("utf-8", errors="ignore"))
  219. return result
  220. except HTTPError as exc:
  221. detail = exc.read().decode("utf-8", errors="ignore")
  222. log(f"REST API 错误: {exc.code} {exc.reason} - {detail}")
  223. raise PublishError(f"创建 Discord 消息失败: {exc.code} {detail}") from exc
  224. def send_message(self, target_channel_id: str, message: str, media: str = "") -> Dict[str, Any]:
  225. args = ["send", "--target", f"channel:{target_channel_id}", "--message", message]
  226. if media:
  227. args.extend(["--media", media])
  228. return self.openclaw(*args)
  229. def build_opening_message(payload: Dict[str, Any]) -> str:
  230. total = len(payload.get("papers", []))
  231. counts = payload.get("counts", {})
  232. breakdown = []
  233. for domain in DOMAIN_ORDER:
  234. count = counts.get(domain, 0)
  235. if count:
  236. breakdown.append(f"{DOMAIN_CONFIGS[domain]['label_zh']} {count}")
  237. suffix = " / ".join(breakdown)
  238. if suffix:
  239. return f"**RobotDaily | {now_local().strftime('%Y-%m-%d')}**\n共 {total} 篇偏应用论文,按方向整理成短卡片:{suffix}。"
  240. return f"**RobotDaily | {now_local().strftime('%Y-%m-%d')}**\n共 {total} 篇偏应用论文。"
  241. def build_domain_header(domain: str, count: int) -> str:
  242. style = DOMAIN_STYLE.get(domain, {})
  243. emoji = style.get("emoji", "📌")
  244. return f"## {emoji} {DOMAIN_CONFIGS[domain]['label_zh']}({count} 篇)"
  245. def build_paper_embed(paper: Dict[str, Any]) -> Dict[str, Any]:
  246. domain = paper.get("domain", "")
  247. label_zh = DOMAIN_CONFIGS.get(domain, {}).get("label_zh", domain or "未分类")
  248. style = DOMAIN_STYLE.get(domain, {"color": 0x3498DB, "emoji": "📄"})
  249. title_prefix = f"{paper.get('domain_rank', '?')}. "
  250. title = truncate(title_prefix + str(paper.get("title", "")), 256)
  251. brief = truncate(
  252. paper.get("brief_explanation_zh", "")
  253. or paper.get("selection_reason", "")
  254. or paper.get("translated_abstract_zh", ""),
  255. 200,
  256. )
  257. authors = truncate(format_authors(paper.get("authors", []), limit=4), 120) or "—"
  258. tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:4]) or "—"
  259. published = truncate(str(paper.get("published_local", ""))[:10], 32) or "—"
  260. arxiv_id = truncate(paper.get("arxiv_id", ""), 64) or "—"
  261. fields: List[Dict[str, Any]] = [
  262. {"name": "💡 简析", "value": brief or "—", "inline": False},
  263. {"name": "👤 作者", "value": authors, "inline": True},
  264. {"name": "🏷️ 标签", "value": tags, "inline": True},
  265. ]
  266. embed: Dict[str, Any] = {
  267. "title": title,
  268. "url": paper.get("abs_url", ""),
  269. "description": f"{style['emoji']} **{label_zh}** · `{published}` · `arXiv:{arxiv_id}`",
  270. "color": style["color"],
  271. "fields": fields,
  272. "footer": {"text": "RobotDaily 卡片视图"},
  273. }
  274. return embed
  275. def build_link_buttons(paper: Dict[str, Any]) -> List[Dict[str, Any]]:
  276. buttons: List[Dict[str, Any]] = []
  277. for label, url in [
  278. ("DOI", paper.get("doi_url", "")),
  279. ("arXiv", paper.get("abs_url", "")),
  280. ("PDF", paper.get("pdf_url", "")),
  281. ]:
  282. if not url:
  283. continue
  284. buttons.append({"type": 2, "style": 5, "label": label, "url": url})
  285. if not buttons:
  286. return []
  287. return [{"type": 1, "components": buttons[:3]}]
  288. def build_fallback_paper_markdown(paper: Dict[str, Any]) -> str:
  289. links = []
  290. if paper.get("doi_url"):
  291. links.append(f"[DOI]({paper.get('doi_url')})")
  292. if paper.get("abs_url"):
  293. links.append(f"[arXiv]({paper.get('abs_url')})")
  294. if paper.get("pdf_url"):
  295. links.append(f"[PDF]({paper.get('pdf_url')})")
  296. tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:4]) or "—"
  297. brief = truncate(
  298. paper.get("brief_explanation_zh", "") or paper.get("selection_reason", "") or paper.get("translated_abstract_zh", ""),
  299. 180,
  300. )
  301. abstract = truncate(paper.get("translated_abstract_zh", "") or paper.get("summary", ""), 220)
  302. lines = [
  303. f"### {paper.get('domain_rank', '?')}. {paper.get('title', '')}",
  304. f"> {brief or '—'}",
  305. f"- 作者:{truncate(format_authors(paper.get('authors', []), limit=4), 120) or '—'}",
  306. f"- 标签:{tags}",
  307. f"- 中文摘要:{abstract or '—'}",
  308. ]
  309. if links:
  310. lines.append(f"- 链接:{' | '.join(links)}")
  311. return "\n".join(lines)
  312. def publish_digest(
  313. payload: Dict[str, Any],
  314. *,
  315. html_path: str = "",
  316. markdown_path: str = "",
  317. publisher: DiscordPublisher,
  318. ) -> str:
  319. opening_message = build_opening_message(payload)
  320. target_channel_id = publisher.create_or_resolve_target(opening_message, opening_message)
  321. publisher.send_message(target_channel_id, opening_message)
  322. grouped: Dict[str, List[Dict[str, Any]]] = {domain: [] for domain in DOMAIN_ORDER}
  323. for paper in payload.get("papers", []):
  324. grouped.setdefault(paper["domain"], []).append(paper)
  325. for domain in DOMAIN_ORDER:
  326. papers = grouped.get(domain, [])
  327. if not papers:
  328. continue
  329. publisher.send_message(target_channel_id, build_domain_header(domain, len(papers)))
  330. for paper in papers:
  331. if publisher.bot_token:
  332. try:
  333. publisher.send_embeds_via_rest(
  334. target_channel_id,
  335. embeds=[build_paper_embed(paper)],
  336. components=build_link_buttons(paper),
  337. )
  338. continue
  339. except PublishError as exc:
  340. log(f"富文本卡片发送失败,回退到纯 Markdown:{exc}")
  341. publisher.send_message(target_channel_id, build_fallback_paper_markdown(paper))
  342. if markdown_path:
  343. publisher.send_message(target_channel_id, "附一份 Markdown 归档版,方便桌面端检索。", media=markdown_path)
  344. return target_channel_id
  345. def main() -> None:
  346. parser = argparse.ArgumentParser(description="Publish RobotDaily digest to Discord")
  347. parser.add_argument("--input", required=True)
  348. parser.add_argument("--html", default="")
  349. parser.add_argument("--markdown", default="")
  350. parser.add_argument("--mode", default="thread")
  351. parser.add_argument("--openclaw-bin", default="openclaw")
  352. parser.add_argument("--account-id", default="codex")
  353. parser.add_argument("--guild-id", default="")
  354. parser.add_argument("--parent-channel-id", default="")
  355. parser.add_argument("--target-channel-id", default="")
  356. parser.add_argument("--target-channel-name", default="")
  357. parser.add_argument("--category-id", default="")
  358. parser.add_argument("--bot-token", default="")
  359. parser.add_argument("--thread-auto-archive-min", type=int, default=10080)
  360. parser.add_argument("--dry-run", action="store_true")
  361. args = parser.parse_args()
  362. payload = read_json(args.input, default={}) or {}
  363. publisher = DiscordPublisher(
  364. openclaw_bin=args.openclaw_bin,
  365. account_id=args.account_id,
  366. mode=args.mode,
  367. guild_id=args.guild_id,
  368. parent_channel_id=args.parent_channel_id,
  369. target_channel_id=args.target_channel_id,
  370. target_channel_name=args.target_channel_name,
  371. category_id=args.category_id,
  372. bot_token=args.bot_token,
  373. thread_auto_archive_min=args.thread_auto_archive_min,
  374. dry_run=args.dry_run,
  375. )
  376. target = publish_digest(
  377. payload,
  378. html_path=args.html,
  379. markdown_path=args.markdown,
  380. publisher=publisher,
  381. )
  382. log(f"Digest published to Discord target: {target}")
  383. if __name__ == "__main__":
  384. main()