publish_discord.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. #!/usr/bin/env python3
  2. """Publish RobotDaily digests to Discord via OpenClaw and optional Discord REST."""
  3. from __future__ import annotations
  4. import argparse
  5. import json
  6. import re
  7. from pathlib import Path
  8. from typing import Any, Dict, List, Optional
  9. from urllib.error import HTTPError
  10. from urllib.request import Request, urlopen
  11. from fetch_arxiv import DOMAIN_CONFIGS
  12. from utils import (
  13. format_authors,
  14. log,
  15. normalize_space,
  16. now_local,
  17. read_json,
  18. run_command_json,
  19. truncate,
  20. )
  21. DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
  22. DISCORD_API = "https://discord.com/api/v10"
  23. class PublishError(RuntimeError):
  24. pass
  25. def normalize_channel_name(name: str) -> str:
  26. text = normalize_space(name).lstrip("#").lower()
  27. text = re.sub(r"\s+", "-", text)
  28. return text
  29. class DiscordPublisher:
  30. def __init__(
  31. self,
  32. *,
  33. openclaw_bin: str,
  34. account_id: str,
  35. mode: str,
  36. guild_id: str,
  37. parent_channel_id: str,
  38. target_channel_id: str,
  39. target_channel_name: str,
  40. category_id: str,
  41. bot_token: str,
  42. thread_auto_archive_min: int,
  43. dry_run: bool,
  44. ) -> None:
  45. self.openclaw_bin = openclaw_bin
  46. self.account_id = account_id
  47. self.mode = mode
  48. self.guild_id = guild_id
  49. self.parent_channel_id = parent_channel_id
  50. self.target_channel_id = target_channel_id
  51. self.target_channel_name = target_channel_name
  52. self.category_id = category_id
  53. self.bot_token = bot_token
  54. self.thread_auto_archive_min = thread_auto_archive_min
  55. self.dry_run = dry_run
  56. def openclaw(self, *args: str) -> Dict[str, Any]:
  57. command = [self.openclaw_bin, "message", *args, "--channel", "discord", "--account", self.account_id, "--json"]
  58. if self.dry_run:
  59. command.append("--dry-run")
  60. return run_command_json(command)
  61. def list_channels(self) -> List[Dict[str, Any]]:
  62. if not self.guild_id:
  63. return []
  64. payload = self.openclaw("channel", "list", "--guild-id", self.guild_id)
  65. return payload.get("payload", {}).get("channels", [])
  66. def list_threads(self) -> List[Dict[str, Any]]:
  67. if not self.guild_id or not self.parent_channel_id:
  68. return []
  69. payload = self.openclaw(
  70. "thread",
  71. "list",
  72. "--guild-id",
  73. self.guild_id,
  74. "--channel-id",
  75. self.parent_channel_id,
  76. "--include-archived",
  77. "--limit",
  78. "100",
  79. )
  80. return payload.get("payload", {}).get("threads", {}).get("threads", [])
  81. def find_existing_channel(self, name: str) -> Optional[str]:
  82. wanted_exact = normalize_space(name)
  83. wanted_normalized = normalize_channel_name(name)
  84. for channel in self.list_channels():
  85. current_name = str(channel.get("name", ""))
  86. if current_name == wanted_exact or normalize_channel_name(current_name) == wanted_normalized:
  87. return str(channel.get("id", ""))
  88. return None
  89. def find_existing_thread(self, name: str) -> Optional[str]:
  90. for thread in self.list_threads():
  91. if thread.get("name") == name:
  92. return str(thread.get("id", ""))
  93. return None
  94. def create_channel_via_rest(self, name: str, topic: str = "") -> str:
  95. if not self.guild_id:
  96. raise PublishError("channel 模式需要 DISCORD_GUILD_ID")
  97. if self.dry_run:
  98. return f"dry-run-channel-{normalize_channel_name(name) or 'robotdaily'}"
  99. if not self.bot_token:
  100. raise PublishError("创建 Discord 频道需要 DISCORD_BOT_TOKEN;当前 OpenClaw CLI 版本没有公开 channel create 子命令")
  101. body: Dict[str, Any] = {"name": normalize_channel_name(name) or "robotdaily", "type": 0}
  102. if self.category_id:
  103. body["parent_id"] = self.category_id
  104. if topic:
  105. body["topic"] = topic
  106. request = Request(
  107. url=f"{DISCORD_API}/guilds/{self.guild_id}/channels",
  108. method="POST",
  109. data=json.dumps(body).encode("utf-8"),
  110. headers={
  111. "Authorization": f"Bot {self.bot_token}",
  112. "Content-Type": "application/json",
  113. },
  114. )
  115. try:
  116. with urlopen(request, timeout=30) as response:
  117. payload = json.loads(response.read().decode("utf-8", errors="ignore"))
  118. except HTTPError as exc:
  119. detail = exc.read().decode("utf-8", errors="ignore")
  120. raise PublishError(f"Discord channel create failed: {exc.code} {detail}") from exc
  121. return str(payload.get("id", ""))
  122. def create_thread(self, thread_name: str, opening_message: str) -> str:
  123. if not self.parent_channel_id:
  124. raise PublishError("thread 模式需要 DISCORD_PARENT_CHANNEL_ID")
  125. existing = self.find_existing_thread(thread_name)
  126. if existing:
  127. return existing
  128. payload = self.openclaw(
  129. "thread",
  130. "create",
  131. "--target",
  132. f"channel:{self.parent_channel_id}",
  133. "--thread-name",
  134. thread_name,
  135. "--message",
  136. opening_message,
  137. "--auto-archive-min",
  138. str(self.thread_auto_archive_min),
  139. )
  140. thread = payload.get("payload", {}).get("thread", {})
  141. thread_id = str(thread.get("id", ""))
  142. if not thread_id and self.dry_run:
  143. return f"dry-run-thread-{thread_name}"
  144. if not thread_id:
  145. raise PublishError("OpenClaw thread create 没有返回 thread id")
  146. return thread_id
  147. def create_fixed_channel(self, title: str) -> str:
  148. channel_name = normalize_channel_name(self.target_channel_name or "robotdaily") or "robotdaily"
  149. existing = self.find_existing_channel(channel_name)
  150. if existing:
  151. return existing
  152. topic = truncate(title, 180)
  153. channel_id = self.create_channel_via_rest(channel_name, topic=topic)
  154. if not channel_id:
  155. raise PublishError("Discord fixed channel create 返回空 id")
  156. return channel_id
  157. def create_or_resolve_target(self, title: str, opening_message: str) -> str:
  158. date_slug = now_local().strftime("%Y-%m-%d")
  159. if self.mode == "existing-channel":
  160. if not self.target_channel_id:
  161. raise PublishError("existing-channel 模式需要 DISCORD_TARGET_CHANNEL_ID")
  162. return self.target_channel_id
  163. if self.mode == "fixed-channel":
  164. return self.create_fixed_channel(title)
  165. if self.mode == "thread":
  166. thread_name = f"RobotDaily {date_slug}"
  167. return self.create_thread(thread_name, opening_message)
  168. if self.mode == "channel":
  169. prefix = normalize_channel_name(self.target_channel_name or "robotdaily") or "robotdaily"
  170. channel_name = f"{prefix}-{date_slug}"
  171. existing = self.find_existing_channel(channel_name)
  172. if existing:
  173. return existing
  174. topic = truncate(title, 180)
  175. channel_id = self.create_channel_via_rest(channel_name, topic=topic)
  176. if not channel_id:
  177. raise PublishError("Discord channel create 返回空 id")
  178. return channel_id
  179. raise PublishError(f"未知的投递模式: {self.mode}")
  180. def send_embeds_via_rest(self, target_channel_id: str, content: str = "", embeds: List[Dict[str, Any]] = None) -> Dict[str, Any]:
  181. if self.dry_run:
  182. log(f"[Dry Run] send_embeds_via_rest: channel {target_channel_id}, embeds count {len(embeds or [])}")
  183. return {"id": "dry-run-msg-id"}
  184. if not self.bot_token:
  185. raise PublishError("发送 Embed 需要 DISCORD_BOT_TOKEN")
  186. body: Dict[str, Any] = {}
  187. if content:
  188. body["content"] = content
  189. if embeds:
  190. body["embeds"] = embeds
  191. request = Request(
  192. url=f"{DISCORD_API}/channels/{target_channel_id}/messages",
  193. method="POST",
  194. data=json.dumps(body).encode("utf-8"),
  195. headers={
  196. "Authorization": f"Bot {self.bot_token}",
  197. "Content-Type": "application/json",
  198. },
  199. )
  200. try:
  201. with urlopen(request, timeout=30) as response:
  202. result = json.loads(response.read().decode("utf-8", errors="ignore"))
  203. return result
  204. except HTTPError as exc:
  205. detail = exc.read().decode("utf-8", errors="ignore")
  206. log(f"REST API 错误: {exc.code} {exc.reason} - {detail}")
  207. raise PublishError(f"创建 Discord 消息失败: {exc.code} {detail}") from exc
  208. def send_message(self, target_channel_id: str, message: str, media: str = "") -> Dict[str, Any]:
  209. args = ["send", "--target", f"channel:{target_channel_id}", "--message", message]
  210. if media:
  211. args.extend(["--media", media])
  212. return self.openclaw(*args)
  213. def build_opening_message(payload: Dict[str, Any]) -> str:
  214. total = len(payload.get("papers", []))
  215. counts = payload.get("counts", {})
  216. parts = [f"老大早安~今天给你挑了 {total} 篇偏应用论文。"]
  217. for domain in DOMAIN_ORDER:
  218. count = counts.get(domain, 0)
  219. if count:
  220. parts.append(f"{DOMAIN_CONFIGS[domain]['label_zh']} {count} 篇")
  221. parts.append("下面每张卡片都带 DOI / arXiv / PDF,可直接点开读。")
  222. return " | ".join(parts)
  223. def build_domain_header(domain: str, count: int) -> str:
  224. return f"## {DOMAIN_CONFIGS[domain]['label_zh']}({count} 篇)"
  225. def build_paper_embed(paper: Dict[str, Any]) -> Dict[str, Any]:
  226. tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:6])
  227. # Title max 256
  228. title = f"{paper.get('domain_rank', '?')}. {paper.get('title', '')}"
  229. if len(title) > 256:
  230. title = title[:253] + "..."
  231. description_lines = []
  232. description_lines.append(f"**作者:** {format_authors(paper.get('authors', []), limit=4)}")
  233. if tags:
  234. description_lines.append(f"**关键词:** {tags}")
  235. brief = paper.get('brief_explanation_zh', '')
  236. if brief:
  237. description_lines.append(f"\n**💡 简析**\n{brief}")
  238. abstract = truncate(paper.get('translated_abstract_zh', ''), 700)
  239. if abstract:
  240. description_lines.append(f"\n**📖 摘要**\n{abstract}")
  241. links = []
  242. if paper.get('doi_url'): links.append(f"[DOI]({paper.get('doi_url')})")
  243. if paper.get('abs_url'): links.append(f"[arXiv]({paper.get('abs_url')})")
  244. if paper.get('pdf_url'): links.append(f"[PDF]({paper.get('pdf_url')})")
  245. if links:
  246. description_lines.append(f"\n**🔗 链接:** {' | '.join(links)}")
  247. description = "\n".join(description_lines)
  248. if len(description) > 4096:
  249. description = description[:4093] + "..."
  250. embed = {
  251. "title": title,
  252. "description": description,
  253. "url": paper.get('abs_url', ''),
  254. "color": 0x3498db
  255. }
  256. return embed
  257. def publish_digest(
  258. payload: Dict[str, Any],
  259. *,
  260. html_path: str = "",
  261. markdown_path: str = "",
  262. publisher: DiscordPublisher,
  263. ) -> str:
  264. opening_message = build_opening_message(payload)
  265. target_channel_id = publisher.create_or_resolve_target(opening_message, opening_message)
  266. attached_message = opening_message + "\n\n今天起换成了全新卡片式排版,直接在 Discord 里看中译摘要和核心内容啦!"
  267. if html_path:
  268. publisher.send_message(target_channel_id, attached_message, media=html_path)
  269. else:
  270. publisher.send_message(target_channel_id, attached_message)
  271. grouped: Dict[str, List[Dict[str, Any]]] = {domain: [] for domain in DOMAIN_ORDER}
  272. for paper in payload.get("papers", []):
  273. grouped.setdefault(paper["domain"], []).append(paper)
  274. for domain in DOMAIN_ORDER:
  275. papers = grouped.get(domain, [])
  276. if not papers:
  277. continue
  278. # Build embeds for the domain
  279. embeds = [build_paper_embed(paper) for paper in papers]
  280. # Discord limit is 10 embeds per message, we chunk them by 4 to be safe with total characters
  281. chunk_size = 4
  282. for i in range(0, len(embeds), chunk_size):
  283. chunk_embeds = embeds[i:i + chunk_size]
  284. # Print domain header on the first chunk
  285. msg_content = build_domain_header(domain, len(papers)) if i == 0 else ""
  286. if publisher.bot_token:
  287. # Use REST API to send rich embeds
  288. publisher.send_embeds_via_rest(target_channel_id, content=msg_content, embeds=chunk_embeds)
  289. else:
  290. # Fallback to plain text if no bot token
  291. if msg_content:
  292. publisher.send_message(target_channel_id, msg_content)
  293. for paper in papers[i:i + chunk_size]:
  294. fallback_text = f"**{paper.get('title')}**\n{paper.get('abs_url')}"
  295. publisher.send_message(target_channel_id, fallback_text)
  296. if markdown_path:
  297. publisher.send_message(target_channel_id, "附一份 Markdown 归档版,桌面端检索会更方便。", media=markdown_path)
  298. return target_channel_id
  299. def main() -> None:
  300. parser = argparse.ArgumentParser(description="Publish RobotDaily digest to Discord")
  301. parser.add_argument("--input", required=True)
  302. parser.add_argument("--html", default="")
  303. parser.add_argument("--markdown", default="")
  304. parser.add_argument("--mode", default="thread")
  305. parser.add_argument("--openclaw-bin", default="openclaw")
  306. parser.add_argument("--account-id", default="codex")
  307. parser.add_argument("--guild-id", default="")
  308. parser.add_argument("--parent-channel-id", default="")
  309. parser.add_argument("--target-channel-id", default="")
  310. parser.add_argument("--target-channel-name", default="")
  311. parser.add_argument("--category-id", default="")
  312. parser.add_argument("--bot-token", default="")
  313. parser.add_argument("--thread-auto-archive-min", type=int, default=10080)
  314. parser.add_argument("--dry-run", action="store_true")
  315. args = parser.parse_args()
  316. payload = read_json(args.input, default={}) or {}
  317. publisher = DiscordPublisher(
  318. openclaw_bin=args.openclaw_bin,
  319. account_id=args.account_id,
  320. mode=args.mode,
  321. guild_id=args.guild_id,
  322. parent_channel_id=args.parent_channel_id,
  323. target_channel_id=args.target_channel_id,
  324. target_channel_name=args.target_channel_name,
  325. category_id=args.category_id,
  326. bot_token=args.bot_token,
  327. thread_auto_archive_min=args.thread_auto_archive_min,
  328. dry_run=args.dry_run,
  329. )
  330. target = publish_digest(
  331. payload,
  332. html_path=args.html,
  333. markdown_path=args.markdown,
  334. publisher=publisher,
  335. )
  336. log(f"Digest published to Discord target: {target}")
  337. if __name__ == "__main__":
  338. main()