publish_discord.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. #!/usr/bin/env python3
  2. """Publish RobotDaily digests to Discord via OpenClaw and optional Discord REST."""
  3. from __future__ import annotations
  4. import argparse
  5. import json
  6. import re
  7. from pathlib import Path
  8. from typing import Any, Dict, List, Optional
  9. from urllib.error import HTTPError
  10. from urllib.request import Request, urlopen
  11. from fetch_arxiv import DOMAIN_CONFIGS
  12. from utils import (
  13. format_authors,
  14. log,
  15. normalize_space,
  16. now_local,
  17. read_json,
  18. run_command_json,
  19. truncate,
  20. )
  21. DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
  22. DISCORD_API = "https://discord.com/api/v10"
  23. class PublishError(RuntimeError):
  24. pass
  25. def normalize_channel_name(name: str) -> str:
  26. text = normalize_space(name).lstrip("#").lower()
  27. text = re.sub(r"\s+", "-", text)
  28. return text
  29. class DiscordPublisher:
  30. def __init__(
  31. self,
  32. *,
  33. openclaw_bin: str,
  34. account_id: str,
  35. mode: str,
  36. guild_id: str,
  37. parent_channel_id: str,
  38. target_channel_id: str,
  39. target_channel_name: str,
  40. category_id: str,
  41. bot_token: str,
  42. thread_auto_archive_min: int,
  43. dry_run: bool,
  44. ) -> None:
  45. self.openclaw_bin = openclaw_bin
  46. self.account_id = account_id
  47. self.mode = mode
  48. self.guild_id = guild_id
  49. self.parent_channel_id = parent_channel_id
  50. self.target_channel_id = target_channel_id
  51. self.target_channel_name = target_channel_name
  52. self.category_id = category_id
  53. self.bot_token = bot_token
  54. self.thread_auto_archive_min = thread_auto_archive_min
  55. self.dry_run = dry_run
  56. def openclaw(self, *args: str) -> Dict[str, Any]:
  57. command = [self.openclaw_bin, "message", *args, "--channel", "discord", "--account", self.account_id, "--json"]
  58. if self.dry_run:
  59. command.append("--dry-run")
  60. return run_command_json(command)
  61. def list_channels(self) -> List[Dict[str, Any]]:
  62. if not self.guild_id:
  63. return []
  64. payload = self.openclaw("channel", "list", "--guild-id", self.guild_id)
  65. return payload.get("payload", {}).get("channels", [])
  66. def list_threads(self) -> List[Dict[str, Any]]:
  67. if not self.guild_id or not self.parent_channel_id:
  68. return []
  69. payload = self.openclaw(
  70. "thread",
  71. "list",
  72. "--guild-id",
  73. self.guild_id,
  74. "--channel-id",
  75. self.parent_channel_id,
  76. "--include-archived",
  77. "--limit",
  78. "100",
  79. )
  80. return payload.get("payload", {}).get("threads", {}).get("threads", [])
  81. def find_existing_channel(self, name: str) -> Optional[str]:
  82. wanted_exact = normalize_space(name)
  83. wanted_normalized = normalize_channel_name(name)
  84. for channel in self.list_channels():
  85. current_name = str(channel.get("name", ""))
  86. if current_name == wanted_exact or normalize_channel_name(current_name) == wanted_normalized:
  87. return str(channel.get("id", ""))
  88. return None
  89. def find_existing_thread(self, name: str) -> Optional[str]:
  90. for thread in self.list_threads():
  91. if thread.get("name") == name:
  92. return str(thread.get("id", ""))
  93. return None
  94. def create_channel_via_rest(self, name: str, topic: str = "") -> str:
  95. if not self.guild_id:
  96. raise PublishError("channel 模式需要 DISCORD_GUILD_ID")
  97. if self.dry_run:
  98. return f"dry-run-channel-{normalize_channel_name(name) or 'robotdaily'}"
  99. if not self.bot_token:
  100. raise PublishError("创建 Discord 频道需要 DISCORD_BOT_TOKEN;当前 OpenClaw CLI 版本没有公开 channel create 子命令")
  101. body: Dict[str, Any] = {"name": normalize_channel_name(name) or "robotdaily", "type": 0}
  102. if self.category_id:
  103. body["parent_id"] = self.category_id
  104. if topic:
  105. body["topic"] = topic
  106. request = Request(
  107. url=f"{DISCORD_API}/guilds/{self.guild_id}/channels",
  108. method="POST",
  109. data=json.dumps(body).encode("utf-8"),
  110. headers={
  111. "Authorization": f"Bot {self.bot_token}",
  112. "Content-Type": "application/json",
  113. },
  114. )
  115. try:
  116. with urlopen(request, timeout=30) as response:
  117. payload = json.loads(response.read().decode("utf-8", errors="ignore"))
  118. except HTTPError as exc:
  119. detail = exc.read().decode("utf-8", errors="ignore")
  120. raise PublishError(f"Discord channel create failed: {exc.code} {detail}") from exc
  121. return str(payload.get("id", ""))
  122. def create_thread(self, thread_name: str, opening_message: str) -> str:
  123. if not self.parent_channel_id:
  124. raise PublishError("thread 模式需要 DISCORD_PARENT_CHANNEL_ID")
  125. existing = self.find_existing_thread(thread_name)
  126. if existing:
  127. return existing
  128. payload = self.openclaw(
  129. "thread",
  130. "create",
  131. "--target",
  132. f"channel:{self.parent_channel_id}",
  133. "--thread-name",
  134. thread_name,
  135. "--message",
  136. opening_message,
  137. "--auto-archive-min",
  138. str(self.thread_auto_archive_min),
  139. )
  140. thread = payload.get("payload", {}).get("thread", {})
  141. thread_id = str(thread.get("id", ""))
  142. if not thread_id and self.dry_run:
  143. return f"dry-run-thread-{thread_name}"
  144. if not thread_id:
  145. raise PublishError("OpenClaw thread create 没有返回 thread id")
  146. return thread_id
  147. def create_fixed_channel(self, title: str) -> str:
  148. channel_name = normalize_channel_name(self.target_channel_name or "robotdaily") or "robotdaily"
  149. existing = self.find_existing_channel(channel_name)
  150. if existing:
  151. return existing
  152. topic = truncate(title, 180)
  153. channel_id = self.create_channel_via_rest(channel_name, topic=topic)
  154. if not channel_id:
  155. raise PublishError("Discord fixed channel create 返回空 id")
  156. return channel_id
  157. def create_or_resolve_target(self, title: str, opening_message: str) -> str:
  158. date_slug = now_local().strftime("%Y-%m-%d")
  159. if self.mode == "existing-channel":
  160. if not self.target_channel_id:
  161. raise PublishError("existing-channel 模式需要 DISCORD_TARGET_CHANNEL_ID")
  162. return self.target_channel_id
  163. if self.mode == "fixed-channel":
  164. return self.create_fixed_channel(title)
  165. if self.mode == "thread":
  166. thread_name = f"RobotDaily {date_slug}"
  167. return self.create_thread(thread_name, opening_message)
  168. if self.mode == "channel":
  169. prefix = normalize_channel_name(self.target_channel_name or "robotdaily") or "robotdaily"
  170. channel_name = f"{prefix}-{date_slug}"
  171. existing = self.find_existing_channel(channel_name)
  172. if existing:
  173. return existing
  174. topic = truncate(title, 180)
  175. channel_id = self.create_channel_via_rest(channel_name, topic=topic)
  176. if not channel_id:
  177. raise PublishError("Discord channel create 返回空 id")
  178. return channel_id
  179. raise PublishError(f"未知的投递模式: {self.mode}")
  180. def send_message(self, target_channel_id: str, message: str, media: str = "") -> Dict[str, Any]:
  181. args = ["send", "--target", f"channel:{target_channel_id}", "--message", message]
  182. if media:
  183. args.extend(["--media", media])
  184. return self.openclaw(*args)
  185. def build_opening_message(payload: Dict[str, Any]) -> str:
  186. total = len(payload.get("papers", []))
  187. counts = payload.get("counts", {})
  188. parts = [f"老大早安~今天给你挑了 {total} 篇偏应用论文。"]
  189. for domain in DOMAIN_ORDER:
  190. count = counts.get(domain, 0)
  191. if count:
  192. parts.append(f"{DOMAIN_CONFIGS[domain]['label_zh']} {count} 篇")
  193. parts.append("下面每张卡片都带 DOI / arXiv / PDF,可直接点开读。")
  194. return " | ".join(parts)
  195. def build_domain_header(domain: str, count: int) -> str:
  196. return f"## {DOMAIN_CONFIGS[domain]['label_zh']}({count} 篇)"
  197. def build_paper_message(paper: Dict[str, Any]) -> str:
  198. tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:6])
  199. lines = [
  200. f"### {paper.get('domain_rank', '?')}. {paper.get('title', '')}",
  201. f"作者:{format_authors(paper.get('authors', []), limit=4)}",
  202. f"关键词:{tags}" if tags else "关键词:暂无",
  203. f"简析:{paper.get('brief_explanation_zh', '')}",
  204. f"摘要中译:{truncate(paper.get('translated_abstract_zh', ''), 700)}",
  205. f"DOI:{paper.get('doi_url', '')}",
  206. f"arXiv:{paper.get('abs_url', '')}",
  207. f"PDF:{paper.get('pdf_url', '')}",
  208. ]
  209. return "\n".join(lines)
  210. def publish_digest(
  211. payload: Dict[str, Any],
  212. *,
  213. html_path: str,
  214. markdown_path: str,
  215. publisher: DiscordPublisher,
  216. ) -> str:
  217. opening_message = build_opening_message(payload)
  218. target_channel_id = publisher.create_or_resolve_target(opening_message, opening_message)
  219. attached_message = opening_message + "\n\n已附上移动端 HTML 晨读版,点开卡片能直接看中译摘要。"
  220. publisher.send_message(target_channel_id, attached_message, media=html_path)
  221. grouped: Dict[str, List[Dict[str, Any]]] = {domain: [] for domain in DOMAIN_ORDER}
  222. for paper in payload.get("papers", []):
  223. grouped.setdefault(paper["domain"], []).append(paper)
  224. for domain in DOMAIN_ORDER:
  225. papers = grouped.get(domain, [])
  226. if not papers:
  227. continue
  228. publisher.send_message(target_channel_id, build_domain_header(domain, len(papers)))
  229. for paper in papers:
  230. publisher.send_message(target_channel_id, build_paper_message(paper))
  231. if markdown_path:
  232. publisher.send_message(target_channel_id, "附一份 Markdown 归档版,桌面端检索会更方便。", media=markdown_path)
  233. return target_channel_id
  234. def main() -> None:
  235. parser = argparse.ArgumentParser(description="Publish RobotDaily digest to Discord")
  236. parser.add_argument("--input", required=True)
  237. parser.add_argument("--html", required=True)
  238. parser.add_argument("--markdown", default="")
  239. parser.add_argument("--mode", default="thread")
  240. parser.add_argument("--openclaw-bin", default="openclaw")
  241. parser.add_argument("--account-id", default="codex")
  242. parser.add_argument("--guild-id", default="")
  243. parser.add_argument("--parent-channel-id", default="")
  244. parser.add_argument("--target-channel-id", default="")
  245. parser.add_argument("--target-channel-name", default="")
  246. parser.add_argument("--category-id", default="")
  247. parser.add_argument("--bot-token", default="")
  248. parser.add_argument("--thread-auto-archive-min", type=int, default=10080)
  249. parser.add_argument("--dry-run", action="store_true")
  250. args = parser.parse_args()
  251. payload = read_json(args.input, default={}) or {}
  252. publisher = DiscordPublisher(
  253. openclaw_bin=args.openclaw_bin,
  254. account_id=args.account_id,
  255. mode=args.mode,
  256. guild_id=args.guild_id,
  257. parent_channel_id=args.parent_channel_id,
  258. target_channel_id=args.target_channel_id,
  259. target_channel_name=args.target_channel_name,
  260. category_id=args.category_id,
  261. bot_token=args.bot_token,
  262. thread_auto_archive_min=args.thread_auto_archive_min,
  263. dry_run=args.dry_run,
  264. )
  265. target = publish_digest(
  266. payload,
  267. html_path=args.html,
  268. markdown_path=args.markdown,
  269. publisher=publisher,
  270. )
  271. log(f"Digest published to Discord target: {target}")
  272. if __name__ == "__main__":
  273. main()