| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440 |
- #!/usr/bin/env python3
- """Publish RobotDaily digests to Discord via OpenClaw and optional Discord REST."""
- from __future__ import annotations
- import argparse
- import json
- import re
- from typing import Any, Dict, List, Optional
- from urllib.error import HTTPError
- from urllib.request import Request, urlopen
- from fetch_arxiv import DOMAIN_CONFIGS
- from utils import (
- format_authors,
- log,
- normalize_space,
- now_local,
- read_json,
- run_command_json,
- truncate,
- )
- DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
- DISCORD_API = "https://discord.com/api/v10"
- DOMAIN_STYLE = {
- "embodied": {"color": 0x4F8CFF, "emoji": "🤖"},
- "representation": {"color": 0x9B59B6, "emoji": "🧠"},
- "reinforcement": {"color": 0x2ECC71, "emoji": "🎯"},
- }
- class PublishError(RuntimeError):
- pass
- def normalize_channel_name(name: str) -> str:
- text = normalize_space(name).lstrip("#").lower()
- text = re.sub(r"\s+", "-", text)
- return text
- class DiscordPublisher:
- def __init__(
- self,
- *,
- openclaw_bin: str,
- account_id: str,
- mode: str,
- guild_id: str,
- parent_channel_id: str,
- target_channel_id: str,
- target_channel_name: str,
- category_id: str,
- bot_token: str,
- thread_auto_archive_min: int,
- dry_run: bool,
- ) -> None:
- self.openclaw_bin = openclaw_bin
- self.account_id = account_id
- self.mode = mode
- self.guild_id = guild_id
- self.parent_channel_id = parent_channel_id
- self.target_channel_id = target_channel_id
- self.target_channel_name = target_channel_name
- self.category_id = category_id
- self.bot_token = bot_token
- self.thread_auto_archive_min = thread_auto_archive_min
- self.dry_run = dry_run
- def openclaw(self, *args: str) -> Dict[str, Any]:
- command = [self.openclaw_bin, "message", *args, "--channel", "discord", "--account", self.account_id, "--json"]
- if self.dry_run:
- command.append("--dry-run")
- return run_command_json(command)
- def list_channels(self) -> List[Dict[str, Any]]:
- if not self.guild_id:
- return []
- payload = self.openclaw("channel", "list", "--guild-id", self.guild_id)
- return payload.get("payload", {}).get("channels", [])
- def list_threads(self) -> List[Dict[str, Any]]:
- if not self.guild_id or not self.parent_channel_id:
- return []
- payload = self.openclaw(
- "thread",
- "list",
- "--guild-id",
- self.guild_id,
- "--channel-id",
- self.parent_channel_id,
- "--include-archived",
- "--limit",
- "100",
- )
- return payload.get("payload", {}).get("threads", {}).get("threads", [])
- def find_existing_channel(self, name: str) -> Optional[str]:
- wanted_exact = normalize_space(name)
- wanted_normalized = normalize_channel_name(name)
- for channel in self.list_channels():
- current_name = str(channel.get("name", ""))
- if current_name == wanted_exact or normalize_channel_name(current_name) == wanted_normalized:
- return str(channel.get("id", ""))
- return None
- def find_existing_thread(self, name: str) -> Optional[str]:
- for thread in self.list_threads():
- if thread.get("name") == name:
- return str(thread.get("id", ""))
- return None
- def create_channel_via_rest(self, name: str, topic: str = "") -> str:
- if not self.guild_id:
- raise PublishError("channel 模式需要 DISCORD_GUILD_ID")
- if self.dry_run:
- return f"dry-run-channel-{normalize_channel_name(name) or 'robotdaily'}"
- if not self.bot_token:
- raise PublishError("创建 Discord 频道需要 DISCORD_BOT_TOKEN;当前 OpenClaw CLI 版本没有公开 channel create 子命令")
- body: Dict[str, Any] = {"name": normalize_channel_name(name) or "robotdaily", "type": 0}
- if self.category_id:
- body["parent_id"] = self.category_id
- if topic:
- body["topic"] = topic
- request = Request(
- url=f"{DISCORD_API}/guilds/{self.guild_id}/channels",
- method="POST",
- data=json.dumps(body).encode("utf-8"),
- headers={
- "Authorization": f"Bot {self.bot_token}",
- "Content-Type": "application/json",
- "User-Agent": "DiscordBot (https://github.com/openclaw/openclaw, 1.0)",
- },
- )
- try:
- with urlopen(request, timeout=30) as response:
- payload = json.loads(response.read().decode("utf-8", errors="ignore"))
- except HTTPError as exc:
- detail = exc.read().decode("utf-8", errors="ignore")
- raise PublishError(f"Discord channel create failed: {exc.code} {detail}") from exc
- return str(payload.get("id", ""))
- def create_thread(self, thread_name: str, opening_message: str) -> str:
- if not self.parent_channel_id:
- raise PublishError("thread 模式需要 DISCORD_PARENT_CHANNEL_ID")
- existing = self.find_existing_thread(thread_name)
- if existing:
- return existing
- payload = self.openclaw(
- "thread",
- "create",
- "--target",
- f"channel:{self.parent_channel_id}",
- "--thread-name",
- thread_name,
- "--message",
- opening_message,
- "--auto-archive-min",
- str(self.thread_auto_archive_min),
- )
- thread = payload.get("payload", {}).get("thread", {})
- thread_id = str(thread.get("id", ""))
- if not thread_id and self.dry_run:
- return f"dry-run-thread-{thread_name}"
- if not thread_id:
- raise PublishError("OpenClaw thread create 没有返回 thread id")
- return thread_id
- def create_fixed_channel(self, title: str) -> str:
- channel_name = normalize_channel_name(self.target_channel_name or "robotdaily") or "robotdaily"
- existing = self.find_existing_channel(channel_name)
- if existing:
- return existing
- topic = truncate(title, 180)
- channel_id = self.create_channel_via_rest(channel_name, topic=topic)
- if not channel_id:
- raise PublishError("Discord fixed channel create 返回空 id")
- return channel_id
- def create_or_resolve_target(self, title: str, opening_message: str) -> str:
- date_slug = now_local().strftime("%Y-%m-%d")
- if self.mode == "existing-channel":
- if not self.target_channel_id:
- raise PublishError("existing-channel 模式需要 DISCORD_TARGET_CHANNEL_ID")
- return self.target_channel_id
- if self.mode == "fixed-channel":
- return self.create_fixed_channel(title)
- if self.mode == "thread":
- thread_name = f"RobotDaily {date_slug}"
- return self.create_thread(thread_name, opening_message)
- if self.mode == "channel":
- prefix = normalize_channel_name(self.target_channel_name or "robotdaily") or "robotdaily"
- channel_name = f"{prefix}-{date_slug}"
- existing = self.find_existing_channel(channel_name)
- if existing:
- return existing
- topic = truncate(title, 180)
- channel_id = self.create_channel_via_rest(channel_name, topic=topic)
- if not channel_id:
- raise PublishError("Discord channel create 返回空 id")
- return channel_id
- raise PublishError(f"未知的投递模式: {self.mode}")
- def send_embeds_via_rest(
- self,
- target_channel_id: str,
- content: str = "",
- embeds: Optional[List[Dict[str, Any]]] = None,
- components: Optional[List[Dict[str, Any]]] = None,
- ) -> Dict[str, Any]:
- if self.dry_run:
- log(
- f"[Dry Run] send_embeds_via_rest: channel {target_channel_id}, embeds count {len(embeds or [])}, components count {len(components or [])}"
- )
- return {"id": "dry-run-msg-id"}
- if not self.bot_token:
- raise PublishError("发送富文本卡片需要 DISCORD_BOT_TOKEN")
- body: Dict[str, Any] = {}
- if content:
- body["content"] = content
- if embeds:
- body["embeds"] = embeds
- if components:
- body["components"] = components
- request = Request(
- url=f"{DISCORD_API}/channels/{target_channel_id}/messages",
- method="POST",
- data=json.dumps(body).encode("utf-8"),
- headers={
- "Authorization": f"Bot {self.bot_token}",
- "Content-Type": "application/json",
- "User-Agent": "DiscordBot (https://github.com/openclaw/openclaw, 1.0)",
- },
- )
- try:
- with urlopen(request, timeout=30) as response:
- result = json.loads(response.read().decode("utf-8", errors="ignore"))
- return result
- except HTTPError as exc:
- detail = exc.read().decode("utf-8", errors="ignore")
- log(f"REST API 错误: {exc.code} {exc.reason} - {detail}")
- raise PublishError(f"创建 Discord 消息失败: {exc.code} {detail}") from exc
- def send_message(self, target_channel_id: str, message: str, media: str = "") -> Dict[str, Any]:
- args = ["send", "--target", f"channel:{target_channel_id}", "--message", message]
- if media:
- args.extend(["--media", media])
- return self.openclaw(*args)
- def build_opening_message(payload: Dict[str, Any]) -> str:
- total = len(payload.get("papers", []))
- counts = payload.get("counts", {})
- breakdown = []
- for domain in DOMAIN_ORDER:
- count = counts.get(domain, 0)
- if count:
- breakdown.append(f"{DOMAIN_CONFIGS[domain]['label_zh']} {count}")
- suffix = " / ".join(breakdown)
- if suffix:
- return f"**RobotDaily | {now_local().strftime('%Y-%m-%d')}**\n共 {total} 篇偏应用论文,按方向整理成短卡片:{suffix}。"
- return f"**RobotDaily | {now_local().strftime('%Y-%m-%d')}**\n共 {total} 篇偏应用论文。"
- def build_domain_header(domain: str, count: int) -> str:
- style = DOMAIN_STYLE.get(domain, {})
- emoji = style.get("emoji", "📌")
- return f"## {emoji} {DOMAIN_CONFIGS[domain]['label_zh']}({count} 篇)"
- def build_paper_embed(paper: Dict[str, Any]) -> Dict[str, Any]:
- domain = paper.get("domain", "")
- label_zh = DOMAIN_CONFIGS.get(domain, {}).get("label_zh", domain or "未分类")
- style = DOMAIN_STYLE.get(domain, {"color": 0x3498DB, "emoji": "📄"})
- title_prefix = f"{paper.get('domain_rank', '?')}. "
- title = truncate(title_prefix + str(paper.get("title", "")), 256)
- brief = truncate(
- paper.get("brief_explanation_zh", "")
- or paper.get("selection_reason", "")
- or paper.get("translated_abstract_zh", ""),
- 200,
- )
- authors = truncate(format_authors(paper.get("authors", []), limit=4), 120) or "—"
- tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:4]) or "—"
- published = truncate(str(paper.get("published_local", ""))[:10], 32) or "—"
- arxiv_id = truncate(paper.get("arxiv_id", ""), 64) or "—"
- fields: List[Dict[str, Any]] = [
- {"name": "💡 简析", "value": brief or "—", "inline": False},
- {"name": "👤 作者", "value": authors, "inline": True},
- {"name": "🏷️ 标签", "value": tags, "inline": True},
- ]
- embed: Dict[str, Any] = {
- "title": title,
- "url": paper.get("abs_url", ""),
- "description": f"{style['emoji']} **{label_zh}** · `{published}` · `arXiv:{arxiv_id}`",
- "color": style["color"],
- "fields": fields,
- "footer": {"text": "RobotDaily 卡片视图"},
- }
- return embed
- def build_link_buttons(paper: Dict[str, Any]) -> List[Dict[str, Any]]:
- buttons: List[Dict[str, Any]] = []
- for label, url in [
- ("DOI", paper.get("doi_url", "")),
- ("arXiv", paper.get("abs_url", "")),
- ("PDF", paper.get("pdf_url", "")),
- ]:
- if not url:
- continue
- buttons.append({"type": 2, "style": 5, "label": label, "url": url})
- if not buttons:
- return []
- return [{"type": 1, "components": buttons[:3]}]
- def build_fallback_paper_markdown(paper: Dict[str, Any]) -> str:
- links = []
- if paper.get("doi_url"):
- links.append(f"[DOI]({paper.get('doi_url')})")
- if paper.get("abs_url"):
- links.append(f"[arXiv]({paper.get('abs_url')})")
- if paper.get("pdf_url"):
- links.append(f"[PDF]({paper.get('pdf_url')})")
- tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:4]) or "—"
- brief = truncate(
- paper.get("brief_explanation_zh", "") or paper.get("selection_reason", "") or paper.get("translated_abstract_zh", ""),
- 180,
- )
- abstract = truncate(paper.get("translated_abstract_zh", "") or paper.get("summary", ""), 220)
- lines = [
- f"### {paper.get('domain_rank', '?')}. {paper.get('title', '')}",
- f"> {brief or '—'}",
- f"- 作者:{truncate(format_authors(paper.get('authors', []), limit=4), 120) or '—'}",
- f"- 标签:{tags}",
- f"- 中文摘要:{abstract or '—'}",
- ]
- if links:
- lines.append(f"- 链接:{' | '.join(links)}")
- return "\n".join(lines)
- def publish_digest(
- payload: Dict[str, Any],
- *,
- html_path: str = "",
- markdown_path: str = "",
- publisher: DiscordPublisher,
- ) -> str:
- opening_message = build_opening_message(payload)
- target_channel_id = publisher.create_or_resolve_target(opening_message, opening_message)
- publisher.send_message(target_channel_id, opening_message)
- grouped: Dict[str, List[Dict[str, Any]]] = {domain: [] for domain in DOMAIN_ORDER}
- for paper in payload.get("papers", []):
- grouped.setdefault(paper["domain"], []).append(paper)
- for domain in DOMAIN_ORDER:
- papers = grouped.get(domain, [])
- if not papers:
- continue
- publisher.send_message(target_channel_id, build_domain_header(domain, len(papers)))
- for paper in papers:
- if publisher.bot_token:
- try:
- publisher.send_embeds_via_rest(
- target_channel_id,
- embeds=[build_paper_embed(paper)],
- components=build_link_buttons(paper),
- )
- continue
- except PublishError as exc:
- log(f"富文本卡片发送失败,回退到纯 Markdown:{exc}")
- publisher.send_message(target_channel_id, build_fallback_paper_markdown(paper))
- if markdown_path:
- publisher.send_message(target_channel_id, "附一份 Markdown 归档版,方便桌面端检索。", media=markdown_path)
- return target_channel_id
- def main() -> None:
- parser = argparse.ArgumentParser(description="Publish RobotDaily digest to Discord")
- parser.add_argument("--input", required=True)
- parser.add_argument("--html", default="")
- parser.add_argument("--markdown", default="")
- parser.add_argument("--mode", default="thread")
- parser.add_argument("--openclaw-bin", default="openclaw")
- parser.add_argument("--account-id", default="codex")
- parser.add_argument("--guild-id", default="")
- parser.add_argument("--parent-channel-id", default="")
- parser.add_argument("--target-channel-id", default="")
- parser.add_argument("--target-channel-name", default="")
- parser.add_argument("--category-id", default="")
- parser.add_argument("--bot-token", default="")
- parser.add_argument("--thread-auto-archive-min", type=int, default=10080)
- parser.add_argument("--dry-run", action="store_true")
- args = parser.parse_args()
- payload = read_json(args.input, default={}) or {}
- publisher = DiscordPublisher(
- openclaw_bin=args.openclaw_bin,
- account_id=args.account_id,
- mode=args.mode,
- guild_id=args.guild_id,
- parent_channel_id=args.parent_channel_id,
- target_channel_id=args.target_channel_id,
- target_channel_name=args.target_channel_name,
- category_id=args.category_id,
- bot_token=args.bot_token,
- thread_auto_archive_min=args.thread_auto_archive_min,
- dry_run=args.dry_run,
- )
- target = publish_digest(
- payload,
- html_path=args.html,
- markdown_path=args.markdown,
- publisher=publisher,
- )
- log(f"Digest published to Discord target: {target}")
- if __name__ == "__main__":
- main()
|