ClawLab
/
RobotDaily


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
							#!/usr/bin/env python3
"""Publish RobotDaily digests to Discord via OpenClaw and optional Discord REST."""

from __future__ import annotations

import argparse
import json
import re
from typing import Any, Dict, List, Optional
from urllib.error import HTTPError
from urllib.request import Request, urlopen

from fetch_arxiv import DOMAIN_CONFIGS
from utils import (
    format_authors,
    log,
    normalize_space,
    now_local,
    read_json,
    run_command_json,
    truncate,
)

DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
DISCORD_API = "https://discord.com/api/v10"
DOMAIN_STYLE = {
    "embodied": {"color": 0x4F8CFF, "emoji": "🤖"},
    "representation": {"color": 0x9B59B6, "emoji": "🧠"},
    "reinforcement": {"color": 0x2ECC71, "emoji": "🎯"},
}


class PublishError(RuntimeError):
    pass


def normalize_channel_name(name: str) -> str:
    text = normalize_space(name).lstrip("#").lower()
    text = re.sub(r"\s+", "-", text)
    return text


class DiscordPublisher:
    def __init__(
        self,
        *,
        openclaw_bin: str,
        account_id: str,
        mode: str,
        guild_id: str,
        parent_channel_id: str,
        target_channel_id: str,
        target_channel_name: str,
        category_id: str,
        bot_token: str,
        thread_auto_archive_min: int,
        dry_run: bool,
    ) -> None:
        self.openclaw_bin = openclaw_bin
        self.account_id = account_id
        self.mode = mode
        self.guild_id = guild_id
        self.parent_channel_id = parent_channel_id
        self.target_channel_id = target_channel_id
        self.target_channel_name = target_channel_name
        self.category_id = category_id
        self.bot_token = bot_token
        self.thread_auto_archive_min = thread_auto_archive_min
        self.dry_run = dry_run

    def openclaw(self, *args: str) -> Dict[str, Any]:
        command = [self.openclaw_bin, "message", *args, "--channel", "discord", "--account", self.account_id, "--json"]
        if self.dry_run:
            command.append("--dry-run")
        return run_command_json(command)

    def list_channels(self) -> List[Dict[str, Any]]:
        if not self.guild_id:
            return []
        payload = self.openclaw("channel", "list", "--guild-id", self.guild_id)
        return payload.get("payload", {}).get("channels", [])

    def list_threads(self) -> List[Dict[str, Any]]:
        if not self.guild_id or not self.parent_channel_id:
            return []
        payload = self.openclaw(
            "thread",
            "list",
            "--guild-id",
            self.guild_id,
            "--channel-id",
            self.parent_channel_id,
            "--include-archived",
            "--limit",
            "100",
        )
        return payload.get("payload", {}).get("threads", {}).get("threads", [])

    def find_existing_channel(self, name: str) -> Optional[str]:
        wanted_exact = normalize_space(name)
        wanted_normalized = normalize_channel_name(name)
        for channel in self.list_channels():
            current_name = str(channel.get("name", ""))
            if current_name == wanted_exact or normalize_channel_name(current_name) == wanted_normalized:
                return str(channel.get("id", ""))
        return None

    def find_existing_thread(self, name: str) -> Optional[str]:
        for thread in self.list_threads():
            if thread.get("name") == name:
                return str(thread.get("id", ""))
        return None

    def create_channel_via_rest(self, name: str, topic: str = "") -> str:
        if not self.guild_id:
            raise PublishError("channel 模式需要 DISCORD_GUILD_ID")
        if self.dry_run:
            return f"dry-run-channel-{normalize_channel_name(name) or 'robotdaily'}"
        if not self.bot_token:
            raise PublishError("创建 Discord 频道需要 DISCORD_BOT_TOKEN；当前 OpenClaw CLI 版本没有公开 channel create 子命令")

        body: Dict[str, Any] = {"name": normalize_channel_name(name) or "robotdaily", "type": 0}
        if self.category_id:
            body["parent_id"] = self.category_id
        if topic:
            body["topic"] = topic

        request = Request(
            url=f"{DISCORD_API}/guilds/{self.guild_id}/channels",
            method="POST",
            data=json.dumps(body).encode("utf-8"),
            headers={
                "Authorization": f"Bot {self.bot_token}",
                "Content-Type": "application/json",
                "User-Agent": "DiscordBot (https://github.com/openclaw/openclaw, 1.0)",
            },
        )
        try:
            with urlopen(request, timeout=30) as response:
                payload = json.loads(response.read().decode("utf-8", errors="ignore"))
        except HTTPError as exc:
            detail = exc.read().decode("utf-8", errors="ignore")
            raise PublishError(f"Discord channel create failed: {exc.code} {detail}") from exc
        return str(payload.get("id", ""))

    def create_thread(self, thread_name: str, opening_message: str) -> str:
        if not self.parent_channel_id:
            raise PublishError("thread 模式需要 DISCORD_PARENT_CHANNEL_ID")
        existing = self.find_existing_thread(thread_name)
        if existing:
            return existing
        payload = self.openclaw(
            "thread",
            "create",
            "--target",
            f"channel:{self.parent_channel_id}",
            "--thread-name",
            thread_name,
            "--message",
            opening_message,
            "--auto-archive-min",
            str(self.thread_auto_archive_min),
        )
        thread = payload.get("payload", {}).get("thread", {})
        thread_id = str(thread.get("id", ""))
        if not thread_id and self.dry_run:
            return f"dry-run-thread-{thread_name}"
        if not thread_id:
            raise PublishError("OpenClaw thread create 没有返回 thread id")
        return thread_id

    def create_fixed_channel(self, title: str) -> str:
        channel_name = normalize_channel_name(self.target_channel_name or "robotdaily") or "robotdaily"
        existing = self.find_existing_channel(channel_name)
        if existing:
            return existing
        topic = truncate(title, 180)
        channel_id = self.create_channel_via_rest(channel_name, topic=topic)
        if not channel_id:
            raise PublishError("Discord fixed channel create 返回空 id")
        return channel_id

    def create_or_resolve_target(self, title: str, opening_message: str) -> str:
        date_slug = now_local().strftime("%Y-%m-%d")
        if self.mode == "existing-channel":
            if not self.target_channel_id:
                raise PublishError("existing-channel 模式需要 DISCORD_TARGET_CHANNEL_ID")
            return self.target_channel_id

        if self.mode == "fixed-channel":
            return self.create_fixed_channel(title)

        if self.mode == "thread":
            thread_name = f"RobotDaily {date_slug}"
            return self.create_thread(thread_name, opening_message)

        if self.mode == "channel":
            prefix = normalize_channel_name(self.target_channel_name or "robotdaily") or "robotdaily"
            channel_name = f"{prefix}-{date_slug}"
            existing = self.find_existing_channel(channel_name)
            if existing:
                return existing
            topic = truncate(title, 180)
            channel_id = self.create_channel_via_rest(channel_name, topic=topic)
            if not channel_id:
                raise PublishError("Discord channel create 返回空 id")
            return channel_id

        raise PublishError(f"未知的投递模式: {self.mode}")

    def send_embeds_via_rest(
        self,
        target_channel_id: str,
        content: str = "",
        embeds: Optional[List[Dict[str, Any]]] = None,
        components: Optional[List[Dict[str, Any]]] = None,
    ) -> Dict[str, Any]:
        if self.dry_run:
            log(
                f"[Dry Run] send_embeds_via_rest: channel {target_channel_id}, embeds count {len(embeds or [])}, components count {len(components or [])}"
            )
            return {"id": "dry-run-msg-id"}
        if not self.bot_token:
            raise PublishError("发送富文本卡片需要 DISCORD_BOT_TOKEN")

        body: Dict[str, Any] = {}
        if content:
            body["content"] = content
        if embeds:
            body["embeds"] = embeds
        if components:
            body["components"] = components

        request = Request(
            url=f"{DISCORD_API}/channels/{target_channel_id}/messages",
            method="POST",
            data=json.dumps(body).encode("utf-8"),
            headers={
                "Authorization": f"Bot {self.bot_token}",
                "Content-Type": "application/json",
                "User-Agent": "DiscordBot (https://github.com/openclaw/openclaw, 1.0)",
            },
        )
        try:
            with urlopen(request, timeout=30) as response:
                result = json.loads(response.read().decode("utf-8", errors="ignore"))
                return result
        except HTTPError as exc:
            detail = exc.read().decode("utf-8", errors="ignore")
            log(f"REST API 错误: {exc.code} {exc.reason} - {detail}")
            raise PublishError(f"创建 Discord 消息失败: {exc.code} {detail}") from exc

    def send_message(self, target_channel_id: str, message: str, media: str = "") -> Dict[str, Any]:
        args = ["send", "--target", f"channel:{target_channel_id}", "--message", message]
        if media:
            args.extend(["--media", media])
        return self.openclaw(*args)


def build_opening_message(payload: Dict[str, Any]) -> str:
    total = len(payload.get("papers", []))
    counts = payload.get("counts", {})
    breakdown = []
    for domain in DOMAIN_ORDER:
        count = counts.get(domain, 0)
        if count:
            breakdown.append(f"{DOMAIN_CONFIGS[domain]['label_zh']} {count}")
    suffix = " / ".join(breakdown)
    if suffix:
        return f"**RobotDaily | {now_local().strftime('%Y-%m-%d')}**\n共 {total} 篇偏应用论文，按方向整理成短卡片：{suffix}。"
    return f"**RobotDaily | {now_local().strftime('%Y-%m-%d')}**\n共 {total} 篇偏应用论文。"


def build_domain_header(domain: str, count: int) -> str:
    style = DOMAIN_STYLE.get(domain, {})
    emoji = style.get("emoji", "📌")
    return f"## {emoji} {DOMAIN_CONFIGS[domain]['label_zh']}（{count} 篇）"


def build_paper_embed(paper: Dict[str, Any]) -> Dict[str, Any]:
    domain = paper.get("domain", "")
    label_zh = DOMAIN_CONFIGS.get(domain, {}).get("label_zh", domain or "未分类")
    style = DOMAIN_STYLE.get(domain, {"color": 0x3498DB, "emoji": "📄"})

    title_prefix = f"{paper.get('domain_rank', '?')}. "
    title = truncate(title_prefix + str(paper.get("title", "")), 256)
    brief = truncate(
        paper.get("brief_explanation_zh", "")
        or paper.get("selection_reason", "")
        or paper.get("translated_abstract_zh", ""),
        200,
    )
    authors = truncate(format_authors(paper.get("authors", []), limit=4), 120) or "—"
    tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:4]) or "—"
    published = truncate(str(paper.get("published_local", ""))[:10], 32) or "—"
    arxiv_id = truncate(paper.get("arxiv_id", ""), 64) or "—"

    fields: List[Dict[str, Any]] = [
        {"name": "💡 简析", "value": brief or "—", "inline": False},
        {"name": "👤 作者", "value": authors, "inline": True},
        {"name": "🏷️ 标签", "value": tags, "inline": True},
    ]

    embed: Dict[str, Any] = {
        "title": title,
        "url": paper.get("abs_url", ""),
        "description": f"{style['emoji']} **{label_zh}** · `{published}` · `arXiv:{arxiv_id}`",
        "color": style["color"],
        "fields": fields,
        "footer": {"text": "RobotDaily 卡片视图"},
    }
    return embed


def build_link_buttons(paper: Dict[str, Any]) -> List[Dict[str, Any]]:
    buttons: List[Dict[str, Any]] = []
    for label, url in [
        ("DOI", paper.get("doi_url", "")),
        ("arXiv", paper.get("abs_url", "")),
        ("PDF", paper.get("pdf_url", "")),
    ]:
        if not url:
            continue
        buttons.append({"type": 2, "style": 5, "label": label, "url": url})
    if not buttons:
        return []
    return [{"type": 1, "components": buttons[:3]}]


def build_fallback_paper_markdown(paper: Dict[str, Any]) -> str:
    links = []
    if paper.get("doi_url"):
        links.append(f"[DOI]({paper.get('doi_url')})")
    if paper.get("abs_url"):
        links.append(f"[arXiv]({paper.get('abs_url')})")
    if paper.get("pdf_url"):
        links.append(f"[PDF]({paper.get('pdf_url')})")
    tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:4]) or "—"
    brief = truncate(
        paper.get("brief_explanation_zh", "") or paper.get("selection_reason", "") or paper.get("translated_abstract_zh", ""),
        180,
    )
    abstract = truncate(paper.get("translated_abstract_zh", "") or paper.get("summary", ""), 220)
    lines = [
        f"### {paper.get('domain_rank', '?')}. {paper.get('title', '')}",
        f"> {brief or '—'}",
        f"- 作者：{truncate(format_authors(paper.get('authors', []), limit=4), 120) or '—'}",
        f"- 标签：{tags}",
        f"- 中文摘要：{abstract or '—'}",
    ]
    if links:
        lines.append(f"- 链接：{' | '.join(links)}")
    return "\n".join(lines)


def publish_digest(
    payload: Dict[str, Any],
    *,
    html_path: str = "",
    markdown_path: str = "",
    publisher: DiscordPublisher,
) -> str:
    opening_message = build_opening_message(payload)
    target_channel_id = publisher.create_or_resolve_target(opening_message, opening_message)

    publisher.send_message(target_channel_id, opening_message)

    grouped: Dict[str, List[Dict[str, Any]]] = {domain: [] for domain in DOMAIN_ORDER}
    for paper in payload.get("papers", []):
        grouped.setdefault(paper["domain"], []).append(paper)

    for domain in DOMAIN_ORDER:
        papers = grouped.get(domain, [])
        if not papers:
            continue

        publisher.send_message(target_channel_id, build_domain_header(domain, len(papers)))
        for paper in papers:
            if publisher.bot_token:
                try:
                    publisher.send_embeds_via_rest(
                        target_channel_id,
                        embeds=[build_paper_embed(paper)],
                        components=build_link_buttons(paper),
                    )
                    continue
                except PublishError as exc:
                    log(f"富文本卡片发送失败，回退到纯 Markdown：{exc}")
            publisher.send_message(target_channel_id, build_fallback_paper_markdown(paper))

    if markdown_path:
        publisher.send_message(target_channel_id, "附一份 Markdown 归档版，方便桌面端检索。", media=markdown_path)

    return target_channel_id


def main() -> None:
    parser = argparse.ArgumentParser(description="Publish RobotDaily digest to Discord")
    parser.add_argument("--input", required=True)
    parser.add_argument("--html", default="")
    parser.add_argument("--markdown", default="")
    parser.add_argument("--mode", default="thread")
    parser.add_argument("--openclaw-bin", default="openclaw")
    parser.add_argument("--account-id", default="codex")
    parser.add_argument("--guild-id", default="")
    parser.add_argument("--parent-channel-id", default="")
    parser.add_argument("--target-channel-id", default="")
    parser.add_argument("--target-channel-name", default="")
    parser.add_argument("--category-id", default="")
    parser.add_argument("--bot-token", default="")
    parser.add_argument("--thread-auto-archive-min", type=int, default=10080)
    parser.add_argument("--dry-run", action="store_true")
    args = parser.parse_args()

    payload = read_json(args.input, default={}) or {}
    publisher = DiscordPublisher(
        openclaw_bin=args.openclaw_bin,
        account_id=args.account_id,
        mode=args.mode,
        guild_id=args.guild_id,
        parent_channel_id=args.parent_channel_id,
        target_channel_id=args.target_channel_id,
        target_channel_name=args.target_channel_name,
        category_id=args.category_id,
        bot_token=args.bot_token,
        thread_auto_archive_min=args.thread_auto_archive_min,
        dry_run=args.dry_run,
    )

    target = publish_digest(
        payload,
        html_path=args.html,
        markdown_path=args.markdown,
        publisher=publisher,
    )
    log(f"Digest published to Discord target: {target}")


if __name__ == "__main__":
    main()