ClawLab
/
RobotDaily


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
							#!/usr/bin/env python3
"""Publish RobotDaily digests to Discord via OpenClaw and optional Discord REST."""

from __future__ import annotations

import argparse
import json
import re
from pathlib import Path
from typing import Any, Dict, List, Optional
from urllib.error import HTTPError
from urllib.request import Request, urlopen

from fetch_arxiv import DOMAIN_CONFIGS
from utils import (
    format_authors,
    log,
    normalize_space,
    now_local,
    read_json,
    run_command_json,
    truncate,
)

DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
DISCORD_API = "https://discord.com/api/v10"


class PublishError(RuntimeError):
    pass


def normalize_channel_name(name: str) -> str:
    text = normalize_space(name).lstrip("#").lower()
    text = re.sub(r"\s+", "-", text)
    return text


class DiscordPublisher:
    def __init__(
        self,
        *,
        openclaw_bin: str,
        account_id: str,
        mode: str,
        guild_id: str,
        parent_channel_id: str,
        target_channel_id: str,
        target_channel_name: str,
        category_id: str,
        bot_token: str,
        thread_auto_archive_min: int,
        dry_run: bool,
    ) -> None:
        self.openclaw_bin = openclaw_bin
        self.account_id = account_id
        self.mode = mode
        self.guild_id = guild_id
        self.parent_channel_id = parent_channel_id
        self.target_channel_id = target_channel_id
        self.target_channel_name = target_channel_name
        self.category_id = category_id
        self.bot_token = bot_token
        self.thread_auto_archive_min = thread_auto_archive_min
        self.dry_run = dry_run

    def openclaw(self, *args: str) -> Dict[str, Any]:
        command = [self.openclaw_bin, "message", *args, "--channel", "discord", "--account", self.account_id, "--json"]
        if self.dry_run:
            command.append("--dry-run")
        return run_command_json(command)

    def list_channels(self) -> List[Dict[str, Any]]:
        if not self.guild_id:
            return []
        payload = self.openclaw("channel", "list", "--guild-id", self.guild_id)
        return payload.get("payload", {}).get("channels", [])

    def list_threads(self) -> List[Dict[str, Any]]:
        if not self.guild_id or not self.parent_channel_id:
            return []
        payload = self.openclaw(
            "thread",
            "list",
            "--guild-id",
            self.guild_id,
            "--channel-id",
            self.parent_channel_id,
            "--include-archived",
            "--limit",
            "100",
        )
        return payload.get("payload", {}).get("threads", {}).get("threads", [])

    def find_existing_channel(self, name: str) -> Optional[str]:
        wanted_exact = normalize_space(name)
        wanted_normalized = normalize_channel_name(name)
        for channel in self.list_channels():
            current_name = str(channel.get("name", ""))
            if current_name == wanted_exact or normalize_channel_name(current_name) == wanted_normalized:
                return str(channel.get("id", ""))
        return None

    def find_existing_thread(self, name: str) -> Optional[str]:
        for thread in self.list_threads():
            if thread.get("name") == name:
                return str(thread.get("id", ""))
        return None

    def create_channel_via_rest(self, name: str, topic: str = "") -> str:
        if not self.guild_id:
            raise PublishError("channel 模式需要 DISCORD_GUILD_ID")
        if self.dry_run:
            return f"dry-run-channel-{normalize_channel_name(name) or 'robotdaily'}"
        if not self.bot_token:
            raise PublishError("创建 Discord 频道需要 DISCORD_BOT_TOKEN；当前 OpenClaw CLI 版本没有公开 channel create 子命令")

        body: Dict[str, Any] = {"name": normalize_channel_name(name) or "robotdaily", "type": 0}
        if self.category_id:
            body["parent_id"] = self.category_id
        if topic:
            body["topic"] = topic

        request = Request(
            url=f"{DISCORD_API}/guilds/{self.guild_id}/channels",
            method="POST",
            data=json.dumps(body).encode("utf-8"),
            headers={
                "Authorization": f"Bot {self.bot_token}",
                "Content-Type": "application/json",
            },
        )
        try:
            with urlopen(request, timeout=30) as response:
                payload = json.loads(response.read().decode("utf-8", errors="ignore"))
        except HTTPError as exc:
            detail = exc.read().decode("utf-8", errors="ignore")
            raise PublishError(f"Discord channel create failed: {exc.code} {detail}") from exc
        return str(payload.get("id", ""))

    def create_thread(self, thread_name: str, opening_message: str) -> str:
        if not self.parent_channel_id:
            raise PublishError("thread 模式需要 DISCORD_PARENT_CHANNEL_ID")
        existing = self.find_existing_thread(thread_name)
        if existing:
            return existing
        payload = self.openclaw(
            "thread",
            "create",
            "--target",
            f"channel:{self.parent_channel_id}",
            "--thread-name",
            thread_name,
            "--message",
            opening_message,
            "--auto-archive-min",
            str(self.thread_auto_archive_min),
        )
        thread = payload.get("payload", {}).get("thread", {})
        thread_id = str(thread.get("id", ""))
        if not thread_id and self.dry_run:
            return f"dry-run-thread-{thread_name}"
        if not thread_id:
            raise PublishError("OpenClaw thread create 没有返回 thread id")
        return thread_id

    def create_fixed_channel(self, title: str) -> str:
        channel_name = normalize_channel_name(self.target_channel_name or "robotdaily") or "robotdaily"
        existing = self.find_existing_channel(channel_name)
        if existing:
            return existing
        topic = truncate(title, 180)
        channel_id = self.create_channel_via_rest(channel_name, topic=topic)
        if not channel_id:
            raise PublishError("Discord fixed channel create 返回空 id")
        return channel_id

    def create_or_resolve_target(self, title: str, opening_message: str) -> str:
        date_slug = now_local().strftime("%Y-%m-%d")
        if self.mode == "existing-channel":
            if not self.target_channel_id:
                raise PublishError("existing-channel 模式需要 DISCORD_TARGET_CHANNEL_ID")
            return self.target_channel_id

        if self.mode == "fixed-channel":
            return self.create_fixed_channel(title)

        if self.mode == "thread":
            thread_name = f"RobotDaily {date_slug}"
            return self.create_thread(thread_name, opening_message)

        if self.mode == "channel":
            prefix = normalize_channel_name(self.target_channel_name or "robotdaily") or "robotdaily"
            channel_name = f"{prefix}-{date_slug}"
            existing = self.find_existing_channel(channel_name)
            if existing:
                return existing
            topic = truncate(title, 180)
            channel_id = self.create_channel_via_rest(channel_name, topic=topic)
            if not channel_id:
                raise PublishError("Discord channel create 返回空 id")
            return channel_id

        raise PublishError(f"未知的投递模式: {self.mode}")


    def send_embeds_via_rest(self, target_channel_id: str, content: str = "", embeds: List[Dict[str, Any]] = None) -> Dict[str, Any]:
        if self.dry_run:
            log(f"[Dry Run] send_embeds_via_rest: channel {target_channel_id}, embeds count {len(embeds or [])}")
            return {"id": "dry-run-msg-id"}
        if not self.bot_token:
            raise PublishError("发送 Embed 需要 DISCORD_BOT_TOKEN")

        body: Dict[str, Any] = {}
        if content:
            body["content"] = content
        if embeds:
            body["embeds"] = embeds

        request = Request(
            url=f"{DISCORD_API}/channels/{target_channel_id}/messages",
            method="POST",
            data=json.dumps(body).encode("utf-8"),
            headers={
                "Authorization": f"Bot {self.bot_token}",
                "Content-Type": "application/json",
            },
        )
        try:
            with urlopen(request, timeout=30) as response:
                result = json.loads(response.read().decode("utf-8", errors="ignore"))
                return result
        except HTTPError as exc:
            detail = exc.read().decode("utf-8", errors="ignore")
            log(f"REST API 错误: {exc.code} {exc.reason} - {detail}")
            raise PublishError(f"创建 Discord 消息失败: {exc.code} {detail}") from exc

    def send_message(self, target_channel_id: str, message: str, media: str = "") -> Dict[str, Any]:
        args = ["send", "--target", f"channel:{target_channel_id}", "--message", message]
        if media:
            args.extend(["--media", media])
        return self.openclaw(*args)


def build_opening_message(payload: Dict[str, Any]) -> str:
    total = len(payload.get("papers", []))
    counts = payload.get("counts", {})
    parts = [f"老大早安～今天给你挑了 {total} 篇偏应用论文。"]
    for domain in DOMAIN_ORDER:
        count = counts.get(domain, 0)
        if count:
            parts.append(f"{DOMAIN_CONFIGS[domain]['label_zh']} {count} 篇")
    parts.append("下面每张卡片都带 DOI / arXiv / PDF，可直接点开读。")
    return " | ".join(parts)


def build_domain_header(domain: str, count: int) -> str:
    return f"## {DOMAIN_CONFIGS[domain]['label_zh']}（{count} 篇）"


def build_paper_embed(paper: Dict[str, Any]) -> Dict[str, Any]:
    tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:6])
    
    # Title max 256
    title = f"{paper.get('domain_rank', '?')}. {paper.get('title', '')}"
    if len(title) > 256:
        title = title[:253] + "..."
        
    description_lines = []
    description_lines.append(f"**作者：** {format_authors(paper.get('authors', []), limit=4)}")
    if tags:
        description_lines.append(f"**关键词：** {tags}")
        
    brief = paper.get('brief_explanation_zh', '')
    if brief:
        description_lines.append(f"\n**💡 简析**\n{brief}")
        
    abstract = truncate(paper.get('translated_abstract_zh', ''), 700)
    if abstract:
        description_lines.append(f"\n**📖 摘要**\n{abstract}")
        
    links = []
    if paper.get('doi_url'): links.append(f"[DOI]({paper.get('doi_url')})")
    if paper.get('abs_url'): links.append(f"[arXiv]({paper.get('abs_url')})")
    if paper.get('pdf_url'): links.append(f"[PDF]({paper.get('pdf_url')})")
    
    if links:
        description_lines.append(f"\n**🔗 链接：** {' | '.join(links)}")
        
    description = "\n".join(description_lines)
    if len(description) > 4096:
        description = description[:4093] + "..."
        
    embed = {
        "title": title,
        "description": description,
        "url": paper.get('abs_url', ''),
        "color": 0x3498db
    }
    
    return embed


def publish_digest(
    payload: Dict[str, Any],
    *,
    html_path: str = "",
    markdown_path: str = "",
    publisher: DiscordPublisher,
) -> str:
    opening_message = build_opening_message(payload)
    target_channel_id = publisher.create_or_resolve_target(opening_message, opening_message)

    attached_message = opening_message + "\n\n今天起换成了全新卡片式排版，直接在 Discord 里看中译摘要和核心内容啦！"
    if html_path:
        publisher.send_message(target_channel_id, attached_message, media=html_path)
    else:
        publisher.send_message(target_channel_id, attached_message)

    grouped: Dict[str, List[Dict[str, Any]]] = {domain: [] for domain in DOMAIN_ORDER}
    for paper in payload.get("papers", []):
        grouped.setdefault(paper["domain"], []).append(paper)

    for domain in DOMAIN_ORDER:
        papers = grouped.get(domain, [])
        if not papers:
            continue
        
        # Build embeds for the domain
        embeds = [build_paper_embed(paper) for paper in papers]
        
        # Discord limit is 10 embeds per message, we chunk them by 4 to be safe with total characters
        chunk_size = 4
        for i in range(0, len(embeds), chunk_size):
            chunk_embeds = embeds[i:i + chunk_size]
            
            # Print domain header on the first chunk
            msg_content = build_domain_header(domain, len(papers)) if i == 0 else ""
            
            if publisher.bot_token:
                # Use REST API to send rich embeds
                publisher.send_embeds_via_rest(target_channel_id, content=msg_content, embeds=chunk_embeds)
            else:
                # Fallback to plain text if no bot token
                if msg_content:
                    publisher.send_message(target_channel_id, msg_content)
                for paper in papers[i:i + chunk_size]:
                    fallback_text = f"**{paper.get('title')}**\n{paper.get('abs_url')}"
                    publisher.send_message(target_channel_id, fallback_text)

    if markdown_path:
        publisher.send_message(target_channel_id, "附一份 Markdown 归档版，桌面端检索会更方便。", media=markdown_path)

    return target_channel_id


def main() -> None:
    parser = argparse.ArgumentParser(description="Publish RobotDaily digest to Discord")
    parser.add_argument("--input", required=True)
    parser.add_argument("--html", default="")
    parser.add_argument("--markdown", default="")
    parser.add_argument("--mode", default="thread")
    parser.add_argument("--openclaw-bin", default="openclaw")
    parser.add_argument("--account-id", default="codex")
    parser.add_argument("--guild-id", default="")
    parser.add_argument("--parent-channel-id", default="")
    parser.add_argument("--target-channel-id", default="")
    parser.add_argument("--target-channel-name", default="")
    parser.add_argument("--category-id", default="")
    parser.add_argument("--bot-token", default="")
    parser.add_argument("--thread-auto-archive-min", type=int, default=10080)
    parser.add_argument("--dry-run", action="store_true")
    args = parser.parse_args()

    payload = read_json(args.input, default={}) or {}
    publisher = DiscordPublisher(
        openclaw_bin=args.openclaw_bin,
        account_id=args.account_id,
        mode=args.mode,
        guild_id=args.guild_id,
        parent_channel_id=args.parent_channel_id,
        target_channel_id=args.target_channel_id,
        target_channel_name=args.target_channel_name,
        category_id=args.category_id,
        bot_token=args.bot_token,
        thread_auto_archive_min=args.thread_auto_archive_min,
        dry_run=args.dry_run,
    )

    target = publish_digest(
        payload,
        html_path=args.html,
        markdown_path=args.markdown,
        publisher=publisher,
    )
    log(f"Digest published to Discord target: {target}")


if __name__ == "__main__":
    main()