| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185 |
- #!/usr/bin/env python3
- """Render a mobile-friendly HTML digest and a Discord-friendly markdown digest."""
- from __future__ import annotations
- import argparse
- import json
- from collections import defaultdict
- from pathlib import Path
- from typing import Any, Dict, List
- from fetch_arxiv import DOMAIN_CONFIGS
- from utils import SKILL_DIR, format_authors, html_escape, normalize_space, now_local, read_json, truncate, write_text
- DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
- TEMPLATE_PATH = SKILL_DIR / "assets" / "mobile_digest_template.html"
- def render_tag(tag: str) -> str:
- return f'<span class="tag">#{html_escape(tag)}</span>'
- def render_link(label: str, url: str) -> str:
- if not url:
- return ""
- safe_label = html_escape(label)
- safe_url = html_escape(url)
- return f'<a class="link-btn" href="{safe_url}" target="_blank" rel="noopener noreferrer">{safe_label}</a>'
- def render_paper_card(paper: Dict[str, Any]) -> str:
- domain_label = DOMAIN_CONFIGS[paper["domain"]]["label_zh"]
- tags_html = "".join(render_tag(tag) for tag in paper.get("tags", []))
- links_html = "".join(
- item
- for item in [
- render_link("打开 DOI", paper.get("doi_url", "")),
- render_link("打开 arXiv", paper.get("abs_url", "")),
- render_link("打开 PDF", paper.get("pdf_url", "")),
- ]
- if item
- )
- authors = html_escape(format_authors(paper.get("authors", []), limit=4))
- return f"""
- <details class=\"paper-card\">
- <summary>
- <div class=\"meta-row\">
- <span class=\"pill domain\">{html_escape(domain_label)}</span>
- <span class=\"pill score\">综合分 {paper.get('score_total', 0):.1f}</span>
- <span class=\"pill date\">{html_escape(paper.get('published_local', '')[:10])}</span>
- </div>
- <h3 class=\"paper-title\">{html_escape(paper.get('title', ''))}</h3>
- <p class=\"teaser\">{html_escape(paper.get('brief_explanation_zh', ''))}</p>
- <div class=\"tag-row\">{tags_html}</div>
- </summary>
- <div class=\"paper-body\">
- <div class=\"info\">
- <strong>作者:</strong>{authors}<br />
- <strong>arXiv:</strong>{html_escape(paper.get('arxiv_id', ''))}<br />
- <strong>入选原因:</strong>{html_escape(paper.get('selection_reason', ''))}
- </div>
- <div class=\"block\">
- <h3>中文摘要</h3>
- <p>{html_escape(paper.get('translated_abstract_zh', ''))}</p>
- </div>
- <div class=\"block\">
- <h3>原文摘要</h3>
- <p>{html_escape(paper.get('summary', ''))}</p>
- </div>
- <div class=\"links\">{links_html}</div>
- </div>
- </details>
- """.strip()
- def render_sections(papers: List[Dict[str, Any]]) -> Dict[str, str]:
- grouped: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
- for paper in papers:
- grouped[paper["domain"]].append(paper)
- nav_parts: List[str] = []
- section_parts: List[str] = []
- for domain in DOMAIN_ORDER:
- domain_papers = grouped.get(domain, [])
- if not domain_papers:
- continue
- label = DOMAIN_CONFIGS[domain]["label_zh"]
- nav_parts.append(f'<a href="#{domain}">{html_escape(label)} · {len(domain_papers)} 篇</a>')
- cards_html = "\n".join(render_paper_card(paper) for paper in domain_papers)
- section_parts.append(
- f"""
- <section class=\"section\" id=\"{domain}\">
- <div class=\"section-header\">
- <h2>{html_escape(label)}</h2>
- <span class=\"count\">{len(domain_papers)} 篇</span>
- </div>
- <div class=\"cards\">{cards_html}</div>
- </section>
- """.strip()
- )
- return {"nav": "".join(nav_parts), "sections": "\n".join(section_parts)}
- def render_html(payload: Dict[str, Any]) -> str:
- template = TEMPLATE_PATH.read_text(encoding="utf-8")
- papers = payload.get("papers", [])
- rendered = render_sections(papers)
- intro = f"{now_local().strftime('%Y-%m-%d')} · 具身智能 / 表征学习 / 强化学习 · 每个方向 2-3 篇偏应用候选。点开卡片即可看中文摘要、原文摘要与 DOI 链接。"
- replacements = {
- "{{date}}": now_local().strftime("%Y-%m-%d"),
- "{{intro}}": html_escape(intro),
- "{{nav}}": rendered["nav"],
- "{{sections}}": rendered["sections"],
- "{{generated_at}}": html_escape(now_local().strftime("%Y-%m-%d %H:%M %Z")),
- }
- html = template
- for placeholder, value in replacements.items():
- html = html.replace(placeholder, value)
- return html
- def render_markdown(payload: Dict[str, Any]) -> str:
- lines: List[str] = []
- lines.append(f"# RobotDaily | {now_local().strftime('%Y-%m-%d')}")
- lines.append("")
- lines.append("偏应用导向精选,按方向整理成短卡片式 Markdown 归档。")
- lines.append("")
- for domain in DOMAIN_ORDER:
- papers = [paper for paper in payload.get("papers", []) if paper.get("domain") == domain]
- if not papers:
- continue
- lines.append(f"## {DOMAIN_CONFIGS[domain]['label_zh']}({len(papers)} 篇)")
- lines.append("")
- for idx, paper in enumerate(papers, start=1):
- tags = " ".join(f"`{tag}`" for tag in paper.get("tags", [])[:4]) or "—"
- brief = truncate(
- paper.get("brief_explanation_zh", "") or paper.get("selection_reason", "") or paper.get("translated_abstract_zh", ""),
- 180,
- )
- abstract = truncate(paper.get("translated_abstract_zh", "") or paper.get("summary", ""), 450)
- links = [
- part
- for part in [
- f"[DOI]({paper.get('doi_url', '')})" if paper.get("doi_url") else "",
- f"[arXiv]({paper.get('abs_url', '')})" if paper.get("abs_url") else "",
- f"[PDF]({paper.get('pdf_url', '')})" if paper.get("pdf_url") else "",
- ]
- if part
- ]
- lines.extend(
- [
- f"### {idx}. {paper.get('title', '')}",
- f"> {brief or '—'}",
- f"- 作者:{format_authors(paper.get('authors', []), limit=4) or '—'}",
- f"- 标签:{tags}",
- f"- 中文摘要:{abstract or '—'}",
- f"- 链接:{' | '.join(links) if links else '—'}",
- "",
- ]
- )
- return "\n".join(lines).strip() + "\n"
- def main() -> None:
- parser = argparse.ArgumentParser(description="Render RobotDaily digest HTML/markdown")
- parser.add_argument("--input", required=True)
- parser.add_argument("--html-output", default="")
- parser.add_argument("--md-output", default="")
- args = parser.parse_args()
- payload = read_json(args.input, default={}) or {}
- html = render_html(payload)
- markdown = render_markdown(payload)
- if args.html_output:
- write_text(args.html_output, html)
- else:
- print(html)
- if args.md_output:
- write_text(args.md_output, markdown)
- if __name__ == "__main__":
- main()
|