render_digest.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. #!/usr/bin/env python3
  2. """Render a mobile-friendly HTML digest and a Discord-friendly markdown digest."""
  3. from __future__ import annotations
  4. import argparse
  5. import json
  6. from collections import defaultdict
  7. from pathlib import Path
  8. from typing import Any, Dict, List
  9. from fetch_arxiv import DOMAIN_CONFIGS
  10. from utils import SKILL_DIR, format_authors, html_escape, normalize_space, now_local, read_json, write_text
  11. DOMAIN_ORDER = ["embodied", "representation", "reinforcement"]
  12. TEMPLATE_PATH = SKILL_DIR / "assets" / "mobile_digest_template.html"
  13. def render_tag(tag: str) -> str:
  14. return f'<span class="tag">#{html_escape(tag)}</span>'
  15. def render_link(label: str, url: str) -> str:
  16. if not url:
  17. return ""
  18. safe_label = html_escape(label)
  19. safe_url = html_escape(url)
  20. return f'<a class="link-btn" href="{safe_url}" target="_blank" rel="noopener noreferrer">{safe_label}</a>'
  21. def render_paper_card(paper: Dict[str, Any]) -> str:
  22. domain_label = DOMAIN_CONFIGS[paper["domain"]]["label_zh"]
  23. tags_html = "".join(render_tag(tag) for tag in paper.get("tags", []))
  24. links_html = "".join(
  25. item
  26. for item in [
  27. render_link("打开 DOI", paper.get("doi_url", "")),
  28. render_link("打开 arXiv", paper.get("abs_url", "")),
  29. render_link("打开 PDF", paper.get("pdf_url", "")),
  30. ]
  31. if item
  32. )
  33. authors = html_escape(format_authors(paper.get("authors", []), limit=4))
  34. return f"""
  35. <details class=\"paper-card\">
  36. <summary>
  37. <div class=\"meta-row\">
  38. <span class=\"pill domain\">{html_escape(domain_label)}</span>
  39. <span class=\"pill score\">综合分 {paper.get('score_total', 0):.1f}</span>
  40. <span class=\"pill date\">{html_escape(paper.get('published_local', '')[:10])}</span>
  41. </div>
  42. <h3 class=\"paper-title\">{html_escape(paper.get('title', ''))}</h3>
  43. <p class=\"teaser\">{html_escape(paper.get('brief_explanation_zh', ''))}</p>
  44. <div class=\"tag-row\">{tags_html}</div>
  45. </summary>
  46. <div class=\"paper-body\">
  47. <div class=\"info\">
  48. <strong>作者:</strong>{authors}<br />
  49. <strong>arXiv:</strong>{html_escape(paper.get('arxiv_id', ''))}<br />
  50. <strong>入选原因:</strong>{html_escape(paper.get('selection_reason', ''))}
  51. </div>
  52. <div class=\"block\">
  53. <h3>中文摘要</h3>
  54. <p>{html_escape(paper.get('translated_abstract_zh', ''))}</p>
  55. </div>
  56. <div class=\"block\">
  57. <h3>原文摘要</h3>
  58. <p>{html_escape(paper.get('summary', ''))}</p>
  59. </div>
  60. <div class=\"links\">{links_html}</div>
  61. </div>
  62. </details>
  63. """.strip()
  64. def render_sections(papers: List[Dict[str, Any]]) -> Dict[str, str]:
  65. grouped: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
  66. for paper in papers:
  67. grouped[paper["domain"]].append(paper)
  68. nav_parts: List[str] = []
  69. section_parts: List[str] = []
  70. for domain in DOMAIN_ORDER:
  71. domain_papers = grouped.get(domain, [])
  72. if not domain_papers:
  73. continue
  74. label = DOMAIN_CONFIGS[domain]["label_zh"]
  75. nav_parts.append(f'<a href="#{domain}">{html_escape(label)} · {len(domain_papers)} 篇</a>')
  76. cards_html = "\n".join(render_paper_card(paper) for paper in domain_papers)
  77. section_parts.append(
  78. f"""
  79. <section class=\"section\" id=\"{domain}\">
  80. <div class=\"section-header\">
  81. <h2>{html_escape(label)}</h2>
  82. <span class=\"count\">{len(domain_papers)} 篇</span>
  83. </div>
  84. <div class=\"cards\">{cards_html}</div>
  85. </section>
  86. """.strip()
  87. )
  88. return {"nav": "".join(nav_parts), "sections": "\n".join(section_parts)}
  89. def render_html(payload: Dict[str, Any]) -> str:
  90. template = TEMPLATE_PATH.read_text(encoding="utf-8")
  91. papers = payload.get("papers", [])
  92. rendered = render_sections(papers)
  93. intro = f"{now_local().strftime('%Y-%m-%d')} · 具身智能 / 表征学习 / 强化学习 · 每个方向 2-3 篇偏应用候选。点开卡片即可看中文摘要、原文摘要与 DOI 链接。"
  94. replacements = {
  95. "{{date}}": now_local().strftime("%Y-%m-%d"),
  96. "{{intro}}": html_escape(intro),
  97. "{{nav}}": rendered["nav"],
  98. "{{sections}}": rendered["sections"],
  99. "{{generated_at}}": html_escape(now_local().strftime("%Y-%m-%d %H:%M %Z")),
  100. }
  101. html = template
  102. for placeholder, value in replacements.items():
  103. html = html.replace(placeholder, value)
  104. return html
  105. def render_markdown(payload: Dict[str, Any]) -> str:
  106. lines: List[str] = []
  107. lines.append(f"# RobotDaily | {now_local().strftime('%Y-%m-%d')}")
  108. lines.append("")
  109. lines.append("具身智能 / 表征学习 / 强化学习,每个方向 2-3 篇偏应用候选。")
  110. lines.append("")
  111. for domain in DOMAIN_ORDER:
  112. papers = [paper for paper in payload.get("papers", []) if paper.get("domain") == domain]
  113. if not papers:
  114. continue
  115. lines.append(f"## {DOMAIN_CONFIGS[domain]['label_zh']}({len(papers)} 篇)")
  116. lines.append("")
  117. for idx, paper in enumerate(papers, start=1):
  118. tags = " ".join(f"`{tag}`" for tag in paper.get("tags", []))
  119. lines.extend(
  120. [
  121. f"### {idx}. {paper.get('title', '')}",
  122. f"- 作者:{format_authors(paper.get('authors', []), limit=4)}",
  123. f"- 亮点:{paper.get('brief_explanation_zh', '')}",
  124. f"- 标签:{tags}",
  125. f"- DOI:{paper.get('doi_url', '')}",
  126. f"- arXiv:{paper.get('abs_url', '')}",
  127. f"- PDF:{paper.get('pdf_url', '')}",
  128. "",
  129. ]
  130. )
  131. return "\n".join(lines).strip() + "\n"
  132. def main() -> None:
  133. parser = argparse.ArgumentParser(description="Render RobotDaily digest HTML/markdown")
  134. parser.add_argument("--input", required=True)
  135. parser.add_argument("--html-output", default="")
  136. parser.add_argument("--md-output", default="")
  137. args = parser.parse_args()
  138. payload = read_json(args.input, default={}) or {}
  139. html = render_html(payload)
  140. markdown = render_markdown(payload)
  141. if args.html_output:
  142. write_text(args.html_output, html)
  143. else:
  144. print(html)
  145. if args.md_output:
  146. write_text(args.md_output, markdown)
  147. if __name__ == "__main__":
  148. main()