|
|
@@ -0,0 +1,163 @@
|
|
|
+#!/usr/bin/env python3
|
|
|
+"""Publish RobotDaily markdown bundles into a Hugo content section."""
|
|
|
+
|
|
|
+from __future__ import annotations
|
|
|
+
|
|
|
+import argparse
|
|
|
+import json
|
|
|
+import re
|
|
|
+from pathlib import Path
|
|
|
+from typing import Any, Dict, List
|
|
|
+
|
|
|
+from utils import ensure_dir, normalize_space, now_local, read_json, slugify, write_text
|
|
|
+
|
|
|
+DEFAULT_SITE_DIR = Path(__file__).resolve().parents[2] / "site"
|
|
|
+DEFAULT_HUGO_CONTENT_DIR = DEFAULT_SITE_DIR / "content" / "ai-daily"
|
|
|
+DOMAIN_TAGS = {
|
|
|
+ "embodied": "具身智能",
|
|
|
+ "representation": "表征学习",
|
|
|
+ "reinforcement": "强化学习",
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+def detect_date(markdown_path: Path, content: str) -> str:
|
|
|
+ parent_name = markdown_path.parent.name
|
|
|
+ if re.fullmatch(r"\d{4}-\d{2}-\d{2}", parent_name):
|
|
|
+ return parent_name
|
|
|
+ match = re.search(r"(20\d{2}-\d{2}-\d{2})", content)
|
|
|
+ if match:
|
|
|
+ return match.group(1)
|
|
|
+ return now_local().strftime("%Y-%m-%d")
|
|
|
+
|
|
|
+
|
|
|
+def strip_leading_title(markdown: str) -> str:
|
|
|
+ lines = markdown.splitlines()
|
|
|
+ while lines and not normalize_space(lines[0]):
|
|
|
+ lines.pop(0)
|
|
|
+ if lines and lines[0].startswith("# "):
|
|
|
+ lines.pop(0)
|
|
|
+ while lines and not normalize_space(lines[0]):
|
|
|
+ lines.pop(0)
|
|
|
+ return "\n".join(lines).strip() + "\n"
|
|
|
+
|
|
|
+
|
|
|
+def build_summary_from_manifest(manifest: Dict[str, Any], fallback_body: str = "") -> str:
|
|
|
+ if manifest:
|
|
|
+ date_slug = str(manifest.get("date") or now_local().strftime("%Y-%m-%d"))
|
|
|
+ total = int(manifest.get("selected_count") or 0)
|
|
|
+ counts = manifest.get("counts") or {}
|
|
|
+ parts: List[str] = []
|
|
|
+ for key in ["embodied", "representation", "reinforcement"]:
|
|
|
+ count = counts.get(key)
|
|
|
+ if count:
|
|
|
+ parts.append(f"{DOMAIN_TAGS.get(key, key)} {count} 篇")
|
|
|
+ breakdown = ",".join(parts)
|
|
|
+ if breakdown:
|
|
|
+ return f"RobotDaily {date_slug}:共 {total} 篇,含 {breakdown}。"
|
|
|
+ return f"RobotDaily {date_slug}:共 {total} 篇。"
|
|
|
+
|
|
|
+ for line in fallback_body.splitlines():
|
|
|
+ clean = normalize_space(line)
|
|
|
+ if clean and not clean.startswith("#") and not clean.startswith("-") and not clean.startswith(">"):
|
|
|
+ return clean[:110]
|
|
|
+ return "RobotDaily 当日 Markdown 归档。"
|
|
|
+
|
|
|
+
|
|
|
+def build_tags(manifest: Dict[str, Any]) -> List[str]:
|
|
|
+ tags = ["robotdaily", "ai-daily"]
|
|
|
+ counts = manifest.get("counts") or {}
|
|
|
+ for key in ["embodied", "representation", "reinforcement"]:
|
|
|
+ if counts.get(key):
|
|
|
+ # 使用中文作为 taxonomy 标签,支持 Hugo 索引
|
|
|
+ zh = DOMAIN_TAGS.get(key)
|
|
|
+ if zh:
|
|
|
+ tags.append(zh)
|
|
|
+ if manifest.get("effective_models_used"):
|
|
|
+ tags.append("llm")
|
|
|
+ deduped: List[str] = []
|
|
|
+ seen = set()
|
|
|
+ for item in tags:
|
|
|
+ text = normalize_space(item)
|
|
|
+ if not text or text in seen:
|
|
|
+ continue
|
|
|
+ deduped.append(text)
|
|
|
+ seen.add(text)
|
|
|
+ return deduped
|
|
|
+
|
|
|
+
|
|
|
+def format_front_matter(*, title: str, date_text: str, summary: str, tags: List[str]) -> str:
|
|
|
+ escaped_summary = summary.replace('"', '\\"')
|
|
|
+ tag_json = json.dumps(tags, ensure_ascii=False)
|
|
|
+ return (
|
|
|
+ "---\n"
|
|
|
+ f'title: "{title}"\n'
|
|
|
+ f"date: {date_text}\n"
|
|
|
+ "draft: false\n"
|
|
|
+ f'summary: "{escaped_summary}"\n'
|
|
|
+ f"tags: {tag_json}\n"
|
|
|
+ "---\n\n"
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+def build_hugo_document(source: Path, manifest: Dict[str, Any] | None = None) -> tuple[str, str]:
|
|
|
+ raw = source.read_text(encoding="utf-8")
|
|
|
+ date_slug = str((manifest or {}).get("date") or detect_date(source, raw))
|
|
|
+ body = strip_leading_title(raw)
|
|
|
+ summary = build_summary_from_manifest(manifest or {}, body)
|
|
|
+ title = f"{date_slug} · AI 每日简报"
|
|
|
+ date_text = str((manifest or {}).get("generated_at") or f"{date_slug}T10:30:00+08:00")
|
|
|
+ front_matter = format_front_matter(title=title, date_text=date_text, summary=summary, tags=build_tags(manifest or {}))
|
|
|
+ intro = [
|
|
|
+ "> Hugo 归档版,来源于 RobotDaily 当日 Markdown 简报。",
|
|
|
+ ">",
|
|
|
+ f"> {summary}",
|
|
|
+ "",
|
|
|
+ ]
|
|
|
+ return date_slug, front_matter + "\n".join(intro) + "\n" + body
|
|
|
+
|
|
|
+
|
|
|
+def publish_markdown_to_hugo(markdown_path: str, site_dir: str, section: str = "ai-daily", manifest_path: str = "") -> Path:
|
|
|
+ source = Path(markdown_path)
|
|
|
+ if not source.exists():
|
|
|
+ raise FileNotFoundError(f"Markdown source not found: {source}")
|
|
|
+ manifest = read_json(manifest_path, default={}) if manifest_path else {}
|
|
|
+ date_slug, document = build_hugo_document(source, manifest)
|
|
|
+ target = ensure_dir(Path(site_dir) / "content" / section) / f"{date_slug}.md"
|
|
|
+ write_text(target, document)
|
|
|
+ return target
|
|
|
+
|
|
|
+
|
|
|
+def publish_to_hugo(markdown_path: str, manifest_path: str = "", content_dir: str = "") -> Path:
|
|
|
+ source = Path(markdown_path)
|
|
|
+ if not source.exists():
|
|
|
+ raise FileNotFoundError(f"Markdown source not found: {source}")
|
|
|
+ manifest = read_json(manifest_path, default={}) if manifest_path else {}
|
|
|
+ date_slug, document = build_hugo_document(source, manifest)
|
|
|
+ target = ensure_dir(Path(content_dir) if content_dir else DEFAULT_HUGO_CONTENT_DIR) / f"{date_slug}.md"
|
|
|
+ write_text(target, document)
|
|
|
+ return target
|
|
|
+
|
|
|
+
|
|
|
+def main() -> None:
|
|
|
+ parser = argparse.ArgumentParser(description="Publish RobotDaily markdown into Hugo content")
|
|
|
+ parser.add_argument("--input", default="")
|
|
|
+ parser.add_argument("--markdown", default="")
|
|
|
+ parser.add_argument("--manifest", default="")
|
|
|
+ parser.add_argument("--content-dir", default="")
|
|
|
+ parser.add_argument("--site-dir", default="")
|
|
|
+ parser.add_argument("--section", default="ai-daily")
|
|
|
+ args = parser.parse_args()
|
|
|
+
|
|
|
+ markdown = args.input or args.markdown
|
|
|
+ if not markdown:
|
|
|
+ raise SystemExit("--input 或 --markdown 必填")
|
|
|
+
|
|
|
+ if args.site_dir:
|
|
|
+ output = publish_markdown_to_hugo(markdown, site_dir=args.site_dir, section=args.section, manifest_path=args.manifest)
|
|
|
+ else:
|
|
|
+ output = publish_to_hugo(markdown_path=markdown, manifest_path=args.manifest, content_dir=args.content_dir)
|
|
|
+ print(output)
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ main()
|