process_daily_papers.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. #!/usr/bin/env python3
  2. """
  3. Complete pipeline: RSS fetch -> Enhanced processing -> HTML generation
  4. """
  5. import json
  6. import sys
  7. import subprocess
  8. from pathlib import Path
  9. def run_script(script_path, input_data=None):
  10. """Run a Python script and return its output"""
  11. cmd = [sys.executable, script_path]
  12. if input_data:
  13. result = subprocess.run(
  14. cmd,
  15. input=input_data,
  16. text=True,
  17. capture_output=True,
  18. check=True
  19. )
  20. else:
  21. result = subprocess.run(
  22. cmd,
  23. capture_output=True,
  24. text=True,
  25. check=True
  26. )
  27. return result.stdout
  28. def main():
  29. """Complete pipeline: RSS fetch -> Enhanced processing -> HTML generation"""
  30. try:
  31. # Step 1: Get papers from RSS feeds
  32. print("Step 1: Fetching papers from RSS feeds...", file=sys.stderr)
  33. rss_output = run_script('rss_arxiv_search.py')
  34. # Parse the RSS output
  35. try:
  36. papers = json.loads(rss_output)
  37. except json.JSONDecodeError:
  38. print("Error: Could not parse RSS output", file=sys.stderr)
  39. print("[]")
  40. return
  41. if not papers:
  42. print("No papers found", file=sys.stderr)
  43. print("[]")
  44. return
  45. print(f"Step 2: Found {len(papers)} papers, enhancing translations...", file=sys.stderr)
  46. # Step 2: Enhance with better translations and explanations
  47. enhanced_output = run_script(
  48. 'enhanced_translation.py',
  49. json.dumps(papers)
  50. )
  51. enhanced_papers = json.loads(enhanced_output)
  52. # Step 3: Generate HTML with enhanced information
  53. html_content = generate_enhanced_html(enhanced_papers)
  54. # Print the HTML content
  55. print(html_content)
  56. except subprocess.CalledProcessError as e:
  57. print(f"Error running script: {e}", file=sys.stderr)
  58. print("[]")
  59. except Exception as e:
  60. print(f"Error in processing: {e}", file=sys.stderr)
  61. print("[]")
  62. def generate_enhanced_html(papers):
  63. """Generate HTML with enhanced translations and explanations"""
  64. html = '''<!DOCTYPE html>
  65. <html lang="zh-CN">
  66. <head>
  67. <meta charset="UTF-8">
  68. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  69. <title>每日AI前沿速递 - 2026年1月30日</title>
  70. <style>
  71. * {
  72. margin: 0;
  73. padding: 0;
  74. box-sizing: border-box;
  75. }
  76. body {
  77. font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
  78. background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
  79. padding: 20px;
  80. color: #333;
  81. }
  82. .container {
  83. max-width: 800px;
  84. margin: 0 auto;
  85. background: #fff;
  86. border-radius: 12px;
  87. box-shadow: 0 10px 40px rgba(0,0,0,0.2);
  88. overflow: hidden;
  89. }
  90. .header {
  91. background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
  92. color: white;
  93. padding: 30px 20px;
  94. text-align: center;
  95. }
  96. .header h1 {
  97. font-size: 24px;
  98. margin-bottom: 8px;
  99. }
  100. .header .date {
  101. font-size: 14px;
  102. opacity: 0.9;
  103. }
  104. .paper-card {
  105. padding: 20px;
  106. border-bottom: 1px solid #eee;
  107. }
  108. .paper-card:hover {
  109. background: #f8f9fa;
  110. }
  111. .paper-card:last-child {
  112. border-bottom: none;
  113. }
  114. .paper-card h2 {
  115. font-size: 18px;
  116. color: #2c3e50;
  117. margin-bottom: 8px;
  118. }
  119. .paper-card .author {
  120. font-size: 14px;
  121. color: #7f8c8d;
  122. margin-bottom: 12px;
  123. }
  124. .paper-card .label {
  125. display: inline-block;
  126. background: #e8f4fd;
  127. color: #3498db;
  128. padding: 4px 12px;
  129. border-radius: 16px;
  130. font-size: 12px;
  131. font-weight: bold;
  132. margin-bottom: 12px;
  133. }
  134. .paper-card .abstract {
  135. font-size: 14px;
  136. line-height: 1.6;
  137. color: #555;
  138. margin-bottom: 12px;
  139. background: #f9f9f9;
  140. padding: 10px;
  141. border-radius: 6px;
  142. }
  143. .paper-card .translation {
  144. background: #fff9e6;
  145. padding: 12px;
  146. border-radius: 8px;
  147. margin-bottom: 12px;
  148. font-size: 14px;
  149. line-height: 1.6;
  150. color: #555;
  151. border-left: 4px solid #f39c12;
  152. }
  153. .paper-card .translation b {
  154. color: #e67e22;
  155. }
  156. .paper-card .explanation {
  157. background: #e8f8f5;
  158. padding: 12px;
  159. border-radius: 8px;
  160. font-size: 14px;
  161. line-height: 1.6;
  162. color: #555;
  163. border-left: 4px solid #27ae60;
  164. }
  165. .paper-card .explanation b {
  166. color: #27ae60;
  167. }
  168. .tags {
  169. margin-top: 12px;
  170. }
  171. .tags span {
  172. display: inline-block;
  173. background: #f0f2f5;
  174. color: #7f8c8d;
  175. padding: 3px 10px;
  176. border-radius: 4px;
  177. font-size: 12px;
  178. margin-right: 6px;
  179. margin-bottom: 6px;
  180. }
  181. .links {
  182. margin-top: 12px;
  183. }
  184. .links a {
  185. display: inline-block;
  186. background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
  187. color: white;
  188. padding: 8px 16px;
  189. border-radius: 6px;
  190. text-decoration: none;
  191. font-size: 12px;
  192. margin-right: 8px;
  193. margin-bottom: 8px;
  194. }
  195. .links a:hover {
  196. opacity: 0.9;
  197. }
  198. .footer {
  199. background: #f8f9fa;
  200. padding: 20px;
  201. text-align: center;
  202. color: #7f8c8d;
  203. font-size: 12px;
  204. }
  205. .timestamp {
  206. font-size: 12px;
  207. color: #95a5a6;
  208. margin-top: 10px;
  209. }
  210. </style>
  211. </head>
  212. <body>
  213. <div class="container">
  214. <div class="header">
  215. <h1>🤖 每日AI前沿速递</h1>
  216. <div class="date">2026年1月30日</div>
  217. </div>'''
  218. # Add paper cards with enhanced content
  219. for paper in papers[:4]: # Limit to first 4 papers
  220. # Clean up the abstract
  221. import re
  222. clean_abstract = re.sub(r'arXiv:[^\\n]*\\nAbstract: ?', '', paper['abstract'])
  223. clean_abstract = re.sub(r'\\n', '<br>', clean_abstract)
  224. # Get category tag
  225. cat_map = {
  226. "embodied": "#具身智能",
  227. "representation": "#表征学习",
  228. "reinforcement": "#强化学习",
  229. "robotics": "#机器人",
  230. "general": "#综合"
  231. }
  232. category_tag = cat_map.get(paper['primary_category'], "#AI研究")
  233. # Create tags
  234. tags_html = " ".join([f"<span>{tag}</span>" for tag in paper.get('tags', [])[:6]])
  235. html += f'''
  236. <div class="paper-card">
  237. <div class="label">{category_tag}</div>
  238. <h2>{paper['title']}</h2>
  239. <div class="author">✍️ {", ".join(paper['authors'])} | 发布: {paper['published']}</div>
  240. <div class="abstract">
  241. <b>📝 英文摘要:</b><br>{clean_abstract[:500]}...
  242. </div>
  243. <div class="translation">
  244. <b>🇨🇳 中文翻译:</b><br>{paper.get('accurate_translation', '【待翻译】')}
  245. </div>
  246. <div class="explanation">
  247. <b>🔍 技术讲解:</b><br>{paper.get('technical_explanation', '【待讲解】')}
  248. </div>
  249. <div class="tags">
  250. {tags_html}
  251. </div>
  252. <div class="links">
  253. <a href="{paper['url']}">📄 论文链接</a>
  254. </div>
  255. </div>'''
  256. html += '''
  257. <div class="footer">
  258. ⏰ 每日定时推送 | 🤖 2026年1月30日自动生成 | 📊 使用RSS源获取最新论文
  259. </div>
  260. </div>
  261. </body>
  262. </html>'''
  263. return html
  264. if __name__ == "__main__":
  265. main()