| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290 |
- #!/usr/bin/env python3
- """
- Complete pipeline: RSS fetch -> Enhanced processing -> HTML generation
- """
- import json
- import sys
- import subprocess
- from pathlib import Path
- def run_script(script_path, input_data=None):
- """Run a Python script and return its output"""
- cmd = [sys.executable, script_path]
-
- if input_data:
- result = subprocess.run(
- cmd,
- input=input_data,
- text=True,
- capture_output=True,
- check=True
- )
- else:
- result = subprocess.run(
- cmd,
- capture_output=True,
- text=True,
- check=True
- )
-
- return result.stdout
- def main():
- """Complete pipeline: RSS fetch -> Enhanced processing -> HTML generation"""
- try:
- # Step 1: Get papers from RSS feeds
- print("Step 1: Fetching papers from RSS feeds...", file=sys.stderr)
- rss_output = run_script('rss_arxiv_search.py')
-
- # Parse the RSS output
- try:
- papers = json.loads(rss_output)
- except json.JSONDecodeError:
- print("Error: Could not parse RSS output", file=sys.stderr)
- print("[]")
- return
-
- if not papers:
- print("No papers found", file=sys.stderr)
- print("[]")
- return
-
- print(f"Step 2: Found {len(papers)} papers, enhancing translations...", file=sys.stderr)
-
- # Step 2: Enhance with better translations and explanations
- enhanced_output = run_script(
- 'enhanced_translation.py',
- json.dumps(papers)
- )
-
- enhanced_papers = json.loads(enhanced_output)
-
- # Step 3: Generate HTML with enhanced information
- html_content = generate_enhanced_html(enhanced_papers)
-
- # Print the HTML content
- print(html_content)
-
- except subprocess.CalledProcessError as e:
- print(f"Error running script: {e}", file=sys.stderr)
- print("[]")
- except Exception as e:
- print(f"Error in processing: {e}", file=sys.stderr)
- print("[]")
- def generate_enhanced_html(papers):
- """Generate HTML with enhanced translations and explanations"""
- html = '''<!DOCTYPE html>
- <html lang="zh-CN">
- <head>
- <meta charset="UTF-8">
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
- <title>每日AI前沿速递 - 2026年1月30日</title>
- <style>
- * {
- margin: 0;
- padding: 0;
- box-sizing: border-box;
- }
- body {
- font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
- padding: 20px;
- color: #333;
- }
- .container {
- max-width: 800px;
- margin: 0 auto;
- background: #fff;
- border-radius: 12px;
- box-shadow: 0 10px 40px rgba(0,0,0,0.2);
- overflow: hidden;
- }
- .header {
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
- color: white;
- padding: 30px 20px;
- text-align: center;
- }
- .header h1 {
- font-size: 24px;
- margin-bottom: 8px;
- }
- .header .date {
- font-size: 14px;
- opacity: 0.9;
- }
- .paper-card {
- padding: 20px;
- border-bottom: 1px solid #eee;
- }
- .paper-card:hover {
- background: #f8f9fa;
- }
- .paper-card:last-child {
- border-bottom: none;
- }
- .paper-card h2 {
- font-size: 18px;
- color: #2c3e50;
- margin-bottom: 8px;
- }
- .paper-card .author {
- font-size: 14px;
- color: #7f8c8d;
- margin-bottom: 12px;
- }
- .paper-card .label {
- display: inline-block;
- background: #e8f4fd;
- color: #3498db;
- padding: 4px 12px;
- border-radius: 16px;
- font-size: 12px;
- font-weight: bold;
- margin-bottom: 12px;
- }
- .paper-card .abstract {
- font-size: 14px;
- line-height: 1.6;
- color: #555;
- margin-bottom: 12px;
- background: #f9f9f9;
- padding: 10px;
- border-radius: 6px;
- }
- .paper-card .translation {
- background: #fff9e6;
- padding: 12px;
- border-radius: 8px;
- margin-bottom: 12px;
- font-size: 14px;
- line-height: 1.6;
- color: #555;
- border-left: 4px solid #f39c12;
- }
- .paper-card .translation b {
- color: #e67e22;
- }
- .paper-card .explanation {
- background: #e8f8f5;
- padding: 12px;
- border-radius: 8px;
- font-size: 14px;
- line-height: 1.6;
- color: #555;
- border-left: 4px solid #27ae60;
- }
- .paper-card .explanation b {
- color: #27ae60;
- }
- .tags {
- margin-top: 12px;
- }
- .tags span {
- display: inline-block;
- background: #f0f2f5;
- color: #7f8c8d;
- padding: 3px 10px;
- border-radius: 4px;
- font-size: 12px;
- margin-right: 6px;
- margin-bottom: 6px;
- }
- .links {
- margin-top: 12px;
- }
- .links a {
- display: inline-block;
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
- color: white;
- padding: 8px 16px;
- border-radius: 6px;
- text-decoration: none;
- font-size: 12px;
- margin-right: 8px;
- margin-bottom: 8px;
- }
- .links a:hover {
- opacity: 0.9;
- }
- .footer {
- background: #f8f9fa;
- padding: 20px;
- text-align: center;
- color: #7f8c8d;
- font-size: 12px;
- }
- .timestamp {
- font-size: 12px;
- color: #95a5a6;
- margin-top: 10px;
- }
- </style>
- </head>
- <body>
- <div class="container">
- <div class="header">
- <h1>🤖 每日AI前沿速递</h1>
- <div class="date">2026年1月30日</div>
- </div>'''
-
- # Add paper cards with enhanced content
- for paper in papers[:4]: # Limit to first 4 papers
- # Clean up the abstract
- import re
- clean_abstract = re.sub(r'arXiv:[^\\n]*\\nAbstract: ?', '', paper['abstract'])
- clean_abstract = re.sub(r'\\n', '<br>', clean_abstract)
-
- # Get category tag
- cat_map = {
- "embodied": "#具身智能",
- "representation": "#表征学习",
- "reinforcement": "#强化学习",
- "robotics": "#机器人",
- "general": "#综合"
- }
- category_tag = cat_map.get(paper['primary_category'], "#AI研究")
-
- # Create tags
- tags_html = " ".join([f"<span>{tag}</span>" for tag in paper.get('tags', [])[:6]])
-
- html += f'''
- <div class="paper-card">
- <div class="label">{category_tag}</div>
- <h2>{paper['title']}</h2>
- <div class="author">✍️ {", ".join(paper['authors'])} | 发布: {paper['published']}</div>
- <div class="abstract">
- <b>📝 英文摘要:</b><br>{clean_abstract[:500]}...
- </div>
- <div class="translation">
- <b>🇨🇳 中文翻译:</b><br>{paper.get('accurate_translation', '【待翻译】')}
- </div>
- <div class="explanation">
- <b>🔍 技术讲解:</b><br>{paper.get('technical_explanation', '【待讲解】')}
- </div>
- <div class="tags">
- {tags_html}
- </div>
- <div class="links">
- <a href="{paper['url']}">📄 论文链接</a>
- </div>
- </div>'''
-
- html += '''
- <div class="footer">
- ⏰ 每日定时推送 | 🤖 2026年1月30日自动生成 | 📊 使用RSS源获取最新论文
- </div>
- </div>
- </body>
- </html>'''
-
- return html
- if __name__ == "__main__":
- main()
|