ClawLab
/
RobotDaily


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
							#!/usr/bin/env python3
"""
Complete pipeline: RSS fetch -> Enhanced processing -> HTML generation
"""

import json
import sys
import subprocess
from pathlib import Path


def run_script(script_path, input_data=None):
    """Run a Python script and return its output"""
    cmd = [sys.executable, script_path]
    
    if input_data:
        result = subprocess.run(
            cmd,
            input=input_data,
            text=True,
            capture_output=True,
            check=True
        )
    else:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            check=True
        )
    
    return result.stdout


def main():
    """Complete pipeline: RSS fetch -> Enhanced processing -> HTML generation"""
    try:
        # Step 1: Get papers from RSS feeds
        print("Step 1: Fetching papers from RSS feeds...", file=sys.stderr)
        rss_output = run_script('rss_arxiv_search.py')
        
        # Parse the RSS output
        try:
            papers = json.loads(rss_output)
        except json.JSONDecodeError:
            print("Error: Could not parse RSS output", file=sys.stderr)
            print("[]")
            return
        
        if not papers:
            print("No papers found", file=sys.stderr)
            print("[]")
            return
        
        print(f"Step 2: Found {len(papers)} papers, enhancing translations...", file=sys.stderr)
        
        # Step 2: Enhance with better translations and explanations
        enhanced_output = run_script(
            'enhanced_translation.py',
            json.dumps(papers)
        )
        
        enhanced_papers = json.loads(enhanced_output)
        
        # Step 3: Generate HTML with enhanced information
        html_content = generate_enhanced_html(enhanced_papers)
        
        # Print the HTML content
        print(html_content)
        
    except subprocess.CalledProcessError as e:
        print(f"Error running script: {e}", file=sys.stderr)
        print("[]")
    except Exception as e:
        print(f"Error in processing: {e}", file=sys.stderr)
        print("[]")


def generate_enhanced_html(papers):
    """Generate HTML with enhanced translations and explanations"""
    html = '''<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>每日AI前沿速递 - 2026年1月30日</title>
    <style>
        * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }
        body {
            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            padding: 20px;
            color: #333;
        }
        .container {
            max-width: 800px;
            margin: 0 auto;
            background: #fff;
            border-radius: 12px;
            box-shadow: 0 10px 40px rgba(0,0,0,0.2);
            overflow: hidden;
        }
        .header {
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            padding: 30px 20px;
            text-align: center;
        }
        .header h1 {
            font-size: 24px;
            margin-bottom: 8px;
        }
        .header .date {
            font-size: 14px;
            opacity: 0.9;
        }
        .paper-card {
            padding: 20px;
            border-bottom: 1px solid #eee;
        }
        .paper-card:hover {
            background: #f8f9fa;
        }
        .paper-card:last-child {
            border-bottom: none;
        }
        .paper-card h2 {
            font-size: 18px;
            color: #2c3e50;
            margin-bottom: 8px;
        }
        .paper-card .author {
            font-size: 14px;
            color: #7f8c8d;
            margin-bottom: 12px;
        }
        .paper-card .label {
            display: inline-block;
            background: #e8f4fd;
            color: #3498db;
            padding: 4px 12px;
            border-radius: 16px;
            font-size: 12px;
            font-weight: bold;
            margin-bottom: 12px;
        }
        .paper-card .abstract {
            font-size: 14px;
            line-height: 1.6;
            color: #555;
            margin-bottom: 12px;
            background: #f9f9f9;
            padding: 10px;
            border-radius: 6px;
        }
        .paper-card .translation {
            background: #fff9e6;
            padding: 12px;
            border-radius: 8px;
            margin-bottom: 12px;
            font-size: 14px;
            line-height: 1.6;
            color: #555;
            border-left: 4px solid #f39c12;
        }
        .paper-card .translation b {
            color: #e67e22;
        }
        .paper-card .explanation {
            background: #e8f8f5;
            padding: 12px;
            border-radius: 8px;
            font-size: 14px;
            line-height: 1.6;
            color: #555;
            border-left: 4px solid #27ae60;
        }
        .paper-card .explanation b {
            color: #27ae60;
        }
        .tags {
            margin-top: 12px;
        }
        .tags span {
            display: inline-block;
            background: #f0f2f5;
            color: #7f8c8d;
            padding: 3px 10px;
            border-radius: 4px;
            font-size: 12px;
            margin-right: 6px;
            margin-bottom: 6px;
        }
        .links {
            margin-top: 12px;
        }
        .links a {
            display: inline-block;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            padding: 8px 16px;
            border-radius: 6px;
            text-decoration: none;
            font-size: 12px;
            margin-right: 8px;
            margin-bottom: 8px;
        }
        .links a:hover {
            opacity: 0.9;
        }
        .footer {
            background: #f8f9fa;
            padding: 20px;
            text-align: center;
            color: #7f8c8d;
            font-size: 12px;
        }
        .timestamp {
            font-size: 12px;
            color: #95a5a6;
            margin-top: 10px;
        }
    </style>
</head>
<body>
    <div class="container">
        <div class="header">
            <h1>🤖 每日AI前沿速递</h1>
            <div class="date">2026年1月30日</div>
        </div>'''
    
    # Add paper cards with enhanced content
    for paper in papers[:4]:  # Limit to first 4 papers
        # Clean up the abstract
        import re
        clean_abstract = re.sub(r'arXiv:[^\\n]*\\nAbstract: ?', '', paper['abstract'])
        clean_abstract = re.sub(r'\\n', '<br>', clean_abstract)
        
        # Get category tag
        cat_map = {
            "embodied": "#具身智能",
            "representation": "#表征学习", 
            "reinforcement": "#强化学习",
            "robotics": "#机器人",
            "general": "#综合"
        }
        category_tag = cat_map.get(paper['primary_category'], "#AI研究")
        
        # Create tags
        tags_html = " ".join([f"<span>{tag}</span>" for tag in paper.get('tags', [])[:6]])
        
        html += f'''
        <div class="paper-card">
            <div class="label">{category_tag}</div>
            <h2>{paper['title']}</h2>
            <div class="author">✍️ {", ".join(paper['authors'])} | 发布: {paper['published']}</div>
            <div class="abstract">
                <b>📝 英文摘要:</b><br>{clean_abstract[:500]}...
            </div>
            <div class="translation">
                <b>🇨🇳 中文翻译:</b><br>{paper.get('accurate_translation', '【待翻译】')}
            </div>
            <div class="explanation">
                <b>🔍 技术讲解:</b><br>{paper.get('technical_explanation', '【待讲解】')}
            </div>
            <div class="tags">
                {tags_html}
            </div>
            <div class="links">
                <a href="{paper['url']}">📄 论文链接</a>
            </div>
        </div>'''
    
    html += '''
        <div class="footer">
            ⏰ 每日定时推送 | 🤖 2026年1月30日自动生成 | 📊 使用RSS源获取最新论文
        </div>
    </div>
</body>
</html>'''
    
    return html


if __name__ == "__main__":
    main()