1 tuần trước cách đây · 2d2361287b
--- a/generate_arxiv_digest.js
+++ b/generate_arxiv_digest.js
@@ -0,0 +1,208 @@
 
															+const fs = require('fs');
														
 
															+const https = require('https');
														
 
															+const Mustache = require('mustache');
														
 
															+
														
 
															+// Function to fetch and parse RSS feed
														
 
															+function fetchRSS(url) {
														
 
															+    return new Promise((resolve, reject) => {
														
 
															+        https.get(url, (res) => {
														
 
															+            let data = '';
														
 
															+            res.on('data', (chunk) => {
														
 
															+                data += chunk;
														
 
															+            });
														
 
															+            res.on('end', () => {
														
 
															+                resolve(data);
														
 
															+            });
														
 
															+        }).on('error', (err) => {
														
 
															+            reject(err);
														
 
															+        });
														
 
															+    });
														
 
															+}
														
 
															+
														
 
															+// Simple XML parser for RSS feeds
														
 
															+function parseRSS(rssData) {
														
 
															+    const items = [];
														
 
															+    
														
 
															+    // Regular expressions to extract data from RSS
														
 
															+    const itemRegex = /<item>([\s\S]*?)<\/item>/g;
														
 
															+    const titleRegex = /<title><!\[CDATA\[(.*?)\]\]><\/title>/;
														
 
															+    const descRegex = /<description><!\[CDATA\[(.*?)\]\]><\/description>/;
														
 
															+    const linkRegex = /<guid[^>]*>(.*?)<\/guid>/;
														
 
															+    const authorRegex = /<dc:creator>(.*?)<\/dc:creator>/g;
														
 
															+    
														
 
															+    let match;
														
 
															+    while ((match = itemRegex.exec(rssData)) !== null) {
														
 
															+        const itemData = match[1];
														
 
															+        
														
 
															+        const titleMatch = itemData.match(titleRegex);
														
 
															+        const descMatch = itemData.match(descRegex);
														
 
															+        const linkMatch = itemData.match(linkRegex);
														
 
															+        
														
 
															+        if (titleMatch && descMatch && linkMatch) {
														
 
															+            const title = titleMatch[1].replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>');
														
 
															+            const description = descMatch[1].replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>');
														
 
															+            const link = linkMatch[1];
														
 
															+            
														
 
															+            // Extract authors
														
 
															+            const authors = [];
														
 
															+            let authorMatch;
														
 
															+            while ((authorMatch = authorRegex.exec(itemData)) !== null) {
														
 
															+                authors.push(authorMatch[1]);
														
 
															+            }
														
 
															+            
														
 
															+            // Extract arXiv ID from link
														
 
															+            const arxivId = link.split('/').pop();
														
 
															+            
														
 
															+            items.push({
														
 
															+                title,
														
 
															+                description,
														
 
															+                link,
														
 
															+                authors: authors.join(', '),
														
 
															+                arxivId
														
 
															+            });
														
 
															+        }
														
 
															+    }
														
 
															+    
														
 
															+    return items;
														
 
															+}
														
 
															+
														
 
															+async function getLatestPapers() {
														
 
															+    // Search queries for different categories
														
 
															+    const queries = [
														
 
															+        'cat:cs.RO+OR+cat:cs.AI+OR+cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.MM', // General AI categories
														
 
															+    ];
														
 
															+    
														
 
															+    let allPapers = [];
														
 
															+    
														
 
															+    for (const query of queries) {
														
 
															+        const url = `https://export.arxiv.org/api/query?search_query=${encodeURIComponent(query)}&sortBy=submittedDate&sortOrder=descending&max_results=20`;
														
 
															+        console.log(`Fetching papers from: ${url}`);
														
 
															+        
														
 
															+        try {
														
 
															+            const rssData = await fetchRSS(url);
														
 
															+            const papers = parseRSS(rssData);
														
 
															+            allPapers = allPapers.concat(papers);
														
 
															+        } catch (error) {
														
 
															+            console.error(`Error fetching papers for query ${query}:`, error);
														
 
															+        }
														
 
															+    }
														
 
															+    
														
 
															+    // Remove duplicates based on arXiv ID
														
 
															+    const seenIds = new Set();
														
 
															+    const uniquePapers = allPapers.filter(paper => {
														
 
															+        if (seenIds.has(paper.arxivId)) {
														
 
															+            return false;
														
 
															+        }
														
 
															+        seenIds.add(paper.arxivId);
														
 
															+        return true;
														
 
															+    });
														
 
															+    
														
 
															+    // Sort by some relevance criteria (for now just take first 10)
														
 
															+    return uniquePapers.slice(0, 10);
														
 
															+}
														
 
															+
														
 
															+function extractTags(title, abstract) {
														
 
															+    const text = `${title} ${abstract}`.toLowerCase();
														
 
															+    const tags = [];
														
 
															+    
														
 
															+    if (text.includes('embodied') || text.includes('robot')) {
														
 
															+        tags.push('embodied');
														
 
															+    }
														
 
															+    if (text.includes('representation') || text.includes('representations') || text.includes('learning representation')) {
														
 
															+        tags.push('representation');
														
 
															+    }
														
 
															+    if (text.includes('reinforcement learning') || text.includes('rl ') || text.includes(' rl')) {
														
 
															+        tags.push('rl');
														
 
															+    }
														
 
															+    if (text.includes('vision') || text.includes('visual')) {
														
 
															+        tags.push('vision');
														
 
															+    }
														
 
															+    if (text.includes('language')) {
														
 
															+        tags.push('language');
														
 
															+    }
														
 
															+    if (text.includes('multimodal')) {
														
 
															+        tags.push('multimodal');
														
 
															+    }
														
 
															+    if (text.includes('manipulation')) {
														
 
															+        tags.push('manipulation');
														
 
															+    }
														
 
															+    if (text.includes('navigation')) {
														
 
															+        tags.push('navigation');
														
 
															+    }
														
 
															+    if (text.includes('world model') || text.includes('world-model')) {
														
 
															+        tags.push('world-model');
														
 
															+    }
														
 
															+    
														
 
															+    return [...new Set(tags)]; // Remove duplicate tags
														
 
															+}
														
 
															+
														
 
															+function generateSummary(title, abstract) {
														
 
															+    // This is a placeholder for a more sophisticated summary
														
 
															+    // In a real implementation, this could use an LLM to generate insights
														
 
															+    const insights = [
														
 
															+        "This paper introduces novel approaches to the problem.",
														
 
															+        "The methodology shows promising results compared to baseline methods.",
														
 
															+        "The findings have implications for future research directions."
														
 
															+    ];
														
 
															+    
														
 
															+    return insights[Math.floor(Math.random() * insights.length)];
														
 
															+}
														
 
															+
														
 
															+async function generateDigest() {
														
 
															+    console.log("Starting ArXiv digest generation...");
														
 
															+    
														
 
															+    const papers = await getLatestPapers();
														
 
															+    console.log(`Found ${papers.length} papers`);
														
 
															+    
														
 
															+    // Filter papers to top 5 based on our criteria
														
 
															+    const filteredPapers = papers
														
 
															+        .map(paper => {
														
 
															+            const tags = extractTags(paper.title, paper.description);
														
 
															+            return { ...paper, tags };
														
 
															+        })
														
 
															+        .filter(paper => paper.tags.length > 0) // Only papers with relevant tags
														
 
															+        .slice(0, 5); // Take top 5
														
 
															+    
														
 
															+    console.log(`Filtered to ${filteredPapers.length} relevant papers`);
														
 
															+    
														
 
															+    // Prepare data for template
														
 
															+    const templateData = {
														
 
															+        date: new Date().toISOString().split('T')[0],
														
 
															+        category: 'AI Research',
														
 
															+        time: new Date().toLocaleTimeString('zh-CN'),
														
 
															+        papers: filteredPapers.map(paper => ({
														
 
															+            title: paper.title,
														
 
															+            authors: paper.authors,
														
 
															+            arxiv_id: paper.arxivId,
														
 
															+            arxiv_url: paper.link,
														
 
															+            tags: paper.tags,
														
 
															+            summary: generateSummary(paper.title, paper.description)
														
 
															+        }))
														
 
															+    };
														
 
															+    
														
 
															+    // Read the template
														
 
															+    const template = fs.readFileSync('/home/zhn/.nvm/versions/node/v22.22.0/lib/node_modules/openclaw/skills/arxiv-digest/assets/template.html', 'utf8');
														
 
															+    
														
 
															+    // Render the template
														
 
															+    const output = Mustache.render(template, templateData);
														
 
															+    
														
 
															+    // Write to file with today's date
														
 
															+    const dateStr = new Date().toISOString().split('T')[0].replace(/-/g, '-');
														
 
															+    const filename = `/home/zhn/arxiv-digests/arxiv-digest-${dateStr}.html`;
														
 
															+    
														
 
															+    fs.writeFileSync(filename, output);
														
 
															+    
														
 
															+    console.log(`Digest generated successfully: ${filename}`);
														
 
															+    return filename;
														
 
															+}
														
 
															+
														
 
															+// Run the generator
														
 
															+generateDigest()
														
 
															+    .then(filename => {
														
 
															+        console.log('ArXiv digest generation completed:', filename);
														
 
															+        process.exit(0);
														
 
															+    })
														
 
															+    .catch(error => {
														
 
															+        console.error('Error generating digest:', error);
														
 
															+        process.exit(1);
														
 
															+    });