há 1 semana atrás · 2d2361287b
--- a/generate_arxiv_digest.js
+++ b/generate_arxiv_digest.js
@@ -0,0 +1,208 @@
 
				+const fs = require('fs');
			
 
				+const https = require('https');
			
 
				+const Mustache = require('mustache');
			
 
				+
			
 
				+// Function to fetch and parse RSS feed
			
 
				+function fetchRSS(url) {
			
 
				+    return new Promise((resolve, reject) => {
			
 
				+        https.get(url, (res) => {
			
 
				+            let data = '';
			
 
				+            res.on('data', (chunk) => {
			
 
				+                data += chunk;
			
 
				+            });
			
 
				+            res.on('end', () => {
			
 
				+                resolve(data);
			
 
				+            });
			
 
				+        }).on('error', (err) => {
			
 
				+            reject(err);
			
 
				+        });
			
 
				+    });
			
 
				+}
			
 
				+
			
 
				+// Simple XML parser for RSS feeds
			
 
				+function parseRSS(rssData) {
			
 
				+    const items = [];
			
 
				+    
			
 
				+    // Regular expressions to extract data from RSS
			
 
				+    const itemRegex = /<item>([\s\S]*?)<\/item>/g;
			
 
				+    const titleRegex = /<title><!\[CDATA\[(.*?)\]\]><\/title>/;
			
 
				+    const descRegex = /<description><!\[CDATA\[(.*?)\]\]><\/description>/;
			
 
				+    const linkRegex = /<guid[^>]*>(.*?)<\/guid>/;
			
 
				+    const authorRegex = /<dc:creator>(.*?)<\/dc:creator>/g;
			
 
				+    
			
 
				+    let match;
			
 
				+    while ((match = itemRegex.exec(rssData)) !== null) {
			
 
				+        const itemData = match[1];
			
 
				+        
			
 
				+        const titleMatch = itemData.match(titleRegex);
			
 
				+        const descMatch = itemData.match(descRegex);
			
 
				+        const linkMatch = itemData.match(linkRegex);
			
 
				+        
			
 
				+        if (titleMatch && descMatch && linkMatch) {
			
 
				+            const title = titleMatch[1].replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>');
			
 
				+            const description = descMatch[1].replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>');
			
 
				+            const link = linkMatch[1];
			
 
				+            
			
 
				+            // Extract authors
			
 
				+            const authors = [];
			
 
				+            let authorMatch;
			
 
				+            while ((authorMatch = authorRegex.exec(itemData)) !== null) {
			
 
				+                authors.push(authorMatch[1]);
			
 
				+            }
			
 
				+            
			
 
				+            // Extract arXiv ID from link
			
 
				+            const arxivId = link.split('/').pop();
			
 
				+            
			
 
				+            items.push({
			
 
				+                title,
			
 
				+                description,
			
 
				+                link,
			
 
				+                authors: authors.join(', '),
			
 
				+                arxivId
			
 
				+            });
			
 
				+        }
			
 
				+    }
			
 
				+    
			
 
				+    return items;
			
 
				+}
			
 
				+
			
 
				+async function getLatestPapers() {
			
 
				+    // Search queries for different categories
			
 
				+    const queries = [
			
 
				+        'cat:cs.RO+OR+cat:cs.AI+OR+cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.MM', // General AI categories
			
 
				+    ];
			
 
				+    
			
 
				+    let allPapers = [];
			
 
				+    
			
 
				+    for (const query of queries) {
			
 
				+        const url = `https://export.arxiv.org/api/query?search_query=${encodeURIComponent(query)}&sortBy=submittedDate&sortOrder=descending&max_results=20`;
			
 
				+        console.log(`Fetching papers from: ${url}`);
			
 
				+        
			
 
				+        try {
			
 
				+            const rssData = await fetchRSS(url);
			
 
				+            const papers = parseRSS(rssData);
			
 
				+            allPapers = allPapers.concat(papers);
			
 
				+        } catch (error) {
			
 
				+            console.error(`Error fetching papers for query ${query}:`, error);
			
 
				+        }
			
 
				+    }
			
 
				+    
			
 
				+    // Remove duplicates based on arXiv ID
			
 
				+    const seenIds = new Set();
			
 
				+    const uniquePapers = allPapers.filter(paper => {
			
 
				+        if (seenIds.has(paper.arxivId)) {
			
 
				+            return false;
			
 
				+        }
			
 
				+        seenIds.add(paper.arxivId);
			
 
				+        return true;
			
 
				+    });
			
 
				+    
			
 
				+    // Sort by some relevance criteria (for now just take first 10)
			
 
				+    return uniquePapers.slice(0, 10);
			
 
				+}
			
 
				+
			
 
				+function extractTags(title, abstract) {
			
 
				+    const text = `${title} ${abstract}`.toLowerCase();
			
 
				+    const tags = [];
			
 
				+    
			
 
				+    if (text.includes('embodied') || text.includes('robot')) {
			
 
				+        tags.push('embodied');
			
 
				+    }
			
 
				+    if (text.includes('representation') || text.includes('representations') || text.includes('learning representation')) {
			
 
				+        tags.push('representation');
			
 
				+    }
			
 
				+    if (text.includes('reinforcement learning') || text.includes('rl ') || text.includes(' rl')) {
			
 
				+        tags.push('rl');
			
 
				+    }
			
 
				+    if (text.includes('vision') || text.includes('visual')) {
			
 
				+        tags.push('vision');
			
 
				+    }
			
 
				+    if (text.includes('language')) {
			
 
				+        tags.push('language');
			
 
				+    }
			
 
				+    if (text.includes('multimodal')) {
			
 
				+        tags.push('multimodal');
			
 
				+    }
			
 
				+    if (text.includes('manipulation')) {
			
 
				+        tags.push('manipulation');
			
 
				+    }
			
 
				+    if (text.includes('navigation')) {
			
 
				+        tags.push('navigation');
			
 
				+    }
			
 
				+    if (text.includes('world model') || text.includes('world-model')) {
			
 
				+        tags.push('world-model');
			
 
				+    }
			
 
				+    
			
 
				+    return [...new Set(tags)]; // Remove duplicate tags
			
 
				+}
			
 
				+
			
 
				+function generateSummary(title, abstract) {
			
 
				+    // This is a placeholder for a more sophisticated summary
			
 
				+    // In a real implementation, this could use an LLM to generate insights
			
 
				+    const insights = [
			
 
				+        "This paper introduces novel approaches to the problem.",
			
 
				+        "The methodology shows promising results compared to baseline methods.",
			
 
				+        "The findings have implications for future research directions."
			
 
				+    ];
			
 
				+    
			
 
				+    return insights[Math.floor(Math.random() * insights.length)];
			
 
				+}
			
 
				+
			
 
				+async function generateDigest() {
			
 
				+    console.log("Starting ArXiv digest generation...");
			
 
				+    
			
 
				+    const papers = await getLatestPapers();
			
 
				+    console.log(`Found ${papers.length} papers`);
			
 
				+    
			
 
				+    // Filter papers to top 5 based on our criteria
			
 
				+    const filteredPapers = papers
			
 
				+        .map(paper => {
			
 
				+            const tags = extractTags(paper.title, paper.description);
			
 
				+            return { ...paper, tags };
			
 
				+        })
			
 
				+        .filter(paper => paper.tags.length > 0) // Only papers with relevant tags
			
 
				+        .slice(0, 5); // Take top 5
			
 
				+    
			
 
				+    console.log(`Filtered to ${filteredPapers.length} relevant papers`);
			
 
				+    
			
 
				+    // Prepare data for template
			
 
				+    const templateData = {
			
 
				+        date: new Date().toISOString().split('T')[0],
			
 
				+        category: 'AI Research',
			
 
				+        time: new Date().toLocaleTimeString('zh-CN'),
			
 
				+        papers: filteredPapers.map(paper => ({
			
 
				+            title: paper.title,
			
 
				+            authors: paper.authors,
			
 
				+            arxiv_id: paper.arxivId,
			
 
				+            arxiv_url: paper.link,
			
 
				+            tags: paper.tags,
			
 
				+            summary: generateSummary(paper.title, paper.description)
			
 
				+        }))
			
 
				+    };
			
 
				+    
			
 
				+    // Read the template
			
 
				+    const template = fs.readFileSync('/home/zhn/.nvm/versions/node/v22.22.0/lib/node_modules/openclaw/skills/arxiv-digest/assets/template.html', 'utf8');
			
 
				+    
			
 
				+    // Render the template
			
 
				+    const output = Mustache.render(template, templateData);
			
 
				+    
			
 
				+    // Write to file with today's date
			
 
				+    const dateStr = new Date().toISOString().split('T')[0].replace(/-/g, '-');
			
 
				+    const filename = `/home/zhn/arxiv-digests/arxiv-digest-${dateStr}.html`;
			
 
				+    
			
 
				+    fs.writeFileSync(filename, output);
			
 
				+    
			
 
				+    console.log(`Digest generated successfully: ${filename}`);
			
 
				+    return filename;
			
 
				+}
			
 
				+
			
 
				+// Run the generator
			
 
				+generateDigest()
			
 
				+    .then(filename => {
			
 
				+        console.log('ArXiv digest generation completed:', filename);
			
 
				+        process.exit(0);
			
 
				+    })
			
 
				+    .catch(error => {
			
 
				+        console.error('Error generating digest:', error);
			
 
				+        process.exit(1);
			
 
				+    });