const fs = require('fs'); const https = require('https'); const Mustache = require('mustache'); // Function to fetch and parse RSS feed function fetchRSS(url) { return new Promise((resolve, reject) => { https.get(url, (res) => { let data = ''; res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { resolve(data); }); }).on('error', (err) => { reject(err); }); }); } // Simple XML parser for RSS feeds function parseRSS(rssData) { const items = []; // Regular expressions to extract data from RSS const itemRegex = /([\s\S]*?)<\/item>/g; const titleRegex = /<!\[CDATA\[(.*?)\]\]><\/title>/; const descRegex = /<description><!\[CDATA\[(.*?)\]\]><\/description>/; const linkRegex = /<guid[^>]*>(.*?)<\/guid>/; const authorRegex = /<dc:creator>(.*?)<\/dc:creator>/g; let match; while ((match = itemRegex.exec(rssData)) !== null) { const itemData = match[1]; const titleMatch = itemData.match(titleRegex); const descMatch = itemData.match(descRegex); const linkMatch = itemData.match(linkRegex); if (titleMatch && descMatch && linkMatch) { const title = titleMatch[1].replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>'); const description = descMatch[1].replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>'); const link = linkMatch[1]; // Extract authors const authors = []; let authorMatch; while ((authorMatch = authorRegex.exec(itemData)) !== null) { authors.push(authorMatch[1]); } // Extract arXiv ID from link const arxivId = link.split('/').pop(); items.push({ title, description, link, authors: authors.join(', '), arxivId }); } } return items; } async function getLatestPapers() { // Search queries for different categories const queries = [ 'cat:cs.RO+OR+cat:cs.AI+OR+cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.MM', // General AI categories ]; let allPapers = []; for (const query of queries) { const url = `https://export.arxiv.org/api/query?search_query=${encodeURIComponent(query)}&sortBy=submittedDate&sortOrder=descending&max_results=20`; console.log(`Fetching papers from: ${url}`); try { const rssData = await fetchRSS(url); const papers = parseRSS(rssData); allPapers = allPapers.concat(papers); } catch (error) { console.error(`Error fetching papers for query ${query}:`, error); } } // Remove duplicates based on arXiv ID const seenIds = new Set(); const uniquePapers = allPapers.filter(paper => { if (seenIds.has(paper.arxivId)) { return false; } seenIds.add(paper.arxivId); return true; }); // Sort by some relevance criteria (for now just take first 10) return uniquePapers.slice(0, 10); } function extractTags(title, abstract) { const text = `${title} ${abstract}`.toLowerCase(); const tags = []; if (text.includes('embodied') || text.includes('robot')) { tags.push('embodied'); } if (text.includes('representation') || text.includes('representations') || text.includes('learning representation')) { tags.push('representation'); } if (text.includes('reinforcement learning') || text.includes('rl ') || text.includes(' rl')) { tags.push('rl'); } if (text.includes('vision') || text.includes('visual')) { tags.push('vision'); } if (text.includes('language')) { tags.push('language'); } if (text.includes('multimodal')) { tags.push('multimodal'); } if (text.includes('manipulation')) { tags.push('manipulation'); } if (text.includes('navigation')) { tags.push('navigation'); } if (text.includes('world model') || text.includes('world-model')) { tags.push('world-model'); } return [...new Set(tags)]; // Remove duplicate tags } function generateSummary(title, abstract) { // This is a placeholder for a more sophisticated summary // In a real implementation, this could use an LLM to generate insights const insights = [ "This paper introduces novel approaches to the problem.", "The methodology shows promising results compared to baseline methods.", "The findings have implications for future research directions." ]; return insights[Math.floor(Math.random() * insights.length)]; } async function generateDigest() { console.log("Starting ArXiv digest generation..."); const papers = await getLatestPapers(); console.log(`Found ${papers.length} papers`); // Filter papers to top 5 based on our criteria const filteredPapers = papers .map(paper => { const tags = extractTags(paper.title, paper.description); return { ...paper, tags }; }) .filter(paper => paper.tags.length > 0) // Only papers with relevant tags .slice(0, 5); // Take top 5 console.log(`Filtered to ${filteredPapers.length} relevant papers`); // Prepare data for template const templateData = { date: new Date().toISOString().split('T')[0], category: 'AI Research', time: new Date().toLocaleTimeString('zh-CN'), papers: filteredPapers.map(paper => ({ title: paper.title, authors: paper.authors, arxiv_id: paper.arxivId, arxiv_url: paper.link, tags: paper.tags, summary: generateSummary(paper.title, paper.description) })) }; // Read the template const template = fs.readFileSync('/home/zhn/.nvm/versions/node/v22.22.0/lib/node_modules/openclaw/skills/arxiv-digest/assets/template.html', 'utf8'); // Render the template const output = Mustache.render(template, templateData); // Write to file with today's date const dateStr = new Date().toISOString().split('T')[0].replace(/-/g, '-'); const filename = `/home/zhn/arxiv-digests/arxiv-digest-${dateStr}.html`; fs.writeFileSync(filename, output); console.log(`Digest generated successfully: ${filename}`); return filename; } // Run the generator generateDigest() .then(filename => { console.log('ArXiv digest generation completed:', filename); process.exit(0); }) .catch(error => { console.error('Error generating digest:', error); process.exit(1); });