const fs = require('fs'); const arxiv = require('arxiv-api'); const Mustache = require('mustache'); // Function to search arXiv with specific categories async function searchPapers(query, maxResults = 20) { try { // Using the arxiv-api package to search for papers const search = new arxiv.Search({ query: query, maxResults: maxResults, sortBy: 'submittedDate', sortOrder: 'descending' }); const results = await new Promise((resolve, reject) => { search.fetch(resolve, reject); }); return results.entries || []; } catch (error) { console.error('Error searching arXiv:', error); return []; } } function extractTags(title, summary) { const text = `${title} ${summary}`.toLowerCase(); const tags = []; if (text.includes('embodied') || text.includes('robot') || text.includes('physical interaction')) { tags.push('embodied'); } if (text.includes('representation') || text.includes('representations') || text.includes('learning representation')) { tags.push('representation'); } if (text.includes('reinforcement learning') || text.includes('rl ') || text.includes(' rl') || text.includes('deep rl')) { tags.push('rl'); } if (text.includes('vision') || text.includes('visual')) { tags.push('vision'); } if (text.includes('language')) { tags.push('language'); } if (text.includes('multimodal')) { tags.push('multimodal'); } if (text.includes('manipulation')) { tags.push('manipulation'); } if (text.includes('navigation')) { tags.push('navigation'); } if (text.includes('world model') || text.includes('world-model')) { tags.push('world-model'); } if (text.includes('transformer') || text.includes('attention')) { tags.push('transformers'); } return [...new Set(tags)]; // Remove duplicate tags } function generateSummary(title, summary) { // Simple heuristic to generate insights - in practice, this could be enhanced with LLM const insights = [ "This paper introduces novel approaches to the problem with promising experimental results.", "An interesting contribution to the field with potential applications in real-world scenarios.", "Methodologically sound approach with comprehensive evaluation against baseline methods.", "Theoretical contributions with practical implications for future research directions.", "Innovative combination of existing techniques showing improved performance." ]; return insights[Math.floor(Math.random() * insights.length)] + ` Abstract: ${summary.substring(0, 150)}...`; } async function generateDigest() { console.log("Starting ArXiv digest generation..."); // Search queries for different categories const queries = [ 'cat:cs.RO OR cat:cs.AI OR cat:cs.CV OR cat:cs.LG OR cat:cs.CL OR cat:cs.MM' // General AI categories ]; let allPapers = []; for (const query of queries) { console.log(`Searching for papers with query: ${query}`); const papers = await searchPapers(query, 15); console.log(`Found ${papers.length} papers for query: ${query}`); allPapers = allPapers.concat(papers); } // Remove duplicates based on ID const seenIds = new Set(); const uniquePapers = allPapers.filter(paper => { if (seenIds.has(paper.id)) { return false; } seenIds.add(paper.id); return true; }); console.log(`Total unique papers found: ${uniquePapers.length}`); // Filter papers to top 5 based on relevance tags const filteredPapers = uniquePapers .map(paper => { const tags = extractTags(paper.title, paper.summary); return { ...paper, tags, arxivId: paper.id.split('/').pop().replace('arXiv:', ''), arxivUrl: paper.id }; }) .filter(paper => paper.tags.length > 0) // Only papers with relevant tags .slice(0, 5); // Take top 5 console.log(`Filtered to ${filteredPapers.length} relevant papers`); if (filteredPapers.length === 0) { console.log("No relevant papers found, creating digest with placeholders"); // Create a default template with placeholder content const templateData = { date: new Date().toISOString().split('T')[0], category: 'AI Research', time: new Date().toLocaleTimeString('zh-CN'), papers: [] // Empty array - template should handle this gracefully }; // Read the template const template = fs.readFileSync('/home/zhn/.nvm/versions/node/v22.22.0/lib/node_modules/openclaw/skills/arxiv-digest/assets/template.html', 'utf8'); // Render the template const output = Mustache.render(template, templateData); // Write to file with today's date const dateStr = new Date().toISOString().split('T')[0].replace(/-/g, '-'); const filename = `/home/zhn/arxiv-digests/arxiv-digest-${dateStr}.html`; fs.writeFileSync(filename, output); console.log(`Digest generated with placeholders: ${filename}`); return filename; } // Prepare data for template const templateData = { date: new Date().toISOString().split('T')[0], category: 'AI Research', time: new Date().toLocaleTimeString('zh-CN'), papers: filteredPapers.map(paper => ({ title: paper.title, authors: paper.authors.map(a => a.name).join(', '), arxiv_id: paper.arxivId, arxiv_url: paper.arxivUrl, tags: paper.tags, summary: generateSummary(paper.title, paper.summary) })) }; // Read the template const template = fs.readFileSync('/home/zhn/.nvm/versions/node/v22.22.0/lib/node_modules/openclaw/skills/arxiv-digest/assets/template.html', 'utf8'); // Render the template const output = Mustache.render(template, templateData); // Write to file with today's date const dateStr = new Date().toISOString().split('T')[0].replace(/-/g, '-'); const filename = `/home/zhn/arxiv-digests/arxiv-digest-${dateStr}.html`; fs.writeFileSync(filename, output); console.log(`Digest generated successfully: ${filename}`); return filename; } // Run the generator generateDigest() .then(filename => { console.log('ArXiv digest generation completed:', filename); process.exit(0); }) .catch(error => { console.error('Error generating digest:', error); process.exit(1); });