|
@@ -0,0 +1,208 @@
|
|
|
|
|
+const fs = require('fs');
|
|
|
|
|
+const https = require('https');
|
|
|
|
|
+const Mustache = require('mustache');
|
|
|
|
|
+
|
|
|
|
|
+// Function to fetch and parse RSS feed
|
|
|
|
|
+function fetchRSS(url) {
|
|
|
|
|
+ return new Promise((resolve, reject) => {
|
|
|
|
|
+ https.get(url, (res) => {
|
|
|
|
|
+ let data = '';
|
|
|
|
|
+ res.on('data', (chunk) => {
|
|
|
|
|
+ data += chunk;
|
|
|
|
|
+ });
|
|
|
|
|
+ res.on('end', () => {
|
|
|
|
|
+ resolve(data);
|
|
|
|
|
+ });
|
|
|
|
|
+ }).on('error', (err) => {
|
|
|
|
|
+ reject(err);
|
|
|
|
|
+ });
|
|
|
|
|
+ });
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// Simple XML parser for RSS feeds
|
|
|
|
|
+function parseRSS(rssData) {
|
|
|
|
|
+ const items = [];
|
|
|
|
|
+
|
|
|
|
|
+ // Regular expressions to extract data from RSS
|
|
|
|
|
+ const itemRegex = /<item>([\s\S]*?)<\/item>/g;
|
|
|
|
|
+ const titleRegex = /<title><!\[CDATA\[(.*?)\]\]><\/title>/;
|
|
|
|
|
+ const descRegex = /<description><!\[CDATA\[(.*?)\]\]><\/description>/;
|
|
|
|
|
+ const linkRegex = /<guid[^>]*>(.*?)<\/guid>/;
|
|
|
|
|
+ const authorRegex = /<dc:creator>(.*?)<\/dc:creator>/g;
|
|
|
|
|
+
|
|
|
|
|
+ let match;
|
|
|
|
|
+ while ((match = itemRegex.exec(rssData)) !== null) {
|
|
|
|
|
+ const itemData = match[1];
|
|
|
|
|
+
|
|
|
|
|
+ const titleMatch = itemData.match(titleRegex);
|
|
|
|
|
+ const descMatch = itemData.match(descRegex);
|
|
|
|
|
+ const linkMatch = itemData.match(linkRegex);
|
|
|
|
|
+
|
|
|
|
|
+ if (titleMatch && descMatch && linkMatch) {
|
|
|
|
|
+ const title = titleMatch[1].replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
|
|
|
|
+ const description = descMatch[1].replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
|
|
|
|
+ const link = linkMatch[1];
|
|
|
|
|
+
|
|
|
|
|
+ // Extract authors
|
|
|
|
|
+ const authors = [];
|
|
|
|
|
+ let authorMatch;
|
|
|
|
|
+ while ((authorMatch = authorRegex.exec(itemData)) !== null) {
|
|
|
|
|
+ authors.push(authorMatch[1]);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Extract arXiv ID from link
|
|
|
|
|
+ const arxivId = link.split('/').pop();
|
|
|
|
|
+
|
|
|
|
|
+ items.push({
|
|
|
|
|
+ title,
|
|
|
|
|
+ description,
|
|
|
|
|
+ link,
|
|
|
|
|
+ authors: authors.join(', '),
|
|
|
|
|
+ arxivId
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return items;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+async function getLatestPapers() {
|
|
|
|
|
+ // Search queries for different categories
|
|
|
|
|
+ const queries = [
|
|
|
|
|
+ 'cat:cs.RO+OR+cat:cs.AI+OR+cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.MM', // General AI categories
|
|
|
|
|
+ ];
|
|
|
|
|
+
|
|
|
|
|
+ let allPapers = [];
|
|
|
|
|
+
|
|
|
|
|
+ for (const query of queries) {
|
|
|
|
|
+ const url = `https://export.arxiv.org/api/query?search_query=${encodeURIComponent(query)}&sortBy=submittedDate&sortOrder=descending&max_results=20`;
|
|
|
|
|
+ console.log(`Fetching papers from: ${url}`);
|
|
|
|
|
+
|
|
|
|
|
+ try {
|
|
|
|
|
+ const rssData = await fetchRSS(url);
|
|
|
|
|
+ const papers = parseRSS(rssData);
|
|
|
|
|
+ allPapers = allPapers.concat(papers);
|
|
|
|
|
+ } catch (error) {
|
|
|
|
|
+ console.error(`Error fetching papers for query ${query}:`, error);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Remove duplicates based on arXiv ID
|
|
|
|
|
+ const seenIds = new Set();
|
|
|
|
|
+ const uniquePapers = allPapers.filter(paper => {
|
|
|
|
|
+ if (seenIds.has(paper.arxivId)) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ seenIds.add(paper.arxivId);
|
|
|
|
|
+ return true;
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ // Sort by some relevance criteria (for now just take first 10)
|
|
|
|
|
+ return uniquePapers.slice(0, 10);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+function extractTags(title, abstract) {
|
|
|
|
|
+ const text = `${title} ${abstract}`.toLowerCase();
|
|
|
|
|
+ const tags = [];
|
|
|
|
|
+
|
|
|
|
|
+ if (text.includes('embodied') || text.includes('robot')) {
|
|
|
|
|
+ tags.push('embodied');
|
|
|
|
|
+ }
|
|
|
|
|
+ if (text.includes('representation') || text.includes('representations') || text.includes('learning representation')) {
|
|
|
|
|
+ tags.push('representation');
|
|
|
|
|
+ }
|
|
|
|
|
+ if (text.includes('reinforcement learning') || text.includes('rl ') || text.includes(' rl')) {
|
|
|
|
|
+ tags.push('rl');
|
|
|
|
|
+ }
|
|
|
|
|
+ if (text.includes('vision') || text.includes('visual')) {
|
|
|
|
|
+ tags.push('vision');
|
|
|
|
|
+ }
|
|
|
|
|
+ if (text.includes('language')) {
|
|
|
|
|
+ tags.push('language');
|
|
|
|
|
+ }
|
|
|
|
|
+ if (text.includes('multimodal')) {
|
|
|
|
|
+ tags.push('multimodal');
|
|
|
|
|
+ }
|
|
|
|
|
+ if (text.includes('manipulation')) {
|
|
|
|
|
+ tags.push('manipulation');
|
|
|
|
|
+ }
|
|
|
|
|
+ if (text.includes('navigation')) {
|
|
|
|
|
+ tags.push('navigation');
|
|
|
|
|
+ }
|
|
|
|
|
+ if (text.includes('world model') || text.includes('world-model')) {
|
|
|
|
|
+ tags.push('world-model');
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return [...new Set(tags)]; // Remove duplicate tags
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+function generateSummary(title, abstract) {
|
|
|
|
|
+ // This is a placeholder for a more sophisticated summary
|
|
|
|
|
+ // In a real implementation, this could use an LLM to generate insights
|
|
|
|
|
+ const insights = [
|
|
|
|
|
+ "This paper introduces novel approaches to the problem.",
|
|
|
|
|
+ "The methodology shows promising results compared to baseline methods.",
|
|
|
|
|
+ "The findings have implications for future research directions."
|
|
|
|
|
+ ];
|
|
|
|
|
+
|
|
|
|
|
+ return insights[Math.floor(Math.random() * insights.length)];
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+async function generateDigest() {
|
|
|
|
|
+ console.log("Starting ArXiv digest generation...");
|
|
|
|
|
+
|
|
|
|
|
+ const papers = await getLatestPapers();
|
|
|
|
|
+ console.log(`Found ${papers.length} papers`);
|
|
|
|
|
+
|
|
|
|
|
+ // Filter papers to top 5 based on our criteria
|
|
|
|
|
+ const filteredPapers = papers
|
|
|
|
|
+ .map(paper => {
|
|
|
|
|
+ const tags = extractTags(paper.title, paper.description);
|
|
|
|
|
+ return { ...paper, tags };
|
|
|
|
|
+ })
|
|
|
|
|
+ .filter(paper => paper.tags.length > 0) // Only papers with relevant tags
|
|
|
|
|
+ .slice(0, 5); // Take top 5
|
|
|
|
|
+
|
|
|
|
|
+ console.log(`Filtered to ${filteredPapers.length} relevant papers`);
|
|
|
|
|
+
|
|
|
|
|
+ // Prepare data for template
|
|
|
|
|
+ const templateData = {
|
|
|
|
|
+ date: new Date().toISOString().split('T')[0],
|
|
|
|
|
+ category: 'AI Research',
|
|
|
|
|
+ time: new Date().toLocaleTimeString('zh-CN'),
|
|
|
|
|
+ papers: filteredPapers.map(paper => ({
|
|
|
|
|
+ title: paper.title,
|
|
|
|
|
+ authors: paper.authors,
|
|
|
|
|
+ arxiv_id: paper.arxivId,
|
|
|
|
|
+ arxiv_url: paper.link,
|
|
|
|
|
+ tags: paper.tags,
|
|
|
|
|
+ summary: generateSummary(paper.title, paper.description)
|
|
|
|
|
+ }))
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ // Read the template
|
|
|
|
|
+ const template = fs.readFileSync('/home/zhn/.nvm/versions/node/v22.22.0/lib/node_modules/openclaw/skills/arxiv-digest/assets/template.html', 'utf8');
|
|
|
|
|
+
|
|
|
|
|
+ // Render the template
|
|
|
|
|
+ const output = Mustache.render(template, templateData);
|
|
|
|
|
+
|
|
|
|
|
+ // Write to file with today's date
|
|
|
|
|
+ const dateStr = new Date().toISOString().split('T')[0].replace(/-/g, '-');
|
|
|
|
|
+ const filename = `/home/zhn/arxiv-digests/arxiv-digest-${dateStr}.html`;
|
|
|
|
|
+
|
|
|
|
|
+ fs.writeFileSync(filename, output);
|
|
|
|
|
+
|
|
|
|
|
+ console.log(`Digest generated successfully: ${filename}`);
|
|
|
|
|
+ return filename;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// Run the generator
|
|
|
|
|
+generateDigest()
|
|
|
|
|
+ .then(filename => {
|
|
|
|
|
+ console.log('ArXiv digest generation completed:', filename);
|
|
|
|
|
+ process.exit(0);
|
|
|
|
|
+ })
|
|
|
|
|
+ .catch(error => {
|
|
|
|
|
+ console.error('Error generating digest:', error);
|
|
|
|
|
+ process.exit(1);
|
|
|
|
|
+ });
|