generate_arxiv_digest_v2.js 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. const fs = require('fs');
  2. const arxiv = require('arxiv-api');
  3. const Mustache = require('mustache');
  4. // Function to search arXiv with specific categories
  5. async function searchPapers(query, maxResults = 20) {
  6. try {
  7. // Using the arxiv-api package to search for papers
  8. const search = new arxiv.Search({
  9. query: query,
  10. maxResults: maxResults,
  11. sortBy: 'submittedDate',
  12. sortOrder: 'descending'
  13. });
  14. const results = await new Promise((resolve, reject) => {
  15. search.fetch(resolve, reject);
  16. });
  17. return results.entries || [];
  18. } catch (error) {
  19. console.error('Error searching arXiv:', error);
  20. return [];
  21. }
  22. }
  23. function extractTags(title, summary) {
  24. const text = `${title} ${summary}`.toLowerCase();
  25. const tags = [];
  26. if (text.includes('embodied') || text.includes('robot') || text.includes('physical interaction')) {
  27. tags.push('embodied');
  28. }
  29. if (text.includes('representation') || text.includes('representations') || text.includes('learning representation')) {
  30. tags.push('representation');
  31. }
  32. if (text.includes('reinforcement learning') || text.includes('rl ') || text.includes(' rl') || text.includes('deep rl')) {
  33. tags.push('rl');
  34. }
  35. if (text.includes('vision') || text.includes('visual')) {
  36. tags.push('vision');
  37. }
  38. if (text.includes('language')) {
  39. tags.push('language');
  40. }
  41. if (text.includes('multimodal')) {
  42. tags.push('multimodal');
  43. }
  44. if (text.includes('manipulation')) {
  45. tags.push('manipulation');
  46. }
  47. if (text.includes('navigation')) {
  48. tags.push('navigation');
  49. }
  50. if (text.includes('world model') || text.includes('world-model')) {
  51. tags.push('world-model');
  52. }
  53. if (text.includes('transformer') || text.includes('attention')) {
  54. tags.push('transformers');
  55. }
  56. return [...new Set(tags)]; // Remove duplicate tags
  57. }
  58. function generateSummary(title, summary) {
  59. // Simple heuristic to generate insights - in practice, this could be enhanced with LLM
  60. const insights = [
  61. "This paper introduces novel approaches to the problem with promising experimental results.",
  62. "An interesting contribution to the field with potential applications in real-world scenarios.",
  63. "Methodologically sound approach with comprehensive evaluation against baseline methods.",
  64. "Theoretical contributions with practical implications for future research directions.",
  65. "Innovative combination of existing techniques showing improved performance."
  66. ];
  67. return insights[Math.floor(Math.random() * insights.length)] + ` Abstract: ${summary.substring(0, 150)}...`;
  68. }
  69. async function generateDigest() {
  70. console.log("Starting ArXiv digest generation...");
  71. // Search queries for different categories
  72. const queries = [
  73. 'cat:cs.RO OR cat:cs.AI OR cat:cs.CV OR cat:cs.LG OR cat:cs.CL OR cat:cs.MM' // General AI categories
  74. ];
  75. let allPapers = [];
  76. for (const query of queries) {
  77. console.log(`Searching for papers with query: ${query}`);
  78. const papers = await searchPapers(query, 15);
  79. console.log(`Found ${papers.length} papers for query: ${query}`);
  80. allPapers = allPapers.concat(papers);
  81. }
  82. // Remove duplicates based on ID
  83. const seenIds = new Set();
  84. const uniquePapers = allPapers.filter(paper => {
  85. if (seenIds.has(paper.id)) {
  86. return false;
  87. }
  88. seenIds.add(paper.id);
  89. return true;
  90. });
  91. console.log(`Total unique papers found: ${uniquePapers.length}`);
  92. // Filter papers to top 5 based on relevance tags
  93. const filteredPapers = uniquePapers
  94. .map(paper => {
  95. const tags = extractTags(paper.title, paper.summary);
  96. return {
  97. ...paper,
  98. tags,
  99. arxivId: paper.id.split('/').pop().replace('arXiv:', ''),
  100. arxivUrl: paper.id
  101. };
  102. })
  103. .filter(paper => paper.tags.length > 0) // Only papers with relevant tags
  104. .slice(0, 5); // Take top 5
  105. console.log(`Filtered to ${filteredPapers.length} relevant papers`);
  106. if (filteredPapers.length === 0) {
  107. console.log("No relevant papers found, creating digest with placeholders");
  108. // Create a default template with placeholder content
  109. const templateData = {
  110. date: new Date().toISOString().split('T')[0],
  111. category: 'AI Research',
  112. time: new Date().toLocaleTimeString('zh-CN'),
  113. papers: [] // Empty array - template should handle this gracefully
  114. };
  115. // Read the template
  116. const template = fs.readFileSync('/home/zhn/.nvm/versions/node/v22.22.0/lib/node_modules/openclaw/skills/arxiv-digest/assets/template.html', 'utf8');
  117. // Render the template
  118. const output = Mustache.render(template, templateData);
  119. // Write to file with today's date
  120. const dateStr = new Date().toISOString().split('T')[0].replace(/-/g, '-');
  121. const filename = `/home/zhn/arxiv-digests/arxiv-digest-${dateStr}.html`;
  122. fs.writeFileSync(filename, output);
  123. console.log(`Digest generated with placeholders: ${filename}`);
  124. return filename;
  125. }
  126. // Prepare data for template
  127. const templateData = {
  128. date: new Date().toISOString().split('T')[0],
  129. category: 'AI Research',
  130. time: new Date().toLocaleTimeString('zh-CN'),
  131. papers: filteredPapers.map(paper => ({
  132. title: paper.title,
  133. authors: paper.authors.map(a => a.name).join(', '),
  134. arxiv_id: paper.arxivId,
  135. arxiv_url: paper.arxivUrl,
  136. tags: paper.tags,
  137. summary: generateSummary(paper.title, paper.summary)
  138. }))
  139. };
  140. // Read the template
  141. const template = fs.readFileSync('/home/zhn/.nvm/versions/node/v22.22.0/lib/node_modules/openclaw/skills/arxiv-digest/assets/template.html', 'utf8');
  142. // Render the template
  143. const output = Mustache.render(template, templateData);
  144. // Write to file with today's date
  145. const dateStr = new Date().toISOString().split('T')[0].replace(/-/g, '-');
  146. const filename = `/home/zhn/arxiv-digests/arxiv-digest-${dateStr}.html`;
  147. fs.writeFileSync(filename, output);
  148. console.log(`Digest generated successfully: ${filename}`);
  149. return filename;
  150. }
  151. // Run the generator
  152. generateDigest()
  153. .then(filename => {
  154. console.log('ArXiv digest generation completed:', filename);
  155. process.exit(0);
  156. })
  157. .catch(error => {
  158. console.error('Error generating digest:', error);
  159. process.exit(1);
  160. });