| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157 |
- #!/usr/bin/env python3
- """
- Search ArXiv for papers related to embodied learning, representation learning, and reinforcement learning.
- This version uses web search functionality instead of the arxiv library.
- """
- import json
- import sys
- import re
- from datetime import datetime
- def search_recent_papers_web(max_results=10):
- """
- Search for recent papers using web search functionality
- """
- # We'll use MoltBot's web_search tool instead of the arxiv library
- # This function will return a template that will be filled by the calling function
- return {
- "status": "web_search_needed",
- "queries": [
- "recent arxiv papers embodied learning",
- "recent arxiv papers representation learning",
- "recent arxiv papers reinforcement learning"
- ],
- "max_results": max_results
- }
- def parse_search_results(search_results):
- """
- Parse web search results into paper format
- """
- papers = []
-
- # This function expects search_results to be the output from web_search tool
- if isinstance(search_results, dict) and 'results' in search_results:
- for result in search_results['results']:
- title = result.get('title', '')
- url = result.get('url', '')
- description = result.get('description', '')
- published = result.get('published', '')
-
- # Extract potential abstract from description
- clean_description = re.sub(r'<.*?>', '', description) # Remove HTML tags
-
- # Determine category based on title
- category = ''
- title_lower = title.lower()
- if 'embodied' in title_lower:
- category = 'embodied'
- elif 'representation' in title_lower:
- category = 'representation'
- elif 'reinforcement' in title_lower:
- category = 'reinforcement'
- else:
- # Check description for keywords
- desc_lower = clean_description.lower()
- if 'embodied' in desc_lower:
- category = 'embodied'
- elif 'representation' in desc_lower:
- category = 'representation'
- elif 'reinforcement' in desc_lower:
- category = 'reinforcement'
-
- paper_info = {
- "title": title,
- "authors": ["Multiple Authors"], # Placeholder - would be extracted from full paper
- "abstract": clean_description,
- "doi": "", # Would be extracted from full paper
- "url": url,
- "published": published,
- "categories": [category],
- "primary_category": category
- }
- papers.append(paper_info)
-
- return papers
- def select_top_papers(papers, per_category=2):
- """
- Select top papers from each category based on relevance
- """
- if not papers:
- return []
-
- # Group papers by category
- categories_map = {
- 'embodied': [],
- 'representation': [],
- 'reinforcement': []
- }
-
- # Classify papers into categories
- for paper in papers:
- category = paper['primary_category']
- if category in categories_map:
- categories_map[category].append(paper)
- else:
- # If category is unknown, try to classify based on content
- title_lower = paper['title'].lower()
- abstract_lower = paper['abstract'].lower()
-
- if 'embodied' in title_lower or 'embodied' in abstract_lower:
- categories_map['embodied'].append(paper)
- elif 'representation' in title_lower or 'representation' in abstract_lower:
- categories_map['representation'].append(paper)
- elif 'reinforcement' in title_lower or 'reinforcement' in abstract_lower:
- categories_map['reinforcement'].append(paper)
- else:
- # Put in a general category if no match
- categories_map['embodied'].append(paper) # Default fallback
-
- # Select top papers from each category
- selected = []
- for category, papers_in_cat in categories_map.items():
- if not papers_in_cat:
- continue
- # Sort by relevance (simple heuristic: length of title and abstract)
- sorted_papers = sorted(papers_in_cat,
- key=lambda x: len(x['title']) + len(x['abstract']),
- reverse=True)
- selected.extend(sorted_papers[:per_category])
-
- # Remove duplicates
- seen_titles = set()
- unique_selected = []
- for paper in selected:
- if paper['title'] not in seen_titles:
- unique_selected.append(paper)
- seen_titles.add(paper['title'])
-
- # If we don't have enough papers, add more from remaining results
- if len(unique_selected) < 6: # 2 per category * 3 categories
- for paper in papers:
- if paper['title'] not in seen_titles:
- unique_selected.append(paper)
- seen_titles.add(paper['title'])
- if len(unique_selected) >= 6:
- break
-
- return unique_selected[:6] # Return maximum 6 papers (2-3 per category)
- def main():
- # Since we can't directly import web search tools in Python,
- # we return a structure indicating what needs to be done
- print(json.dumps({
- "action_required": "web_search",
- "instructions": "Use web_search tool with queries for recent arxiv papers in embodied learning, representation learning, and reinforcement learning",
- "post_processing": "Call parse_search_results with the web_search output, then select_top_papers"
- }, ensure_ascii=False, indent=2))
- if __name__ == "__main__":
- main()
|