#!/usr/bin/env python3 """ Search ArXiv for papers related to embodied learning, representation learning, and reinforcement learning. This version uses web search functionality instead of the arxiv library. """ import json import sys import re from datetime import datetime def search_recent_papers_web(max_results=10): """ Search for recent papers using web search functionality """ # We'll use MoltBot's web_search tool instead of the arxiv library # This function will return a template that will be filled by the calling function return { "status": "web_search_needed", "queries": [ "recent arxiv papers embodied learning", "recent arxiv papers representation learning", "recent arxiv papers reinforcement learning" ], "max_results": max_results } def parse_search_results(search_results): """ Parse web search results into paper format """ papers = [] # This function expects search_results to be the output from web_search tool if isinstance(search_results, dict) and 'results' in search_results: for result in search_results['results']: title = result.get('title', '') url = result.get('url', '') description = result.get('description', '') published = result.get('published', '') # Extract potential abstract from description clean_description = re.sub(r'<.*?>', '', description) # Remove HTML tags # Determine category based on title category = '' title_lower = title.lower() if 'embodied' in title_lower: category = 'embodied' elif 'representation' in title_lower: category = 'representation' elif 'reinforcement' in title_lower: category = 'reinforcement' else: # Check description for keywords desc_lower = clean_description.lower() if 'embodied' in desc_lower: category = 'embodied' elif 'representation' in desc_lower: category = 'representation' elif 'reinforcement' in desc_lower: category = 'reinforcement' paper_info = { "title": title, "authors": ["Multiple Authors"], # Placeholder - would be extracted from full paper "abstract": clean_description, "doi": "", # Would be extracted from full paper "url": url, "published": published, "categories": [category], "primary_category": category } papers.append(paper_info) return papers def select_top_papers(papers, per_category=2): """ Select top papers from each category based on relevance """ if not papers: return [] # Group papers by category categories_map = { 'embodied': [], 'representation': [], 'reinforcement': [] } # Classify papers into categories for paper in papers: category = paper['primary_category'] if category in categories_map: categories_map[category].append(paper) else: # If category is unknown, try to classify based on content title_lower = paper['title'].lower() abstract_lower = paper['abstract'].lower() if 'embodied' in title_lower or 'embodied' in abstract_lower: categories_map['embodied'].append(paper) elif 'representation' in title_lower or 'representation' in abstract_lower: categories_map['representation'].append(paper) elif 'reinforcement' in title_lower or 'reinforcement' in abstract_lower: categories_map['reinforcement'].append(paper) else: # Put in a general category if no match categories_map['embodied'].append(paper) # Default fallback # Select top papers from each category selected = [] for category, papers_in_cat in categories_map.items(): if not papers_in_cat: continue # Sort by relevance (simple heuristic: length of title and abstract) sorted_papers = sorted(papers_in_cat, key=lambda x: len(x['title']) + len(x['abstract']), reverse=True) selected.extend(sorted_papers[:per_category]) # Remove duplicates seen_titles = set() unique_selected = [] for paper in selected: if paper['title'] not in seen_titles: unique_selected.append(paper) seen_titles.add(paper['title']) # If we don't have enough papers, add more from remaining results if len(unique_selected) < 6: # 2 per category * 3 categories for paper in papers: if paper['title'] not in seen_titles: unique_selected.append(paper) seen_titles.add(paper['title']) if len(unique_selected) >= 6: break return unique_selected[:6] # Return maximum 6 papers (2-3 per category) def main(): # Since we can't directly import web search tools in Python, # we return a structure indicating what needs to be done print(json.dumps({ "action_required": "web_search", "instructions": "Use web_search tool with queries for recent arxiv papers in embodied learning, representation learning, and reinforcement learning", "post_processing": "Call parse_search_results with the web_search output, then select_top_papers" }, ensure_ascii=False, indent=2)) if __name__ == "__main__": main()