#!/usr/bin/env python3 """ Translate paper abstracts to Chinese and provide brief explanations """ import json import sys import re def translate_to_chinese(text): """ This function would normally call a translation API, but since we're using MoltBot's LLM capabilities, we'll format the text for translation by the LLM """ # Simply return the text formatted for translation return { "original": text, "translation_prompt": f"请将以下英文摘要翻译成中文,并提供简要解释:\n\n{text}" } def extract_keywords(text): """ Extract keywords from the abstract using simple heuristics """ # Common technical terms in ML/RL/embodied learning tech_terms = [ r'\b(?:neural|deep|convolutional|recurrent|transformer|attention)\b', r'\b(?:learning|reinforcement|policy|value|q-learning|actor-critic)\b', r'\b(?:embodied|robot|agent|environment|simulation|real-world)\b', r'\b(?:representation|encoding|latent|feature|embedding)\b', r'\b(?:algorithm|method|approach|framework|architecture)\b', r'\b(?:experiment|evaluation|performance|accuracy|result)\b', r'\b(?:training|dataset|model|network|parameter)\b', r'\b(?:state-of-the-art|sota|baseline|comparison)\b' ] keywords = set() text_lower = text.lower() for term_pattern in tech_terms: matches = re.findall(term_pattern, text_lower) keywords.update(matches) # Extract capitalized words (potential proper nouns) caps_words = re.findall(r'\b[A-Z][a-z]{2,}\b', text) keywords.update([word.lower() for word in caps_words if len(word) > 2]) return list(keywords)[:10] # Return top 10 keywords def process_paper(paper): """ Process a single paper: translate abstract and extract keywords """ translated = translate_to_chinese(paper['abstract']) processed_paper = { **paper, "chinese_abstract": translated, "keywords": extract_keywords(paper['abstract']), "tags": [paper['primary_category']] + extract_keywords(paper['abstract'])[:5] } return processed_paper def main(): # Read JSON input from stdin input_text = sys.stdin.read().strip() if not input_text: print("No input provided", file=sys.stderr) sys.exit(1) try: papers = json.loads(input_text) except json.JSONDecodeError as e: print(f"Invalid JSON input: {e}", file=sys.stderr) sys.exit(1) processed_papers = [] for paper in papers: processed = process_paper(paper) processed_papers.append(processed) # Output as JSON print(json.dumps(processed_papers, ensure_ascii=False, indent=2)) if __name__ == "__main__": main()