|
|
@@ -43,20 +43,57 @@ def build_prompt(paper: Dict[str, Any]) -> str:
|
|
|
|
|
|
|
|
|
def fallback_enrichment(paper: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
- tags = FALLBACK_TAGS.get(paper["domain"], ["AI 论文", "机器学习", "应用研究"])
|
|
|
title = paper.get("title", "")
|
|
|
summary = paper.get("summary", "")
|
|
|
+ summary_lower = summary.lower()
|
|
|
|
|
|
- # 从标题提取核心方法
|
|
|
- brief = title.split(':')[0].strip()
|
|
|
- if '.' in brief:
|
|
|
- brief = brief.split('.')[0].strip()
|
|
|
+ # 从标题提取核心方法名,优先使用冒号前的部分
|
|
|
+ if ':' in title:
|
|
|
+ core_method = title.split(':')[0].strip()
|
|
|
+ elif '.' in title:
|
|
|
+ core_method = title.split('.')[0].strip()
|
|
|
+ else:
|
|
|
+ core_method = title.strip()
|
|
|
|
|
|
- # 判断方法类型
|
|
|
- summary_lower = summary.lower()
|
|
|
- if "diffusion" in summary_lower:
|
|
|
+ # 如果方法名太长(超过 20 字符),使用摘要中的关键词
|
|
|
+ if len(core_method) > 20:
|
|
|
+ # 从摘要中提取关键词
|
|
|
+ if "diffusion" in summary_lower:
|
|
|
+ core_method = "扩散模型框架"
|
|
|
+ elif "reinforcement learning" in summary_lower:
|
|
|
+ core_method = "强化学习框架"
|
|
|
+ elif "imitation learning" in summary_lower:
|
|
|
+ core_method = "模仿学习框架"
|
|
|
+ elif "contrastive" in summary_lower:
|
|
|
+ core_method = "对比学习框架"
|
|
|
+ elif "transformer" in summary_lower:
|
|
|
+ core_method = "Transformer 框架"
|
|
|
+ elif "self-supervised" in summary_lower:
|
|
|
+ core_method = "自监督学习框架"
|
|
|
+ elif "representation" in summary_lower:
|
|
|
+ core_method = "表征学习框架"
|
|
|
+ elif "adaptation" in summary_lower or "adaptive" in summary_lower:
|
|
|
+ core_method = "自适应框架"
|
|
|
+ elif "multi-agent" in summary_lower or "marl" in summary_lower:
|
|
|
+ core_method = "多智能体框架"
|
|
|
+ elif "world model" in summary_lower:
|
|
|
+ core_method = "世界模型框架"
|
|
|
+ elif "residual policy" in summary_lower:
|
|
|
+ core_method = "残差策略优化"
|
|
|
+ elif "preference optimization" in summary_lower:
|
|
|
+ core_method = "偏好优化"
|
|
|
+ else:
|
|
|
+ core_method = "创新框架"
|
|
|
+
|
|
|
+ # 判断方法类型(优先级从高到低)
|
|
|
+ method = "多种技术"
|
|
|
+ if "residual policy" in summary_lower:
|
|
|
+ method = "残差策略优化"
|
|
|
+ elif "preference optimization" in summary_lower:
|
|
|
+ method = "偏好优化"
|
|
|
+ elif "diffusion" in summary_lower:
|
|
|
method = "扩散模型"
|
|
|
- elif "reinforcement learning" in summary_lower:
|
|
|
+ elif "reinforcement learning" in summary_lower or "rl" in summary_lower:
|
|
|
method = "强化学习"
|
|
|
elif "imitation learning" in summary_lower:
|
|
|
method = "模仿学习"
|
|
|
@@ -64,24 +101,58 @@ def fallback_enrichment(paper: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
method = "对比学习"
|
|
|
elif "transformer" in summary_lower:
|
|
|
method = "Transformer"
|
|
|
- elif "self-supervised" in summary_lower:
|
|
|
+ elif "self-supervised" in summary_lower or "self supervised" in summary_lower:
|
|
|
method = "自监督学习"
|
|
|
- elif "representation" in summary_lower:
|
|
|
+ elif "representation learning" in summary_lower:
|
|
|
method = "表征学习"
|
|
|
- else:
|
|
|
- method = "多种技术"
|
|
|
+ elif "adaptation" in summary_lower or "adaptive" in summary_lower:
|
|
|
+ method = "自适应方法"
|
|
|
+ elif "multi-agent" in summary_lower or "marl" in summary_lower:
|
|
|
+ method = "多智能体强化学习"
|
|
|
+ elif "world model" in summary_lower:
|
|
|
+ method = "世界模型"
|
|
|
|
|
|
- # 判断应用领域
|
|
|
- if "robot" in summary_lower or "manipulation" in summary_lower:
|
|
|
- field = "机器人操作"
|
|
|
- elif "navigation" in summary_lower or "driving" in summary_lower:
|
|
|
+ # 判断应用领域(优先级从高到低)
|
|
|
+ field = "相关任务"
|
|
|
+ if "cloth" in summary_lower or "布料" in summary_lower:
|
|
|
+ field = "布料操作"
|
|
|
+ elif "piano" in summary_lower or "music" in summary_lower:
|
|
|
+ field = "音乐演奏"
|
|
|
+ elif "racing" in summary_lower or ("autonomous" in summary_lower and ("driving" in summary_lower or "racing" in summary_lower)):
|
|
|
+ field = "自动驾驶"
|
|
|
+ elif "medical" in summary_lower or "delivery" in summary_lower or "logistics" in summary_lower:
|
|
|
+ field = "医疗物流"
|
|
|
+ elif "motion" in summary_lower or "humanoid" in summary_lower:
|
|
|
+ field = "人类动作生成"
|
|
|
+ elif "navigation" in summary_lower and ("robot" in summary_lower or "policy" in summary_lower):
|
|
|
+ field = "机器人导航"
|
|
|
+ elif "navigation" in summary_lower:
|
|
|
field = "导航控制"
|
|
|
- elif "translation" in summary_lower or "generation" in summary_lower:
|
|
|
- field = "生成任务"
|
|
|
+ elif "traffic" in summary_lower or "scene understanding" in summary_lower:
|
|
|
+ field = "交通场景理解"
|
|
|
+ elif "map" in summary_lower or "localization" in summary_lower or "pose estimation" in summary_lower:
|
|
|
+ field = "定位建图"
|
|
|
+ elif "physical systems" in summary_lower or "emulator" in summary_lower:
|
|
|
+ field = "物理系统模拟"
|
|
|
+ elif "robot" in summary_lower or "manipulation" in summary_lower or "dexterous" in summary_lower:
|
|
|
+ field = "机器人操作"
|
|
|
else:
|
|
|
- field = "相关任务"
|
|
|
+ # 从标题推断领域
|
|
|
+ title_lower = title.lower()
|
|
|
+ if "robot" in title_lower or "manipulation" in title_lower:
|
|
|
+ field = "机器人操作"
|
|
|
+ elif "navigation" in title_lower or "driving" in title_lower:
|
|
|
+ field = "导航控制"
|
|
|
+ elif "piano" in title_lower or "music" in title_lower:
|
|
|
+ field = "音乐演奏"
|
|
|
+ elif "cloth" in title_lower:
|
|
|
+ field = "布料操作"
|
|
|
+ elif "motion" in title_lower or "humanoid" in title_lower:
|
|
|
+ field = "人类动作生成"
|
|
|
+ elif "racing" in title_lower or "autonomous" in title_lower:
|
|
|
+ field = "自动驾驶"
|
|
|
|
|
|
- # 判断结果
|
|
|
+ # 判断结果/创新点
|
|
|
if "real-world" in summary_lower or "deployment" in summary_lower:
|
|
|
result = "真实部署"
|
|
|
elif "zero-shot" in summary_lower:
|
|
|
@@ -90,15 +161,112 @@ def fallback_enrichment(paper: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
result = "首次提出"
|
|
|
elif "improve" in summary_lower or "better" in summary_lower:
|
|
|
result = "性能提升"
|
|
|
+ elif "efficient" in summary_lower or "efficiently" in summary_lower:
|
|
|
+ result = "高效"
|
|
|
+ elif "robust" in summary_lower or "robustly" in summary_lower:
|
|
|
+ result = "鲁棒性强"
|
|
|
+ elif "generalize" in summary_lower or "generalization" in summary_lower:
|
|
|
+ result = "泛化能力强"
|
|
|
+ elif "few-shot" in summary_lower or "few shot" in summary_lower:
|
|
|
+ result = "少样本学习"
|
|
|
+ elif "sim-to-real" in summary_lower or "sim2real" in summary_lower:
|
|
|
+ result = "仿真到现实迁移"
|
|
|
else:
|
|
|
result = "性能优化"
|
|
|
|
|
|
- brief = f"{brief},采用{method}解决{field},实现{result}"
|
|
|
+ # 格式:提出 XXX 框架,采用 XXX 技术,解决 XXX 问题,实现 XXX 效果
|
|
|
+ brief = f"提出{core_method},采用{method}解决{field},实现{result}"
|
|
|
+
|
|
|
+ # 从摘要提取具体标签(4-6 个),优先提取论文具体技术标签
|
|
|
+ tags = []
|
|
|
+
|
|
|
+ # 核心方法标签
|
|
|
+ if "diffusion" in summary_lower:
|
|
|
+ tags.append("扩散模型")
|
|
|
+ if "reinforcement learning" in summary_lower:
|
|
|
+ tags.append("强化学习")
|
|
|
+ if "imitation learning" in summary_lower:
|
|
|
+ tags.append("模仿学习")
|
|
|
+ if "contrastive" in summary_lower:
|
|
|
+ tags.append("对比学习")
|
|
|
+ if "transformer" in summary_lower:
|
|
|
+ tags.append("Transformer")
|
|
|
+ if "self-supervised" in summary_lower:
|
|
|
+ tags.append("自监督学习")
|
|
|
+ if "multi-agent" in summary_lower or "marl" in summary_lower:
|
|
|
+ tags.append("多智能体强化学习")
|
|
|
+ if "world model" in summary_lower:
|
|
|
+ tags.append("世界模型")
|
|
|
+ if "residual policy" in summary_lower:
|
|
|
+ tags.append("残差策略优化")
|
|
|
+ if "preference optimization" in summary_lower:
|
|
|
+ tags.append("偏好优化")
|
|
|
+ if "representation learning" in summary_lower:
|
|
|
+ tags.append("表征学习")
|
|
|
+ if "adaptation" in summary_lower or "adaptive" in summary_lower:
|
|
|
+ tags.append("自适应")
|
|
|
+
|
|
|
+ # 具体任务标签
|
|
|
+ if "robot" in summary_lower and "manipulation" in summary_lower:
|
|
|
+ tags.append("机器人操作")
|
|
|
+ if "dexterous" in summary_lower:
|
|
|
+ tags.append("灵巧操作")
|
|
|
+ if "navigation" in summary_lower:
|
|
|
+ tags.append("导航")
|
|
|
+ if "driving" in summary_lower or "racing" in summary_lower:
|
|
|
+ tags.append("自动驾驶")
|
|
|
+ if "cloth" in summary_lower:
|
|
|
+ tags.append("布料操作")
|
|
|
+ if "piano" in summary_lower:
|
|
|
+ tags.append("音乐演奏")
|
|
|
+ if "humanoid" in summary_lower or "motion" in summary_lower:
|
|
|
+ tags.append("动作生成")
|
|
|
+ if "localization" in summary_lower or "pose estimation" in summary_lower:
|
|
|
+ tags.append("定位")
|
|
|
+ if "traffic" in summary_lower:
|
|
|
+ tags.append("交通场景")
|
|
|
+ if "map" in summary_lower:
|
|
|
+ tags.append("建图")
|
|
|
+
|
|
|
+ # 结果标签
|
|
|
+ if "zero-shot" in summary_lower:
|
|
|
+ tags.append("零样本")
|
|
|
+ if "real-world" in summary_lower:
|
|
|
+ tags.append("真实部署")
|
|
|
+ if "deployment" in summary_lower:
|
|
|
+ tags.append("部署")
|
|
|
+ if "sim-to-real" in summary_lower or "sim2real" in summary_lower:
|
|
|
+ tags.append("仿真到现实")
|
|
|
+ if "generalization" in summary_lower:
|
|
|
+ tags.append("泛化能力")
|
|
|
+ if "few-shot" in summary_lower:
|
|
|
+ tags.append("少样本")
|
|
|
+ if "efficient" in summary_lower:
|
|
|
+ tags.append("高效")
|
|
|
+ if "robust" in summary_lower:
|
|
|
+ tags.append("鲁棒性")
|
|
|
+
|
|
|
+ # 如果标签数量不足 4 个,添加领域特定标签
|
|
|
+ if len(tags) < 4:
|
|
|
+ domain_tags = {
|
|
|
+ "embodied": ["具身智能", "机器人", "真实部署", "操控", "灵巧操作"],
|
|
|
+ "representation": ["表征学习", "潜在空间", "世界模型", "预训练", "自监督"],
|
|
|
+ "reinforcement": ["强化学习", "策略优化", "奖励设计", "离线 RL", "模仿学习"],
|
|
|
+ }
|
|
|
+ fallback = domain_tags.get(paper["domain"], ["AI 论文", "机器学习", "应用研究", "深度学习"])
|
|
|
+ for tag in fallback:
|
|
|
+ if tag not in tags:
|
|
|
+ tags.append(tag)
|
|
|
+ if len(tags) >= 6:
|
|
|
+ break
|
|
|
+
|
|
|
+ # 去重并限制数量
|
|
|
+ tags = list(dict.fromkeys(tags))[:6]
|
|
|
|
|
|
return {
|
|
|
"translated_abstract_zh": f"【LLM 暂不可用,先保留英文摘要要点】{truncate(summary, 220)}",
|
|
|
"brief_explanation_zh": truncate(brief, 86),
|
|
|
- "tags": tags[:5],
|
|
|
+ "tags": tags,
|
|
|
}
|
|
|
|
|
|
|