import os import requests import google.generativeai as genai import logging import asyncio from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import List, Dict, Any # --- 配置 --- logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # 从环境变量获取 API 密钥和后端 URL GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") SEARCH_API_BASE_URL = os.getenv("SEARCH_API_BASE_URL", "").strip() # 配置 Google Gemini genai.configure(api_key=GEMINI_API_KEY) gemini_model = genai.GenerativeModel('gemini-2.5-flash') # --- FastAPI 应用设置 --- app = FastAPI( title="AI Search Agent", description="一个使用 Gemini-2.5-Flash 进行查询优化和结果摘要的智能中间层。", version="2.0.0" # Version bump! ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # --- 数据模型 --- class SearchRequest(BaseModel): platform: str query: str max_results: int = 10 # --- 核心 AI 功能 --- async def get_ai_keywords(natural_language_query: str) -> str: # ... (此函数保持不变) if not GEMINI_API_KEY: logger.warning("GEMINI_API_KEY 未设置,将使用原始查询。") return natural_language_query prompt = f""" You are an expert academic researcher. Your task is to convert a user's natural language query into a highly effective, concise, boolean-logic keyword string for searching academic databases like PubMed. - Use boolean operators like AND, OR. - Use parentheses for grouping. - Focus on core concepts. - Keep the string concise and in English. - Do not add any explanation, markdown, or quotation marks. Just return the pure keyword string. User Query: "{natural_language_query}" Keyword String: """ try: logger.info(f"向 Gemini 发送请求 [关键词提炼],查询: '{natural_language_query}'") response = await gemini_model.generate_content_async(prompt) optimized_query = response.text.strip() logger.info(f"原始查询: '{natural_language_query}' -> Gemini 优化关键词: '{optimized_query}'") if not optimized_query: logger.warning("Gemini 返回空关键词,回退到原始查询。") return natural_language_query return optimized_query except Exception as e: logger.error(f"调用 Gemini API [关键词提炼] 失败: {e}") return natural_language_query # ================================================================= # BEGIN: 新增功能 - AI 摘要 # ================================================================= async def summarize_results_with_ai(papers: List[Dict[str, Any]], original_query: str) -> str | None: """ 使用 Gemini 根据搜索结果的摘要生成一个综合性总结。 """ if not GEMINI_API_KEY or not papers: return None # 只选择前 5 篇或更少的论文进行摘要,以提高效率和相关性 papers_for_summary = papers[:5] # 构建用于摘要的上下文 context = "" for i, paper in enumerate(papers_for_summary): title = paper.get('title', 'No Title') abstract = paper.get('abstract') or paper.get('summary', 'No Abstract Available.') context += f"### Paper {i+1}: {title}\nAbstract: {abstract}\n\n" # 精心设计的摘要 Prompt prompt = f""" You are a helpful medical research assistant. Based on the abstracts of the scientific papers provided below, write a concise and easy-to-understand summary that directly answers the user's original research question. - Start with a direct introductory sentence. - Use bullet points to list the key findings. - Base your summary STRICTLY on the information given in the abstracts. Do not add any outside knowledge. - The summary should be in clear, accessible language. USER'S ORIGINAL QUESTION: "{original_query}" PROVIDED ABSTRACTS: {context} CONCISE SUMMARY: """ try: logger.info(f"向 Gemini 发送请求 [结果摘要],基于 {len(papers_for_summary)} 篇论文。") response = await gemini_model.generate_content_async(prompt) summary = response.text.strip() logger.info("Gemini 摘要生成成功。") return summary except Exception as e: logger.error(f"调用 Gemini API [结果摘要] 失败: {e}") return None # 如果摘要失败,不影响主流程 # ================================================================= # END: 新增功能 - AI 摘要 # ================================================================= # --- API 端点 --- @app.get("/") def read_root(): return {"status": "AI Search Agent is running"} @app.post("/search") async def intelligent_search(request: SearchRequest): if not SEARCH_API_BASE_URL: raise HTTPException(status_code=500, detail="SEARCH_API_BASE_URL 未配置") # 1. 关键词提炼 optimized_query = await get_ai_keywords(request.query) search_payload = { "platform": request.platform, "query": optimized_query, "max_results": request.max_results } # 2. 调用搜索后端 try: logger.info(f"向搜索后端发送请求: {search_payload}") search_url = f"{SEARCH_API_BASE_URL}/search" # 使用 asyncio.to_thread 运行同步的 requests 调用,避免阻塞 loop = asyncio.get_running_loop() response = await loop.run_in_executor( None, lambda: requests.post(search_url, json=search_payload, timeout=30) ) response.raise_for_status() search_results_data = response.json() except requests.exceptions.RequestException as e: logger.error(f"调用搜索后端失败: {e}") raise HTTPException(status_code=503, detail=f"无法连接到搜索服务: {str(e)}") # 3. (新) 生成 AI 摘要 ai_summary = None if search_results_data and search_results_data.get("results"): ai_summary = await summarize_results_with_ai(search_results_data["results"], request.query) # 4. 组合最终响应 final_response = { "original_query": request.query, "optimized_query": optimized_query, "ai_summary": ai_summary, # 添加摘要 "results": search_results_data.get("results", []) } return final_response