ai-search-agent / app_bak2.py
leonsimon23's picture
Rename app.py to app_bak2.py
d25187e verified
import os
import requests
import google.generativeai as genai
import logging
import asyncio
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Dict, Any
# --- 配置 ---
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 从环境变量获取 API 密钥和后端 URL
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
SEARCH_API_BASE_URL = os.getenv("SEARCH_API_BASE_URL", "").strip()
# 配置 Google Gemini
genai.configure(api_key=GEMINI_API_KEY)
gemini_model = genai.GenerativeModel('gemini-2.5-flash')
# --- FastAPI 应用设置 ---
app = FastAPI(
title="AI Search Agent",
description="一个使用 Gemini-2.5-Flash 进行查询优化和结果摘要的智能中间层。",
version="2.0.0" # Version bump!
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# --- 数据模型 ---
class SearchRequest(BaseModel):
platform: str
query: str
max_results: int = 10
# --- 核心 AI 功能 ---
async def get_ai_keywords(natural_language_query: str) -> str:
# ... (此函数保持不变)
if not GEMINI_API_KEY:
logger.warning("GEMINI_API_KEY 未设置,将使用原始查询。")
return natural_language_query
prompt = f"""
You are an expert academic researcher. Your task is to convert a user's natural language query into a highly effective, concise, boolean-logic keyword string for searching academic databases like PubMed.
- Use boolean operators like AND, OR.
- Use parentheses for grouping.
- Focus on core concepts.
- Keep the string concise and in English.
- Do not add any explanation, markdown, or quotation marks. Just return the pure keyword string.
User Query: "{natural_language_query}"
Keyword String:
"""
try:
logger.info(f"向 Gemini 发送请求 [关键词提炼],查询: '{natural_language_query}'")
response = await gemini_model.generate_content_async(prompt)
optimized_query = response.text.strip()
logger.info(f"原始查询: '{natural_language_query}' -> Gemini 优化关键词: '{optimized_query}'")
if not optimized_query:
logger.warning("Gemini 返回空关键词,回退到原始查询。")
return natural_language_query
return optimized_query
except Exception as e:
logger.error(f"调用 Gemini API [关键词提炼] 失败: {e}")
return natural_language_query
# =================================================================
# BEGIN: 新增功能 - AI 摘要
# =================================================================
async def summarize_results_with_ai(papers: List[Dict[str, Any]], original_query: str) -> str | None:
"""
使用 Gemini 根据搜索结果的摘要生成一个综合性总结。
"""
if not GEMINI_API_KEY or not papers:
return None
# 只选择前 5 篇或更少的论文进行摘要,以提高效率和相关性
papers_for_summary = papers[:5]
# 构建用于摘要的上下文
context = ""
for i, paper in enumerate(papers_for_summary):
title = paper.get('title', 'No Title')
abstract = paper.get('abstract') or paper.get('summary', 'No Abstract Available.')
context += f"### Paper {i+1}: {title}\nAbstract: {abstract}\n\n"
# 精心设计的摘要 Prompt
prompt = f"""
You are a helpful medical research assistant. Based on the abstracts of the scientific papers provided below, write a concise and easy-to-understand summary that directly answers the user's original research question.
- Start with a direct introductory sentence.
- Use bullet points to list the key findings.
- Base your summary STRICTLY on the information given in the abstracts. Do not add any outside knowledge.
- The summary should be in clear, accessible language.
USER'S ORIGINAL QUESTION: "{original_query}"
PROVIDED ABSTRACTS:
{context}
CONCISE SUMMARY:
"""
try:
logger.info(f"向 Gemini 发送请求 [结果摘要],基于 {len(papers_for_summary)} 篇论文。")
response = await gemini_model.generate_content_async(prompt)
summary = response.text.strip()
logger.info("Gemini 摘要生成成功。")
return summary
except Exception as e:
logger.error(f"调用 Gemini API [结果摘要] 失败: {e}")
return None # 如果摘要失败,不影响主流程
# =================================================================
# END: 新增功能 - AI 摘要
# =================================================================
# --- API 端点 ---
@app.get("/")
def read_root():
return {"status": "AI Search Agent is running"}
@app.post("/search")
async def intelligent_search(request: SearchRequest):
if not SEARCH_API_BASE_URL:
raise HTTPException(status_code=500, detail="SEARCH_API_BASE_URL 未配置")
# 1. 关键词提炼
optimized_query = await get_ai_keywords(request.query)
search_payload = {
"platform": request.platform,
"query": optimized_query,
"max_results": request.max_results
}
# 2. 调用搜索后端
try:
logger.info(f"向搜索后端发送请求: {search_payload}")
search_url = f"{SEARCH_API_BASE_URL}/search"
# 使用 asyncio.to_thread 运行同步的 requests 调用,避免阻塞
loop = asyncio.get_running_loop()
response = await loop.run_in_executor(
None,
lambda: requests.post(search_url, json=search_payload, timeout=30)
)
response.raise_for_status()
search_results_data = response.json()
except requests.exceptions.RequestException as e:
logger.error(f"调用搜索后端失败: {e}")
raise HTTPException(status_code=503, detail=f"无法连接到搜索服务: {str(e)}")
# 3. (新) 生成 AI 摘要
ai_summary = None
if search_results_data and search_results_data.get("results"):
ai_summary = await summarize_results_with_ai(search_results_data["results"], request.query)
# 4. 组合最终响应
final_response = {
"original_query": request.query,
"optimized_query": optimized_query,
"ai_summary": ai_summary, # 添加摘要
"results": search_results_data.get("results", [])
}
return final_response