Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import langextract as lx | |
| import json | |
| import os | |
| import tempfile | |
| import textwrap | |
| # --- 默认模板和示例 (已更新为临床影像报告场景) --- | |
| # 1. 默认提取指令 (Prompt) | |
| DEFAULT_PROMPT = textwrap.dedent("""\ | |
| 请从影像检查报告中,按顺序提取关键的影像学发现、涉及的解剖部位、尺寸测量、影像学特征以及阴性发现。 | |
| - 提取时必须使用报告中的确切文本。 | |
| - 不要转述或概括。 | |
| - 为每个提取的实体提供详细的属性,以增加结构化信息。""") | |
| # 2. 默认提取示例 (Examples) | |
| # 提供一个高质量的CT报告提取示例 | |
| DEFAULT_EXAMPLES_DICT = [ | |
| { | |
| "text": "腹部CT平扫增强检查显示:肝脏右叶可见一大小约3.2 x 2.8 cm的低密度占位灶,边缘清晰,增强扫描后呈轻度环形强化。胰腺及双肾未见明确异常。", | |
| "extractions": [ | |
| { | |
| "extraction_class": "anatomy", | |
| "extraction_text": "肝脏右叶", | |
| "attributes": {"organ": "肝脏", "lobe": "右叶"} | |
| }, | |
| { | |
| "extraction_class": "size_measurement", | |
| "extraction_text": "3.2 x 2.8 cm", | |
| "attributes": {"value": "3.2 x 2.8", "unit": "cm"} | |
| }, | |
| { | |
| "extraction_class": "finding", | |
| "extraction_text": "低密度占位灶", | |
| "attributes": {"density": "低密度", "type": "占位灶"} | |
| }, | |
| { | |
| "extraction_class": "radiologic_feature", | |
| "extraction_text": "边缘清晰", | |
| "attributes": {"feature_type": "边缘", "description": "清晰"} | |
| }, | |
| { | |
| "extraction_class": "radiologic_feature", | |
| "extraction_text": "轻度环形强化", | |
| "attributes": {"feature_type": "增强扫描", "degree": "轻度", "pattern": "环形强化"} | |
| }, | |
| { | |
| "extraction_class": "normal_finding", | |
| "extraction_text": "胰腺及双肾未见明确异常", | |
| "attributes": {"organs": ["胰腺", "双肾"]} | |
| } | |
| ] | |
| } | |
| ] | |
| # 将字典转换为格式化的 JSON 字符串,用于在界面上显示 | |
| DEFAULT_EXAMPLES_JSON = json.dumps(DEFAULT_EXAMPLES_DICT, ensure_ascii=False, indent=2) | |
| # --- 后端处理函数 (无需修改) --- | |
| def extract_information(api_key, prompt, examples_json, input_text): | |
| """ | |
| 接收用户输入,调用 LangExtract 进行信息提取。 | |
| """ | |
| # 1. 输入验证 | |
| if not api_key: | |
| raise gr.Error("请输入您的 Google AI Studio API 密钥。") | |
| if not prompt or not examples_json or not input_text: | |
| raise gr.Error("提取指令、示例和源文本均不能为空。") | |
| # 2. 解析用户输入的 JSON 示例 | |
| try: | |
| examples_data = json.loads(examples_json) | |
| # 将 JSON 字典转换为 LangExtract 的 ExampleData 对象 | |
| examples = [ | |
| lx.data.ExampleData( | |
| text=ex['text'], | |
| extractions=[ | |
| lx.data.Extraction(**extr) for extr in ex['extractions'] | |
| ] | |
| ) for ex in examples_data | |
| ] | |
| except (json.JSONDecodeError, KeyError) as e: | |
| raise gr.Error(f"提取示例的 JSON 格式无效,请检查。错误: {e}") | |
| # 3. 调用 LangExtract | |
| try: | |
| # 将 API 密钥设置到环境变量中,LangExtract 会自动读取 | |
| os.environ['LANGEXTRACT_API_KEY'] = api_key | |
| result = lx.extract( | |
| text_or_documents=input_text, | |
| prompt_description=prompt, | |
| examples=examples, | |
| model_id="gemini-1.5-flash", # 使用速度和成本效益高的模型 | |
| ) | |
| # 将结果转换为可序列化的字典以便在 Gradio 中显示 | |
| output_for_display = { | |
| "source_text": result.source_text, | |
| "extractions": [ext.to_dict() for ext in result.extractions] | |
| } | |
| # 4. 创建可供下载的文件 | |
| with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.jsonl', encoding='utf-8') as tmp_file: | |
| lx.io.save_annotated_documents([result], file_path=tmp_file.name) | |
| download_path = tmp_file.name | |
| return output_for_display, download_path | |
| except Exception as e: | |
| # 捕获 LangExtract 或 API 调用可能出现的任何错误 | |
| raise gr.Error(f"提取过程中发生错误: {e}") | |
| # --- Gradio UI 界面 (无需修改) --- | |
| with gr.Blocks(theme=gr.themes.Soft(), title="LangExtract 交互式信息提取工具") as demo: | |
| gr.Markdown("# LangExtract 交互式信息提取工具") | |
| gr.Markdown( | |
| "在左侧定义您的提取规则和输入文本,然后点击“开始提取”在右侧查看结果。\n" | |
| "您需要一个 [Google AI Studio API Key](https://aistudio.google.com/app/apikey) 才能使用此工具。" | |
| ) | |
| with gr.Row(): | |
| # 左侧:用户输入区域 | |
| with gr.Column(scale=1): | |
| gr.Markdown("## 1. 输入配置") | |
| api_key_input = gr.Textbox( | |
| label="🔑 Google AI Studio API Key", | |
| type="password", | |
| placeholder="在此处粘贴您的 API 密钥..." | |
| ) | |
| gr.Markdown("## 2. 定义提取模板") | |
| prompt_input = gr.Textbox( | |
| label="提取指令 (Prompt)", | |
| value=DEFAULT_PROMPT, | |
| lines=5, | |
| ) | |
| gr.Markdown("告诉模型您想提取什么,以及遵循什么规则。") | |
| examples_input = gr.Code( | |
| label="提取示例 (JSON 格式)", | |
| value=DEFAULT_EXAMPLES_JSON, | |
| language="json", | |
| lines=20, # 增加了行数以更好地显示复杂的JSON | |
| ) | |
| gr.Markdown("提供一两个高质量的示例,指导模型的输出格式。") | |
| gr.Markdown("## 3. 输入待提取的文本") | |
| text_input = gr.Textbox( | |
| label="源文本", | |
| lines=10, | |
| placeholder="在此处粘贴您要从中提取信息的临床病历或影像报告..." | |
| ) | |
| submit_btn = gr.Button("🚀 开始提取", variant="primary") | |
| # 右侧:结果输出区域 | |
| with gr.Column(scale=1): | |
| gr.Markdown("## 4. 提取结果") | |
| json_output = gr.JSON( | |
| label="结构化输出 (JSON)", | |
| ) | |
| file_output = gr.File( | |
| label="⬇️ 下载结果文件", | |
| ) | |
| # --- 事件绑定 --- | |
| submit_btn.click( | |
| fn=extract_information, | |
| inputs=[api_key_input, prompt_input, examples_input, text_input], | |
| outputs=[json_output, file_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |