Spaces:

leonsimon23
/

exactEMR

Sleeping

App Files Files Community

exactEMR / app_bk.py

leonsimon23

Rename app.py to app_bk.py

e50f65d verified about 2 months ago

raw

history blame contribute delete

7.04 kB

	import gradio as gr
	import langextract as lx
	import json
	import os
	import tempfile
	import textwrap

	# --- 默认模板和示例 (已更新为临床影像报告场景) ---

	# 1. 默认提取指令 (Prompt)
	DEFAULT_PROMPT = textwrap.dedent("""\
	请从影像检查报告中，按顺序提取关键的影像学发现、涉及的解剖部位、尺寸测量、影像学特征以及阴性发现。
	- 提取时必须使用报告中的确切文本。
	- 不要转述或概括。
	- 为每个提取的实体提供详细的属性，以增加结构化信息。""")

	# 2. 默认提取示例 (Examples)
	# 提供一个高质量的CT报告提取示例
	DEFAULT_EXAMPLES_DICT = [
	{
	"text": "腹部CT平扫增强检查显示：肝脏右叶可见一大小约3.2 x 2.8 cm的低密度占位灶，边缘清晰，增强扫描后呈轻度环形强化。胰腺及双肾未见明确异常。",
	"extractions": [
	{
	"extraction_class": "anatomy",
	"extraction_text": "肝脏右叶",
	"attributes": {"organ": "肝脏", "lobe": "右叶"}
	},
	{
	"extraction_class": "size_measurement",
	"extraction_text": "3.2 x 2.8 cm",
	"attributes": {"value": "3.2 x 2.8", "unit": "cm"}
	},
	{
	"extraction_class": "finding",
	"extraction_text": "低密度占位灶",
	"attributes": {"density": "低密度", "type": "占位灶"}
	},
	{
	"extraction_class": "radiologic_feature",
	"extraction_text": "边缘清晰",
	"attributes": {"feature_type": "边缘", "description": "清晰"}
	},
	{
	"extraction_class": "radiologic_feature",
	"extraction_text": "轻度环形强化",
	"attributes": {"feature_type": "增强扫描", "degree": "轻度", "pattern": "环形强化"}
	},
	{
	"extraction_class": "normal_finding",
	"extraction_text": "胰腺及双肾未见明确异常",
	"attributes": {"organs": ["胰腺", "双肾"]}
	}
	]
	}
	]

	# 将字典转换为格式化的 JSON 字符串，用于在界面上显示
	DEFAULT_EXAMPLES_JSON = json.dumps(DEFAULT_EXAMPLES_DICT, ensure_ascii=False, indent=2)


	# --- 后端处理函数 (无需修改) ---

	def extract_information(api_key, prompt, examples_json, input_text):
	"""
	接收用户输入，调用 LangExtract 进行信息提取。
	"""
	# 1. 输入验证
	if not api_key:
	raise gr.Error("请输入您的 Google AI Studio API 密钥。")
	if not prompt or not examples_json or not input_text:
	raise gr.Error("提取指令、示例和源文本均不能为空。")

	# 2. 解析用户输入的 JSON 示例
	try:
	examples_data = json.loads(examples_json)
	# 将 JSON 字典转换为 LangExtract 的 ExampleData 对象
	examples = [
	lx.data.ExampleData(
	text=ex['text'],
	extractions=[
	lx.data.Extraction(**extr) for extr in ex['extractions']
	]
	) for ex in examples_data
	]
	except (json.JSONDecodeError, KeyError) as e:
	raise gr.Error(f"提取示例的 JSON 格式无效，请检查。错误: {e}")

	# 3. 调用 LangExtract
	try:
	# 将 API 密钥设置到环境变量中，LangExtract 会自动读取
	os.environ['LANGEXTRACT_API_KEY'] = api_key

	result = lx.extract(
	text_or_documents=input_text,
	prompt_description=prompt,
	examples=examples,
	model_id="gemini-1.5-flash", # 使用速度和成本效益高的模型
	)

	# 将结果转换为可序列化的字典以便在 Gradio 中显示
	output_for_display = {
	"source_text": result.source_text,
	"extractions": [ext.to_dict() for ext in result.extractions]
	}

	# 4. 创建可供下载的文件
	with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.jsonl', encoding='utf-8') as tmp_file:
	lx.io.save_annotated_documents([result], file_path=tmp_file.name)
	download_path = tmp_file.name

	return output_for_display, download_path

	except Exception as e:
	# 捕获 LangExtract 或 API 调用可能出现的任何错误
	raise gr.Error(f"提取过程中发生错误: {e}")


	# --- Gradio UI 界面 (无需修改) ---

	with gr.Blocks(theme=gr.themes.Soft(), title="LangExtract 交互式信息提取工具") as demo:
	gr.Markdown("# LangExtract 交互式信息提取工具")
	gr.Markdown(
	"在左侧定义您的提取规则和输入文本，然后点击“开始提取”在右侧查看结果。\n"
	"您需要一个 [Google AI Studio API Key](https://aistudio.google.com/app/apikey) 才能使用此工具。"
	)

	with gr.Row():
	# 左侧：用户输入区域
	with gr.Column(scale=1):
	gr.Markdown("## 1. 输入配置")

	api_key_input = gr.Textbox(
	label="🔑 Google AI Studio API Key",
	type="password",
	placeholder="在此处粘贴您的 API 密钥..."
	)

	gr.Markdown("## 2. 定义提取模板")

	prompt_input = gr.Textbox(
	label="提取指令 (Prompt)",
	value=DEFAULT_PROMPT,
	lines=5,
	)
	gr.Markdown("告诉模型您想提取什么，以及遵循什么规则。")

	examples_input = gr.Code(
	label="提取示例 (JSON 格式)",
	value=DEFAULT_EXAMPLES_JSON,
	language="json",
	lines=20, # 增加了行数以更好地显示复杂的JSON
	)
	gr.Markdown("提供一两个高质量的示例，指导模型的输出格式。")

	gr.Markdown("## 3. 输入待提取的文本")

	text_input = gr.Textbox(
	label="源文本",
	lines=10,
	placeholder="在此处粘贴您要从中提取信息的临床病历或影像报告..."
	)

	submit_btn = gr.Button("🚀 开始提取", variant="primary")

	# 右侧：结果输出区域
	with gr.Column(scale=1):
	gr.Markdown("## 4. 提取结果")

	json_output = gr.JSON(
	label="结构化输出 (JSON)",
	)

	file_output = gr.File(
	label="⬇️ 下载结果文件",
	)

	# --- 事件绑定 ---
	submit_btn.click(
	fn=extract_information,
	inputs=[api_key_input, prompt_input, examples_input, text_input],
	outputs=[json_output, file_output]
	)

	if __name__ == "__main__":
	demo.launch()