Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,369 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import langextract as lx
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import tempfile
|
| 6 |
+
import textwrap
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
|
| 9 |
+
# --- 预设模板库 ---
|
| 10 |
+
REPORT_TEMPLATES = {
|
| 11 |
+
"影像报告": {
|
| 12 |
+
"prompt": textwrap.dedent("""\
|
| 13 |
+
请从影像检查报告中提取以下关键信息:
|
| 14 |
+
- 检查部位和器官
|
| 15 |
+
- 病灶的位置、大小、密度/信号特征
|
| 16 |
+
- 增强扫描表现
|
| 17 |
+
- 阴性发现(未见异常的部位)
|
| 18 |
+
|
| 19 |
+
提取规则:
|
| 20 |
+
1. 必须使用报告中的原文,不要改写
|
| 21 |
+
2. 保留数值的精确度
|
| 22 |
+
3. 完整提取解剖学描述"""),
|
| 23 |
+
"example": json.dumps([{
|
| 24 |
+
"text": "胸部CT平扫显示:右肺上叶尖段见结节影,大小约1.2 x 0.9 cm,边缘毛糙,密度不均。纵隔淋巴结未见明显肿大。",
|
| 25 |
+
"extractions": [
|
| 26 |
+
{"extraction_class": "anatomy", "extraction_text": "右肺上叶尖段", "attributes": {"organ": "肺", "side": "右", "lobe": "上叶", "segment": "尖段"}},
|
| 27 |
+
{"extraction_class": "finding", "extraction_text": "结节影", "attributes": {"type": "结节"}},
|
| 28 |
+
{"extraction_class": "size", "extraction_text": "1.2 x 0.9 cm", "attributes": {"length": "1.2", "width": "0.9", "unit": "cm"}},
|
| 29 |
+
{"extraction_class": "feature", "extraction_text": "边缘毛糙", "attributes": {"feature_type": "边缘", "description": "毛糙"}},
|
| 30 |
+
{"extraction_class": "feature", "extraction_text": "密度不均", "attributes": {"feature_type": "密度", "description": "不均"}},
|
| 31 |
+
{"extraction_class": "negative_finding", "extraction_text": "纵隔淋巴结未见明显肿大", "attributes": {"location": "纵隔", "structure": "淋巴结"}}
|
| 32 |
+
]
|
| 33 |
+
}], ensure_ascii=False, indent=2)
|
| 34 |
+
},
|
| 35 |
+
"病理报告": {
|
| 36 |
+
"prompt": textwrap.dedent("""\
|
| 37 |
+
请从病理报告中提取以下关键信息:
|
| 38 |
+
- 标本来源和类型
|
| 39 |
+
- 组织学分型
|
| 40 |
+
- 肿瘤分级
|
| 41 |
+
- 免疫组化结果
|
| 42 |
+
- 切缘情况
|
| 43 |
+
|
| 44 |
+
提取规则:
|
| 45 |
+
1. 完整保留病理诊断术语
|
| 46 |
+
2. 提取所有阳性和阴性的免疫组化标记
|
| 47 |
+
3. 保留分级和分期信息"""),
|
| 48 |
+
"example": json.dumps([{
|
| 49 |
+
"text": "(右乳腺)浸润性导管癌,组织学II级,肿瘤大小2.5cm。免疫组化:ER(+,90%),PR(+,70%),HER-2(-),Ki-67(30%)。切缘未见癌累及。",
|
| 50 |
+
"extractions": [
|
| 51 |
+
{"extraction_class": "specimen", "extraction_text": "右乳腺", "attributes": {"location": "右乳腺"}},
|
| 52 |
+
{"extraction_class": "diagnosis", "extraction_text": "浸润性导管癌", "attributes": {"type": "癌", "subtype": "浸润性导管癌"}},
|
| 53 |
+
{"extraction_class": "grade", "extraction_text": "组织学II级", "attributes": {"grading_system": "组织学", "grade": "II"}},
|
| 54 |
+
{"extraction_class": "size", "extraction_text": "2.5cm", "attributes": {"value": "2.5", "unit": "cm"}},
|
| 55 |
+
{"extraction_class": "ihc_marker", "extraction_text": "ER(+,90%)", "attributes": {"marker": "ER", "result": "阳性", "percentage": "90"}},
|
| 56 |
+
{"extraction_class": "ihc_marker", "extraction_text": "PR(+,70%)", "attributes": {"marker": "PR", "result": "阳性", "percentage": "70"}},
|
| 57 |
+
{"extraction_class": "ihc_marker", "extraction_text": "HER-2(-)", "attributes": {"marker": "HER-2", "result": "阴性"}},
|
| 58 |
+
{"extraction_class": "ihc_marker", "extraction_text": "Ki-67(30%)", "attributes": {"marker": "Ki-67", "percentage": "30"}},
|
| 59 |
+
{"extraction_class": "margin", "extraction_text": "切缘未见癌累及", "attributes": {"status": "阴性"}}
|
| 60 |
+
]
|
| 61 |
+
}], ensure_ascii=False, indent=2)
|
| 62 |
+
},
|
| 63 |
+
"病历摘要": {
|
| 64 |
+
"prompt": textwrap.dedent("""\
|
| 65 |
+
请从病历中提取以下关键信息:
|
| 66 |
+
- 主诉和现病史要点
|
| 67 |
+
- 既往史(重要疾病和手术史)
|
| 68 |
+
- 体格检查阳性体征
|
| 69 |
+
- 辅助检查异常结果
|
| 70 |
+
- 诊断和治疗方案
|
| 71 |
+
|
| 72 |
+
提取规则:
|
| 73 |
+
1. 提取关键时间节点
|
| 74 |
+
2. 保留症状的完整描述
|
| 75 |
+
3. 提取数值型指标"""),
|
| 76 |
+
"example": json.dumps([{
|
| 77 |
+
"text": "患者主因"反复胸痛3月,加重1周"入院。既往高血压病史10年,2型糖尿病5年。入院查体:BP 150/95mmHg,心率92次/分。心电图示:II、III、aVF导联ST段压低0.1mV。",
|
| 78 |
+
"extractions": [
|
| 79 |
+
{"extraction_class": "chief_complaint", "extraction_text": "反复胸痛3月,加重1周", "attributes": {"symptom": "胸痛", "duration": "3月", "change": "加重1周"}},
|
| 80 |
+
{"extraction_class": "past_history", "extraction_text": "高血压病史10年", "attributes": {"disease": "高血压", "duration": "10年"}},
|
| 81 |
+
{"extraction_class": "past_history", "extraction_text": "2型糖尿病5年", "attributes": {"disease": "2型糖尿病", "duration": "5年"}},
|
| 82 |
+
{"extraction_class": "vital_sign", "extraction_text": "BP 150/95mmHg", "attributes": {"type": "血压", "systolic": "150", "diastolic": "95", "unit": "mmHg"}},
|
| 83 |
+
{"extraction_class": "vital_sign", "extraction_text": "心率92次/分", "attributes": {"type": "心率", "value": "92", "unit": "次/分"}},
|
| 84 |
+
{"extraction_class": "exam_finding", "extraction_text": "II、III、aVF导联ST段压低0.1mV", "attributes": {"exam_type": "心电图", "leads": ["II", "III", "aVF"], "finding": "ST段压低", "value": "0.1", "unit": "mV"}}
|
| 85 |
+
]
|
| 86 |
+
}], ensure_ascii=False, indent=2)
|
| 87 |
+
}
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
# --- 示例文本库 ---
|
| 91 |
+
SAMPLE_TEXTS = {
|
| 92 |
+
"影像报告示例": "头颅MRI平扫增强:左侧基底节区见片状异常信号影,T1WI呈低信号,T2WI及FLAIR呈高信号,大小约4.5 x 3.2 x 3.8 cm,周围见大片水肿带。增强扫描后病灶呈不均匀明显强化,周围水肿带未见强化。中线结构轻度右移约0.5cm。双侧侧脑室对称,未见明显扩大。",
|
| 93 |
+
"病理报告示例": "(胃窦)腺癌,中分化,浸润至肌层。肿瘤大小3.0 x 2.5 cm。免疫组化:CK(+),CK7(-),CK20(+),CDX-2(+),Her-2(1+),Ki-67阳性指数约40%。送检淋巴结12枚,见癌转移3枚(3/12)。",
|
| 94 |
+
"病历示例": "患者,女性,58岁,因"发现左乳腺肿物2月"入院。患者2月前无意中发现左乳腺外上象限肿物,约2cm大小,无疼痛,无乳头溢液。既往体健。查体:左乳外上象限可触及约2.5 x 2.0cm肿物,质硬,边界欠清,活动度差,无压痛。左侧腋窝可触及1枚肿大淋巴结,约1.5cm。辅助检查:乳腺超声示左乳外上象限低回声结节,BI-RADS 4C类。"
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
# --- 历史记录管理 ---
|
| 98 |
+
extraction_history = []
|
| 99 |
+
|
| 100 |
+
def save_to_history(input_text, result, template_name):
|
| 101 |
+
"""保存提取历史"""
|
| 102 |
+
history_entry = {
|
| 103 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 104 |
+
"template": template_name,
|
| 105 |
+
"input_preview": input_text[:100] + "..." if len(input_text) > 100 else input_text,
|
| 106 |
+
"extraction_count": len(result.get("extractions", [])),
|
| 107 |
+
"full_result": result
|
| 108 |
+
}
|
| 109 |
+
extraction_history.insert(0, history_entry) # 最新的在前
|
| 110 |
+
if len(extraction_history) > 10: # 只保留最近10条
|
| 111 |
+
extraction_history.pop()
|
| 112 |
+
return format_history_display()
|
| 113 |
+
|
| 114 |
+
def format_history_display():
|
| 115 |
+
"""格式化历史记录显示"""
|
| 116 |
+
if not extraction_history:
|
| 117 |
+
return "暂无提取历史"
|
| 118 |
+
|
| 119 |
+
history_text = ""
|
| 120 |
+
for i, entry in enumerate(extraction_history, 1):
|
| 121 |
+
history_text += f"### 记录 {i} - {entry['timestamp']}\n"
|
| 122 |
+
history_text += f"**模板**: {entry['template']} | **提取项**: {entry['extraction_count']}项\n"
|
| 123 |
+
history_text += f"**文本预览**: {entry['input_preview']}\n\n"
|
| 124 |
+
return history_text
|
| 125 |
+
|
| 126 |
+
# --- 统计信息生成 ---
|
| 127 |
+
def generate_statistics(result):
|
| 128 |
+
"""生成提取统计信息"""
|
| 129 |
+
extractions = result.get("extractions", [])
|
| 130 |
+
if not extractions:
|
| 131 |
+
return "暂无统计信息"
|
| 132 |
+
|
| 133 |
+
# 按类别统计
|
| 134 |
+
class_counts = {}
|
| 135 |
+
for ext in extractions:
|
| 136 |
+
cls = ext.get("extraction_class", "未分类")
|
| 137 |
+
class_counts[cls] = class_counts.get(cls, 0) + 1
|
| 138 |
+
|
| 139 |
+
stats_text = f"### 📊 提取统计\n"
|
| 140 |
+
stats_text += f"**总提取项数**: {len(extractions)}\n\n"
|
| 141 |
+
stats_text += "**分类统计**:\n"
|
| 142 |
+
for cls, count in sorted(class_counts.items(), key=lambda x: x[1], reverse=True):
|
| 143 |
+
stats_text += f"- {cls}: {count}项\n"
|
| 144 |
+
|
| 145 |
+
return stats_text
|
| 146 |
+
|
| 147 |
+
# --- 后端处理函数 ---
|
| 148 |
+
def extract_information(api_key, prompt, examples_json, input_text, template_name):
|
| 149 |
+
"""执行信息提取"""
|
| 150 |
+
if not api_key:
|
| 151 |
+
raise gr.Error("⚠️ 请输入您的 Google AI Studio API 密钥")
|
| 152 |
+
if not input_text.strip():
|
| 153 |
+
raise gr.Error("⚠️ 请输入待提取的文本内容")
|
| 154 |
+
|
| 155 |
+
try:
|
| 156 |
+
examples_data = json.loads(examples_json)
|
| 157 |
+
examples = [
|
| 158 |
+
lx.data.ExampleData(
|
| 159 |
+
text=ex['text'],
|
| 160 |
+
extractions=[lx.data.Extraction(**extr) for extr in ex['extractions']]
|
| 161 |
+
) for ex in examples_data
|
| 162 |
+
]
|
| 163 |
+
except (json.JSONDecodeError, KeyError) as e:
|
| 164 |
+
raise gr.Error(f"❌ 示例JSON格式错误: {e}")
|
| 165 |
+
|
| 166 |
+
try:
|
| 167 |
+
os.environ['LANGEXTRACT_API_KEY'] = api_key
|
| 168 |
+
|
| 169 |
+
result = lx.extract(
|
| 170 |
+
text_or_documents=input_text,
|
| 171 |
+
prompt_description=prompt,
|
| 172 |
+
examples=examples,
|
| 173 |
+
model_id="gemini-1.5-flash",
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
output_dict = {
|
| 177 |
+
"source_text": result.source_text,
|
| 178 |
+
"extractions": [ext.to_dict() for ext in result.extractions]
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
# 创建下载文件
|
| 182 |
+
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.jsonl', encoding='utf-8') as tmp_file:
|
| 183 |
+
lx.io.save_annotated_documents([result], file_path=tmp_file.name)
|
| 184 |
+
download_path = tmp_file.name
|
| 185 |
+
|
| 186 |
+
# 生成统计信息
|
| 187 |
+
stats = generate_statistics(output_dict)
|
| 188 |
+
|
| 189 |
+
# 保存到历史
|
| 190 |
+
history = save_to_history(input_text, output_dict, template_name)
|
| 191 |
+
|
| 192 |
+
return output_dict, download_path, stats, history, gr.update(visible=True)
|
| 193 |
+
|
| 194 |
+
except Exception as e:
|
| 195 |
+
raise gr.Error(f"❌ 提取失败: {str(e)}")
|
| 196 |
+
|
| 197 |
+
def load_template(template_name):
|
| 198 |
+
"""加载预设模板"""
|
| 199 |
+
if template_name in REPORT_TEMPLATES:
|
| 200 |
+
template = REPORT_TEMPLATES[template_name]
|
| 201 |
+
return template["prompt"], template["example"]
|
| 202 |
+
return "", ""
|
| 203 |
+
|
| 204 |
+
def load_sample_text(sample_name):
|
| 205 |
+
"""加载示例文本"""
|
| 206 |
+
return SAMPLE_TEXTS.get(sample_name, "")
|
| 207 |
+
|
| 208 |
+
# --- Gradio 界面 ---
|
| 209 |
+
custom_css = """
|
| 210 |
+
#header {
|
| 211 |
+
text-align: center;
|
| 212 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 213 |
+
padding: 2rem;
|
| 214 |
+
border-radius: 10px;
|
| 215 |
+
color: white;
|
| 216 |
+
margin-bottom: 2rem;
|
| 217 |
+
}
|
| 218 |
+
#header h1 {
|
| 219 |
+
margin: 0;
|
| 220 |
+
font-size: 2.5rem;
|
| 221 |
+
font-weight: 700;
|
| 222 |
+
}
|
| 223 |
+
#header p {
|
| 224 |
+
margin: 0.5rem 0 0 0;
|
| 225 |
+
font-size: 1.1rem;
|
| 226 |
+
opacity: 0.95;
|
| 227 |
+
}
|
| 228 |
+
.template-btn {
|
| 229 |
+
margin: 0.25rem !important;
|
| 230 |
+
}
|
| 231 |
+
#stats-box {
|
| 232 |
+
background: #f8f9fa;
|
| 233 |
+
padding: 1rem;
|
| 234 |
+
border-radius: 8px;
|
| 235 |
+
border-left: 4px solid #667eea;
|
| 236 |
+
}
|
| 237 |
+
"""
|
| 238 |
+
|
| 239 |
+
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="医学信息智能提取系统") as demo:
|
| 240 |
+
|
| 241 |
+
# 顶部标题
|
| 242 |
+
with gr.Row(elem_id="header"):
|
| 243 |
+
gr.Markdown("""
|
| 244 |
+
# 🏥 医学信息智能提取系统
|
| 245 |
+
### 基于 AI 的结构化医学文本分析工具
|
| 246 |
+
""")
|
| 247 |
+
|
| 248 |
+
with gr.Row():
|
| 249 |
+
# 左侧配置区
|
| 250 |
+
with gr.Column(scale=1):
|
| 251 |
+
gr.Markdown("### 🔐 API 配置")
|
| 252 |
+
api_key_input = gr.Textbox(
|
| 253 |
+
label="Google AI Studio API Key",
|
| 254 |
+
type="password",
|
| 255 |
+
placeholder="请输入您的 API 密钥...",
|
| 256 |
+
info="获取密钥: https://aistudio.google.com/app/apikey"
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
gr.Markdown("### 📋 选择报告类型")
|
| 260 |
+
template_selector = gr.Radio(
|
| 261 |
+
choices=list(REPORT_TEMPLATES.keys()),
|
| 262 |
+
value="影像报告",
|
| 263 |
+
label="预设模板",
|
| 264 |
+
info="选择适合您文本类型的模板"
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
load_template_btn = gr.Button("📥 加载模板", variant="secondary", size="sm")
|
| 268 |
+
|
| 269 |
+
gr.Markdown("### ✏️ 自定义提取规则")
|
| 270 |
+
prompt_input = gr.Textbox(
|
| 271 |
+
label="提取指令",
|
| 272 |
+
value=REPORT_TEMPLATES["影像报告"]["prompt"],
|
| 273 |
+
lines=8,
|
| 274 |
+
placeholder="描述您想提取的信息类型和规则..."
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
with gr.Accordion("🎯 提取示例 (JSON格式)", open=False):
|
| 278 |
+
examples_input = gr.Code(
|
| 279 |
+
value=REPORT_TEMPLATES["影像报告"]["example"],
|
| 280 |
+
language="json",
|
| 281 |
+
lines=15,
|
| 282 |
+
label="示例数据"
|
| 283 |
+
)
|
| 284 |
+
|
| 285 |
+
# 中间输入区
|
| 286 |
+
with gr.Column(scale=1):
|
| 287 |
+
gr.Markdown("### 📄 输入医学文本")
|
| 288 |
+
|
| 289 |
+
sample_selector = gr.Dropdown(
|
| 290 |
+
choices=list(SAMPLE_TEXTS.keys()),
|
| 291 |
+
label="快速加载示例",
|
| 292 |
+
value=None
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
text_input = gr.Textbox(
|
| 296 |
+
label="待提取文本",
|
| 297 |
+
lines=18,
|
| 298 |
+
placeholder="请粘贴或输入医学报告、病历等文本...\n\n支持:\n• 影像报告 (CT/MRI/X线等)\n• 病理报告\n• 病历记录\n• 检验报告",
|
| 299 |
+
max_lines=25
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
with gr.Row():
|
| 303 |
+
clear_btn = gr.Button("🗑️ 清空", size="sm")
|
| 304 |
+
submit_btn = gr.Button("🚀 开始提取", variant="primary", size="lg", scale=2)
|
| 305 |
+
|
| 306 |
+
# 右侧结果区
|
| 307 |
+
with gr.Column(scale=1):
|
| 308 |
+
gr.Markdown("### ✨ 提取结果")
|
| 309 |
+
|
| 310 |
+
result_tabs = gr.Tabs()
|
| 311 |
+
with result_tabs:
|
| 312 |
+
with gr.Tab("📊 结构化数据"):
|
| 313 |
+
json_output = gr.JSON(label="提取结果", show_label=False)
|
| 314 |
+
|
| 315 |
+
with gr.Tab("📈 统计分析"):
|
| 316 |
+
stats_output = gr.Markdown("点击'开始提取'后显示统计信息", elem_id="stats-box")
|
| 317 |
+
|
| 318 |
+
with gr.Tab("📜 历史记录"):
|
| 319 |
+
history_output = gr.Markdown("暂无提取历史")
|
| 320 |
+
|
| 321 |
+
file_output = gr.File(label="💾 下载结果文件 (.jsonl)", visible=False)
|
| 322 |
+
|
| 323 |
+
# 底部说明
|
| 324 |
+
with gr.Accordion("ℹ️ 使用说明", open=False):
|
| 325 |
+
gr.Markdown("""
|
| 326 |
+
### 使用步骤
|
| 327 |
+
1. **输入 API 密钥**: 从 Google AI Studio 获取免费 API 密钥
|
| 328 |
+
2. **选择模板**: 根据文本类型选择预设模板,或自定义提取规则
|
| 329 |
+
3. **输入文本**: 粘贴您的医学报告或病历文本
|
| 330 |
+
4. **开始提取**: 点击提取按钮,AI 将自动识别并结构化关键信息
|
| 331 |
+
5. **查看结果**: 在右侧查看结构化数据、统计分析和历史记录
|
| 332 |
+
|
| 333 |
+
### 支持的报告类型
|
| 334 |
+
- **影像报告**: CT、MRI、X线、超声等各类影像学检查
|
| 335 |
+
- **病理报告**: 组织病理、细胞病理、免疫组化等
|
| 336 |
+
- **病历记录**: 入院记录、病程记录、出院小结等
|
| 337 |
+
|
| 338 |
+
### 提示
|
| 339 |
+
- 提供高质量的示例可显著提升提取准确度
|
| 340 |
+
- 可同时处理多份报告(用空行分隔)
|
| 341 |
+
- 结果可导出为 JSONL 格式供后续分析使用
|
| 342 |
+
""")
|
| 343 |
+
|
| 344 |
+
# 事件绑定
|
| 345 |
+
load_template_btn.click(
|
| 346 |
+
fn=load_template,
|
| 347 |
+
inputs=[template_selector],
|
| 348 |
+
outputs=[prompt_input, examples_input]
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
sample_selector.change(
|
| 352 |
+
fn=load_sample_text,
|
| 353 |
+
inputs=[sample_selector],
|
| 354 |
+
outputs=[text_input]
|
| 355 |
+
)
|
| 356 |
+
|
| 357 |
+
clear_btn.click(
|
| 358 |
+
fn=lambda: "",
|
| 359 |
+
outputs=[text_input]
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
+
submit_btn.click(
|
| 363 |
+
fn=extract_information,
|
| 364 |
+
inputs=[api_key_input, prompt_input, examples_input, text_input, template_selector],
|
| 365 |
+
outputs=[json_output, file_output, stats_output, history_output, file_output]
|
| 366 |
+
)
|
| 367 |
+
|
| 368 |
+
if __name__ == "__main__":
|
| 369 |
+
demo.launch()
|