Spaces:
Sleeping
Sleeping
File size: 5,632 Bytes
985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 f2f4366 985f660 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import re
from typing import Dict, Any, List
def clean_ocr_artifacts(text: str) -> str:
text = re.sub(r'\s{2,}', ' ', text)
text = re.sub(r'(?<=[\.\?!]\s)([eEoO])([A-Z][a-z]+)', r'\2', text) # eFlood → Flood, oSeek → Seek
text = re.sub(r'\b[Aa]love\b', 'aloe', text)
text = re.sub(r'\bRelevanci\b', 'Relevance', text)
text = re.sub(r'\bAlove\b', 'Aloe', text)
text = re.sub(r'\b[aA]dvice\b', 'advice', text)
return text.strip()
class MedicalFactChecker:
"""Enhanced medical fact checker with faster validation"""
def __init__(self):
self.medical_facts = self._load_medical_facts()
self.contraindications = self._load_contraindications()
self.dosage_patterns = self._compile_dosage_patterns()
self.definitive_patterns = [
re.compile(r, re.IGNORECASE) for r in [
r'always\s+(?:use|take|apply)',
r'never\s+(?:use|take|apply)',
r'will\s+(?:cure|heal|fix)',
r'guaranteed\s+to',
r'completely\s+(?:safe|effective)'
]
]
def _load_medical_facts(self) -> Dict[str, Any]:
"""Pre-loaded medical facts for Gaza context"""
return {
"burn_treatment": {
"cool_water": "Use clean, cool (not ice-cold) water for 10-20 minutes",
"no_ice": "Never apply ice directly to burns",
"clean_cloth": "Cover with clean, dry cloth if available"
},
"wound_care": {
"pressure": "Apply direct pressure to control bleeding",
"elevation": "Elevate injured limb if possible",
"clean_hands": "Clean hands before treating wounds when possible"
},
"infection_signs": {
"redness": "Increasing redness around wound",
"warmth": "Increased warmth at wound site",
"pus": "Yellow or green discharge",
"fever": "Fever may indicate systemic infection"
}
}
def _load_contraindications(self) -> Dict[str, List[str]]:
"""Pre-loaded contraindications for common treatments"""
return {
"aspirin": ["children under 16", "bleeding disorders", "stomach ulcers"],
"ibuprofen": ["kidney disease", "heart failure", "stomach bleeding"],
"hydrogen_peroxide": ["deep wounds", "closed wounds", "eyes"],
"tourniquets": ["non-life-threatening bleeding", "without proper training"]
}
def _compile_dosage_patterns(self) -> List[re.Pattern]:
"""Pre-compiled dosage patterns"""
patterns = [
r'\d+\s*mg\b', # milligrams
r'\d+\s*g\b', # grams
r'\d+\s*ml\b', # milliliters
r'\d+\s*tablets?\b', # tablets
r'\d+\s*times?\s+(?:per\s+)?day\b', # frequency
r'every\s+\d+\s+hours?\b' # intervals
]
return [re.compile(pattern, re.IGNORECASE) for pattern in patterns]
def check_medical_accuracy(self, response: str, context: str) -> Dict[str, Any]:
"""Enhanced medical accuracy check with Gaza-specific considerations"""
if response is None:
response = ""
issues = []
warnings = []
accuracy_score = 0.0
# Check for contraindications (faster keyword matching)
response_lower = response.lower()
for medication, contra_list in self.contraindications.items():
if medication in response_lower:
for contra in contra_list:
if any(word in response_lower for word in contra.split()):
issues.append(f"Potential contraindication: {medication} with {contra}")
accuracy_score -= 0.3
break
# Context alignment using Jaccard similarity
if context:
resp_words = set(response_lower.split())
ctx_words = set(context.lower().split())
context_similarity = len(resp_words & ctx_words) / len(resp_words | ctx_words) if ctx_words else 0.0
if context_similarity < 0.5: # Lowered threshold for Gaza context
warnings.append(f"Low context similarity: {context_similarity:.2f}")
accuracy_score -= 0.1
else:
context_similarity = 0.0
# Gaza-specific resource checks
gaza_resources = ["clean water", "sterile", "hospital", "ambulance", "electricity"]
if any(resource in response_lower for resource in gaza_resources):
warnings.append("Consider resource limitations in Gaza context")
accuracy_score -= 0.05
# Unsupported claims check
for pattern in self.definitive_patterns:
if pattern.search(response):
issues.append(f"Unsupported definitive claim detected")
accuracy_score -= 0.4
break
# Dosage validation
for pattern in self.dosage_patterns:
if pattern.search(response):
warnings.append("Dosage detected - verify with professional")
accuracy_score -= 0.1
break
confidence_score = max(0.0, min(1.0, 0.8 + accuracy_score))
return {
"confidence_score": confidence_score,
"issues": issues,
"warnings": warnings,
"context_similarity": context_similarity,
"is_safe": len(issues) == 0 and confidence_score > 0.5
}
|