Spaces:
Sleeping
Sleeping
import re | |
from typing import Dict, Any, List | |
def clean_ocr_artifacts(text: str) -> str: | |
text = re.sub(r'\s{2,}', ' ', text) | |
text = re.sub(r'(?<=[\.\?!]\s)([eEoO])([A-Z][a-z]+)', r'\2', text) # eFlood → Flood, oSeek → Seek | |
text = re.sub(r'\b[Aa]love\b', 'aloe', text) | |
text = re.sub(r'\bRelevanci\b', 'Relevance', text) | |
text = re.sub(r'\bAlove\b', 'Aloe', text) | |
text = re.sub(r'\b[aA]dvice\b', 'advice', text) | |
return text.strip() | |
class MedicalFactChecker: | |
"""Enhanced medical fact checker with faster validation""" | |
def __init__(self): | |
self.medical_facts = self._load_medical_facts() | |
self.contraindications = self._load_contraindications() | |
self.dosage_patterns = self._compile_dosage_patterns() | |
self.definitive_patterns = [ | |
re.compile(r, re.IGNORECASE) for r in [ | |
r'always\s+(?:use|take|apply)', | |
r'never\s+(?:use|take|apply)', | |
r'will\s+(?:cure|heal|fix)', | |
r'guaranteed\s+to', | |
r'completely\s+(?:safe|effective)' | |
] | |
] | |
def _load_medical_facts(self) -> Dict[str, Any]: | |
"""Pre-loaded medical facts for Gaza context""" | |
return { | |
"burn_treatment": { | |
"cool_water": "Use clean, cool (not ice-cold) water for 10-20 minutes", | |
"no_ice": "Never apply ice directly to burns", | |
"clean_cloth": "Cover with clean, dry cloth if available" | |
}, | |
"wound_care": { | |
"pressure": "Apply direct pressure to control bleeding", | |
"elevation": "Elevate injured limb if possible", | |
"clean_hands": "Clean hands before treating wounds when possible" | |
}, | |
"infection_signs": { | |
"redness": "Increasing redness around wound", | |
"warmth": "Increased warmth at wound site", | |
"pus": "Yellow or green discharge", | |
"fever": "Fever may indicate systemic infection" | |
} | |
} | |
def _load_contraindications(self) -> Dict[str, List[str]]: | |
"""Pre-loaded contraindications for common treatments""" | |
return { | |
"aspirin": ["children under 16", "bleeding disorders", "stomach ulcers"], | |
"ibuprofen": ["kidney disease", "heart failure", "stomach bleeding"], | |
"hydrogen_peroxide": ["deep wounds", "closed wounds", "eyes"], | |
"tourniquets": ["non-life-threatening bleeding", "without proper training"] | |
} | |
def _compile_dosage_patterns(self) -> List[re.Pattern]: | |
"""Pre-compiled dosage patterns""" | |
patterns = [ | |
r'\d+\s*mg\b', # milligrams | |
r'\d+\s*g\b', # grams | |
r'\d+\s*ml\b', # milliliters | |
r'\d+\s*tablets?\b', # tablets | |
r'\d+\s*times?\s+(?:per\s+)?day\b', # frequency | |
r'every\s+\d+\s+hours?\b' # intervals | |
] | |
return [re.compile(pattern, re.IGNORECASE) for pattern in patterns] | |
def check_medical_accuracy(self, response: str, context: str) -> Dict[str, Any]: | |
"""Enhanced medical accuracy check with Gaza-specific considerations""" | |
if response is None: | |
response = "" | |
issues = [] | |
warnings = [] | |
accuracy_score = 0.0 | |
# Check for contraindications (faster keyword matching) | |
response_lower = response.lower() | |
for medication, contra_list in self.contraindications.items(): | |
if medication in response_lower: | |
for contra in contra_list: | |
if any(word in response_lower for word in contra.split()): | |
issues.append(f"Potential contraindication: {medication} with {contra}") | |
accuracy_score -= 0.3 | |
break | |
# Context alignment using Jaccard similarity | |
if context: | |
resp_words = set(response_lower.split()) | |
ctx_words = set(context.lower().split()) | |
context_similarity = len(resp_words & ctx_words) / len(resp_words | ctx_words) if ctx_words else 0.0 | |
if context_similarity < 0.5: # Lowered threshold for Gaza context | |
warnings.append(f"Low context similarity: {context_similarity:.2f}") | |
accuracy_score -= 0.1 | |
else: | |
context_similarity = 0.0 | |
# Gaza-specific resource checks | |
gaza_resources = ["clean water", "sterile", "hospital", "ambulance", "electricity"] | |
if any(resource in response_lower for resource in gaza_resources): | |
warnings.append("Consider resource limitations in Gaza context") | |
accuracy_score -= 0.05 | |
# Unsupported claims check | |
for pattern in self.definitive_patterns: | |
if pattern.search(response): | |
issues.append(f"Unsupported definitive claim detected") | |
accuracy_score -= 0.4 | |
break | |
# Dosage validation | |
for pattern in self.dosage_patterns: | |
if pattern.search(response): | |
warnings.append("Dosage detected - verify with professional") | |
accuracy_score -= 0.1 | |
break | |
confidence_score = max(0.0, min(1.0, 0.8 + accuracy_score)) | |
return { | |
"confidence_score": confidence_score, | |
"issues": issues, | |
"warnings": warnings, | |
"context_similarity": context_similarity, | |
"is_safe": len(issues) == 0 and confidence_score > 0.5 | |
} | |