File size: 5,632 Bytes
985f660
f2f4366
985f660
 
 
f2f4366
985f660
 
 
 
 
 
f2f4366
985f660
f2f4366
 
985f660
f2f4366
985f660
 
f2f4366
 
 
 
 
 
 
 
 
985f660
f2f4366
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
985f660
f2f4366
985f660
 
 
 
 
 
f2f4366
985f660
f2f4366
985f660
f2f4366
 
 
 
 
 
985f660
f2f4366
 
985f660
f2f4366
 
 
985f660
 
 
f2f4366
 
985f660
f2f4366
 
 
 
 
985f660
 
f2f4366
 
985f660
 
 
 
f2f4366
 
985f660
f2f4366
 
 
 
 
 
 
 
 
 
985f660
 
f2f4366
985f660
 
f2f4366
 
985f660
 
f2f4366
985f660
 
f2f4366
 
 
985f660
f2f4366
985f660
 
f2f4366
 
985f660
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import re
from typing import Dict, Any, List

def clean_ocr_artifacts(text: str) -> str:
    text = re.sub(r'\s{2,}', ' ', text)
    text = re.sub(r'(?<=[\.\?!]\s)([eEoO])([A-Z][a-z]+)', r'\2', text)  # eFlood → Flood, oSeek → Seek
    text = re.sub(r'\b[Aa]love\b', 'aloe', text)
    text = re.sub(r'\bRelevanci\b', 'Relevance', text)
    text = re.sub(r'\bAlove\b', 'Aloe', text)
    text = re.sub(r'\b[aA]dvice\b', 'advice', text)
    return text.strip()


class MedicalFactChecker:
    """Enhanced medical fact checker with faster validation"""
    
    def __init__(self):
        self.medical_facts = self._load_medical_facts()
        self.contraindications = self._load_contraindications()
        self.dosage_patterns = self._compile_dosage_patterns()
        self.definitive_patterns = [
            re.compile(r, re.IGNORECASE) for r in [
                r'always\s+(?:use|take|apply)',
                r'never\s+(?:use|take|apply)',
                r'will\s+(?:cure|heal|fix)',
                r'guaranteed\s+to',
                r'completely\s+(?:safe|effective)'
            ]
        ]

        
    def _load_medical_facts(self) -> Dict[str, Any]:
        """Pre-loaded medical facts for Gaza context"""
        return {
            "burn_treatment": {
                "cool_water": "Use clean, cool (not ice-cold) water for 10-20 minutes",
                "no_ice": "Never apply ice directly to burns",
                "clean_cloth": "Cover with clean, dry cloth if available"
            },
            "wound_care": {
                "pressure": "Apply direct pressure to control bleeding",
                "elevation": "Elevate injured limb if possible",
                "clean_hands": "Clean hands before treating wounds when possible"
            },
            "infection_signs": {
                "redness": "Increasing redness around wound",
                "warmth": "Increased warmth at wound site",
                "pus": "Yellow or green discharge",
                "fever": "Fever may indicate systemic infection"
            }
        }
    
    def _load_contraindications(self) -> Dict[str, List[str]]:
        """Pre-loaded contraindications for common treatments"""
        return {
            "aspirin": ["children under 16", "bleeding disorders", "stomach ulcers"],
            "ibuprofen": ["kidney disease", "heart failure", "stomach bleeding"],
            "hydrogen_peroxide": ["deep wounds", "closed wounds", "eyes"],
            "tourniquets": ["non-life-threatening bleeding", "without proper training"]
        }
    
    def _compile_dosage_patterns(self) -> List[re.Pattern]:
        """Pre-compiled dosage patterns"""
        patterns = [
            r'\d+\s*mg\b',  # milligrams
            r'\d+\s*g\b',   # grams
            r'\d+\s*ml\b',  # milliliters
            r'\d+\s*tablets?\b',  # tablets
            r'\d+\s*times?\s+(?:per\s+)?day\b',  # frequency
            r'every\s+\d+\s+hours?\b'  # intervals
        ]
        return [re.compile(pattern, re.IGNORECASE) for pattern in patterns]
    
    def check_medical_accuracy(self, response: str, context: str) -> Dict[str, Any]:
        """Enhanced medical accuracy check with Gaza-specific considerations"""
        if response is None:
            response = ""
        issues = []
        warnings = []
        accuracy_score = 0.0
        
        # Check for contraindications (faster keyword matching)
        response_lower = response.lower()
        for medication, contra_list in self.contraindications.items():
            if medication in response_lower:
                for contra in contra_list:
                    if any(word in response_lower for word in contra.split()):
                        issues.append(f"Potential contraindication: {medication} with {contra}")
                        accuracy_score -= 0.3
                        break
        
        # Context alignment using Jaccard similarity
        if context:
            resp_words = set(response_lower.split())
            ctx_words = set(context.lower().split())
            context_similarity = len(resp_words & ctx_words) / len(resp_words | ctx_words) if ctx_words else 0.0
            if context_similarity < 0.5:  # Lowered threshold for Gaza context
                warnings.append(f"Low context similarity: {context_similarity:.2f}")
                accuracy_score -= 0.1
        else:
            context_similarity = 0.0
        
        # Gaza-specific resource checks
        gaza_resources = ["clean water", "sterile", "hospital", "ambulance", "electricity"]
        if any(resource in response_lower for resource in gaza_resources):
            warnings.append("Consider resource limitations in Gaza context")
            accuracy_score -= 0.05
        
        # Unsupported claims check
        for pattern in self.definitive_patterns:
            if pattern.search(response):
                issues.append(f"Unsupported definitive claim detected")
                accuracy_score -= 0.4
                break
        
        # Dosage validation
        for pattern in self.dosage_patterns:
            if pattern.search(response):
                warnings.append("Dosage detected - verify with professional")
                accuracy_score -= 0.1
                break
        
        confidence_score = max(0.0, min(1.0, 0.8 + accuracy_score))
        
        return {
            "confidence_score": confidence_score,
            "issues": issues,
            "warnings": warnings,
            "context_similarity": context_similarity,
            "is_safe": len(issues) == 0 and confidence_score > 0.5
        }