import re
from typing import Dict, Any, List

def clean_ocr_artifacts(text: str) -> str:
    text = re.sub(r'\s{2,}', ' ', text)
    text = re.sub(r'(?<=[\.\?!]\s)([eEoO])([A-Z][a-z]+)', r'\2', text)  # eFlood → Flood, oSeek → Seek
    text = re.sub(r'\b[Aa]love\b', 'aloe', text)
    text = re.sub(r'\bRelevanci\b', 'Relevance', text)
    text = re.sub(r'\bAlove\b', 'Aloe', text)
    text = re.sub(r'\b[aA]dvice\b', 'advice', text)
    return text.strip()


class MedicalFactChecker:
    """Enhanced medical fact checker with faster validation"""
    
    def __init__(self):
        self.medical_facts = self._load_medical_facts()
        self.contraindications = self._load_contraindications()
        self.dosage_patterns = self._compile_dosage_patterns()
        self.definitive_patterns = [
            re.compile(r, re.IGNORECASE) for r in [
                r'always\s+(?:use|take|apply)',
                r'never\s+(?:use|take|apply)',
                r'will\s+(?:cure|heal|fix)',
                r'guaranteed\s+to',
                r'completely\s+(?:safe|effective)'
            ]
        ]

        
    def _load_medical_facts(self) -> Dict[str, Any]:
        """Pre-loaded medical facts for Gaza context"""
        return {
            "burn_treatment": {
                "cool_water": "Use clean, cool (not ice-cold) water for 10-20 minutes",
                "no_ice": "Never apply ice directly to burns",
                "clean_cloth": "Cover with clean, dry cloth if available"
            },
            "wound_care": {
                "pressure": "Apply direct pressure to control bleeding",
                "elevation": "Elevate injured limb if possible",
                "clean_hands": "Clean hands before treating wounds when possible"
            },
            "infection_signs": {
                "redness": "Increasing redness around wound",
                "warmth": "Increased warmth at wound site",
                "pus": "Yellow or green discharge",
                "fever": "Fever may indicate systemic infection"
            }
        }
    
    def _load_contraindications(self) -> Dict[str, List[str]]:
        """Pre-loaded contraindications for common treatments"""
        return {
            "aspirin": ["children under 16", "bleeding disorders", "stomach ulcers"],
            "ibuprofen": ["kidney disease", "heart failure", "stomach bleeding"],
            "hydrogen_peroxide": ["deep wounds", "closed wounds", "eyes"],
            "tourniquets": ["non-life-threatening bleeding", "without proper training"]
        }
    
    def _compile_dosage_patterns(self) -> List[re.Pattern]:
        """Pre-compiled dosage patterns"""
        patterns = [
            r'\d+\s*mg\b',  # milligrams
            r'\d+\s*g\b',   # grams
            r'\d+\s*ml\b',  # milliliters
            r'\d+\s*tablets?\b',  # tablets
            r'\d+\s*times?\s+(?:per\s+)?day\b',  # frequency
            r'every\s+\d+\s+hours?\b'  # intervals
        ]
        return [re.compile(pattern, re.IGNORECASE) for pattern in patterns]
    
    def check_medical_accuracy(self, response: str, context: str) -> Dict[str, Any]:
        """Enhanced medical accuracy check with Gaza-specific considerations"""
        if response is None:
            response = ""
        issues = []
        warnings = []
        accuracy_score = 0.0
        
        # Check for contraindications (faster keyword matching)
        response_lower = response.lower()
        for medication, contra_list in self.contraindications.items():
            if medication in response_lower:
                for contra in contra_list:
                    if any(word in response_lower for word in contra.split()):
                        issues.append(f"Potential contraindication: {medication} with {contra}")
                        accuracy_score -= 0.3
                        break
        
        # Context alignment using Jaccard similarity
        if context:
            resp_words = set(response_lower.split())
            ctx_words = set(context.lower().split())
            context_similarity = len(resp_words & ctx_words) / len(resp_words | ctx_words) if ctx_words else 0.0
            if context_similarity < 0.5:  # Lowered threshold for Gaza context
                warnings.append(f"Low context similarity: {context_similarity:.2f}")
                accuracy_score -= 0.1
        else:
            context_similarity = 0.0
        
        # Gaza-specific resource checks
        gaza_resources = ["clean water", "sterile", "hospital", "ambulance", "electricity"]
        if any(resource in response_lower for resource in gaza_resources):
            warnings.append("Consider resource limitations in Gaza context")
            accuracy_score -= 0.05
        
        # Unsupported claims check
        for pattern in self.definitive_patterns:
            if pattern.search(response):
                issues.append(f"Unsupported definitive claim detected")
                accuracy_score -= 0.4
                break
        
        # Dosage validation
        for pattern in self.dosage_patterns:
            if pattern.search(response):
                warnings.append("Dosage detected - verify with professional")
                accuracy_score -= 0.1
                break
        
        confidence_score = max(0.0, min(1.0, 0.8 + accuracy_score))
        
        return {
            "confidence_score": confidence_score,
            "issues": issues,
            "warnings": warnings,
            "context_similarity": context_similarity,
            "is_safe": len(issues) == 0 and confidence_score > 0.5
        }