Spaces:

rivapereira123
/

firstaid

Sleeping

App Files Files Community

firstaid / core /fact_checker.py

rivapereira123

Update core/fact_checker.py

f2f4366 verified about 1 month ago

raw

history blame contribute delete

5.63 kB

	import re
	from typing import Dict, Any, List

	def clean_ocr_artifacts(text: str) -> str:
	text = re.sub(r'\s{2,}', ' ', text)
	text = re.sub(r'(?<=[\.\?!]\s)([eEoO])([A-Z][a-z]+)', r'\2', text) # eFlood → Flood, oSeek → Seek
	text = re.sub(r'\b[Aa]love\b', 'aloe', text)
	text = re.sub(r'\bRelevanci\b', 'Relevance', text)
	text = re.sub(r'\bAlove\b', 'Aloe', text)
	text = re.sub(r'\b[aA]dvice\b', 'advice', text)
	return text.strip()


	class MedicalFactChecker:
	"""Enhanced medical fact checker with faster validation"""

	def __init__(self):
	self.medical_facts = self._load_medical_facts()
	self.contraindications = self._load_contraindications()
	self.dosage_patterns = self._compile_dosage_patterns()
	self.definitive_patterns = [
	re.compile(r, re.IGNORECASE) for r in [
	r'always\s+(?:use\|take\|apply)',
	r'never\s+(?:use\|take\|apply)',
	r'will\s+(?:cure\|heal\|fix)',
	r'guaranteed\s+to',
	r'completely\s+(?:safe\|effective)'
	]
	]


	def _load_medical_facts(self) -> Dict[str, Any]:
	"""Pre-loaded medical facts for Gaza context"""
	return {
	"burn_treatment": {
	"cool_water": "Use clean, cool (not ice-cold) water for 10-20 minutes",
	"no_ice": "Never apply ice directly to burns",
	"clean_cloth": "Cover with clean, dry cloth if available"
	},
	"wound_care": {
	"pressure": "Apply direct pressure to control bleeding",
	"elevation": "Elevate injured limb if possible",
	"clean_hands": "Clean hands before treating wounds when possible"
	},
	"infection_signs": {
	"redness": "Increasing redness around wound",
	"warmth": "Increased warmth at wound site",
	"pus": "Yellow or green discharge",
	"fever": "Fever may indicate systemic infection"
	}
	}

	def _load_contraindications(self) -> Dict[str, List[str]]:
	"""Pre-loaded contraindications for common treatments"""
	return {
	"aspirin": ["children under 16", "bleeding disorders", "stomach ulcers"],
	"ibuprofen": ["kidney disease", "heart failure", "stomach bleeding"],
	"hydrogen_peroxide": ["deep wounds", "closed wounds", "eyes"],
	"tourniquets": ["non-life-threatening bleeding", "without proper training"]
	}

	def _compile_dosage_patterns(self) -> List[re.Pattern]:
	"""Pre-compiled dosage patterns"""
	patterns = [
	r'\d+\s*mg\b', # milligrams
	r'\d+\s*g\b', # grams
	r'\d+\s*ml\b', # milliliters
	r'\d+\s*tablets?\b', # tablets
	r'\d+\s*times?\s+(?:per\s+)?day\b', # frequency
	r'every\s+\d+\s+hours?\b' # intervals
	]
	return [re.compile(pattern, re.IGNORECASE) for pattern in patterns]

	def check_medical_accuracy(self, response: str, context: str) -> Dict[str, Any]:
	"""Enhanced medical accuracy check with Gaza-specific considerations"""
	if response is None:
	response = ""
	issues = []
	warnings = []
	accuracy_score = 0.0

	# Check for contraindications (faster keyword matching)
	response_lower = response.lower()
	for medication, contra_list in self.contraindications.items():
	if medication in response_lower:
	for contra in contra_list:
	if any(word in response_lower for word in contra.split()):
	issues.append(f"Potential contraindication: {medication} with {contra}")
	accuracy_score -= 0.3
	break

	# Context alignment using Jaccard similarity
	if context:
	resp_words = set(response_lower.split())
	ctx_words = set(context.lower().split())
	context_similarity = len(resp_words & ctx_words) / len(resp_words \| ctx_words) if ctx_words else 0.0
	if context_similarity < 0.5: # Lowered threshold for Gaza context
	warnings.append(f"Low context similarity: {context_similarity:.2f}")
	accuracy_score -= 0.1
	else:
	context_similarity = 0.0

	# Gaza-specific resource checks
	gaza_resources = ["clean water", "sterile", "hospital", "ambulance", "electricity"]
	if any(resource in response_lower for resource in gaza_resources):
	warnings.append("Consider resource limitations in Gaza context")
	accuracy_score -= 0.05

	# Unsupported claims check
	for pattern in self.definitive_patterns:
	if pattern.search(response):
	issues.append(f"Unsupported definitive claim detected")
	accuracy_score -= 0.4
	break

	# Dosage validation
	for pattern in self.dosage_patterns:
	if pattern.search(response):
	warnings.append("Dosage detected - verify with professional")
	accuracy_score -= 0.1
	break

	confidence_score = max(0.0, min(1.0, 0.8 + accuracy_score))

	return {
	"confidence_score": confidence_score,
	"issues": issues,
	"warnings": warnings,
	"context_similarity": context_similarity,
	"is_safe": len(issues) == 0 and confidence_score > 0.5
	}