med-verify-mvp / app.py
sahildeshmukh123's picture
Update app.py
d97526e verified
"""
Health Misinformation Combat Web App - MVP
IMPROVEMENTS:
- New verdict categories (NO EVIDENCE FOUND, INSUFFICIENT EVIDENCE)
- Updated display for new verdicts
- Better emoji indicators for each verdict type
"""
import streamlit as st
import json
from datetime import datetime, timezone
from sqlalchemy import func
import re
from models import get_db, Submission
from utils import scrape_post_text, extract_medical_claim, search_medical_literature, generate_verdict_with_llm
# Page configuration
st.set_page_config(
page_title="Health Claim Checker",
page_icon="πŸ₯",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS with new verdict categories
st.markdown("""
<style>
.disclaimer {
background-color: #fff3cd;
border-left: 5px solid #ffc107;
padding: 15px;
margin: 20px 0;
border-radius: 5px;
}
.verdict-true { color: #28a745; font-weight: bold; }
.verdict-false { color: #dc3545; font-weight: bold; }
.verdict-insufficient-evidence { color: #ff9800; font-weight: bold; }
.verdict-no-evidence-found { color: #9e9e9e; font-weight: bold; }
.verdict-misleading { color: #ff6b6b; font-weight: bold; }
.verdict-uncertain { color: #ffc107; font-weight: bold; } /* Legacy support */
.help-box {
background-color: #e7f3ff;
border-left: 5px solid #2196F3;
padding: 15px;
margin: 10px 0;
border-radius: 5px;
}
</style>
""", unsafe_allow_html=True)
# Medical specialty categories
MEDICAL_SPECIALTIES = [
"General Medicine",
"Cardiology",
"Neurology",
"Oncology",
"Endocrinology",
"Immunology",
"Nutrition & Diet",
"Mental Health",
"Infectious Disease",
"Pediatrics",
"Geriatrics",
"Other"
]
def main_page():
"""Main user interface with split-screen layout"""
st.title("πŸ₯ Health Claim Checker")
st.subheader("Combat Health Misinformation with AI-Powered Fact Checking")
# Prominent disclaimer
st.markdown("""
<div class="disclaimer">
⚠️ <strong>IMPORTANT DISCLAIMER:</strong> This tool is for informational purposes only and is NOT medical advice.
All results are preliminary and require professional medical verification. Always consult qualified healthcare
professionals for medical decisions. This service does not replace professional medical consultation.
</div>
""", unsafe_allow_html=True)
# Help section
with st.expander("πŸ’‘ How to Get Better Results - Click for Tips"):
st.markdown("""
<div class="help-box">
<h4>πŸ“ Tips for Writing Strong Claims:</h4>
<ul>
<li><strong>Use action verbs:</strong> "treats", "prevents", "reduces", "improves", "causes", "increases", "cures"</li>
<li><strong>Be specific:</strong> Instead of "Vitamin C is good", try "Vitamin C prevents common cold infections"</li>
<li><strong>Include the condition:</strong> Mention the specific health outcome or disease</li>
<li><strong>Avoid vague statements:</strong> "X is healthy" β†’ "X reduces risk of heart disease"</li>
</ul>
<h4>βœ… Good Examples:</h4>
<ul>
<li>"Aspirin prevents heart attacks in high-risk patients"</li>
<li>"Green tea reduces cholesterol levels"</li>
<li>"Meditation helps reduce anxiety symptoms"</li>
</ul>
<h4>❌ Weak Examples:</h4>
<ul>
<li>"Vitamins are good" (too vague)</li>
<li>"I think coffee is healthy" (not a clear claim)</li>
<li>"This works!" (no specific claim)</li>
</ul>
</div>
""", unsafe_allow_html=True)
# Search bar
st.markdown("---")
search_query = st.text_input(
"πŸ” Search verified claims by keyword:",
placeholder="e.g., vitamin C, cancer, vaccine, diabetes...",
help="Search through previously verified medical claims",
key="search_bar"
)
# Two-column layout
col_left, col_right = st.columns([1, 1])
# LEFT COLUMN: Submit new claim
with col_left:
st.header("πŸ“ Check New Claim")
# Model selection
model_choice = st.selectbox(
"πŸ€– Select AI Model:",
options=["qwen", "mistral", "llama", "gemma"],
index=0,
format_func=lambda x: {
"qwen": "Qwen 2.5 7B (Best Reasoning)",
"mistral": "Mistral 7B (Fastest)",
"llama": "Llama 3.1 8B (Most Popular)",
"gemma": "Gemma 2 9B (Google)"
}[x],
help="Different models may give different quality results."
)
# Specialty selection
specialty = st.selectbox(
"πŸ₯ Medical Specialty (Optional):",
options=["Auto-detect"] + MEDICAL_SPECIALTIES,
help="Select the medical specialty this claim relates to."
)
input_type = st.radio("Input Type:", ["Text/Caption", "Social Media Link"], horizontal=True)
if input_type == "Text/Caption":
user_input = st.text_area(
"Paste the social media post caption or health claim:",
height=150,
placeholder="e.g., 'Drinking lemon water cures cancer' or 'Aspirin prevents heart attacks'"
)
else:
user_input = st.text_input(
"Paste the social media post URL:",
placeholder="https://twitter.com/username/status/..."
)
if st.button("πŸ” Analyze Claim", type="primary", use_container_width=True):
if not user_input or len(user_input.strip()) < 10:
st.error("Please provide valid input (at least 10 characters)")
else:
analyze_claim(user_input, input_type, model_choice, specialty)
# RIGHT COLUMN: Show related claims or search results
with col_right:
if search_query and len(search_query.strip()) >= 3:
show_search_results(search_query)
elif user_input and len(user_input.strip()) >= 10:
st.header("πŸ”— Related Verified Claims")
show_related_claims(user_input)
else:
st.header("πŸ“‹ Recently Verified Claims")
show_recent_verified_claims()
def auto_detect_specialty(claim_text):
"""Auto-detect medical specialty based on keywords"""
claim_lower = claim_text.lower()
specialty_keywords = {
"Cardiology": ["heart", "cardiac", "cardiovascular", "blood pressure", "cholesterol", "stroke", "artery"],
"Neurology": ["brain", "neurological", "alzheimer", "parkinson", "migraine", "seizure", "nerve"],
"Oncology": ["cancer", "tumor", "chemotherapy", "radiation", "malignant", "carcinoma"],
"Endocrinology": ["diabetes", "insulin", "thyroid", "hormone", "metabolic", "glucose"],
"Immunology": ["immune", "immunity", "vaccine", "antibody", "autoimmune", "allergy"],
"Nutrition & Diet": ["vitamin", "nutrition", "diet", "supplement", "food", "eating", "weight"],
"Mental Health": ["depression", "anxiety", "mental", "psychological", "stress", "mood", "therapy"],
"Infectious Disease": ["virus", "bacterial", "infection", "covid", "flu", "contagious", "outbreak"],
"Pediatrics": ["children", "pediatric", "infant", "baby", "child"],
"Geriatrics": ["elderly", "aging", "senior", "geriatric"]
}
specialty_scores = {}
for specialty, keywords in specialty_keywords.items():
score = sum(1 for keyword in keywords if keyword in claim_lower)
if score > 0:
specialty_scores[specialty] = score
if specialty_scores:
return max(specialty_scores, key=specialty_scores.get)
return "General Medicine"
def analyze_claim(user_input, input_type, model_choice, specialty):
"""Analyze a health claim and display results"""
with st.spinner("πŸ”„ Processing your submission..."):
db = get_db()
try:
# Extract text
if input_type == "Social Media Link" and user_input.startswith('http'):
with st.status("Fetching post content...", expanded=True) as status:
text_content = scrape_post_text(user_input)
st.write(f"βœ“ Extracted {len(text_content)} characters")
status.update(label="Content fetched!", state="complete")
else:
text_content = user_input
if "Error scraping" in text_content:
st.error(text_content)
return
# Extract claim
with st.status("Extracting medical claim...", expanded=True) as status:
claim = extract_medical_claim(text_content)
st.write(f"**Claim identified:** {claim}")
status.update(label="Claim extracted!", state="complete")
if "No clear medical claim" in claim:
st.warning(claim)
st.info("πŸ’‘ Try submitting text with clear action verbs like 'treats', 'prevents', 'cures', or 'reduces'.")
return
# Auto-detect specialty
if specialty == "Auto-detect":
detected_specialty = auto_detect_specialty(claim)
st.info(f"πŸ₯ Auto-detected specialty: **{detected_specialty}**")
else:
detected_specialty = specialty
# Search literature
with st.status("Searching medical literature...", expanded=True) as status:
articles = search_medical_literature(claim, max_results=10)
st.write(f"βœ“ Found {len(articles)} relevant articles")
if articles:
sources = {}
for article in articles:
source = article.get('source', 'Unknown')
sources[source] = sources.get(source, 0) + 1
source_summary = ", ".join([f"{src}: {cnt}" for src, cnt in sources.items()])
st.write(f"πŸ“Š Sources: {source_summary}")
status.update(label="Literature search complete!", state="complete")
# Generate verdict
with st.status("Analyzing claim...", expanded=True) as status:
result = generate_verdict_with_llm(claim, articles, model_choice)
verdict = result['verdict']
explanation = result['explanation']
st.write(f"**Verdict:** {verdict.upper()}")
status.update(label="Analysis complete!", state="complete")
# Save to database
submission = Submission(
input_text=user_input[:1000],
extracted_claim=claim,
verdict=verdict,
explanation=explanation,
sources=json.dumps([{
'title': a['title'],
'pmid': a['pmid'],
'url': a['url']
} for a in articles]) if articles else json.dumps([]),
review_status='pending',
reviewer_notes=f"[SPECIALTY: {detected_specialty}]"
)
db.add(submission)
db.commit()
submission_id = submission.id
# Display results
st.success("βœ… Analysis Complete!")
st.markdown("---")
st.subheader("πŸ“Š Preliminary Results")
col1, col2 = st.columns([1, 2])
with col1:
# NEW: Better verdict display with emojis
verdict_emoji = {
'true': 'βœ…',
'false': '❌',
'insufficient evidence': '⚠️',
'no evidence found': '🚫',
'misleading': '⚠️',
'uncertain': '❓' # Legacy
}
st.metric(
"Verdict",
f"{verdict_emoji.get(verdict, '?')} {verdict.upper()}",
delta="Pending Review",
delta_color="off"
)
st.caption(f"Specialty: {detected_specialty}")
with col2:
st.info(f"**Claim:** {claim}")
# Verdict styling (handle spaces in CSS class)
verdict_class = f"verdict-{verdict.replace(' ', '-')}"
st.markdown(f"<p class='{verdict_class}'>Assessment: {verdict.upper()}</p>",
unsafe_allow_html=True)
# Explanation
st.subheader("🧠 AI Analysis")
st.write(explanation)
# Sources
st.subheader("πŸ“š Scientific Sources")
if articles:
for i, article in enumerate(articles, 1):
source_emoji = {
'PubMed': 'πŸ“„',
'EuropePMC': 'πŸ‡ͺπŸ‡Ί',
'ClinicalTrials.gov': 'πŸ₯',
'Semantic Scholar': 'πŸ€–'
}
emoji = source_emoji.get(article.get('source', ''), 'πŸ“„')
with st.expander(f"{emoji} [{article.get('source', 'Unknown')}] {i}. {article['title']}"):
st.write(f"**Source:** {article.get('source', 'Unknown')}")
st.write(f"**ID:** {article.get('id', 'N/A')}")
st.write(f"**Year:** {article.get('year', 'N/A')}")
st.write(f"**Type:** {article.get('type', 'Research Article')}")
st.write(f"**Abstract:** {article['abstract'][:400]}...")
st.write(f"[View Full Article]({article['url']})")
else:
st.warning("No scientific articles found for this claim.")
# Review status
st.markdown("---")
st.info(f"πŸ”¬ **Review Status:** Pending professional verification (ID: {submission_id})")
st.caption("This claim will be reviewed by medical professionals.")
except Exception as e:
st.error(f"An error occurred: {str(e)}")
st.exception(e)
finally:
db.close()
def show_search_results(query):
"""Display search results for verified claims"""
st.header(f"πŸ” Search Results for: '{query}'")
db = get_db()
search_pattern = f"%{query.lower()}%"
results = db.query(Submission).filter(
Submission.review_status == 'approve',
(Submission.extracted_claim.ilike(search_pattern) |
Submission.reviewer_notes.ilike(search_pattern) |
Submission.explanation.ilike(search_pattern))
).order_by(Submission.reviewed_at.desc()).limit(10).all()
if not results:
st.info(f"No verified claims found for '{query}'. Try different keywords!")
else:
st.write(f"Found **{len(results)}** verified claim(s):")
display_claim_list(results)
db.close()
def show_related_claims(claim_text):
"""Show related verified claims"""
db = get_db()
medical_keywords = [
'health', 'medical', 'disease', 'cure', 'treatment', 'vaccine',
'drug', 'symptom', 'cancer', 'diabetes', 'covid', 'virus',
'bacteria', 'infection', 'vitamin', 'supplement', 'blood',
'heart', 'brain', 'immune', 'protein', 'diet', 'exercise',
'pain', 'therapy', 'medicine', 'doctor', 'patient'
]
keywords = [word.lower() for word in claim_text.split()
if len(word) > 4 and word.lower() in medical_keywords]
if not keywords:
related = db.query(Submission).filter_by(review_status='approve').order_by(
Submission.reviewed_at.desc()
).limit(5).all()
else:
search_patterns = [f"%{kw}%" for kw in keywords[:3]]
query = db.query(Submission).filter(Submission.review_status == 'approve')
for pattern in search_patterns:
query = query.filter(Submission.extracted_claim.ilike(pattern))
related = query.order_by(Submission.reviewed_at.desc()).limit(5).all()
if not related and keywords:
pattern = f"%{keywords[0]}%"
related = db.query(Submission).filter(
Submission.review_status == 'approve',
Submission.extracted_claim.ilike(pattern)
).order_by(Submission.reviewed_at.desc()).limit(5).all()
if not related:
st.info("No related verified claims found yet.")
else:
st.write(f"Found **{len(related)}** related claim(s):")
display_claim_list(related)
db.close()
def show_recent_verified_claims():
"""Show recent verified claims"""
st.caption("Recently verified by medical professionals")
db = get_db()
recent = db.query(Submission).filter_by(review_status='approve').order_by(
Submission.reviewed_at.desc()
).limit(10).all()
if not recent:
st.info("No verified claims yet. Be the first!")
else:
st.write(f"Showing **{len(recent)}** recent verification(s):")
display_claim_list(recent)
db.close()
def extract_specialty_from_notes(notes):
"""Extract specialty from reviewer notes"""
if not notes:
return "General Medicine"
match = re.search(r'\[SPECIALTY: ([^\]]+)\]', notes)
if match:
return match.group(1)
return "General Medicine"
def display_claim_list(submissions):
"""Display list of claims with new verdict emojis"""
verdict_emoji = {
'true': 'βœ…',
'false': '❌',
'insufficient evidence': '⚠️',
'no evidence found': '🚫',
'misleading': '⚠️',
'uncertain': '❓'
}
for submission in submissions:
specialty = extract_specialty_from_notes(submission.reviewer_notes)
emoji = verdict_emoji.get(submission.verdict, '?')
with st.expander(
f"{emoji} {submission.extracted_claim[:80]}... [{specialty}]",
expanded=False
):
st.write(f"**Full Claim:** {submission.extracted_claim}")
st.caption(f"πŸ₯ Specialty: **{specialty}**")
# Verdict with color
verdict_class = f"verdict-{submission.verdict.replace(' ', '-')}"
st.markdown(
f"<p class='{verdict_class}' style='font-size: 20px;'>Verdict: {submission.verdict.upper()}</p>",
unsafe_allow_html=True
)
# Doctor's notes
if submission.reviewer_notes:
cleaned_notes = re.sub(r'\[SPECIALTY: [^\]]+\]', '', submission.reviewer_notes).strip()
if cleaned_notes:
st.write("**Medical Professional's Review:**")
st.info(cleaned_notes)
# AI explanation
st.write("**AI Analysis:**")
with st.container():
st.text(submission.explanation)
# Sources
if submission.sources:
st.write("**Scientific Sources:**")
sources = json.loads(submission.sources)
for i, source in enumerate(sources, 1):
st.caption(f"{i}. [{source['title']}]({source['url']}) (ID: {source['pmid']})")
st.caption(f"Verified: {submission.reviewed_at.strftime('%Y-%m-%d %H:%M') if submission.reviewed_at else 'N/A'}")
def admin_page():
"""Admin interface with new verdict categories"""
st.title("πŸ‘¨β€βš•οΈ Medical Review Dashboard")
st.caption("For authorized medical professionals only")
if 'authenticated' not in st.session_state:
st.session_state.authenticated = False
if not st.session_state.authenticated:
password = st.text_input("Enter admin password:", type="password")
if st.button("Login"):
if password == "doctor123":
st.session_state.authenticated = True
st.rerun()
else:
st.error("Invalid password")
st.stop()
# Specialty filter
st.markdown("### πŸ₯ Filter by Specialty")
filter_specialty = st.selectbox(
"Show claims for:",
options=["All Specialties"] + MEDICAL_SPECIALTIES,
help="Filter pending claims by medical specialty"
)
db = get_db()
pending_query = db.query(Submission).filter_by(review_status='pending').order_by(Submission.created_at.desc())
if filter_specialty != "All Specialties":
pending_query = pending_query.filter(Submission.reviewer_notes.like(f"%[SPECIALTY: {filter_specialty}]%"))
pending = pending_query.all()
reviewed = db.query(Submission).filter(Submission.review_status != 'pending').order_by(Submission.reviewed_at.desc()).limit(20).all()
tab1, tab2 = st.tabs([f"πŸ” Pending Review ({len(pending)})", "βœ… Recently Reviewed"])
with tab1:
if not pending:
st.info(f"No pending submissions{' for ' + filter_specialty if filter_specialty != 'All Specialties' else ''}!")
for submission in pending:
specialty = extract_specialty_from_notes(submission.reviewer_notes)
with st.expander(f"#{submission.id} - {specialty} - {submission.created_at.strftime('%Y-%m-%d %H:%M')}"):
st.write(f"**πŸ₯ Specialty:** {specialty}")
st.write(f"**Original Input:** {submission.input_text[:200]}...")
st.write(f"**Extracted Claim:** {submission.extracted_claim}")
st.write(f"**AI Verdict:** {submission.verdict.upper()}")
st.write(f"**AI Explanation:** {submission.explanation}")
if submission.sources:
sources = json.loads(submission.sources)
st.write(f"**Sources:** {len(sources)} articles")
for source in sources[:3]:
st.caption(f"- {source['title']} (ID: {source['pmid']})")
st.markdown("---")
st.subheader("Medical Review")
col1, col2 = st.columns(2)
with col1:
# NEW: Updated verdict options
final_verdict = st.selectbox(
"Medical Verdict:",
["Select", "True", "False", "Insufficient Evidence", "No Evidence Found", "Misleading"],
key=f"verdict_{submission.id}",
help="Choose based on medical evidence"
)
with col2:
confidence = st.selectbox(
"Confidence Level:",
["Select", "High", "Moderate", "Low"],
key=f"confidence_{submission.id}"
)
reviewed_specialty = st.selectbox(
"Confirm/Update Specialty:",
options=MEDICAL_SPECIALTIES,
index=MEDICAL_SPECIALTIES.index(specialty) if specialty in MEDICAL_SPECIALTIES else 0,
key=f"specialty_{submission.id}"
)
reviewer_notes = st.text_area(
"Professional Review Notes (Required):",
placeholder="Explain verdict, cite evidence, add context...",
key=f"notes_{submission.id}"
)
ai_agreement = st.radio(
"Agreement with AI verdict?",
["Yes - AI correct", "No - AI incorrect", "Partially - Needs nuance"],
key=f"agreement_{submission.id}",
horizontal=True
)
if st.button("Submit Review", key=f"submit_{submission.id}", type="primary"):
if final_verdict == "Select" or confidence == "Select":
st.error("Please select verdict and confidence")
elif not reviewer_notes or len(reviewer_notes.strip()) < 20:
st.error("Please provide review notes (min 20 chars)")
else:
submission.review_status = 'approve'
submission.verdict = final_verdict.lower()
review_text = f"[SPECIALTY: {reviewed_specialty}]\n\n"
review_text += f"**Confidence:** {confidence}\n\n"
review_text += f"**AI Agreement:** {ai_agreement}\n\n"
review_text += f"**Medical Professional's Analysis:**\n{reviewer_notes}"
submission.reviewer_notes = review_text
submission.reviewed_at = datetime.now(timezone.utc)
db.commit()
st.success(f"βœ… Review submitted for #{submission.id}")
st.balloons()
st.rerun()
with tab2:
for submission in reviewed:
specialty = extract_specialty_from_notes(submission.reviewer_notes)
status_emoji = "βœ…" if submission.review_status == "approve" else "❌"
with st.expander(f"{status_emoji} #{submission.id} - {specialty} - {submission.review_status.upper()} - {submission.reviewed_at.strftime('%Y-%m-%d') if submission.reviewed_at else 'N/A'}"):
st.write(f"**πŸ₯ Specialty:** {specialty}")
st.write(f"**Claim:** {submission.extracted_claim}")
st.write(f"**Final Verdict:** {submission.verdict.upper()}")
cleaned_notes = re.sub(r'\[SPECIALTY: [^\]]+\]', '', submission.reviewer_notes or '').strip()
st.write(f"**Reviewer Notes:** {cleaned_notes or 'None'}")
db.close()
def stats_page():
"""Statistics dashboard with new verdict breakdown"""
st.title("πŸ“ˆ Platform Statistics")
db = get_db()
total = db.query(Submission).count()
pending = db.query(Submission).filter_by(review_status='pending').count()
approved = db.query(Submission).filter_by(review_status='approve').count()
col1, col2, col3 = st.columns(3)
col1.metric("Total Submissions", total)
col2.metric("Pending Review", pending)
col3.metric("Approved", approved)
# Verdict breakdown
st.subheader("Verdict Distribution")
verdicts = db.query(Submission.verdict, func.count(Submission.id)).group_by(Submission.verdict).all()
# NEW: Better display of new verdict categories
verdict_display = {
'true': 'βœ… TRUE',
'false': '❌ FALSE',
'insufficient evidence': '⚠️ INSUFFICIENT EVIDENCE',
'no evidence found': '🚫 NO EVIDENCE FOUND',
'misleading': '⚠️ MISLEADING',
'uncertain': '❓ UNCERTAIN (legacy)'
}
for verdict, count in verdicts:
if verdict:
display_name = verdict_display.get(verdict, verdict.upper())
st.write(f"**{display_name}:** {count} submissions")
# Specialty breakdown
st.subheader("πŸ“Š Claims by Medical Specialty")
all_submissions = db.query(Submission).filter_by(review_status='approve').all()
specialty_counts = {}
for sub in all_submissions:
specialty = extract_specialty_from_notes(sub.reviewer_notes)
specialty_counts[specialty] = specialty_counts.get(specialty, 0) + 1
if specialty_counts:
for specialty, count in sorted(specialty_counts.items(), key=lambda x: x[1], reverse=True):
st.write(f"**{specialty}:** {count} claims")
else:
st.info("No specialty data available yet")
db.close()
def main():
"""Main application entry point"""
st.sidebar.title("πŸ₯ Navigation")
page = st.sidebar.radio("Go to:", ["Home", "Admin Review", "Statistics"])
st.sidebar.markdown("---")
st.sidebar.info("""
**About This Tool**
Combat health misinformation with:
- Multi-source medical literature search
- AI-powered preliminary analysis
- Professional medical review
- Stricter relevance scoring
**New Verdict Categories:**
- βœ… TRUE: Strong evidence
- ❌ FALSE: Contradicted/risky
- ⚠️ INSUFFICIENT EVIDENCE: Limited data
- 🚫 NO EVIDENCE FOUND: No relevant research
- ⚠️ MISLEADING: Lacks context
""")
if page == "Home":
main_page()
elif page == "Admin Review":
admin_page()
else:
stats_page()
if __name__ == "__main__":
main()