imdb-movie-review-sentiment-analysis / streamlit_deployment.py
MON3EMPASHA's picture
Upload streamlit_deployment.py
747e79c verified
import streamlit as st
import joblib
import json
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import os
# Download NLTK resources
try:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
except:
pass
class SentimentAnalyzer:
def __init__(self, model_dir="saved_models"):
try:
# Load models
self.vectorizer = joblib.load(f"{model_dir}/tfidf_vectorizer.pkl")
self.lr_model = joblib.load(f"{model_dir}/logistic_regression_model.pkl")
self.nb_model = joblib.load(f"{model_dir}/naive_bayes_model.pkl")
# Load metadata
with open(f"{model_dir}/model_metadata.json", 'r') as f:
self.metadata = json.load(f)
self.models_loaded = True
except Exception as e:
st.error(f"Error loading models: {e}")
self.models_loaded = False
def preprocess_text(self, text):
# Lowercase
text = text.lower()
# Remove special characters and digits
text = re.sub(r'[^a-zA-Z\s]', '', text)
# Tokenize
tokens = word_tokenize(text)
# Remove stopwords
stop_words = set(stopwords.words('english'))
tokens = [word for word in tokens if word not in stop_words]
# Lemmatize
lemmatizer = WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(word) for word in tokens]
# Join tokens back to string
return ' '.join(tokens)
def predict(self, text, model_type='both'):
if not self.models_loaded:
return None
# Preprocess text
cleaned_text = self.preprocess_text(text)
# Vectorize
text_vector = self.vectorizer.transform([cleaned_text])
results = {}
if model_type in ['lr', 'both']:
lr_pred = self.lr_model.predict(text_vector)[0]
lr_prob = self.lr_model.predict_proba(text_vector)[0]
results['logistic_regression'] = {
'prediction': 'positive' if lr_pred == 1 else 'negative',
'confidence': float(max(lr_prob)),
'probabilities': {
'negative': float(lr_prob[0]),
'positive': float(lr_prob[1])
}
}
if model_type in ['nb', 'both']:
nb_pred = self.nb_model.predict(text_vector)[0]
nb_prob = self.nb_model.predict_proba(text_vector)[0]
results['naive_bayes'] = {
'prediction': 'positive' if nb_pred == 1 else 'negative',
'confidence': float(max(nb_prob)),
'probabilities': {
'negative': float(nb_prob[0]),
'positive': float(nb_prob[1])
}
}
return results
def main():
st.set_page_config(
page_title="IMDb Sentiment Analysis",
page_icon="🎬",
layout="wide"
)
st.title("🎬 IMDb Review Sentiment Analysis")
st.markdown("---")
# Check if models exist
if not os.path.exists("saved_models"):
st.error("❌ Models not found! Please run `python train_and_save_model.py` first to train and save the models.")
st.info("This will create the 'saved_models' directory with your trained models.")
return
# Initialize analyzer
with st.spinner("Loading models..."):
analyzer = SentimentAnalyzer()
if not analyzer.models_loaded:
st.error("Failed to load models. Please check if the model files exist in the 'saved_models' directory.")
return
# Display model info
st.success("βœ… Models loaded successfully!")
# Model performance metrics
col1, col2 = st.columns(2)
with col1:
st.metric("Logistic Regression Accuracy", f"{analyzer.metadata['lr_accuracy']:.2%}")
with col2:
st.metric("Naive Bayes Accuracy", f"{analyzer.metadata['nb_accuracy']:.2%}")
st.markdown("---")
# Input section
st.subheader("πŸ“ Enter a Movie Review")
# Text input
user_input = st.text_area(
"Write your movie review here:",
height=150,
placeholder="Example: This movie was absolutely fantastic! The acting was superb and the plot was engaging..."
)
# Model selection
model_choice = st.selectbox(
"Choose model for prediction:",
["Both Models", "Logistic Regression Only", "Naive Bayes Only"],
help="Select which model(s) to use for prediction"
)
# Prediction button
if st.button("πŸ” Analyze Sentiment", type="primary"):
if user_input.strip():
with st.spinner("Analyzing sentiment..."):
# Map model choice to parameter
model_type = 'both'
if model_choice == "Logistic Regression Only":
model_type = 'lr'
elif model_choice == "Naive Bayes Only":
model_type = 'nb'
# Get predictions
results = analyzer.predict(user_input, model_type)
if results:
st.markdown("---")
st.subheader("πŸ“Š Analysis Results")
# Display results
if model_type == 'both' or model_choice == "Both Models":
col1, col2 = st.columns(2)
with col1:
st.subheader("πŸ€– Logistic Regression")
lr_result = results['logistic_regression']
if lr_result['prediction'] == 'positive':
st.success(f"βœ… Positive Sentiment")
else:
st.error(f"❌ Negative Sentiment")
st.metric("Confidence", f"{lr_result['confidence']:.2%}")
# Progress bar for probabilities
st.write("**Probabilities:**")
st.progress(lr_result['probabilities']['positive'])
st.write(f"Positive: {lr_result['probabilities']['positive']:.2%}")
st.progress(lr_result['probabilities']['negative'])
st.write(f"Negative: {lr_result['probabilities']['negative']:.2%}")
with col2:
st.subheader("🧠 Naive Bayes")
nb_result = results['naive_bayes']
if nb_result['prediction'] == 'positive':
st.success(f"βœ… Positive Sentiment")
else:
st.error(f"❌ Negative Sentiment")
st.metric("Confidence", f"{nb_result['confidence']:.2%}")
# Progress bar for probabilities
st.write("**Probabilities:**")
st.progress(nb_result['probabilities']['positive'])
st.write(f"Positive: {nb_result['probabilities']['positive']:.2%}")
st.progress(nb_result['probabilities']['negative'])
st.write(f"Negative: {nb_result['probabilities']['negative']:.2%}")
else:
# Single model result
model_name = "Logistic Regression" if model_type == 'lr' else "Naive Bayes"
result = results['logistic_regression'] if model_type == 'lr' else results['naive_bayes']
st.subheader(f"πŸ€– {model_name}")
if result['prediction'] == 'positive':
st.success(f"βœ… Positive Sentiment")
else:
st.error(f"❌ Negative Sentiment")
st.metric("Confidence", f"{result['confidence']:.2%}")
# Progress bar for probabilities
st.write("**Probabilities:**")
st.progress(result['probabilities']['positive'])
st.write(f"Positive: {result['probabilities']['positive']:.2%}")
st.progress(result['probabilities']['negative'])
st.write(f"Negative: {result['probabilities']['negative']:.2%}")
# Model comparison
if model_type == 'both':
st.markdown("---")
st.subheader("πŸ“ˆ Model Comparison")
# Create comparison chart
import plotly.graph_objects as go
models = list(results.keys())
confidences = [results[model]['confidence'] for model in models]
predictions = [results[model]['prediction'] for model in models]
fig = go.Figure(data=[
go.Bar(
x=models,
y=confidences,
text=[f"{conf:.2%}" for conf in confidences],
textposition='auto',
marker_color=['green' if pred == 'positive' else 'red' for pred in predictions]
)
])
fig.update_layout(
title="Model Confidence Comparison",
xaxis_title="Model",
yaxis_title="Confidence",
yaxis_range=[0, 1]
)
st.plotly_chart(fig, use_container_width=True)
else:
st.error("Failed to get predictions. Please try again.")
else:
st.warning("⚠️ Please enter a review to analyze.")
# Sidebar with additional info
with st.sidebar:
st.header("ℹ️ About")
st.write("This app uses machine learning models to analyze the sentiment of movie reviews.")
st.write("**Models:**")
st.write("- Logistic Regression")
st.write("- Naive Bayes")
st.header("πŸ“‹ Model Details")
st.write(f"**Training Samples:** {analyzer.metadata['training_samples']:,}")
st.write(f"**Test Samples:** {analyzer.metadata['test_samples']:,}")
st.write(f"**Features:** {analyzer.metadata['max_features']:,}")
st.header("πŸ”§ Preprocessing Steps")
for step in analyzer.metadata['preprocessing_steps']:
st.write(f"- {step.replace('_', ' ').title()}")
st.header("πŸ“Š Sample Reviews")
sample_reviews = [
"This movie was absolutely fantastic! I loved every minute of it.",
"Terrible film, waste of time. Don't watch it.",
"It was okay, nothing special but not bad either.",
"Amazing performance by the actors, great storyline!",
"Boring and predictable plot, poor acting."
]
for i, review in enumerate(sample_reviews, 1):
if st.button(f"Sample {i}", key=f"sample_{i}"):
st.session_state.user_input = review
st.rerun()
if __name__ == "__main__":
main()