imdb-movie-review-sentiment-analysis / streamlit_deployment.py

Upload streamlit_deployment.py

747e79c verified 4 months ago

12.2 kB

	import streamlit as st
	import joblib
	import json
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	from nltk.stem import WordNetLemmatizer
	import os

	# Download NLTK resources
	try:
	nltk.download('punkt')
	nltk.download('stopwords')
	nltk.download('wordnet')
	except:
	pass

	class SentimentAnalyzer:
	def __init__(self, model_dir="saved_models"):
	try:
	# Load models
	self.vectorizer = joblib.load(f"{model_dir}/tfidf_vectorizer.pkl")
	self.lr_model = joblib.load(f"{model_dir}/logistic_regression_model.pkl")
	self.nb_model = joblib.load(f"{model_dir}/naive_bayes_model.pkl")

	# Load metadata
	with open(f"{model_dir}/model_metadata.json", 'r') as f:
	self.metadata = json.load(f)

	self.models_loaded = True
	except Exception as e:
	st.error(f"Error loading models: {e}")
	self.models_loaded = False

	def preprocess_text(self, text):
	# Lowercase
	text = text.lower()
	# Remove special characters and digits
	text = re.sub(r'[^a-zA-Z\s]', '', text)
	# Tokenize
	tokens = word_tokenize(text)
	# Remove stopwords
	stop_words = set(stopwords.words('english'))
	tokens = [word for word in tokens if word not in stop_words]
	# Lemmatize
	lemmatizer = WordNetLemmatizer()
	tokens = [lemmatizer.lemmatize(word) for word in tokens]
	# Join tokens back to string
	return ' '.join(tokens)

	def predict(self, text, model_type='both'):
	if not self.models_loaded:
	return None

	# Preprocess text
	cleaned_text = self.preprocess_text(text)

	# Vectorize
	text_vector = self.vectorizer.transform([cleaned_text])

	results = {}

	if model_type in ['lr', 'both']:
	lr_pred = self.lr_model.predict(text_vector)[0]
	lr_prob = self.lr_model.predict_proba(text_vector)[0]
	results['logistic_regression'] = {
	'prediction': 'positive' if lr_pred == 1 else 'negative',
	'confidence': float(max(lr_prob)),
	'probabilities': {
	'negative': float(lr_prob[0]),
	'positive': float(lr_prob[1])
	}
	}

	if model_type in ['nb', 'both']:
	nb_pred = self.nb_model.predict(text_vector)[0]
	nb_prob = self.nb_model.predict_proba(text_vector)[0]
	results['naive_bayes'] = {
	'prediction': 'positive' if nb_pred == 1 else 'negative',
	'confidence': float(max(nb_prob)),
	'probabilities': {
	'negative': float(nb_prob[0]),
	'positive': float(nb_prob[1])
	}
	}

	return results

	def main():
	st.set_page_config(
	page_title="IMDb Sentiment Analysis",
	page_icon="🎬",
	layout="wide"
	)

	st.title("🎬 IMDb Review Sentiment Analysis")
	st.markdown("---")

	# Check if models exist
	if not os.path.exists("saved_models"):
	st.error("❌ Models not found! Please run `python train_and_save_model.py` first to train and save the models.")
	st.info("This will create the 'saved_models' directory with your trained models.")
	return

	# Initialize analyzer
	with st.spinner("Loading models..."):
	analyzer = SentimentAnalyzer()

	if not analyzer.models_loaded:
	st.error("Failed to load models. Please check if the model files exist in the 'saved_models' directory.")
	return

	# Display model info
	st.success("✅ Models loaded successfully!")

	# Model performance metrics
	col1, col2 = st.columns(2)
	with col1:
	st.metric("Logistic Regression Accuracy", f"{analyzer.metadata['lr_accuracy']:.2%}")
	with col2:
	st.metric("Naive Bayes Accuracy", f"{analyzer.metadata['nb_accuracy']:.2%}")

	st.markdown("---")

	# Input section
	st.subheader("📝 Enter a Movie Review")

	# Text input
	user_input = st.text_area(
	"Write your movie review here:",
	height=150,
	placeholder="Example: This movie was absolutely fantastic! The acting was superb and the plot was engaging..."
	)

	# Model selection
	model_choice = st.selectbox(
	"Choose model for prediction:",
	["Both Models", "Logistic Regression Only", "Naive Bayes Only"],
	help="Select which model(s) to use for prediction"
	)

	# Prediction button
	if st.button("🔍 Analyze Sentiment", type="primary"):
	if user_input.strip():
	with st.spinner("Analyzing sentiment..."):
	# Map model choice to parameter
	model_type = 'both'
	if model_choice == "Logistic Regression Only":
	model_type = 'lr'
	elif model_choice == "Naive Bayes Only":
	model_type = 'nb'

	# Get predictions
	results = analyzer.predict(user_input, model_type)

	if results:
	st.markdown("---")
	st.subheader("📊 Analysis Results")

	# Display results
	if model_type == 'both' or model_choice == "Both Models":
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("🤖 Logistic Regression")
	lr_result = results['logistic_regression']
	if lr_result['prediction'] == 'positive':
	st.success(f"✅ Positive Sentiment")
	else:
	st.error(f"❌ Negative Sentiment")
	st.metric("Confidence", f"{lr_result['confidence']:.2%}")

	# Progress bar for probabilities
	st.write("Probabilities:")
	st.progress(lr_result['probabilities']['positive'])
	st.write(f"Positive: {lr_result['probabilities']['positive']:.2%}")
	st.progress(lr_result['probabilities']['negative'])
	st.write(f"Negative: {lr_result['probabilities']['negative']:.2%}")

	with col2:
	st.subheader("🧠 Naive Bayes")
	nb_result = results['naive_bayes']
	if nb_result['prediction'] == 'positive':
	st.success(f"✅ Positive Sentiment")
	else:
	st.error(f"❌ Negative Sentiment")
	st.metric("Confidence", f"{nb_result['confidence']:.2%}")

	# Progress bar for probabilities
	st.write("Probabilities:")
	st.progress(nb_result['probabilities']['positive'])
	st.write(f"Positive: {nb_result['probabilities']['positive']:.2%}")
	st.progress(nb_result['probabilities']['negative'])
	st.write(f"Negative: {nb_result['probabilities']['negative']:.2%}")

	else:
	# Single model result
	model_name = "Logistic Regression" if model_type == 'lr' else "Naive Bayes"
	result = results['logistic_regression'] if model_type == 'lr' else results['naive_bayes']

	st.subheader(f"🤖 {model_name}")
	if result['prediction'] == 'positive':
	st.success(f"✅ Positive Sentiment")
	else:
	st.error(f"❌ Negative Sentiment")
	st.metric("Confidence", f"{result['confidence']:.2%}")

	# Progress bar for probabilities
	st.write("Probabilities:")
	st.progress(result['probabilities']['positive'])
	st.write(f"Positive: {result['probabilities']['positive']:.2%}")
	st.progress(result['probabilities']['negative'])
	st.write(f"Negative: {result['probabilities']['negative']:.2%}")

	# Model comparison
	if model_type == 'both':
	st.markdown("---")
	st.subheader("📈 Model Comparison")

	# Create comparison chart
	import plotly.graph_objects as go

	models = list(results.keys())
	confidences = [results[model]['confidence'] for model in models]
	predictions = [results[model]['prediction'] for model in models]

	fig = go.Figure(data=[
	go.Bar(
	x=models,
	y=confidences,
	text=[f"{conf:.2%}" for conf in confidences],
	textposition='auto',
	marker_color=['green' if pred == 'positive' else 'red' for pred in predictions]
	)
	])

	fig.update_layout(
	title="Model Confidence Comparison",
	xaxis_title="Model",
	yaxis_title="Confidence",
	yaxis_range=[0, 1]
	)

	st.plotly_chart(fig, use_container_width=True)

	else:
	st.error("Failed to get predictions. Please try again.")
	else:
	st.warning("⚠️ Please enter a review to analyze.")

	# Sidebar with additional info
	with st.sidebar:
	st.header("ℹ️ About")
	st.write("This app uses machine learning models to analyze the sentiment of movie reviews.")
	st.write("Models:")
	st.write("- Logistic Regression")
	st.write("- Naive Bayes")

	st.header("📋 Model Details")
	st.write(f"Training Samples: {analyzer.metadata['training_samples']:,}")
	st.write(f"Test Samples: {analyzer.metadata['test_samples']:,}")
	st.write(f"Features: {analyzer.metadata['max_features']:,}")

	st.header("🔧 Preprocessing Steps")
	for step in analyzer.metadata['preprocessing_steps']:
	st.write(f"- {step.replace('_', ' ').title()}")

	st.header("📊 Sample Reviews")
	sample_reviews = [
	"This movie was absolutely fantastic! I loved every minute of it.",
	"Terrible film, waste of time. Don't watch it.",
	"It was okay, nothing special but not bad either.",
	"Amazing performance by the actors, great storyline!",
	"Boring and predictable plot, poor acting."
	]

	for i, review in enumerate(sample_reviews, 1):
	if st.button(f"Sample {i}", key=f"sample_{i}"):
	st.session_state.user_input = review
	st.rerun()

	if __name__ == "__main__":
	main()