import streamlit as st import joblib import json import re import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from nltk.stem import WordNetLemmatizer import os # Download NLTK resources try: nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') except: pass class SentimentAnalyzer: def __init__(self, model_dir="saved_models"): try: # Load models self.vectorizer = joblib.load(f"{model_dir}/tfidf_vectorizer.pkl") self.lr_model = joblib.load(f"{model_dir}/logistic_regression_model.pkl") self.nb_model = joblib.load(f"{model_dir}/naive_bayes_model.pkl") # Load metadata with open(f"{model_dir}/model_metadata.json", 'r') as f: self.metadata = json.load(f) self.models_loaded = True except Exception as e: st.error(f"Error loading models: {e}") self.models_loaded = False def preprocess_text(self, text): # Lowercase text = text.lower() # Remove special characters and digits text = re.sub(r'[^a-zA-Z\s]', '', text) # Tokenize tokens = word_tokenize(text) # Remove stopwords stop_words = set(stopwords.words('english')) tokens = [word for word in tokens if word not in stop_words] # Lemmatize lemmatizer = WordNetLemmatizer() tokens = [lemmatizer.lemmatize(word) for word in tokens] # Join tokens back to string return ' '.join(tokens) def predict(self, text, model_type='both'): if not self.models_loaded: return None # Preprocess text cleaned_text = self.preprocess_text(text) # Vectorize text_vector = self.vectorizer.transform([cleaned_text]) results = {} if model_type in ['lr', 'both']: lr_pred = self.lr_model.predict(text_vector)[0] lr_prob = self.lr_model.predict_proba(text_vector)[0] results['logistic_regression'] = { 'prediction': 'positive' if lr_pred == 1 else 'negative', 'confidence': float(max(lr_prob)), 'probabilities': { 'negative': float(lr_prob[0]), 'positive': float(lr_prob[1]) } } if model_type in ['nb', 'both']: nb_pred = self.nb_model.predict(text_vector)[0] nb_prob = self.nb_model.predict_proba(text_vector)[0] results['naive_bayes'] = { 'prediction': 'positive' if nb_pred == 1 else 'negative', 'confidence': float(max(nb_prob)), 'probabilities': { 'negative': float(nb_prob[0]), 'positive': float(nb_prob[1]) } } return results def main(): st.set_page_config( page_title="IMDb Sentiment Analysis", page_icon="đŸŽŦ", layout="wide" ) st.title("đŸŽŦ IMDb Review Sentiment Analysis") st.markdown("---") # Check if models exist if not os.path.exists("saved_models"): st.error("❌ Models not found! Please run `python train_and_save_model.py` first to train and save the models.") st.info("This will create the 'saved_models' directory with your trained models.") return # Initialize analyzer with st.spinner("Loading models..."): analyzer = SentimentAnalyzer() if not analyzer.models_loaded: st.error("Failed to load models. Please check if the model files exist in the 'saved_models' directory.") return # Display model info st.success("✅ Models loaded successfully!") # Model performance metrics col1, col2 = st.columns(2) with col1: st.metric("Logistic Regression Accuracy", f"{analyzer.metadata['lr_accuracy']:.2%}") with col2: st.metric("Naive Bayes Accuracy", f"{analyzer.metadata['nb_accuracy']:.2%}") st.markdown("---") # Input section st.subheader("📝 Enter a Movie Review") # Text input user_input = st.text_area( "Write your movie review here:", height=150, placeholder="Example: This movie was absolutely fantastic! The acting was superb and the plot was engaging..." ) # Model selection model_choice = st.selectbox( "Choose model for prediction:", ["Both Models", "Logistic Regression Only", "Naive Bayes Only"], help="Select which model(s) to use for prediction" ) # Prediction button if st.button("🔍 Analyze Sentiment", type="primary"): if user_input.strip(): with st.spinner("Analyzing sentiment..."): # Map model choice to parameter model_type = 'both' if model_choice == "Logistic Regression Only": model_type = 'lr' elif model_choice == "Naive Bayes Only": model_type = 'nb' # Get predictions results = analyzer.predict(user_input, model_type) if results: st.markdown("---") st.subheader("📊 Analysis Results") # Display results if model_type == 'both' or model_choice == "Both Models": col1, col2 = st.columns(2) with col1: st.subheader("🤖 Logistic Regression") lr_result = results['logistic_regression'] if lr_result['prediction'] == 'positive': st.success(f"✅ Positive Sentiment") else: st.error(f"❌ Negative Sentiment") st.metric("Confidence", f"{lr_result['confidence']:.2%}") # Progress bar for probabilities st.write("**Probabilities:**") st.progress(lr_result['probabilities']['positive']) st.write(f"Positive: {lr_result['probabilities']['positive']:.2%}") st.progress(lr_result['probabilities']['negative']) st.write(f"Negative: {lr_result['probabilities']['negative']:.2%}") with col2: st.subheader("🧠 Naive Bayes") nb_result = results['naive_bayes'] if nb_result['prediction'] == 'positive': st.success(f"✅ Positive Sentiment") else: st.error(f"❌ Negative Sentiment") st.metric("Confidence", f"{nb_result['confidence']:.2%}") # Progress bar for probabilities st.write("**Probabilities:**") st.progress(nb_result['probabilities']['positive']) st.write(f"Positive: {nb_result['probabilities']['positive']:.2%}") st.progress(nb_result['probabilities']['negative']) st.write(f"Negative: {nb_result['probabilities']['negative']:.2%}") else: # Single model result model_name = "Logistic Regression" if model_type == 'lr' else "Naive Bayes" result = results['logistic_regression'] if model_type == 'lr' else results['naive_bayes'] st.subheader(f"🤖 {model_name}") if result['prediction'] == 'positive': st.success(f"✅ Positive Sentiment") else: st.error(f"❌ Negative Sentiment") st.metric("Confidence", f"{result['confidence']:.2%}") # Progress bar for probabilities st.write("**Probabilities:**") st.progress(result['probabilities']['positive']) st.write(f"Positive: {result['probabilities']['positive']:.2%}") st.progress(result['probabilities']['negative']) st.write(f"Negative: {result['probabilities']['negative']:.2%}") # Model comparison if model_type == 'both': st.markdown("---") st.subheader("📈 Model Comparison") # Create comparison chart import plotly.graph_objects as go models = list(results.keys()) confidences = [results[model]['confidence'] for model in models] predictions = [results[model]['prediction'] for model in models] fig = go.Figure(data=[ go.Bar( x=models, y=confidences, text=[f"{conf:.2%}" for conf in confidences], textposition='auto', marker_color=['green' if pred == 'positive' else 'red' for pred in predictions] ) ]) fig.update_layout( title="Model Confidence Comparison", xaxis_title="Model", yaxis_title="Confidence", yaxis_range=[0, 1] ) st.plotly_chart(fig, use_container_width=True) else: st.error("Failed to get predictions. Please try again.") else: st.warning("âš ī¸ Please enter a review to analyze.") # Sidebar with additional info with st.sidebar: st.header("â„šī¸ About") st.write("This app uses machine learning models to analyze the sentiment of movie reviews.") st.write("**Models:**") st.write("- Logistic Regression") st.write("- Naive Bayes") st.header("📋 Model Details") st.write(f"**Training Samples:** {analyzer.metadata['training_samples']:,}") st.write(f"**Test Samples:** {analyzer.metadata['test_samples']:,}") st.write(f"**Features:** {analyzer.metadata['max_features']:,}") st.header("🔧 Preprocessing Steps") for step in analyzer.metadata['preprocessing_steps']: st.write(f"- {step.replace('_', ' ').title()}") st.header("📊 Sample Reviews") sample_reviews = [ "This movie was absolutely fantastic! I loved every minute of it.", "Terrible film, waste of time. Don't watch it.", "It was okay, nothing special but not bad either.", "Amazing performance by the actors, great storyline!", "Boring and predictable plot, poor acting." ] for i, review in enumerate(sample_reviews, 1): if st.button(f"Sample {i}", key=f"sample_{i}"): st.session_state.user_input = review st.rerun() if __name__ == "__main__": main()