MON3EMPASHA
/

imdb-movie-review-sentiment-analysis

+import streamlit as st
+import joblib
+import json
+import re
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from nltk.stem import WordNetLemmatizer
+import os
+# Download NLTK resources
+try:
+    nltk.download('punkt')
+    nltk.download('stopwords')
+    nltk.download('wordnet')
+except:
+    pass
+class SentimentAnalyzer:
+    def __init__(self, model_dir="saved_models"):
+        try:
+            # Load models
+            self.vectorizer = joblib.load(f"{model_dir}/tfidf_vectorizer.pkl")
+            self.lr_model = joblib.load(f"{model_dir}/logistic_regression_model.pkl")
+            self.nb_model = joblib.load(f"{model_dir}/naive_bayes_model.pkl")
+            # Load metadata
+            with open(f"{model_dir}/model_metadata.json", 'r') as f:
+                self.metadata = json.load(f)
+            self.models_loaded = True
+        except Exception as e:
+            st.error(f"Error loading models: {e}")
+            self.models_loaded = False
+    def preprocess_text(self, text):
+        # Lowercase
+        text = text.lower()
+        # Remove special characters and digits
+        text = re.sub(r'[^a-zA-Z\s]', '', text)
+        # Tokenize
+        tokens = word_tokenize(text)
+        # Remove stopwords
+        stop_words = set(stopwords.words('english'))
+        tokens = [word for word in tokens if word not in stop_words]
+        # Lemmatize
+        lemmatizer = WordNetLemmatizer()
+        tokens = [lemmatizer.lemmatize(word) for word in tokens]
+        # Join tokens back to string
+        return ' '.join(tokens)
+    def predict(self, text, model_type='both'):
+        if not self.models_loaded:
+            return None
+        # Preprocess text
+        cleaned_text = self.preprocess_text(text)
+        # Vectorize
+        text_vector = self.vectorizer.transform([cleaned_text])
+        results = {}
+        if model_type in ['lr', 'both']:
+            lr_pred = self.lr_model.predict(text_vector)[0]
+            lr_prob = self.lr_model.predict_proba(text_vector)[0]
+            results['logistic_regression'] = {
+                'prediction': 'positive' if lr_pred == 1 else 'negative',
+                'confidence': float(max(lr_prob)),
+                'probabilities': {
+                    'negative': float(lr_prob[0]),
+                    'positive': float(lr_prob[1])
+                }
+            }
+        if model_type in ['nb', 'both']:
+            nb_pred = self.nb_model.predict(text_vector)[0]
+            nb_prob = self.nb_model.predict_proba(text_vector)[0]
+            results['naive_bayes'] = {
+                'prediction': 'positive' if nb_pred == 1 else 'negative',
+                'confidence': float(max(nb_prob)),
+                'probabilities': {
+                    'negative': float(nb_prob[0]),
+                    'positive': float(nb_prob[1])
+                }
+            }
+        return results
+def main():
+    st.set_page_config(
+        page_title="IMDb Sentiment Analysis",
+        page_icon="🎬",
+        layout="wide"
+    )
+    st.title("🎬 IMDb Review Sentiment Analysis")
+    st.markdown("---")
+    # Check if models exist
+    if not os.path.exists("saved_models"):
+        st.error("❌ Models not found! Please run `python train_and_save_model.py` first to train and save the models.")
+        st.info("This will create the 'saved_models' directory with your trained models.")
+        return
+    # Initialize analyzer
+    with st.spinner("Loading models..."):
+        analyzer = SentimentAnalyzer()
+    if not analyzer.models_loaded:
+        st.error("Failed to load models. Please check if the model files exist in the 'saved_models' directory.")
+        return
+    # Display model info
+    st.success("✅ Models loaded successfully!")
+    # Model performance metrics
+    col1, col2 = st.columns(2)
+    with col1:
+        st.metric("Logistic Regression Accuracy", f"{analyzer.metadata['lr_accuracy']:.2%}")
+    with col2:
+        st.metric("Naive Bayes Accuracy", f"{analyzer.metadata['nb_accuracy']:.2%}")
+    st.markdown("---")
+    # Input section
+    st.subheader("📝 Enter a Movie Review")
+    # Text input
+    user_input = st.text_area(
+        "Write your movie review here:",
+        height=150,
+        placeholder="Example: This movie was absolutely fantastic! The acting was superb and the plot was engaging..."
+    )
+    # Model selection
+    model_choice = st.selectbox(
+        "Choose model for prediction:",
+        ["Both Models", "Logistic Regression Only", "Naive Bayes Only"],
+        help="Select which model(s) to use for prediction"
+    )
+    # Prediction button
+    if st.button("🔍 Analyze Sentiment", type="primary"):
+        if user_input.strip():
+            with st.spinner("Analyzing sentiment..."):
+                # Map model choice to parameter
+                model_type = 'both'
+                if model_choice == "Logistic Regression Only":
+                    model_type = 'lr'
+                elif model_choice == "Naive Bayes Only":
+                    model_type = 'nb'
+                # Get predictions
+                results = analyzer.predict(user_input, model_type)
+                if results:
+                    st.markdown("---")
+                    st.subheader("📊 Analysis Results")
+                    # Display results
+                    if model_type == 'both' or model_choice == "Both Models":
+                        col1, col2 = st.columns(2)
+                        with col1:
+                            st.subheader("🤖 Logistic Regression")
+                            lr_result = results['logistic_regression']
+                            if lr_result['prediction'] == 'positive':
+                                st.success(f"✅ Positive Sentiment")
+                            else:
+                                st.error(f"❌ Negative Sentiment")
+                            st.metric("Confidence", f"{lr_result['confidence']:.2%}")
+                            # Progress bar for probabilities
+                            st.write("**Probabilities:**")
+                            st.progress(lr_result['probabilities']['positive'])
+                            st.write(f"Positive: {lr_result['probabilities']['positive']:.2%}")
+                            st.progress(lr_result['probabilities']['negative'])
+                            st.write(f"Negative: {lr_result['probabilities']['negative']:.2%}")
+                        with col2:
+                            st.subheader("🧠 Naive Bayes")
+                            nb_result = results['naive_bayes']
+                            if nb_result['prediction'] == 'positive':
+                                st.success(f"✅ Positive Sentiment")
+                            else:
+                                st.error(f"❌ Negative Sentiment")
+                            st.metric("Confidence", f"{nb_result['confidence']:.2%}")
+                            # Progress bar for probabilities
+                            st.write("**Probabilities:**")
+                            st.progress(nb_result['probabilities']['positive'])
+                            st.write(f"Positive: {nb_result['probabilities']['positive']:.2%}")
+                            st.progress(nb_result['probabilities']['negative'])
+                            st.write(f"Negative: {nb_result['probabilities']['negative']:.2%}")
+                    else:
+                        # Single model result
+                        model_name = "Logistic Regression" if model_type == 'lr' else "Naive Bayes"
+                        result = results['logistic_regression'] if model_type == 'lr' else results['naive_bayes']
+                        st.subheader(f"🤖 {model_name}")
+                        if result['prediction'] == 'positive':
+                            st.success(f"✅ Positive Sentiment")
+                        else:
+                            st.error(f"❌ Negative Sentiment")
+                        st.metric("Confidence", f"{result['confidence']:.2%}")
+                        # Progress bar for probabilities
+                        st.write("**Probabilities:**")
+                        st.progress(result['probabilities']['positive'])
+                        st.write(f"Positive: {result['probabilities']['positive']:.2%}")
+                        st.progress(result['probabilities']['negative'])
+                        st.write(f"Negative: {result['probabilities']['negative']:.2%}")
+                    # Model comparison
+                    if model_type == 'both':
+                        st.markdown("---")
+                        st.subheader("📈 Model Comparison")
+                        # Create comparison chart
+                        import plotly.graph_objects as go
+                        models = list(results.keys())
+                        confidences = [results[model]['confidence'] for model in models]
+                        predictions = [results[model]['prediction'] for model in models]
+                        fig = go.Figure(data=[
+                            go.Bar(
+                                x=models,
+                                y=confidences,
+                                text=[f"{conf:.2%}" for conf in confidences],
+                                textposition='auto',
+                                marker_color=['green' if pred == 'positive' else 'red' for pred in predictions]
+                            )
+                        ])
+                        fig.update_layout(
+                            title="Model Confidence Comparison",
+                            xaxis_title="Model",
+                            yaxis_title="Confidence",
+                            yaxis_range=[0, 1]
+                        )
+                        st.plotly_chart(fig, use_container_width=True)
+                else:
+                    st.error("Failed to get predictions. Please try again.")
+        else:
+            st.warning("⚠️ Please enter a review to analyze.")
+    # Sidebar with additional info
+    with st.sidebar:
+        st.header("ℹ️ About")
+        st.write("This app uses machine learning models to analyze the sentiment of movie reviews.")
+        st.write("**Models:**")
+        st.write("- Logistic Regression")
+        st.write("- Naive Bayes")
+        st.header("📋 Model Details")
+        st.write(f"**Training Samples:** {analyzer.metadata['training_samples']:,}")
+        st.write(f"**Test Samples:** {analyzer.metadata['test_samples']:,}")
+        st.write(f"**Features:** {analyzer.metadata['max_features']:,}")
+        st.header("🔧 Preprocessing Steps")
+        for step in analyzer.metadata['preprocessing_steps']:
+            st.write(f"- {step.replace('_', ' ').title()}")
+        st.header("📊 Sample Reviews")
+        sample_reviews = [
+            "This movie was absolutely fantastic! I loved every minute of it.",
+            "Terrible film, waste of time. Don't watch it.",
+            "It was okay, nothing special but not bad either.",
+            "Amazing performance by the actors, great storyline!",
+            "Boring and predictable plot, poor acting."
+        ]
+        for i, review in enumerate(sample_reviews, 1):
+            if st.button(f"Sample {i}", key=f"sample_{i}"):
+                st.session_state.user_input = review
+                st.rerun()
+if __name__ == "__main__":
+    main()