MON3EMPASHA commited on
Commit
57927a6
Β·
1 Parent(s): 557abda

Refactor project structure and update dependencies for improved compatibility and performance.

Browse files
Files changed (1) hide show
  1. streamlit_app.py +284 -0
streamlit_app.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import json
4
+ import re
5
+ import nltk
6
+ from nltk.corpus import stopwords
7
+ from nltk.tokenize import word_tokenize
8
+ from nltk.stem import WordNetLemmatizer
9
+ import os
10
+
11
+ # Download NLTK resources
12
+ try:
13
+ nltk.download('punkt')
14
+ nltk.download('stopwords')
15
+ nltk.download('wordnet')
16
+ except:
17
+ pass
18
+
19
+ class SentimentAnalyzer:
20
+ def __init__(self, model_dir="saved_models"):
21
+ try:
22
+ # Load models
23
+ self.vectorizer = joblib.load(f"{model_dir}/tfidf_vectorizer.pkl")
24
+ self.lr_model = joblib.load(f"{model_dir}/logistic_regression_model.pkl")
25
+ self.nb_model = joblib.load(f"{model_dir}/naive_bayes_model.pkl")
26
+
27
+ # Load metadata
28
+ with open(f"{model_dir}/model_metadata.json", 'r') as f:
29
+ self.metadata = json.load(f)
30
+
31
+ self.models_loaded = True
32
+ except Exception as e:
33
+ st.error(f"Error loading models: {e}")
34
+ self.models_loaded = False
35
+
36
+ def preprocess_text(self, text):
37
+ # Lowercase
38
+ text = text.lower()
39
+ # Remove special characters and digits
40
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
41
+ # Tokenize
42
+ tokens = word_tokenize(text)
43
+ # Remove stopwords
44
+ stop_words = set(stopwords.words('english'))
45
+ tokens = [word for word in tokens if word not in stop_words]
46
+ # Lemmatize
47
+ lemmatizer = WordNetLemmatizer()
48
+ tokens = [lemmatizer.lemmatize(word) for word in tokens]
49
+ # Join tokens back to string
50
+ return ' '.join(tokens)
51
+
52
+ def predict(self, text, model_type='both'):
53
+ if not self.models_loaded:
54
+ return None
55
+
56
+ # Preprocess text
57
+ cleaned_text = self.preprocess_text(text)
58
+
59
+ # Vectorize
60
+ text_vector = self.vectorizer.transform([cleaned_text])
61
+
62
+ results = {}
63
+
64
+ if model_type in ['lr', 'both']:
65
+ lr_pred = self.lr_model.predict(text_vector)[0]
66
+ lr_prob = self.lr_model.predict_proba(text_vector)[0]
67
+ results['logistic_regression'] = {
68
+ 'prediction': 'positive' if lr_pred == 1 else 'negative',
69
+ 'confidence': float(max(lr_prob)),
70
+ 'probabilities': {
71
+ 'negative': float(lr_prob[0]),
72
+ 'positive': float(lr_prob[1])
73
+ }
74
+ }
75
+
76
+ if model_type in ['nb', 'both']:
77
+ nb_pred = self.nb_model.predict(text_vector)[0]
78
+ nb_prob = self.nb_model.predict_proba(text_vector)[0]
79
+ results['naive_bayes'] = {
80
+ 'prediction': 'positive' if nb_pred == 1 else 'negative',
81
+ 'confidence': float(max(nb_prob)),
82
+ 'probabilities': {
83
+ 'negative': float(nb_prob[0]),
84
+ 'positive': float(nb_prob[1])
85
+ }
86
+ }
87
+
88
+ return results
89
+
90
+ def main():
91
+ st.set_page_config(
92
+ page_title="IMDb Sentiment Analysis",
93
+ page_icon="🎬",
94
+ layout="wide"
95
+ )
96
+
97
+ st.title("🎬 IMDb Review Sentiment Analysis")
98
+ st.markdown("---")
99
+
100
+ # Check if models exist
101
+ if not os.path.exists("saved_models"):
102
+ st.error("❌ Models not found! Please run `python train_and_save_model.py` first to train and save the models.")
103
+ st.info("This will create the 'saved_models' directory with your trained models.")
104
+ return
105
+
106
+ # Initialize analyzer
107
+ with st.spinner("Loading models..."):
108
+ analyzer = SentimentAnalyzer()
109
+
110
+ if not analyzer.models_loaded:
111
+ st.error("Failed to load models. Please check if the model files exist in the 'saved_models' directory.")
112
+ return
113
+
114
+ # Display model info
115
+ st.success("βœ… Models loaded successfully!")
116
+
117
+ # Model performance metrics
118
+ col1, col2 = st.columns(2)
119
+ with col1:
120
+ st.metric("Logistic Regression Accuracy", f"{analyzer.metadata['lr_accuracy']:.2%}")
121
+ with col2:
122
+ st.metric("Naive Bayes Accuracy", f"{analyzer.metadata['nb_accuracy']:.2%}")
123
+
124
+ st.markdown("---")
125
+
126
+ # Input section
127
+ st.subheader("πŸ“ Enter a Movie Review")
128
+
129
+ # Text input
130
+ user_input = st.text_area(
131
+ "Write your movie review here:",
132
+ height=150,
133
+ placeholder="Example: This movie was absolutely fantastic! The acting was superb and the plot was engaging..."
134
+ )
135
+
136
+ # Model selection
137
+ model_choice = st.selectbox(
138
+ "Choose model for prediction:",
139
+ ["Both Models", "Logistic Regression Only", "Naive Bayes Only"],
140
+ help="Select which model(s) to use for prediction"
141
+ )
142
+
143
+ # Prediction button
144
+ if st.button("πŸ” Analyze Sentiment", type="primary"):
145
+ if user_input.strip():
146
+ with st.spinner("Analyzing sentiment..."):
147
+ # Map model choice to parameter
148
+ model_type = 'both'
149
+ if model_choice == "Logistic Regression Only":
150
+ model_type = 'lr'
151
+ elif model_choice == "Naive Bayes Only":
152
+ model_type = 'nb'
153
+
154
+ # Get predictions
155
+ results = analyzer.predict(user_input, model_type)
156
+
157
+ if results:
158
+ st.markdown("---")
159
+ st.subheader("πŸ“Š Analysis Results")
160
+
161
+ # Display results
162
+ if model_type == 'both' or model_choice == "Both Models":
163
+ col1, col2 = st.columns(2)
164
+
165
+ with col1:
166
+ st.subheader("πŸ€– Logistic Regression")
167
+ lr_result = results['logistic_regression']
168
+ if lr_result['prediction'] == 'positive':
169
+ st.success(f"βœ… Positive Sentiment")
170
+ else:
171
+ st.error(f"❌ Negative Sentiment")
172
+ st.metric("Confidence", f"{lr_result['confidence']:.2%}")
173
+
174
+ # Progress bar for probabilities
175
+ st.write("**Probabilities:**")
176
+ st.progress(lr_result['probabilities']['positive'])
177
+ st.write(f"Positive: {lr_result['probabilities']['positive']:.2%}")
178
+ st.progress(lr_result['probabilities']['negative'])
179
+ st.write(f"Negative: {lr_result['probabilities']['negative']:.2%}")
180
+
181
+ with col2:
182
+ st.subheader("🧠 Naive Bayes")
183
+ nb_result = results['naive_bayes']
184
+ if nb_result['prediction'] == 'positive':
185
+ st.success(f"βœ… Positive Sentiment")
186
+ else:
187
+ st.error(f"❌ Negative Sentiment")
188
+ st.metric("Confidence", f"{nb_result['confidence']:.2%}")
189
+
190
+ # Progress bar for probabilities
191
+ st.write("**Probabilities:**")
192
+ st.progress(nb_result['probabilities']['positive'])
193
+ st.write(f"Positive: {nb_result['probabilities']['positive']:.2%}")
194
+ st.progress(nb_result['probabilities']['negative'])
195
+ st.write(f"Negative: {nb_result['probabilities']['negative']:.2%}")
196
+
197
+ else:
198
+ # Single model result
199
+ model_name = "Logistic Regression" if model_type == 'lr' else "Naive Bayes"
200
+ result = results['logistic_regression'] if model_type == 'lr' else results['naive_bayes']
201
+
202
+ st.subheader(f"πŸ€– {model_name}")
203
+ if result['prediction'] == 'positive':
204
+ st.success(f"βœ… Positive Sentiment")
205
+ else:
206
+ st.error(f"❌ Negative Sentiment")
207
+ st.metric("Confidence", f"{result['confidence']:.2%}")
208
+
209
+ # Progress bar for probabilities
210
+ st.write("**Probabilities:**")
211
+ st.progress(result['probabilities']['positive'])
212
+ st.write(f"Positive: {result['probabilities']['positive']:.2%}")
213
+ st.progress(result['probabilities']['negative'])
214
+ st.write(f"Negative: {result['probabilities']['negative']:.2%}")
215
+
216
+ # Model comparison
217
+ if model_type == 'both':
218
+ st.markdown("---")
219
+ st.subheader("πŸ“ˆ Model Comparison")
220
+
221
+ # Create comparison chart
222
+ import plotly.graph_objects as go
223
+
224
+ models = list(results.keys())
225
+ confidences = [results[model]['confidence'] for model in models]
226
+ predictions = [results[model]['prediction'] for model in models]
227
+
228
+ fig = go.Figure(data=[
229
+ go.Bar(
230
+ x=models,
231
+ y=confidences,
232
+ text=[f"{conf:.2%}" for conf in confidences],
233
+ textposition='auto',
234
+ marker_color=['green' if pred == 'positive' else 'red' for pred in predictions]
235
+ )
236
+ ])
237
+
238
+ fig.update_layout(
239
+ title="Model Confidence Comparison",
240
+ xaxis_title="Model",
241
+ yaxis_title="Confidence",
242
+ yaxis_range=[0, 1]
243
+ )
244
+
245
+ st.plotly_chart(fig, use_container_width=True)
246
+
247
+ else:
248
+ st.error("Failed to get predictions. Please try again.")
249
+ else:
250
+ st.warning("⚠️ Please enter a review to analyze.")
251
+
252
+ # Sidebar with additional info
253
+ with st.sidebar:
254
+ st.header("ℹ️ About")
255
+ st.write("This app uses machine learning models to analyze the sentiment of movie reviews.")
256
+ st.write("**Models:**")
257
+ st.write("- Logistic Regression")
258
+ st.write("- Naive Bayes")
259
+
260
+ st.header("πŸ“‹ Model Details")
261
+ st.write(f"**Training Samples:** {analyzer.metadata['training_samples']:,}")
262
+ st.write(f"**Test Samples:** {analyzer.metadata['test_samples']:,}")
263
+ st.write(f"**Features:** {analyzer.metadata['max_features']:,}")
264
+
265
+ st.header("πŸ”§ Preprocessing Steps")
266
+ for step in analyzer.metadata['preprocessing_steps']:
267
+ st.write(f"- {step.replace('_', ' ').title()}")
268
+
269
+ st.header("πŸ“Š Sample Reviews")
270
+ sample_reviews = [
271
+ "This movie was absolutely fantastic! I loved every minute of it.",
272
+ "Terrible film, waste of time. Don't watch it.",
273
+ "It was okay, nothing special but not bad either.",
274
+ "Amazing performance by the actors, great storyline!",
275
+ "Boring and predictable plot, poor acting."
276
+ ]
277
+
278
+ for i, review in enumerate(sample_reviews, 1):
279
+ if st.button(f"Sample {i}", key=f"sample_{i}"):
280
+ st.session_state.user_input = review
281
+ st.rerun()
282
+
283
+ if __name__ == "__main__":
284
+ main()