import joblib import json import re import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from nltk.stem import WordNetLemmatizer # Download NLTK resources try: nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') except: pass class SentimentAnalyzer: def __init__(self, model_dir="saved_models"): # Load models self.vectorizer = joblib.load(f"{model_dir}/tfidf_vectorizer.pkl") self.lr_model = joblib.load(f"{model_dir}/logistic_regression_model.pkl") self.nb_model = joblib.load(f"{model_dir}/naive_bayes_model.pkl") # Load metadata with open(f"{model_dir}/model_metadata.json", 'r') as f: self.metadata = json.load(f) def preprocess_text(self, text): # Lowercase text = text.lower() # Remove special characters and digits text = re.sub(r'[^a-zA-Z\s]', '', text) # Tokenize tokens = word_tokenize(text) # Remove stopwords stop_words = set(stopwords.words('english')) tokens = [word for word in tokens if word not in stop_words] # Lemmatize lemmatizer = WordNetLemmatizer() tokens = [lemmatizer.lemmatize(word) for word in tokens] # Join tokens back to string return ' '.join(tokens) def predict(self, text, model_type='both'): # Preprocess text cleaned_text = self.preprocess_text(text) # Vectorize text_vector = self.vectorizer.transform([cleaned_text]) results = {} if model_type in ['lr', 'both']: lr_pred = self.lr_model.predict(text_vector)[0] lr_prob = self.lr_model.predict_proba(text_vector)[0] results['logistic_regression'] = { 'prediction': 'positive' if lr_pred == 1 else 'negative', 'confidence': float(max(lr_prob)), 'probabilities': { 'negative': float(lr_prob[0]), 'positive': float(lr_prob[1]) } } if model_type in ['nb', 'both']: nb_pred = self.nb_model.predict(text_vector)[0] nb_prob = self.nb_model.predict_proba(text_vector)[0] results['naive_bayes'] = { 'prediction': 'positive' if nb_pred == 1 else 'negative', 'confidence': float(max(nb_prob)), 'probabilities': { 'negative': float(nb_prob[0]), 'positive': float(nb_prob[1]) } } return results # Example usage if __name__ == "__main__": analyzer = SentimentAnalyzer() # Test with sample reviews test_reviews = [ "This movie was absolutely fantastic! I loved every minute of it.", "Terrible film, waste of time. Don't watch it.", "It was okay, nothing special but not bad either." ] for review in test_reviews: print(f"\nReview: {review}") results = analyzer.predict(review) for model, result in results.items(): print(f"{model}: {result['prediction']} (confidence: {result['confidence']:.2f})")