DishaKushwah commited on
Commit
ddafacb
·
1 Parent(s): 4e8e93e

Update mcq_generator.py

Browse files
Files changed (1) hide show
  1. mcq_generator.py +95 -147
mcq_generator.py CHANGED
@@ -1,171 +1,119 @@
1
- import torch
2
  import random
3
  import nltk
4
- import re
5
- from transformers import (pipeline, AutoModelForQuestionAnswering, AutoTokenizer)
6
- from sentence_transformers import SentenceTransformer
7
- from sklearn.feature_extraction.text import TfidfVectorizer
8
- from typing import List, Dict, Any
9
- nltk.download('punkt')
10
- nltk.download('stopwords')
11
 
12
  class AdvancedMCQGenerator:
13
  def __init__(self):
14
- """Advanced Multiple Choice Question Generator with Intelligent Distractor Strategy"""
15
- # Question Answering Model
16
- qa_model_name = "deepset/roberta-base-squad2"
17
- self.qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
18
- self.qa_pipeline = pipeline("question-answering", model=qa_model_name,device=0 if torch.cuda.is_available() else -1)
19
- self.sentence_embedder = SentenceTransformer('all-mpnet-base-v2')
20
- self.tfidf_vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))
21
- self.sentence_tokenizer = nltk.sent_tokenize
22
- self.generated_questions = set()
23
 
24
- def _extract_context_features(self, context: str) -> Dict[str, Any]:
25
- """Advanced context feature extraction"""
26
- sentences = self.sentence_tokenizer(context)
27
- try:
28
- tfidf_matrix = self.tfidf_vectorizer.fit_transform(sentences)
29
- feature_names = self.tfidf_vectorizer.get_feature_names_out()
30
- top_keywords = []
31
- for i, sentence in enumerate(sentences):
32
- feature_indices = tfidf_matrix[i].nonzero()[1] # Get top TF-IDF scores for each sentence
33
- top_sentence_keywords = [feature_names[idx] for idx in feature_indices][:3]
34
- top_keywords.extend(top_sentence_keywords)
35
- return {'sentences': sentences,'keywords': list(set(top_keywords)),'total_sentences': len(sentences)}
36
- except Exception as e:
37
- print(f"Context feature extraction error: {e}")
38
- return {'sentences': sentences,'keywords': context.split()[:10],'total_sentences': len(sentences)}
39
 
40
- def _generate_smart_distractors(self, correct_answer: str, context_features: Dict[str, Any], num_distractors: int = 3) -> List[str]:
41
- """Intelligent Distractor Generation Strategy"""
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  distractors = []
43
- used_options = set([correct_answer.lower()])
44
- sentences = context_features['sentences']
45
- keywords = context_features['keywords']
46
- # Semantic similarity-based distractor generation
47
- for _ in range(num_distractors):
48
- try:
49
- semantic_candidates = [sent for sent in sentences if sent.lower() not in used_options and len(sent.split()) > 3]
50
- if semantic_candidates:
51
- candidate_similarities = [(sent, self._calculate_semantic_similarity(correct_answer, sent)) for sent in semantic_candidates]
52
- candidate_similarities.sort(key=lambda x: abs(0.5 - x[1]))
53
- if candidate_similarities:
54
- best_distractor = candidate_similarities[0][0]
55
- distractors.append(best_distractor)
56
- used_options.add(best_distractor.lower())
57
- continue
58
-
59
- if keywords:
60
- keyword_distractor = f"A key aspect related to {random.choice(keywords)}"
61
- distractors.append(keyword_distractor)
62
- used_options.add(keyword_distractor.lower())
63
- continue
64
- fallback_distractors = ["A related contextual detail","An alternative interpretation","A supplementary concept"]
65
  distractor = random.choice(fallback_distractors)
66
  distractors.append(distractor)
67
- used_options.add(distractor.lower())
68
-
69
- except Exception as e:
70
- print(f"Distractor generation error: {e}")
71
- distractors.append("A contextual detail")
72
- return distractors[:num_distractors]
73
 
74
- def _calculate_semantic_similarity(self, text1: str, text2: str) -> float:
75
- """Calculate semantic similarity between two texts"""
76
- try:
77
- # Embed texts
78
- embedding1 = self.sentence_embedder.encode(text1)
79
- embedding2 = self.sentence_embedder.encode(text2)
80
-
81
- # Calculate cosine similarity
82
- similarity = torch.nn.functional.cosine_similarity(torch.tensor(embedding1), torch.tensor(embedding2)).item()
83
- return abs(similarity)
84
- except Exception:
85
- return 0.0
86
-
87
- def generate_mcq(self, context: str, num_questions: int = 5, difficulty: str = "medium") -> List[Dict[str, Any]]:
88
- """
89
- Generate Multiple Choice Questions
90
- """
91
- context = self._preprocess_context(context) # Preprocess context
92
- context_features = self._extract_context_features(context) # Extract context features
93
- self.generated_questions.clear() # Reset generated questions
94
  mcq_questions = []
 
95
 
96
- for _ in range(num_questions):
97
  try:
98
- keywords = context_features['keywords']
99
- subject = random.choice(keywords)
100
- # Question templates
101
- templates = [f"What is the significance of {subject} in this context?",f"Explain the role of {subject}.",f"How does {subject} contribute to the overall understanding?"]
102
- question = random.choice(templates)
103
- answer_result = self.qa_pipeline(question=question, context=context) # Extract answer using QA pipeline
104
- correct_answer = answer_result['answer'] # Get correct answer
105
- distractors = self._generate_smart_distractors(correct_answer, context_features) # Generate contextually relevant distractors
106
- all_options = [correct_answer] + distractors # Combine options
107
  random.shuffle(all_options)
108
- correct_index = all_options.index(correct_answer) # Determine correct option index
109
- mcq_questions.append({"question": question,"options": all_options,"correct_answer": correct_index,"explanation": f"Correct answer based on the context: {correct_answer}"})
110
  except Exception as e:
111
- print(f"MCQ generation error: {e}")
112
  return mcq_questions
113
-
114
- def _preprocess_context(self, context: str) -> str:
115
- """Advanced context preprocessing"""
116
- context = re.sub(r'\s+', ' ', context).strip() # Remove extra whitespaces and special characters
117
- context = ''.join(char for char in context if char.isprintable()) # Remove non-printable characters
118
- if len(context.split()) < 20: # Append context if too short
119
- context += " Additional context to enhance question generation."
120
- return context
121
-
122
  def main():
 
123
  generator = AdvancedMCQGenerator()
124
- print(" -------------Multiple Choice Question Generator-------------")
125
- context = input("\n>> Enter context text: ")
126
- while True:
127
- try:
128
- num_questions = int(input("\n>> How many questions do you want to generate? "))
129
- break
130
- except ValueError:
131
- print("Please enter a valid number.")
132
 
133
- while True:
134
- difficulty = input("\n>> Enter difficulty level (easy/medium/hard): ").lower()
135
- if difficulty in ['easy', 'medium', 'hard']:
136
- break
137
- print("Invalid difficulty level. Please choose easy, medium, or hard.")
138
- questions = generator.generate_mcq(context, num_questions, difficulty)
139
- if questions:
140
- print("\n--- Multiple Choice Quiz ---")
141
- correct_answers = 0 # Simple score tracking
142
- total_questions = len(questions)
143
-
144
- for i, q in enumerate(questions, 1):
145
- print(f"\nQuestion {i}: {q['question']}")
146
- print("Options:")
147
- for j, option in enumerate(q['options']):
148
- print(f"{chr(65+j)}. {option}")
149
-
150
- while True:
151
- user_input = input("\nYour Answer (A/B/C/D): ").upper()
152
- if user_input in ['A', 'B', 'C', 'D']:
153
- break
154
- print("Invalid input. Please enter A, B, C, or D.")
155
-
156
- user_answer_index = ord(user_input) - 65
157
- if user_answer_index == q['correct_answer']:
158
- print("✅ Correct!")
159
- correct_answers += 1
160
- else:
161
- print(f"❌ Incorrect. Correct Answer: {chr(65+q['correct_answer'])}")
162
 
163
- # Simple score display
164
- print(f"\n-----Score: {correct_answers}/{total_questions}-----")
165
-
166
- else:
167
- print("\nNo multiple choice questions were generated.")
 
 
 
 
 
 
 
 
168
 
169
  if __name__ == "__main__":
170
  main()
171
- # SAMPLE CONTEXT- The French Revolution began in 1789 and marked a significant turning point in European history. It was fueled by widespread social inequality, financial crisis, and the rise of Enlightenment ideas. The French monarchy was overthrown, and King Louis XVI was executed. The revolution introduced the ideals of liberty, equality, and fraternity. It led to the rise of Napoleon Bonaparte and had a lasting impact on modern democracy and human rights movements around the world.
 
 
1
  import random
2
  import nltk
3
+ from nltk.corpus import stopwords
4
+ from nltk.tokenize import sent_tokenize, word_tokenize
5
+ from transformers import pipeline
 
 
 
 
6
 
7
  class AdvancedMCQGenerator:
8
  def __init__(self):
9
+ nltk.download('punkt', quiet=True)
10
+ nltk.download('stopwords', quiet=True)
11
+
12
+ # Initialize NLP models
13
+ self.qa_pipeline = pipeline("question-answering")
14
+ self.stop_words = set(stopwords.words('english'))
 
 
 
15
 
16
+ def extract_key_concepts(self, context):
17
+ """Extract key concepts and important phrases"""
18
+ # Tokenize sentences
19
+ sentences = sent_tokenize(context)
20
+
21
+ # Extract potential key concepts
22
+ key_concepts = []
23
+ for sentence in sentences:
24
+ # Look for sentences with unique, meaningful content
25
+ words = word_tokenize(sentence)
26
+ filtered_words = [word.lower() for word in words if word.isalnum() and word.lower() not in self.stop_words and len(word) > 2]
27
+ # Prioritize sentences with named entities or specific concepts
28
+ if len(filtered_words) > 3:
29
+ key_concepts.append(sentence)
30
+ return key_concepts[:5] # Return top 5 key concepts
31
 
32
+ def generate_intelligent_question(self, concept, context):
33
+ """Generate a more nuanced and contextually relevant question"""
34
+ question_templates = [
35
+ f"What is the primary significance of {concept}?",
36
+ f"How does {concept} impact the broader context?",
37
+ f"What key role does {concept} play in the narrative?",
38
+ f"Explain the importance of {concept} in this context.",
39
+ f"What makes {concept} crucial to understanding the situation?"
40
+ ]
41
+
42
+ return random.choice(question_templates)
43
+
44
+ def generate_contextual_distractors(self, correct_answer, context):
45
+ """Create semantically related but incorrect distractors"""
46
+ sentences = sent_tokenize(context)
47
  distractors = []
48
+
49
+ potential_distractors = [sent for sent in sentences if correct_answer.lower() not in sent.lower() and len(sent.split()) > 3]
50
+
51
+ # Generating diverse distractors
52
+ while len(distractors) < 3:
53
+ if potential_distractors:
54
+ # Choose a unique distractor
55
+ distractor = random.choice(potential_distractors)
56
+ potential_distractors.remove(distractor)
57
+ words = word_tokenize(distractor)
58
+ key_phrase = ' '.join([word for word in words if word.lower() not in self.stop_words and len(word) > 2][:3])
59
+ distractors.append(key_phrase)
60
+ else:
61
+ fallback_distractors = ["A partially related historical context","An alternative interpretation","A peripheral aspect of the main theme"]
 
 
 
 
 
 
 
 
62
  distractor = random.choice(fallback_distractors)
63
  distractors.append(distractor)
64
+ return distractors
 
 
 
 
 
65
 
66
+ def generate_mcq(self, context, num_questions=3):
67
+ """Generate Multiple Choice Questions"""
68
+ # Validate context
69
+ if not context or len(context.split()) < 30:
70
+ raise ValueError("Context is too short. Provide more detailed text.")
71
+
72
+ # Generate questions
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  mcq_questions = []
74
+ key_concepts = self.extract_key_concepts(context)
75
 
76
+ for concept in key_concepts[:num_questions]:
77
  try:
78
+ question = self.generate_intelligent_question(concept, context) # Generate question
79
+ answer_result = self.qa_pipeline(question=question, context=context) # Use QA pipeline to find the most relevant answer
80
+ correct_answer = answer_result['answer'] # Get correct answer
81
+ distractors = self.generate_contextual_distractors(correct_answer, context) # Generate distractors
82
+ all_options = [correct_answer] + distractors # Combine options
 
 
 
 
83
  random.shuffle(all_options)
84
+ correct_index = all_options.index(correct_answer) # Determine correct option index
85
+ mcq_questions.append({"question": question,"options": all_options,"correct_answer": correct_index}) # Create MCQ
86
  except Exception as e:
87
+ print(f"Error generating question: {e}")
88
  return mcq_questions
 
 
 
 
 
 
 
 
 
89
  def main():
90
+ # Create generator instance
91
  generator = AdvancedMCQGenerator()
92
+ context = input("Enter context text: ")
93
+ num_questions = int(input("How many questions do you want? "))
94
+ questions = generator.generate_mcq(context, num_questions)
 
 
 
 
 
95
 
96
+ # Display and solve quiz
97
+ print("\n--- Quiz Started ---")
98
+ score = 0
99
+ for i, q in enumerate(questions, 1):
100
+ print(f"\nQuestion {i}: {q['question']}")
101
+ for j, option in enumerate(q['options']):
102
+ print(f"{chr(65+j)}. {option}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ while True:
105
+ user_answer = input("\nYour Answer (A/B/C/D): ").upper()
106
+ if user_answer in ['A', 'B', 'C', 'D']:
107
+ break
108
+ print("Invalid input. Please enter A, B, C, or D.")
109
+
110
+ user_index = ord(user_answer) - 65
111
+ if user_index == q['correct_answer']:
112
+ print("Correct!")
113
+ score += 1
114
+ else:
115
+ print(f"Incorrect. Correct answer was: {chr(65 + q['correct_answer'])}")
116
+ print(f"\nFinal Score: {score}/{len(questions)}")
117
 
118
  if __name__ == "__main__":
119
  main()