Spaces:
Sleeping
Sleeping
DishaKushwah
commited on
Commit
·
ddafacb
1
Parent(s):
4e8e93e
Update mcq_generator.py
Browse files- mcq_generator.py +95 -147
mcq_generator.py
CHANGED
@@ -1,171 +1,119 @@
|
|
1 |
-
import torch
|
2 |
import random
|
3 |
import nltk
|
4 |
-
import
|
5 |
-
from
|
6 |
-
from
|
7 |
-
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
-
from typing import List, Dict, Any
|
9 |
-
nltk.download('punkt')
|
10 |
-
nltk.download('stopwords')
|
11 |
|
12 |
class AdvancedMCQGenerator:
|
13 |
def __init__(self):
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
self.qa_pipeline = pipeline("question-answering"
|
19 |
-
self.
|
20 |
-
self.tfidf_vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))
|
21 |
-
self.sentence_tokenizer = nltk.sent_tokenize
|
22 |
-
self.generated_questions = set()
|
23 |
|
24 |
-
def
|
25 |
-
"""
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
|
40 |
-
def
|
41 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
distractors = []
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
#
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
continue
|
58 |
-
|
59 |
-
if keywords:
|
60 |
-
keyword_distractor = f"A key aspect related to {random.choice(keywords)}"
|
61 |
-
distractors.append(keyword_distractor)
|
62 |
-
used_options.add(keyword_distractor.lower())
|
63 |
-
continue
|
64 |
-
fallback_distractors = ["A related contextual detail","An alternative interpretation","A supplementary concept"]
|
65 |
distractor = random.choice(fallback_distractors)
|
66 |
distractors.append(distractor)
|
67 |
-
|
68 |
-
|
69 |
-
except Exception as e:
|
70 |
-
print(f"Distractor generation error: {e}")
|
71 |
-
distractors.append("A contextual detail")
|
72 |
-
return distractors[:num_distractors]
|
73 |
|
74 |
-
def
|
75 |
-
"""
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
# Calculate cosine similarity
|
82 |
-
similarity = torch.nn.functional.cosine_similarity(torch.tensor(embedding1), torch.tensor(embedding2)).item()
|
83 |
-
return abs(similarity)
|
84 |
-
except Exception:
|
85 |
-
return 0.0
|
86 |
-
|
87 |
-
def generate_mcq(self, context: str, num_questions: int = 5, difficulty: str = "medium") -> List[Dict[str, Any]]:
|
88 |
-
"""
|
89 |
-
Generate Multiple Choice Questions
|
90 |
-
"""
|
91 |
-
context = self._preprocess_context(context) # Preprocess context
|
92 |
-
context_features = self._extract_context_features(context) # Extract context features
|
93 |
-
self.generated_questions.clear() # Reset generated questions
|
94 |
mcq_questions = []
|
|
|
95 |
|
96 |
-
for
|
97 |
try:
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
answer_result = self.qa_pipeline(question=question, context=context) # Extract answer using QA pipeline
|
104 |
-
correct_answer = answer_result['answer'] # Get correct answer
|
105 |
-
distractors = self._generate_smart_distractors(correct_answer, context_features) # Generate contextually relevant distractors
|
106 |
-
all_options = [correct_answer] + distractors # Combine options
|
107 |
random.shuffle(all_options)
|
108 |
-
correct_index = all_options.index(correct_answer)
|
109 |
-
mcq_questions.append({"question": question,"options": all_options,"correct_answer": correct_index
|
110 |
except Exception as e:
|
111 |
-
print(f"
|
112 |
return mcq_questions
|
113 |
-
|
114 |
-
def _preprocess_context(self, context: str) -> str:
|
115 |
-
"""Advanced context preprocessing"""
|
116 |
-
context = re.sub(r'\s+', ' ', context).strip() # Remove extra whitespaces and special characters
|
117 |
-
context = ''.join(char for char in context if char.isprintable()) # Remove non-printable characters
|
118 |
-
if len(context.split()) < 20: # Append context if too short
|
119 |
-
context += " Additional context to enhance question generation."
|
120 |
-
return context
|
121 |
-
|
122 |
def main():
|
|
|
123 |
generator = AdvancedMCQGenerator()
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
try:
|
128 |
-
num_questions = int(input("\n>> How many questions do you want to generate? "))
|
129 |
-
break
|
130 |
-
except ValueError:
|
131 |
-
print("Please enter a valid number.")
|
132 |
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
print("
|
138 |
-
|
139 |
-
|
140 |
-
print("\n--- Multiple Choice Quiz ---")
|
141 |
-
correct_answers = 0 # Simple score tracking
|
142 |
-
total_questions = len(questions)
|
143 |
-
|
144 |
-
for i, q in enumerate(questions, 1):
|
145 |
-
print(f"\nQuestion {i}: {q['question']}")
|
146 |
-
print("Options:")
|
147 |
-
for j, option in enumerate(q['options']):
|
148 |
-
print(f"{chr(65+j)}. {option}")
|
149 |
-
|
150 |
-
while True:
|
151 |
-
user_input = input("\nYour Answer (A/B/C/D): ").upper()
|
152 |
-
if user_input in ['A', 'B', 'C', 'D']:
|
153 |
-
break
|
154 |
-
print("Invalid input. Please enter A, B, C, or D.")
|
155 |
-
|
156 |
-
user_answer_index = ord(user_input) - 65
|
157 |
-
if user_answer_index == q['correct_answer']:
|
158 |
-
print("✅ Correct!")
|
159 |
-
correct_answers += 1
|
160 |
-
else:
|
161 |
-
print(f"❌ Incorrect. Correct Answer: {chr(65+q['correct_answer'])}")
|
162 |
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
if __name__ == "__main__":
|
170 |
main()
|
171 |
-
# SAMPLE CONTEXT- The French Revolution began in 1789 and marked a significant turning point in European history. It was fueled by widespread social inequality, financial crisis, and the rise of Enlightenment ideas. The French monarchy was overthrown, and King Louis XVI was executed. The revolution introduced the ideals of liberty, equality, and fraternity. It led to the rise of Napoleon Bonaparte and had a lasting impact on modern democracy and human rights movements around the world.
|
|
|
|
|
1 |
import random
|
2 |
import nltk
|
3 |
+
from nltk.corpus import stopwords
|
4 |
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
5 |
+
from transformers import pipeline
|
|
|
|
|
|
|
|
|
6 |
|
7 |
class AdvancedMCQGenerator:
|
8 |
def __init__(self):
|
9 |
+
nltk.download('punkt', quiet=True)
|
10 |
+
nltk.download('stopwords', quiet=True)
|
11 |
+
|
12 |
+
# Initialize NLP models
|
13 |
+
self.qa_pipeline = pipeline("question-answering")
|
14 |
+
self.stop_words = set(stopwords.words('english'))
|
|
|
|
|
|
|
15 |
|
16 |
+
def extract_key_concepts(self, context):
|
17 |
+
"""Extract key concepts and important phrases"""
|
18 |
+
# Tokenize sentences
|
19 |
+
sentences = sent_tokenize(context)
|
20 |
+
|
21 |
+
# Extract potential key concepts
|
22 |
+
key_concepts = []
|
23 |
+
for sentence in sentences:
|
24 |
+
# Look for sentences with unique, meaningful content
|
25 |
+
words = word_tokenize(sentence)
|
26 |
+
filtered_words = [word.lower() for word in words if word.isalnum() and word.lower() not in self.stop_words and len(word) > 2]
|
27 |
+
# Prioritize sentences with named entities or specific concepts
|
28 |
+
if len(filtered_words) > 3:
|
29 |
+
key_concepts.append(sentence)
|
30 |
+
return key_concepts[:5] # Return top 5 key concepts
|
31 |
|
32 |
+
def generate_intelligent_question(self, concept, context):
|
33 |
+
"""Generate a more nuanced and contextually relevant question"""
|
34 |
+
question_templates = [
|
35 |
+
f"What is the primary significance of {concept}?",
|
36 |
+
f"How does {concept} impact the broader context?",
|
37 |
+
f"What key role does {concept} play in the narrative?",
|
38 |
+
f"Explain the importance of {concept} in this context.",
|
39 |
+
f"What makes {concept} crucial to understanding the situation?"
|
40 |
+
]
|
41 |
+
|
42 |
+
return random.choice(question_templates)
|
43 |
+
|
44 |
+
def generate_contextual_distractors(self, correct_answer, context):
|
45 |
+
"""Create semantically related but incorrect distractors"""
|
46 |
+
sentences = sent_tokenize(context)
|
47 |
distractors = []
|
48 |
+
|
49 |
+
potential_distractors = [sent for sent in sentences if correct_answer.lower() not in sent.lower() and len(sent.split()) > 3]
|
50 |
+
|
51 |
+
# Generating diverse distractors
|
52 |
+
while len(distractors) < 3:
|
53 |
+
if potential_distractors:
|
54 |
+
# Choose a unique distractor
|
55 |
+
distractor = random.choice(potential_distractors)
|
56 |
+
potential_distractors.remove(distractor)
|
57 |
+
words = word_tokenize(distractor)
|
58 |
+
key_phrase = ' '.join([word for word in words if word.lower() not in self.stop_words and len(word) > 2][:3])
|
59 |
+
distractors.append(key_phrase)
|
60 |
+
else:
|
61 |
+
fallback_distractors = ["A partially related historical context","An alternative interpretation","A peripheral aspect of the main theme"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
distractor = random.choice(fallback_distractors)
|
63 |
distractors.append(distractor)
|
64 |
+
return distractors
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
+
def generate_mcq(self, context, num_questions=3):
|
67 |
+
"""Generate Multiple Choice Questions"""
|
68 |
+
# Validate context
|
69 |
+
if not context or len(context.split()) < 30:
|
70 |
+
raise ValueError("Context is too short. Provide more detailed text.")
|
71 |
+
|
72 |
+
# Generate questions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
mcq_questions = []
|
74 |
+
key_concepts = self.extract_key_concepts(context)
|
75 |
|
76 |
+
for concept in key_concepts[:num_questions]:
|
77 |
try:
|
78 |
+
question = self.generate_intelligent_question(concept, context) # Generate question
|
79 |
+
answer_result = self.qa_pipeline(question=question, context=context) # Use QA pipeline to find the most relevant answer
|
80 |
+
correct_answer = answer_result['answer'] # Get correct answer
|
81 |
+
distractors = self.generate_contextual_distractors(correct_answer, context) # Generate distractors
|
82 |
+
all_options = [correct_answer] + distractors # Combine options
|
|
|
|
|
|
|
|
|
83 |
random.shuffle(all_options)
|
84 |
+
correct_index = all_options.index(correct_answer) # Determine correct option index
|
85 |
+
mcq_questions.append({"question": question,"options": all_options,"correct_answer": correct_index}) # Create MCQ
|
86 |
except Exception as e:
|
87 |
+
print(f"Error generating question: {e}")
|
88 |
return mcq_questions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
def main():
|
90 |
+
# Create generator instance
|
91 |
generator = AdvancedMCQGenerator()
|
92 |
+
context = input("Enter context text: ")
|
93 |
+
num_questions = int(input("How many questions do you want? "))
|
94 |
+
questions = generator.generate_mcq(context, num_questions)
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
+
# Display and solve quiz
|
97 |
+
print("\n--- Quiz Started ---")
|
98 |
+
score = 0
|
99 |
+
for i, q in enumerate(questions, 1):
|
100 |
+
print(f"\nQuestion {i}: {q['question']}")
|
101 |
+
for j, option in enumerate(q['options']):
|
102 |
+
print(f"{chr(65+j)}. {option}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
+
while True:
|
105 |
+
user_answer = input("\nYour Answer (A/B/C/D): ").upper()
|
106 |
+
if user_answer in ['A', 'B', 'C', 'D']:
|
107 |
+
break
|
108 |
+
print("Invalid input. Please enter A, B, C, or D.")
|
109 |
+
|
110 |
+
user_index = ord(user_answer) - 65
|
111 |
+
if user_index == q['correct_answer']:
|
112 |
+
print("Correct!")
|
113 |
+
score += 1
|
114 |
+
else:
|
115 |
+
print(f"Incorrect. Correct answer was: {chr(65 + q['correct_answer'])}")
|
116 |
+
print(f"\nFinal Score: {score}/{len(questions)}")
|
117 |
|
118 |
if __name__ == "__main__":
|
119 |
main()
|
|