mset commited on
Commit
b95ca5f
Β·
verified Β·
1 Parent(s): 75f5073

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +213 -370
app.py CHANGED
@@ -1,427 +1,285 @@
1
  import gradio as gr
2
  import requests
3
- import json
4
  import re
5
  import xml.etree.ElementTree as ET
6
- import numpy as np
7
  import random
8
- import hashlib
9
  from datetime import datetime
10
  from collections import defaultdict, Counter
11
- import time
12
 
13
- class QuestionAnsweringAI:
14
  def __init__(self):
15
- # Token database e vocabulary
16
- self.vocabulary = {}
17
- self.token_to_id = {}
18
- self.vocab_size = 0
19
-
20
- # Neural Network parameters
21
- self.embedding_dim = 256
22
- self.hidden_dim = 512
23
- self.context_length = 32
24
-
25
- # Knowledge systems
26
  self.knowledge_base = defaultdict(list)
27
- self.qa_patterns = defaultdict(list)
28
- self.context_memory = []
29
-
30
- # Network weights
31
- self.embeddings = None
32
- self.hidden_weights = None
33
- self.output_weights = None
34
-
35
- # Pattern storage
36
- self.bigram_counts = defaultdict(Counter)
37
- self.trigram_counts = defaultdict(Counter)
38
- self.sentence_starts = []
39
-
40
- # Data sources
41
- self.data_sources = {
42
- "news_rss": [
43
- "https://feeds.reuters.com/reuters/worldNews",
44
- "https://feeds.bbci.co.uk/news/world/rss.xml",
45
- "https://feeds.bbci.co.uk/news/technology/rss.xml"
46
- ]
47
  }
48
 
49
- # Training state
50
- self.total_tokens_collected = 0
51
- self.epochs_trained = 0
52
- self.learning_rate = 0.001
53
- self.max_response_length = 50
54
-
55
- self.initialize_network()
56
 
57
- def initialize_network(self):
58
- """Initialize neural network"""
59
- self.embeddings = np.random.normal(0, 0.1, (10000, self.embedding_dim))
60
- self.hidden_weights = np.random.normal(0, 0.1, (self.embedding_dim * self.context_length, self.hidden_dim))
61
- self.hidden_bias = np.zeros(self.hidden_dim)
62
- self.output_weights = np.random.normal(0, 0.1, (self.hidden_dim, 10000))
63
- self.output_bias = np.zeros(10000)
64
- print("🧠 Neural Network initialized")
65
 
66
- def collect_training_data(self, max_tokens=20000):
67
  """Collect training data from public sources"""
68
- print("πŸ•·οΈ Collecting Q&A training data...")
69
-
70
- collected_texts = []
71
-
72
- # Collect news data
73
- news_texts = self.scrape_news_feeds()
74
- collected_texts.extend(news_texts)
75
- print(f"πŸ“° Collected {len(news_texts)} news articles")
76
 
77
- # Create structured Q&A patterns
78
- qa_patterns = self.create_qa_patterns()
79
- collected_texts.extend(qa_patterns)
80
- print(f"❓ Generated {len(qa_patterns)} Q&A patterns")
81
 
82
- # Filter for quality
83
- quality_texts = [text for text in collected_texts if len(text) > 30]
 
84
 
85
- # Tokenize
86
- all_tokens = []
87
- for text in quality_texts:
88
- tokens = self.tokenize_text(text)
89
- all_tokens.extend(tokens)
90
- if len(all_tokens) >= max_tokens:
91
- break
92
-
93
- self.total_tokens_collected = len(all_tokens)
94
- print(f"🎯 Collected {self.total_tokens_collected:,} tokens")
95
-
96
- # Build systems
97
- self.build_vocabulary(all_tokens)
98
- self.build_knowledge_base(quality_texts)
99
- self.extract_patterns(all_tokens)
100
-
101
- return all_tokens
102
 
103
- def scrape_news_feeds(self):
104
- """Scrape news RSS feeds"""
105
- texts = []
 
 
 
 
 
106
 
107
- for rss_url in self.data_sources["news_rss"]:
108
  try:
109
- response = requests.get(rss_url, timeout=5)
110
  if response.status_code == 200:
111
  root = ET.fromstring(response.content)
112
- for item in root.findall(".//item")[:3]:
113
  title = item.find("title")
114
- description = item.find("description")
115
- if title is not None:
116
- text = title.text
117
- if description is not None:
118
- text += ". " + description.text
119
- texts.append(self.clean_text(text))
120
- except:
121
  continue
122
 
123
- return texts
124
-
125
- def create_qa_patterns(self):
126
- """Create structured Q&A patterns"""
127
- patterns = []
128
-
129
- # Question-answer templates
130
- qa_templates = [
131
- ("What is artificial intelligence?", "Artificial intelligence is a technology that enables machines to perform tasks requiring human intelligence."),
132
- ("How do computers work?", "Computers work by processing data through electronic circuits and following programmed instructions."),
133
- ("Where is Paris located?", "Paris is located in France and serves as the capital city."),
134
- ("Why is education important?", "Education is important because it develops knowledge, skills, and critical thinking abilities."),
135
- ("What is machine learning?", "Machine learning is a subset of AI that allows systems to learn from data without explicit programming."),
136
- ("How does the internet work?", "The internet works through interconnected networks that enable global communication and data sharing."),
137
- ("What is climate change?", "Climate change refers to long-term changes in global weather patterns and temperatures."),
138
- ("Why do we need renewable energy?", "Renewable energy is needed to reduce environmental impact and ensure sustainable power sources.")
139
- ]
140
-
141
- for question, answer in qa_templates:
142
- pattern = f"Question: {question} Answer: {answer}"
143
- patterns.append(pattern)
144
-
145
- return patterns
146
-
147
- def clean_text(self, text):
148
- """Clean and normalize text"""
149
- if not text:
150
- return ""
151
-
152
- # Remove HTML tags and normalize
153
- text = re.sub(r'<[^>]+>', ' ', text)
154
- text = re.sub(r'\s+', ' ', text)
155
- text = re.sub(r'[^\w\s\.\,\!\?\;\:\-\(\)\"\']+', ' ', text)
156
-
157
- return text.strip()
158
-
159
- def tokenize_text(self, text):
160
- """Tokenize text into tokens"""
161
- tokens = re.findall(r'\w+|[.!?;,]', text.lower())
162
- return tokens
163
-
164
- def build_vocabulary(self, tokens):
165
- """Build vocabulary from tokens"""
166
- token_counts = Counter(tokens)
167
- filtered_tokens = {token: count for token, count in token_counts.items() if count >= 2}
168
-
169
- vocab_list = ['<PAD>', '<UNK>', '<START>', '<END>'] + list(filtered_tokens.keys())
170
-
171
- self.vocabulary = {i: token for i, token in enumerate(vocab_list)}
172
- self.token_to_id = {token: i for i, token in enumerate(vocab_list)}
173
- self.vocab_size = len(vocab_list)
174
-
175
- print(f"πŸ“š Built vocabulary: {self.vocab_size:,} tokens")
176
-
177
- def build_knowledge_base(self, texts):
178
- """Build knowledge base from texts"""
179
- for text in texts:
180
- sentences = re.split(r'[.!?]+', text)
181
- for sentence in sentences:
182
- sentence = sentence.strip()
183
- if len(sentence) > 20:
184
- # Extract main topic (simple approach)
185
- words = sentence.split()
186
- for word in words:
187
- if word[0].isupper() and len(word) > 3:
188
- topic = word.lower()
189
- self.knowledge_base[topic].append(sentence)
190
- break
191
 
192
- def extract_patterns(self, tokens):
193
- """Extract patterns for generation"""
194
- token_ids = [self.token_to_id.get(token, 1) for token in tokens]
195
-
196
- # Build bigrams
197
- for i in range(len(token_ids) - 1):
198
- current_token = token_ids[i]
199
- next_token = token_ids[i + 1]
200
- self.bigram_counts[current_token][next_token] += 1
201
-
202
- print(f"πŸ“Š Extracted {len(self.bigram_counts):,} bigram patterns")
203
-
204
- def train_system(self, training_tokens, epochs=3):
205
- """Train the Q&A system"""
206
- print(f"πŸŽ“ Training system for {epochs} epochs...")
207
-
208
- token_ids = [self.token_to_id.get(token, 1) for token in training_tokens]
209
-
210
- for epoch in range(epochs):
211
- print(f"Training epoch {epoch + 1}/{epochs}")
212
-
213
- # Simple training simulation
214
- total_batches = min(100, len(token_ids) // 10)
215
 
216
- for batch in range(total_batches):
217
- if batch % 25 == 0:
218
- print(f" Batch {batch + 1}/{total_batches}")
219
-
220
- self.epochs_trained += 1
221
-
222
- print("βœ… Training completed!")
 
 
 
 
 
 
223
 
224
  def answer_question(self, question):
225
- """Answer a question using trained knowledge"""
226
  if not question.strip():
227
- return "Hello! I'm an AI that learns from data. Ask me a question!"
228
 
229
- # Add to memory
230
- self.context_memory.append(question)
231
- if len(self.context_memory) > 5:
232
- self.context_memory.pop(0)
233
 
234
- # Classify question type
235
- question_type = self.classify_question(question)
 
 
236
 
237
- # Find relevant knowledge
238
- relevant_knowledge = self.find_relevant_knowledge(question)
 
 
239
 
240
- # Generate response
241
- response = self.generate_response(question, question_type, relevant_knowledge)
242
-
243
- return response
244
-
245
- def classify_question(self, question):
246
- """Classify question type"""
247
- question_lower = question.lower()
248
-
249
- if any(word in question_lower for word in ['what', 'define', 'explain']):
250
- return 'definition'
251
- elif any(word in question_lower for word in ['where', 'location']):
252
- return 'location'
253
- elif any(word in question_lower for word in ['how', 'method']):
254
- return 'process'
255
- elif any(word in question_lower for word in ['why', 'reason']):
256
- return 'explanation'
257
- else:
258
- return 'general'
259
 
260
- def find_relevant_knowledge(self, question):
261
- """Find relevant knowledge for question"""
262
- question_words = set(question.lower().split())
263
- relevant_facts = []
264
 
265
- for topic, facts in self.knowledge_base.items():
266
- if topic in question.lower():
267
- relevant_facts.extend(facts[:2])
268
 
269
- # Also search by word overlap
270
- for topic, facts in self.knowledge_base.items():
271
- for fact in facts:
272
- fact_words = set(fact.lower().split())
273
- overlap = len(question_words.intersection(fact_words))
274
- if overlap >= 2:
275
- relevant_facts.append(fact)
276
- if len(relevant_facts) >= 3:
277
- break
278
 
279
- return relevant_facts[:3]
280
 
281
- def generate_response(self, question, question_type, knowledge):
282
- """Generate response using patterns and knowledge"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
- # Response templates
 
 
 
285
  templates = {
286
- 'definition': "Based on my training data, this refers to",
287
- 'location': "From geographical information I've learned,",
288
- 'process': "According to technical sources,",
289
- 'explanation': "The reason is that",
290
- 'general': "From my knowledge base,"
291
  }
292
 
293
- starter = templates.get(question_type, "Based on what I've learned,")
294
 
295
- if knowledge:
296
- # Use relevant knowledge
297
- response = f"{starter} {knowledge[0][:150]}..."
298
- if len(knowledge) > 1:
299
- response += f" Additionally, {knowledge[1][:100]}..."
300
  else:
301
- # Fallback responses
302
- fallbacks = {
303
- 'definition': f"{starter} a concept that involves multiple factors and considerations.",
304
- 'location': f"{starter} this refers to a specific place or region.",
305
- 'process': f"{starter} this involves a series of steps and procedures.",
306
- 'explanation': f"{starter} multiple factors contribute to this.",
307
- 'general': f"{starter} this is a topic with various aspects to consider."
308
- }
309
- response = fallbacks.get(question_type, f"{starter} this is an interesting topic that requires further analysis.")
310
-
311
- # Ensure proper ending
312
- if not response.endswith('.'):
313
- response += '.'
314
 
315
- return response[:300] # Limit response length
316
 
317
- def get_stats(self):
318
- """Get system statistics"""
319
- return {
320
- "tokens_collected": self.total_tokens_collected,
321
- "vocabulary_size": self.vocab_size,
322
- "epochs_trained": self.epochs_trained,
323
- "knowledge_topics": len(self.knowledge_base),
324
- "bigram_patterns": len(self.bigram_counts),
325
- "memory_items": len(self.context_memory)
326
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
327
 
328
- # Initialize system
329
- qa_system = QuestionAnsweringAI()
330
 
331
- def train_qa_system():
332
- """Train the Q&A system"""
333
  try:
334
- # Collect data
335
- tokens = qa_system.collect_training_data(max_tokens=15000)
336
-
337
- if len(tokens) > 50:
338
- # Train system
339
- qa_system.train_system(tokens, epochs=2)
340
- return "βœ… Q&A System training completed successfully!"
341
- else:
342
- return "❌ Insufficient data collected for training"
343
  except Exception as e:
344
- return f"❌ Training error: {str(e)}"
345
 
346
- def chat_with_ai(message, history):
347
- """Chat interface function"""
348
- if not message.strip():
349
- response = "Hi! I'm an AI that learns from data and answers questions. What would you like to know?"
350
- else:
351
- response = qa_system.answer_question(message)
352
 
353
- history.append([message, response])
354
- return history, ""
 
 
 
 
 
 
355
 
356
- def get_system_status():
357
- """Get current system status"""
358
- stats = qa_system.get_stats()
359
-
360
- status = "πŸ€– **QUESTION ANSWERING AI STATUS**\n\n"
361
-
362
- if stats['tokens_collected'] == 0:
363
- status += "⏳ **System not trained yet**\nClick 'Start Training' to begin\n\n"
364
- else:
365
- status += "βœ… **System trained and operational**\n\n"
366
-
367
- status += "**πŸ“Š Statistics:**\n"
368
- status += f"β€’ **Tokens collected:** {stats['tokens_collected']:,}\n"
369
- status += f"β€’ **Vocabulary size:** {stats['vocabulary_size']:,}\n"
370
- status += f"β€’ **Knowledge topics:** {stats['knowledge_topics']:,}\n"
371
- status += f"β€’ **Training epochs:** {stats['epochs_trained']}\n"
372
- status += f"β€’ **Pattern database:** {stats['bigram_patterns']:,} patterns\n"
373
- status += f"β€’ **Conversation memory:** {stats['memory_items']} messages\n"
374
-
375
- status += "\n**🎯 Capabilities:**\n"
376
- status += "β€’ Answers questions using learned knowledge\n"
377
- status += "β€’ Processes natural language queries\n"
378
- status += "β€’ Maintains conversation context\n"
379
- status += "β€’ Uses pattern matching for responses\n"
380
-
381
- return status
382
 
383
  # Create Gradio interface
384
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
385
 
386
  gr.HTML("""
387
  <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;">
388
- <h1>πŸ€– Question Answering AI</h1>
389
- <p><b>AI that learns from data and answers questions</b></p>
390
- <p>Collects tokens from internet β†’ Organizes neural patterns β†’ Generates intelligent responses</p>
391
  </div>
392
  """)
393
 
394
  with gr.Row():
395
- with gr.Column(scale=2):
396
- gr.HTML("<h3>πŸ’¬ Chat with AI</h3>")
397
 
398
  chatbot = gr.Chatbot(
399
- label="Question Answering AI Chat",
400
- height=400,
401
- show_label=True
402
  )
403
 
404
  msg_input = gr.Textbox(
405
- label="Your question",
406
  placeholder="Ask me anything: What is AI? How does technology work?",
407
  lines=2
408
  )
409
 
410
  with gr.Row():
411
  send_btn = gr.Button("πŸ’¬ Send", variant="primary")
412
- clear_btn = gr.Button("πŸ”„ Clear", variant="secondary")
413
 
414
  with gr.Column(scale=1):
415
- gr.HTML("<h3>βš™οΈ System Status</h3>")
416
 
417
- status_output = gr.Textbox(
418
  label="System Status",
419
- lines=18,
420
- interactive=False,
421
- value=get_system_status()
422
  )
423
 
424
- train_btn = gr.Button("πŸš€ Start Training", variant="secondary")
425
  refresh_btn = gr.Button("πŸ”„ Refresh Status", variant="secondary")
426
 
427
  # Example questions
@@ -429,58 +287,43 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
429
  examples=[
430
  "What is artificial intelligence?",
431
  "How do computers work?",
432
- "Where is Paris located?",
433
  "Why is education important?",
434
- "Explain machine learning",
435
- "How does the internet work?",
436
  "What is climate change?",
437
- "Why do we need renewable energy?"
 
438
  ],
439
  inputs=msg_input,
440
- label="🎯 Example Questions"
441
  )
442
 
443
- gr.HTML("""
444
- <div style="margin-top: 20px; padding: 15px; background-color: #f0f0f0; border-radius: 8px;">
445
- <h4>🧠 How It Works:</h4>
446
- <ol>
447
- <li><b>Data Collection:</b> Gathers text from news feeds and creates Q&A patterns</li>
448
- <li><b>Knowledge Building:</b> Extracts facts and builds searchable knowledge base</li>
449
- <li><b>Pattern Learning:</b> Learns language patterns from collected data</li>
450
- <li><b>Question Processing:</b> Classifies questions and finds relevant knowledge</li>
451
- <li><b>Response Generation:</b> Creates intelligent answers using learned patterns</li>
452
- </ol>
453
- <p><b>🎯 Result:</b> An AI that can answer questions using knowledge learned from data!</p>
454
- </div>
455
- """)
456
-
457
  # Event handlers
458
  send_btn.click(
459
- chat_with_ai,
460
  inputs=[msg_input, chatbot],
461
  outputs=[chatbot, msg_input]
462
  )
463
 
464
  msg_input.submit(
465
- chat_with_ai,
466
  inputs=[msg_input, chatbot],
467
  outputs=[chatbot, msg_input]
468
  )
469
 
470
  clear_btn.click(
471
- lambda: ([], ""),
472
  outputs=[chatbot, msg_input]
473
  )
474
 
475
  train_btn.click(
476
- train_qa_system,
477
- outputs=[status_output]
478
  )
479
 
480
  refresh_btn.click(
481
- get_system_status,
482
- outputs=[status_output]
483
  )
484
 
 
485
  if __name__ == "__main__":
486
- demo.launch()
 
1
  import gradio as gr
2
  import requests
 
3
  import re
4
  import xml.etree.ElementTree as ET
 
5
  import random
 
6
  from datetime import datetime
7
  from collections import defaultdict, Counter
 
8
 
9
+ class SimpleQAAI:
10
  def __init__(self):
 
 
 
 
 
 
 
 
 
 
 
11
  self.knowledge_base = defaultdict(list)
12
+ self.qa_patterns = {}
13
+ self.vocabulary = set()
14
+ self.total_tokens = 0
15
+ self.is_trained = False
16
+
17
+ # Initialize with basic Q&A patterns
18
+ self.initialize_basic_knowledge()
19
+
20
+ def initialize_basic_knowledge(self):
21
+ """Initialize with basic Q&A knowledge"""
22
+ basic_qa = {
23
+ "what is artificial intelligence": "Artificial intelligence is a technology that enables machines to perform tasks that typically require human intelligence, such as learning, reasoning, and problem-solving.",
24
+ "how do computers work": "Computers work by processing data through electronic circuits, following programmed instructions to perform calculations and operations.",
25
+ "where is paris": "Paris is located in France and serves as the capital city of the country.",
26
+ "why is education important": "Education is important because it develops knowledge, critical thinking skills, and prepares people for careers and civic participation.",
27
+ "what is machine learning": "Machine learning is a subset of artificial intelligence that allows systems to automatically learn and improve from data without being explicitly programmed.",
28
+ "how does the internet work": "The internet works through a global network of interconnected computers that communicate using standardized protocols to share information.",
29
+ "what is climate change": "Climate change refers to long-term shifts in global weather patterns and temperatures, largely attributed to human activities.",
30
+ "why renewable energy": "Renewable energy is important because it provides sustainable power sources that don't deplete natural resources and help reduce environmental impact."
 
31
  }
32
 
33
+ for question, answer in basic_qa.items():
34
+ self.qa_patterns[question] = answer
35
+ words = question.split() + answer.split()
36
+ self.vocabulary.update(words)
 
 
 
37
 
38
+ self.total_tokens = sum(len(answer.split()) for answer in basic_qa.values())
39
+ print(f"🧠 Initialized with {len(basic_qa)} Q&A patterns")
 
 
 
 
 
 
40
 
41
+ def collect_training_data(self):
42
  """Collect training data from public sources"""
43
+ print("πŸ•·οΈ Collecting training data...")
 
 
 
 
 
 
 
44
 
45
+ collected_data = []
 
 
 
46
 
47
+ # Try to collect from news sources
48
+ news_data = self.fetch_news_data()
49
+ collected_data.extend(news_data)
50
 
51
+ # Process collected data
52
+ if collected_data:
53
+ self.process_collected_data(collected_data)
54
+ self.is_trained = True
55
+ return f"βœ… Training completed! Collected {len(collected_data)} articles and {self.total_tokens} total tokens."
56
+ else:
57
+ # Use fallback training
58
+ self.is_trained = True
59
+ return "βœ… Training completed using built-in knowledge patterns!"
 
 
 
 
 
 
 
 
60
 
61
+ def fetch_news_data(self):
62
+ """Fetch data from news sources"""
63
+ news_sources = [
64
+ "https://feeds.reuters.com/reuters/worldNews",
65
+ "https://feeds.bbci.co.uk/news/world/rss.xml"
66
+ ]
67
+
68
+ articles = []
69
 
70
+ for source in news_sources:
71
  try:
72
+ response = requests.get(source, timeout=5)
73
  if response.status_code == 200:
74
  root = ET.fromstring(response.content)
75
+ for item in root.findall(".//item")[:3]: # Limit to 3 per source
76
  title = item.find("title")
77
+ if title is not None and title.text:
78
+ clean_title = re.sub(r'[^\w\s]', ' ', title.text).strip()
79
+ if len(clean_title) > 10:
80
+ articles.append(clean_title)
81
+ print(f"πŸ“° Collected {len(articles)} articles from {source}")
82
+ except Exception as e:
83
+ print(f"⚠️ Failed to collect from {source}: {str(e)}")
84
  continue
85
 
86
+ return articles
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ def process_collected_data(self, data):
89
+ """Process collected data into knowledge base"""
90
+ for text in data:
91
+ # Extract key topics and add to knowledge base
92
+ words = text.lower().split()
93
+ self.vocabulary.update(words)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ # Simple topic extraction
96
+ if any(word in text.lower() for word in ['technology', 'ai', 'computer']):
97
+ self.knowledge_base['technology'].append(text)
98
+ elif any(word in text.lower() for word in ['climate', 'environment', 'energy']):
99
+ self.knowledge_base['environment'].append(text)
100
+ elif any(word in text.lower() for word in ['economy', 'market', 'business']):
101
+ self.knowledge_base['economy'].append(text)
102
+ else:
103
+ self.knowledge_base['general'].append(text)
104
+
105
+ # Update token count
106
+ self.total_tokens += sum(len(text.split()) for text in data)
107
+ print(f"πŸ“š Processed data into {len(self.knowledge_base)} knowledge categories")
108
 
109
  def answer_question(self, question):
110
+ """Answer a question using available knowledge"""
111
  if not question.strip():
112
+ return "Hello! I'm an AI that learns from data. Ask me a question and I'll try to answer based on what I've learned!"
113
 
114
+ question_clean = question.lower().strip()
 
 
 
115
 
116
+ # Direct pattern matching
117
+ for pattern, answer in self.qa_patterns.items():
118
+ if self.calculate_similarity(question_clean, pattern) > 0.6:
119
+ return f"Based on my training: {answer}"
120
 
121
+ # Topic-based responses
122
+ topic_response = self.get_topic_response(question_clean)
123
+ if topic_response:
124
+ return topic_response
125
 
126
+ # Fallback response
127
+ return self.generate_fallback_response(question_clean)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
+ def calculate_similarity(self, text1, text2):
130
+ """Calculate similarity between two texts"""
131
+ words1 = set(text1.split())
132
+ words2 = set(text2.split())
133
 
134
+ if not words1 or not words2:
135
+ return 0.0
 
136
 
137
+ intersection = len(words1.intersection(words2))
138
+ union = len(words1.union(words2))
 
 
 
 
 
 
 
139
 
140
+ return intersection / union if union > 0 else 0.0
141
 
142
+ def get_topic_response(self, question):
143
+ """Get response based on topic matching"""
144
+ topic_keywords = {
145
+ 'technology': ['technology', 'computer', 'ai', 'artificial', 'machine', 'internet', 'digital'],
146
+ 'environment': ['climate', 'environment', 'energy', 'renewable', 'carbon', 'sustainability'],
147
+ 'economy': ['economy', 'economic', 'market', 'business', 'finance', 'money'],
148
+ 'education': ['education', 'learning', 'school', 'university', 'knowledge', 'study']
149
+ }
150
+
151
+ # Find matching topic
152
+ for topic, keywords in topic_keywords.items():
153
+ if any(keyword in question for keyword in keywords):
154
+ if topic in self.knowledge_base and self.knowledge_base[topic]:
155
+ return f"Based on recent information about {topic}: {self.knowledge_base[topic][0][:150]}..."
156
+ else:
157
+ return self.get_topic_template_response(topic, question)
158
 
159
+ return None
160
+
161
+ def get_topic_template_response(self, topic, question):
162
+ """Get template response for a topic"""
163
  templates = {
164
+ 'technology': "Technology is rapidly evolving and transforming how we work, communicate, and solve problems. Modern technological advances include artificial intelligence, machine learning, and digital innovations.",
165
+ 'environment': "Environmental issues like climate change require urgent attention. Solutions include renewable energy adoption, sustainable practices, and reduced carbon emissions.",
166
+ 'economy': "Economic factors influence global markets, employment, and business growth. Understanding economic principles helps in making informed decisions.",
167
+ 'education': "Education plays a crucial role in personal development and societal progress. It provides knowledge, skills, and opportunities for growth."
 
168
  }
169
 
170
+ base_response = templates.get(topic, "This is an important topic that involves multiple factors and considerations.")
171
 
172
+ if '?' in question:
173
+ return f"Regarding your question about {topic}: {base_response}"
 
 
 
174
  else:
175
+ return f"About {topic}: {base_response}"
176
+
177
+ def generate_fallback_response(self, question):
178
+ """Generate fallback response for unknown questions"""
179
+ fallback_responses = [
180
+ "That's an interesting question. Based on general knowledge, this topic involves various factors that need consideration.",
181
+ "From what I understand, this subject has multiple aspects worth exploring further.",
182
+ "This is a complex topic that relates to several areas of knowledge and research.",
183
+ "Based on my training data, this question touches on important concepts that merit detailed analysis."
184
+ ]
 
 
 
185
 
186
+ return random.choice(fallback_responses)
187
 
188
+ def get_system_status(self):
189
+ """Get current system status"""
190
+ status = "πŸ€– **SIMPLE Q&A AI STATUS**\n\n"
191
+
192
+ if self.is_trained:
193
+ status += "βœ… **System is trained and ready**\n\n"
194
+ else:
195
+ status += "⏳ **System ready for training**\n\n"
196
+
197
+ status += "**πŸ“Š Statistics:**\n"
198
+ status += f"β€’ **Total tokens processed:** {self.total_tokens:,}\n"
199
+ status += f"β€’ **Vocabulary size:** {len(self.vocabulary):,} words\n"
200
+ status += f"β€’ **Q&A patterns:** {len(self.qa_patterns)} direct patterns\n"
201
+ status += f"β€’ **Knowledge categories:** {len(self.knowledge_base)}\n"
202
+ status += f"β€’ **Training status:** {'Completed' if self.is_trained else 'Pending'}\n"
203
+
204
+ status += "\n**🎯 Capabilities:**\n"
205
+ status += "β€’ Answers questions using pattern matching\n"
206
+ status += "β€’ Learns from news articles and data\n"
207
+ status += "β€’ Handles multiple topics and domains\n"
208
+ status += "β€’ Provides fallback responses for unknown queries\n"
209
+
210
+ return status
211
 
212
+ # Initialize the AI system
213
+ ai_system = SimpleQAAI()
214
 
215
+ def start_training():
216
+ """Start the training process"""
217
  try:
218
+ result = ai_system.collect_training_data()
219
+ return result
 
 
 
 
 
 
 
220
  except Exception as e:
221
+ return f"❌ Training failed: {str(e)}"
222
 
223
+ def chat_function(message, history):
224
+ """Handle chat interactions"""
225
+ if not message:
226
+ return history, ""
 
 
227
 
228
+ try:
229
+ response = ai_system.answer_question(message)
230
+ history.append([message, response])
231
+ return history, ""
232
+ except Exception as e:
233
+ error_response = f"Sorry, I encountered an error: {str(e)}"
234
+ history.append([message, error_response])
235
+ return history, ""
236
 
237
+ def refresh_status():
238
+ """Refresh system status"""
239
+ return ai_system.get_system_status()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
 
241
  # Create Gradio interface
242
+ with gr.Blocks(theme=gr.themes.Soft(), title="Simple Q&A AI") as app:
243
 
244
  gr.HTML("""
245
  <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;">
246
+ <h1>πŸ€– Simple Question Answering AI</h1>
247
+ <p><b>Learn from data and answer questions intelligently</b></p>
248
+ <p>Stable β€’ Fast β€’ Reliable</p>
249
  </div>
250
  """)
251
 
252
  with gr.Row():
253
+ with gr.Column(scale=3):
254
+ gr.Markdown("### πŸ’¬ Chat with AI")
255
 
256
  chatbot = gr.Chatbot(
257
+ value=[],
258
+ label="AI Assistant",
259
+ height=400
260
  )
261
 
262
  msg_input = gr.Textbox(
263
+ label="Your Question",
264
  placeholder="Ask me anything: What is AI? How does technology work?",
265
  lines=2
266
  )
267
 
268
  with gr.Row():
269
  send_btn = gr.Button("πŸ’¬ Send", variant="primary")
270
+ clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
271
 
272
  with gr.Column(scale=1):
273
+ gr.Markdown("### βš™οΈ System Control")
274
 
275
+ status_box = gr.Textbox(
276
  label="System Status",
277
+ value=ai_system.get_system_status(),
278
+ lines=16,
279
+ interactive=False
280
  )
281
 
282
+ train_btn = gr.Button("πŸš€ Start Training", variant="primary")
283
  refresh_btn = gr.Button("πŸ”„ Refresh Status", variant="secondary")
284
 
285
  # Example questions
 
287
  examples=[
288
  "What is artificial intelligence?",
289
  "How do computers work?",
 
290
  "Why is education important?",
 
 
291
  "What is climate change?",
292
+ "How does the internet work?",
293
+ "What is machine learning?"
294
  ],
295
  inputs=msg_input,
296
+ label="πŸ“ Try these questions"
297
  )
298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  # Event handlers
300
  send_btn.click(
301
+ fn=chat_function,
302
  inputs=[msg_input, chatbot],
303
  outputs=[chatbot, msg_input]
304
  )
305
 
306
  msg_input.submit(
307
+ fn=chat_function,
308
  inputs=[msg_input, chatbot],
309
  outputs=[chatbot, msg_input]
310
  )
311
 
312
  clear_btn.click(
313
+ fn=lambda: ([], ""),
314
  outputs=[chatbot, msg_input]
315
  )
316
 
317
  train_btn.click(
318
+ fn=start_training,
319
+ outputs=[status_box]
320
  )
321
 
322
  refresh_btn.click(
323
+ fn=refresh_status,
324
+ outputs=[status_box]
325
  )
326
 
327
+ # Launch the app
328
  if __name__ == "__main__":
329
+ app.launch()