mohbay commited on
Commit
dd5222e
·
verified ·
1 Parent(s): 82e94a1

Upload 7 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ mub_chroma_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MUB Chatbot - Hugging Face Spaces
3
+ Simple deployment using pre-built ChromaDB database
4
+ """
5
+
6
+ import gradio as gr
7
+ import os
8
+ import logging
9
+ import chromadb
10
+ from sentence_transformers import SentenceTransformer
11
+ import google.generativeai as genai
12
+ from typing import List, Dict
13
+
14
+ # Configure logging
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Configure Gemini API
19
+ GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY', 'AIzaSyDCDNMzNnPJC49Y4LNspi1a04OrVwGYqVQ')
20
+ genai.configure(api_key=GEMINI_API_KEY)
21
+
22
+ # Global RAG instance
23
+ rag_instance = None
24
+
25
+
26
+ class SimpleRAG:
27
+ """Simple RAG using pre-built ChromaDB."""
28
+
29
+ def __init__(self, db_path="./mub_chroma_db"):
30
+ self.db_path = db_path
31
+ self.embedding_dim = 512
32
+
33
+ # Load embedding model
34
+ logger.info("Loading embedding model...")
35
+ self.embedding_model = SentenceTransformer(
36
+ "Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka"
37
+ )
38
+
39
+ # Load existing ChromaDB
40
+ logger.info(f"Loading ChromaDB from {db_path}...")
41
+ self.chroma_client = chromadb.PersistentClient(path=db_path)
42
+ self.collection = self.chroma_client.get_collection(name="mub_info")
43
+ logger.info(f"Loaded {self.collection.count()} documents")
44
+
45
+ def embed_text(self, text: str) -> List[float]:
46
+ """Generate embedding for text."""
47
+ embedding = self.embedding_model.encode(text, convert_to_numpy=True).tolist()
48
+ return embedding[:self.embedding_dim] # Truncate to 512
49
+
50
+ def search(self, query: str, n_results: int = 5) -> List[Dict]:
51
+ """Search for relevant documents."""
52
+ query_embedding = self.embed_text(query)
53
+
54
+ results = self.collection.query(
55
+ query_embeddings=[query_embedding],
56
+ n_results=n_results
57
+ )
58
+
59
+ formatted_results = []
60
+ if results['metadatas'] and len(results['metadatas']) > 0:
61
+ for idx, metadata in enumerate(results['metadatas'][0]):
62
+ formatted_results.append({
63
+ 'text': results['documents'][0][idx],
64
+ 'section': metadata['section'],
65
+ 'type': metadata['type'],
66
+ 'distance': results['distances'][0][idx] if results['distances'] else None
67
+ })
68
+
69
+ return formatted_results
70
+
71
+
72
+ def get_rag():
73
+ """Get or create RAG instance."""
74
+ global rag_instance
75
+ if rag_instance is None:
76
+ logger.info("Initializing RAG system...")
77
+ rag_instance = SimpleRAG()
78
+ logger.info("RAG system ready!")
79
+ return rag_instance
80
+
81
+
82
+ def format_sources(search_results):
83
+ """Format search results as markdown."""
84
+ if not search_results:
85
+ return ""
86
+
87
+ sources_md = "\n\n---\n### 📚 Sources:\n\n"
88
+ for idx, result in enumerate(search_results[:3], 1):
89
+ relevance = round(100 - result['distance'] / 3, 1)
90
+ sources_md += f"**{idx}. {result['section']}** (Relevance: {relevance}%)\n"
91
+ sources_md += f"- Type: {result['type']}\n"
92
+ preview = result['text'][:150] + "..." if len(result['text']) > 150 else result['text']
93
+ sources_md += f"- Preview: {preview}\n\n"
94
+
95
+ return sources_md
96
+
97
+
98
+ def generate_response(user_message, context, language='en'):
99
+ """Generate response using Gemini."""
100
+
101
+ if language == 'ar':
102
+ system_prompt = """أنت مساعد ذكي لجامعة المقاصد في بيروت (MUB).
103
+
104
+ مهمتك: الإجابة على أسئلة الطلاب والزوار حول الجامعة بناءً على المعلومات المتوفرة.
105
+
106
+ قواعد:
107
+ - استخدم المعلومات من السياق أدناه
108
+ - إذا لم تجد إجابة دقيقة، قل ذلك بوضوح
109
+ - كن مهذباً ومفيداً
110
+ - أجب باللغة العربية بوضوح ودقة
111
+ """
112
+ else:
113
+ system_prompt = """You are an intelligent assistant for Makassed University of Beirut (MUB).
114
+
115
+ Your task: Answer questions from students and visitors about the university based on available information.
116
+
117
+ Rules:
118
+ - Use information from the context below
119
+ - If you don't find an exact answer, say so clearly
120
+ - Be polite and helpful
121
+ - Answer in English clearly and accurately
122
+ """
123
+
124
+ full_prompt = f"""{system_prompt}
125
+
126
+ CONTEXT:
127
+ {context}
128
+
129
+ USER QUESTION: {user_message}
130
+
131
+ Provide a helpful, accurate answer based on the context above."""
132
+
133
+ try:
134
+ model = genai.GenerativeModel('gemini-2.0-flash-exp')
135
+ response = model.generate_content(full_prompt)
136
+
137
+ if response and response.text:
138
+ return response.text.strip()
139
+ else:
140
+ return "عذراً، لم أتمكن من إنشاء إجابة." if language == 'ar' else "Sorry, couldn't generate a response."
141
+
142
+ except Exception as e:
143
+ logger.error(f"Gemini error: {e}")
144
+ return "عذراً، حدث خطأ." if language == 'ar' else "Sorry, an error occurred."
145
+
146
+
147
+ def chat(message, history, language):
148
+ """Main chat function."""
149
+ try:
150
+ rag = get_rag()
151
+
152
+ # Search
153
+ search_results = rag.search(message, n_results=5)
154
+
155
+ # Format context
156
+ context_parts = []
157
+ lang_code = 'ar' if language == 'العربية' else 'en'
158
+
159
+ if lang_code == 'ar':
160
+ context_parts.append("المعلومات من جامعة المقاصد:")
161
+ else:
162
+ context_parts.append("Information from Makassed University:")
163
+
164
+ for idx, result in enumerate(search_results, 1):
165
+ context_parts.append(f"\n--- Source {idx} ---")
166
+ context_parts.append(result['text'])
167
+
168
+ context = "\n".join(context_parts)
169
+
170
+ # Generate response
171
+ bot_response = generate_response(message, context, lang_code)
172
+
173
+ # Add sources
174
+ sources = format_sources(search_results)
175
+ full_response = bot_response + sources
176
+
177
+ return full_response
178
+
179
+ except Exception as e:
180
+ logger.error(f"Chat error: {e}")
181
+ return "عذراً، حدث خطأ." if language == 'العربية' else "Sorry, an error occurred."
182
+
183
+
184
+ # Create interface
185
+ with gr.Blocks(title="MUB Chatbot", theme=gr.themes.Soft()) as demo:
186
+ gr.Markdown("""
187
+ # 🎓 Makassed University of Beirut (MUB) Chatbot
188
+
189
+ Ask me anything about Makassed University!
190
+ أسألني عن جامعة المقاصد!
191
+ """)
192
+
193
+ language = gr.Radio(
194
+ choices=["English", "العربية"],
195
+ value="English",
196
+ label="Language / اللغة"
197
+ )
198
+
199
+ chatbot = gr.Chatbot(
200
+ height=500,
201
+ show_label=False,
202
+ )
203
+
204
+ with gr.Row():
205
+ msg = gr.Textbox(
206
+ placeholder="Type your question... / اكتب سؤالك...",
207
+ show_label=False,
208
+ scale=9
209
+ )
210
+ submit = gr.Button("Send", scale=1, variant="primary")
211
+
212
+ gr.Examples(
213
+ examples=[
214
+ ["What is Makassed University?"],
215
+ ["What are the tuition fees?"],
216
+ ["Tell me about nursing programs"],
217
+ ["ما هي جامعة المقاصد؟"],
218
+ ["ما هي رسوم الدراسة؟"],
219
+ ],
220
+ inputs=msg,
221
+ )
222
+
223
+ gr.Markdown("""
224
+ ---
225
+ **About:** AI chatbot powered by RAG + Google Gemini
226
+ **Data:** Official MUB documentation
227
+ **Note:** For official information, contact the university directly.
228
+ """)
229
+
230
+ def respond(message, chat_history, lang):
231
+ bot_response = chat(message, chat_history, lang)
232
+ chat_history.append((message, bot_response))
233
+ return "", chat_history
234
+
235
+ msg.submit(respond, [msg, chatbot, language], [msg, chatbot])
236
+ submit.click(respond, [msg, chatbot, language], [msg, chatbot])
237
+
238
+ # Launch
239
+ if __name__ == "__main__":
240
+ demo.queue()
241
+ demo.launch()
mub_chroma_db/a5d21a48-3163-487e-b370-3b741d2ed362/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:979645b77d03863c3e0d13b438fd14402cd3461b77f01909ef009c350fef3e15
3
+ size 218800
mub_chroma_db/a5d21a48-3163-487e-b370-3b741d2ed362/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c8a407226e15554f8aa5e2dc70831bc8e464bd1433ac370e1dc9bef7e839d5a
3
+ size 100
mub_chroma_db/a5d21a48-3163-487e-b370-3b741d2ed362/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6580663486b6b4f8d8c8dc9abbaa995dc3aec03c7d2d8e6250c834988239797
3
+ size 400
mub_chroma_db/a5d21a48-3163-487e-b370-3b741d2ed362/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
3
+ size 0
mub_chroma_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a60c8c692121bbd6ede35370d1e76322b8fe9a7206769e5e577d03790ca44cd
3
+ size 1073152
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Hugging Face Spaces - MUB Chatbot Requirements
2
+
3
+ gradio>=4.0.0
4
+ chromadb>=0.4.22
5
+ sentence-transformers>=2.2.2
6
+ google-generativeai>=0.3.0