Denis202 commited on
Commit
345b0e0
Β·
1 Parent(s): efcbacd

Include training code in main repo

Browse files
.github/workflows/sync-to-huggingface.yml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face Space
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ sync-to-space:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - name: Checkout code
13
+ uses: actions/checkout@v4
14
+ with:
15
+ fetch-depth: 0
16
+
17
+ - name: Setup Git
18
+ run: |
19
+ git config --global user.name "GitHub Actions"
20
+ git config --global user.email "[email protected]"
21
+
22
+ - name: Sync to Hugging Face
23
+ env:
24
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
25
+ run: |
26
+ # Clone Hugging Face space
27
+ git clone https://[email protected]/spaces/Denis202/KiswahiliChetu_space hf-space
28
+
29
+ # Copy all files except .git
30
+ rsync -av --progress ./ hf-space/ --exclude .git --exclude .github
31
+
32
+ # Commit and push to Hugging Face
33
+ cd hf-space
34
+ git add .
35
+ git commit -m "Auto-sync from GitHub: ${{ github.sha }}"
36
+ git push origin main
.gitignore ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model files and caches
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ *.so
7
+ *.egg-info/
8
+ .eggs/
9
+ dist/
10
+ build/
11
+
12
+ # Virtual environments
13
+ .venv/
14
+ venv/
15
+ env/
16
+
17
+ # IDE files
18
+ .vscode/
19
+ .idea/
20
+ *.swp
21
+ *.swo
22
+
23
+ # OS files
24
+ .DS_Store
25
+ Thumbs.db
26
+
27
+ # Logs
28
+ *.log
29
+ logs/
30
+
31
+ # Hugging Face cache
32
+ .cache/
33
+ .huggingface/
app.py.backup ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from chat import bot_instance
3
+ import logging
4
+ import time
5
+
6
+ # Set up logging
7
+ logging.basicConfig(level=logging.INFO)
8
+ logger = logging.getLogger(__name__)
9
+
10
+ # Language texts
11
+ LANGUAGE_TEXTS = {
12
+ "English": {
13
+ "title": "Kiswahili AI Chat",
14
+ "description": "Chat with AI in Swahili and English",
15
+ "placeholder": "Type your message here...",
16
+ "clear": "Clear Chat",
17
+ "submit": "Send",
18
+ "error": "An error occurred",
19
+ "thinking": "Thinking...",
20
+ "examples": "Example Questions",
21
+ "settings": "Settings",
22
+ "response_length": "Response Length",
23
+ "creativity": "Creativity Level",
24
+ "word_selection": "Word Selection"
25
+ },
26
+ "Kiswahili": {
27
+ "title": "Mazungumzo ya AI ya Kiswahili",
28
+ "description": "Wasiliana na AI kwa Kiswahili na Kiingereza",
29
+ "placeholder": "Andika ujumbe wako hapa...",
30
+ "clear": "Futa Mazungumzo",
31
+ "submit": "Tuma",
32
+ "error": "Hitilafu imetokea",
33
+ "thinking": "Inakokotoa...",
34
+ "examples": "Mifano ya Maswali",
35
+ "settings": "Mipangilio",
36
+ "response_length": "Urefu wa Majibu",
37
+ "creativity": "Kiashiria cha Ubunifu",
38
+ "word_selection": "Uchaguzi wa Maneno"
39
+ }
40
+ }
41
+
42
+ def get_localized_text(language, key):
43
+ return LANGUAGE_TEXTS.get(language, LANGUAGE_TEXTS["English"]).get(key, key)
44
+
45
+ def process_message(message, chat_history, max_tokens, temperature, top_p, language):
46
+ """Process message and return updated chat history"""
47
+ try:
48
+ if not message.strip():
49
+ return chat_history, ""
50
+
51
+ if bot_instance is None:
52
+ error_msg = get_localized_text(language, "error") + ": Chatbot not initialized"
53
+ return chat_history + [(message, error_msg)], ""
54
+
55
+ # Add thinking message
56
+ thinking_msg = get_localized_text(language, "thinking")
57
+ yield chat_history + [(message, None)], ""
58
+ time.sleep(0.1)
59
+
60
+ # Get response from chatbot
61
+ response = bot_instance.chat(
62
+ message=message,
63
+ history=chat_history,
64
+ max_new_tokens=max_tokens,
65
+ temperature=temperature,
66
+ top_p=top_p
67
+ )
68
+
69
+ # Return final response
70
+ yield chat_history + [(message, response)], ""
71
+
72
+ except Exception as e:
73
+ error_msg = f"{get_localized_text(language, 'error')}: {str(e)}"
74
+ logger.error(f"Error in process_message: {e}")
75
+ yield chat_history + [(message, error_msg)], ""
76
+
77
+ def clear_chat():
78
+ """Clear chat history"""
79
+ return []
80
+
81
+ # Create the Gradio interface
82
+ with gr.Blocks(
83
+ title="Kiswahili AI Chat",
84
+ theme=gr.themes.Soft(primary_hue="blue", secondary_hue="green"),
85
+ css="""
86
+ .gradio-container {
87
+ max-width: 800px !important;
88
+ margin: auto !important;
89
+ }
90
+ .chatbot {
91
+ min-height: 400px;
92
+ border-radius: 12px;
93
+ border: 1px solid #e0e0e0;
94
+ margin-bottom: 20px;
95
+ }
96
+ .gradio-button {
97
+ border-radius: 8px;
98
+ }
99
+ .settings-section {
100
+ background: #f8f9fa;
101
+ padding: 20px;
102
+ border-radius: 12px;
103
+ border: 1px solid #e0e0e0;
104
+ margin-top: 20px;
105
+ }
106
+ """
107
+ ) as demo:
108
+
109
+ gr.Markdown("""
110
+ # πŸ‡ΉπŸ‡Ώ Kiswahili AI Chat
111
+ ### Wasiliana na msaidizi wa AI kwa Kiswahili na Kiingereza
112
+ """)
113
+
114
+ # Chat interface - FULL WIDTH
115
+ chatbot = gr.Chatbot(
116
+ label="Mazungumzo",
117
+ show_copy_button=True,
118
+ height=400,
119
+ show_label=False,
120
+ avatar_images=(None, "https://api.dicebear.com/7.x/bottts/svg?seed=swahili")
121
+ )
122
+
123
+ with gr.Row():
124
+ msg = gr.Textbox(
125
+ placeholder="Andika ujumbe wako hapa...",
126
+ show_label=False,
127
+ lines=2,
128
+ max_lines=5,
129
+ container=False,
130
+ scale=8
131
+ )
132
+
133
+ with gr.Column(scale=2):
134
+ submit_btn = gr.Button("πŸ“€ Tuma", variant="primary", size="lg")
135
+ clear_btn = gr.Button("πŸ—‘οΈ Futa", variant="secondary")
136
+
137
+ # Settings section - BELOW the chat (not on the side)
138
+ with gr.Accordion("βš™οΈ Mipangilio", open=False):
139
+ with gr.Row():
140
+ with gr.Column():
141
+ max_tokens = gr.Slider(
142
+ minimum=50, maximum=300, value=150, step=10,
143
+ label="Urefu wa Majibu",
144
+ info="Idadi ya herufi za jibu"
145
+ )
146
+
147
+ with gr.Column():
148
+ temperature = gr.Slider(
149
+ minimum=0.1, maximum=1.5, value=0.7, step=0.1,
150
+ label="Kiashiria cha Ubunifu",
151
+ info="Kiwango cha mabadiliko ya majibu"
152
+ )
153
+
154
+ with gr.Column():
155
+ top_p = gr.Slider(
156
+ minimum=0.1, maximum=1.0, value=0.9, step=0.05,
157
+ label="Uchaguzi wa Top-p",
158
+ info="Kiwango cha uchaguzi wa maneno"
159
+ )
160
+
161
+ language = gr.Dropdown(
162
+ choices=["Kiswahili", "English"],
163
+ value="Kiswahili",
164
+ label="Lugha ya Interface",
165
+ info="Badilisha lugha ya kiolesura"
166
+ )
167
+
168
+ # Examples section
169
+ with gr.Accordion("πŸ“š Mifano ya Maswali", open=False):
170
+ gr.Examples(
171
+ examples=[
172
+ ["Habari yako? Unaweza kuniambia kuhusu Tanzania?"],
173
+ ["Tafadhali nipe mapendekezo ya vitabu bora vya Kiswahili"],
174
+ ["Unaweza kunisaidia kutafsiri hii kwa Kiingereza?"],
175
+ ["Eleza kuhusu utamaduni wa Waswahili"],
176
+ ["Nini maana ya 'Hakuna matata' na 'Asante sana'?"],
177
+ ["Toa mfano wa sentensi kwa Kiswahili"]
178
+ ],
179
+ inputs=msg,
180
+ label="Bonyeza mfano wa swali kujaribu:",
181
+ examples_per_page=3
182
+ )
183
+
184
+ # System info
185
+ with gr.Accordion("πŸ“Š Taarifa ya Mfumo", open=False):
186
+ gr.Markdown("""
187
+ **Modeli:** distilgpt2
188
+ **Gradio:** 5.43.1
189
+ **Transformer:** 4.45.1
190
+ **PyTorch:** 2.4.1
191
+ **Kifaa:** CPU
192
+ """)
193
+
194
+ # Event handlers
195
+ msg.submit(
196
+ fn=process_message,
197
+ inputs=[msg, chatbot, max_tokens, temperature, top_p, language],
198
+ outputs=[chatbot, msg]
199
+ )
200
+
201
+ submit_btn.click(
202
+ fn=process_message,
203
+ inputs=[msg, chatbot, max_tokens, temperature, top_p, language],
204
+ outputs=[chatbot, msg]
205
+ )
206
+
207
+ clear_btn.click(
208
+ fn=clear_chat,
209
+ inputs=[],
210
+ outputs=chatbot
211
+ )
212
+
213
+ # Footer
214
+ gr.Markdown("---")
215
+ gr.Markdown("""
216
+ <div style='text-align: center; color: #666;'>
217
+ <strong>Teknolojia:</strong> Gradio 5.43.1 β€’ Transformers β€’ PyTorch<br>
218
+ <em>Imetengenezwa kwa upendo wa lugha ya Kiswahili</em> πŸ’š
219
+ </div>
220
+ """)
221
+
222
+ if __name__ == "__main__":
223
+ demo.launch(
224
+ server_name="0.0.0.0",
225
+ share=False,
226
+ show_error=True,
227
+ debug=False,
228
+ favicon_path=None
229
+ )
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  gradio==5.43.1
2
- torch==2.0.1
3
  transformers==4.30.2
 
1
  gradio==5.43.1
2
+ torch>=2.5.1
3
  transformers==4.30.2
swahili-llm-training ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 7589f160a55900696273d52134f92560cdf622ef