ameerazam08 commited on
Commit
efe4f91
·
verified ·
1 Parent(s): f85722b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +281 -0
app.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import time
4
+ import uuid
5
+ import tempfile
6
+ from PIL import Image, ImageDraw, ImageFont
7
+ import base64
8
+ import mimetypes
9
+ from io import BytesIO
10
+ from google import genai
11
+ from google.genai import types
12
+
13
+ def generate(text, images, api_key, model="gemini-2.5-flash-image-preview"):
14
+ """Generate image using Gemini API"""
15
+ # Initialize client using provided api_key (or fallback to env variable)
16
+ client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != ""
17
+ else os.environ.get("GEMINI_API_KEY")))
18
+
19
+ # Prepare contents with images first, then text
20
+ contents = images + [text]
21
+
22
+ response = client.models.generate_content(
23
+ model=model,
24
+ contents=contents,
25
+ )
26
+
27
+ text_response = ""
28
+ image_path = None
29
+
30
+ for part in response.candidates[0].content.parts:
31
+ if part.text is not None:
32
+ text_response += part.text + "\n"
33
+ elif part.inline_data is not None:
34
+ # Create a temporary file to store the generated image
35
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
36
+ temp_path = tmp.name
37
+ generated_image = Image.open(BytesIO(part.inline_data.data))
38
+ generated_image.save(temp_path)
39
+ image_path = temp_path
40
+ print(f"Generated image saved to: {temp_path} with prompt: {text}")
41
+
42
+ return image_path, text_response
43
+
44
+ def print_like_dislike(x: gr.LikeData):
45
+ """Handle like/dislike interactions"""
46
+ print(x.index, x.value, x.liked)
47
+
48
+ def add_message(history, message, api_key):
49
+ """Add user message to chat history"""
50
+ # Handle file uploads
51
+ if message["files"]:
52
+ for file_path in message["files"]:
53
+ # Check if file_path is a string (direct path) or dict (with path key)
54
+ if isinstance(file_path, str):
55
+ history.append(((file_path,), None))
56
+ elif isinstance(file_path, dict) and "path" in file_path:
57
+ history.append(((file_path["path"],), None))
58
+
59
+ # Handle text message
60
+ if message["text"] is not None and message["text"].strip():
61
+ history.append((message["text"], None))
62
+
63
+ return history, gr.MultimodalTextbox(value=None, interactive=False, file_types=["image"])
64
+
65
+ def bot(history, api_key):
66
+ """Bot response function that processes images and text"""
67
+ if not history:
68
+ return history
69
+
70
+ # Check if there are images in the history
71
+ images = []
72
+ uploaded_image_paths = []
73
+ for msg in history:
74
+ if isinstance(msg[0], tuple) and len(msg[0]) > 0:
75
+ # This is an image message
76
+ img_path = msg[0][0]
77
+ if os.path.exists(img_path):
78
+ try:
79
+ img = Image.open(img_path)
80
+ if img.mode == "RGBA":
81
+ img = img.convert("RGBA")
82
+ images.append(img)
83
+ uploaded_image_paths.append(img_path)
84
+ except Exception as e:
85
+ print(f"Error loading image {img_path}: {e}")
86
+
87
+ # Get the text prompt from the last text message
88
+ text_prompt = None
89
+ for msg in reversed(history):
90
+ if isinstance(msg[0], str):
91
+ text_prompt = msg[0]
92
+ break
93
+
94
+ if not images:
95
+ response = "**Please upload an image first!** I can help you edit images using Gemini AI."
96
+ history[-1][1] = ""
97
+ for character in response:
98
+ history[-1][1] += character
99
+ time.sleep(0.02)
100
+ yield history
101
+ return
102
+
103
+ if not text_prompt:
104
+ response = f"**I see {len(images)} image(s) uploaded!** Please provide a text prompt telling me what you'd like to do with the image(s)."
105
+ history[-1][1] = ""
106
+ for character in response:
107
+ history[-1][1] += character
108
+ time.sleep(0.02)
109
+ yield history
110
+ return
111
+
112
+ try:
113
+ # Generate image using Gemini
114
+ image_path, text_response = generate(text=text_prompt, images=images, api_key=api_key)
115
+
116
+ if image_path:
117
+ # Add the generated image to chat
118
+ history.append((None, (image_path,)))
119
+ yield history
120
+
121
+ # Add text response
122
+ response = f"**Image generated successfully!** Here's your edited image based on the prompt: '{text_prompt}'"
123
+ history.append((None, response))
124
+ yield history
125
+ else:
126
+ # Only text response
127
+ response = f"**Gemini Response:** {text_response}"
128
+ history[-1][1] = ""
129
+ for character in response:
130
+ history[-1][1] += character
131
+ time.sleep(0.02)
132
+ yield history
133
+
134
+ except Exception as e:
135
+ response = f"**Error:** {str(e)}"
136
+ history[-1][1] = ""
137
+ for character in response:
138
+ history[-1][1] += character
139
+ time.sleep(0.02)
140
+ yield history
141
+
142
+ # Create the Gradio interface
143
+ with gr.Blocks(css="""
144
+ .header-container {
145
+ text-align: center;
146
+ padding: 20px;
147
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
148
+ color: white;
149
+ border-radius: 10px;
150
+ margin-bottom: 20px;
151
+ }
152
+ .header-container img {
153
+ width: 50px;
154
+ height: 50px;
155
+ margin-bottom: 10px;
156
+ }
157
+ .header-container h1 {
158
+ margin: 10px 0;
159
+ font-size: 2.5em;
160
+ }
161
+ .header-container p {
162
+ margin: 5px 0;
163
+ font-size: 1.1em;
164
+ }
165
+ .header-container a {
166
+ color: #ffd700;
167
+ text-decoration: none;
168
+ }
169
+ .header-container a:hover {
170
+ text-decoration: underline;
171
+ }
172
+ .api-key-section {
173
+ background: #f8f9fa;
174
+ padding: 15px;
175
+ border-radius: 8px;
176
+ margin-bottom: 20px;
177
+ border: 1px solid #dee2e6;
178
+ }
179
+ .chat-container {
180
+ border: 1px solid #dee2e6;
181
+ border-radius: 10px;
182
+ padding: 20px;
183
+ background: white;
184
+ }
185
+ """) as demo:
186
+
187
+ # Custom HTML header
188
+ gr.HTML(
189
+ """
190
+ <div class="header-container">
191
+ <div>
192
+ <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
193
+ </div>
194
+ <div>
195
+ <h1>Chat with Gemini Image Editor</h1>
196
+ <p>Upload images and chat with AI to edit them! |
197
+ <a href="https://aistudio.google.com/apikey">Get API Key</a> |
198
+ Powered by <a href="https://gradio.app/">Gradio</a>⚡️</p>
199
+ </div>
200
+ </div>
201
+ """
202
+ )
203
+
204
+ # API Key configuration
205
+ with gr.Row():
206
+ with gr.Column(scale=1):
207
+ api_key_input = gr.Textbox(
208
+ lines=1,
209
+ placeholder="Enter your Gemini API Key",
210
+ label="Gemini API Key",
211
+ type="password",
212
+ elem_classes="api-key-section"
213
+ )
214
+ with gr.Column(scale=2):
215
+ gr.Markdown("""
216
+ **Instructions:**
217
+ - Upload one or more images using the file upload button
218
+ - Type your editing instructions in the chat
219
+ - The AI will process your images based on your text prompt
220
+ - You can upload multiple images at once and chat about them
221
+ - Supported formats: PNG, JPG, JPEG, WEBP
222
+ """)
223
+
224
+ # Chat interface
225
+ with gr.Row(elem_classes="chat-container"):
226
+ chatbot = gr.Chatbot(
227
+ [],
228
+ elem_id="chatbot",
229
+ bubble_full_width=False,
230
+ height=500,
231
+ show_label=False
232
+ )
233
+
234
+ # Chat input with file upload
235
+ chat_input = gr.MultimodalTextbox(
236
+ interactive=True,
237
+ file_types=["image"],
238
+ file_count="multiple",
239
+ placeholder="Upload one or more images and type your editing instructions...",
240
+ show_label=False
241
+ )
242
+
243
+ # Set up chat interactions
244
+ chat_msg = chat_input.submit(
245
+ add_message,
246
+ [chatbot, chat_input, api_key_input],
247
+ [chatbot, chat_input],
248
+ queue=False
249
+ ).then(
250
+ bot,
251
+ [chatbot, api_key_input],
252
+ chatbot,
253
+ api_name="bot_response"
254
+ )
255
+
256
+ # Re-enable input after response
257
+ chat_msg.then(
258
+ lambda: gr.MultimodalTextbox(interactive=True),
259
+ None,
260
+ [chat_input],
261
+ queue=False
262
+ )
263
+
264
+ # Like/dislike functionality
265
+ chatbot.like(print_like_dislike, None, None)
266
+
267
+ # Examples section
268
+ gr.Markdown("## Try these examples")
269
+ gr.Markdown("""
270
+ 1. Upload an image and type: "change text to 'HELLO WORLD'"
271
+ 2. Upload an image and type: "remove the background"
272
+ 3. Upload an image and type: "add sunglasses to the person"
273
+ 4. Upload an image and type: "make it look like a painting"
274
+ 5. Upload multiple images and type: "apply the same style to all images"
275
+ 6. Upload multiple images and type: "create a collage of these images"
276
+ """)
277
+
278
+ # Launch the demo
279
+ demo.queue()
280
+ if __name__ == "__main__":
281
+ demo.launch(share=True)