Spaces:

rahul7star
/

QwenChat

Paused

App Files Files Community

rahul7star commited on Aug 15, 2025

Commit

d20382c

verified ·

1 Parent(s): a905a0e

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -32

app.py CHANGED Viewed

@@ -1,16 +1,25 @@
 import gradio as gr
 import torch
-import asyncio
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # ---------------- CONFIG ----------------
 REPO_ID = "goonsai-com/civitaiprompts"
 SUBFOLDER = "gemma3-1B-goonsai-nsfw-100k"
 MODEL_NAME = "Qwen3-1.7B-CivitAI"
-# ---------------- LOAD TOKENIZER & MODEL ----------------
 tokenizer = AutoTokenizer.from_pretrained(REPO_ID, subfolder=SUBFOLDER, trust_remote_code=True)
 dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
 model = AutoModelForCausalLM.from_pretrained(
     REPO_ID,
     subfolder=SUBFOLDER,
@@ -18,21 +27,23 @@ model = AutoModelForCausalLM.from_pretrained(
     device_map="auto",
     trust_remote_code=True
 )
 # ---------------- CHAT FUNCTION ----------------
-def chat_fn_sync(message, history=None):
-    if history is None:
-        history = []
-    # Convert history tuples to lists for Gradio Chatbot
-    history = [list(turn) for turn in history]
-    chat_history = ""
-    for turn in history:
-        chat_history += f"User: {turn[0]}\nAssistant: {turn[1]}\n"
-    full_text = f"{chat_history}User: {message}\nAssistant:"
     inputs = tokenizer([full_text], return_tensors="pt", truncation=True, max_length=1024).to(model.device)
     reply_ids = model.generate(
         **inputs,
         max_new_tokens=512,
@@ -42,35 +53,25 @@ def chat_fn_sync(message, history=None):
     )
     response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
     assistant_reply = response.split("Assistant:")[-1].strip()
-    # Append as a list (not tuple)
-    history.append([message, assistant_reply])
-    return assistant_reply, history
-# Async wrapper for Gradio 5.x
-async def chat_fn(message, history=None):
-    return await asyncio.to_thread(chat_fn_sync, message, history)
 # ---------------- GRADIO BLOCKS UI ----------------
 with gr.Blocks() as demo:
-    gr.Markdown(f"# 🤖 {MODEL_NAME}")
     with gr.Row():
         with gr.Column():
             message = gr.Textbox(label="Type your message...", placeholder="Hello!")
             send_btn = gr.Button("Send")
-            clear_btn = gr.Button("Clear Chat")
         with gr.Column():
-            # Use 'tuples' for legacy list-of-lists format
-            chatbot = gr.Chatbot(type="tuples")
-    state = gr.State([])
-    # Send button / Enter key triggers async chat
-    send_btn.click(chat_fn, inputs=[message, state], outputs=[chatbot, message])
-    message.submit(chat_fn, inputs=[message, state], outputs=[chatbot, message])
-    # Clear chat
-    clear_btn.click(lambda: ([], ""), None, [state, chatbot])
 demo.launch()

 import gradio as gr
 import torch
+import logging
 from transformers import AutoTokenizer, AutoModelForCausalLM
+import time
 # ---------------- CONFIG ----------------
 REPO_ID = "goonsai-com/civitaiprompts"
 SUBFOLDER = "gemma3-1B-goonsai-nsfw-100k"
 MODEL_NAME = "Qwen3-1.7B-CivitAI"
+# ---------------- LOGGING ----------------
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
+logger = logging.getLogger(__name__)
+logger.info("Starting Gradio chatbot...")
+# ---------------- LOAD MODEL ----------------
+logger.info(f"Loading tokenizer from {REPO_ID}/{SUBFOLDER}")
 tokenizer = AutoTokenizer.from_pretrained(REPO_ID, subfolder=SUBFOLDER, trust_remote_code=True)
 dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
+logger.info(f"Loading model with dtype {dtype}")
 model = AutoModelForCausalLM.from_pretrained(
     REPO_ID,
     subfolder=SUBFOLDER,
     device_map="auto",
     trust_remote_code=True
 )
+logger.info("Model loaded successfully.")
 # ---------------- CHAT FUNCTION ----------------
+def chat_fn(message):
+    logger.info(f"Received message: {message}")
+    # Build prompt directly from user input
+    full_text = f"User: {message}\nAssistant:"
+    logger.info(f"Full prompt for generation:\n{full_text}")
+    start_time = time.time()
+    # Tokenize input
     inputs = tokenizer([full_text], return_tensors="pt", truncation=True, max_length=1024).to(model.device)
+    logger.info("Tokenized input.")
+    # Generate response
+    logger.info("Generating response...")
     reply_ids = model.generate(
         **inputs,
         max_new_tokens=512,
     )
     response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
     assistant_reply = response.split("Assistant:")[-1].strip()
+    logger.info(f"Assistant reply: {assistant_reply}")
+    logger.info(f"Generation time: {time.time() - start_time:.2f}s")
+    return assistant_reply
 # ---------------- GRADIO BLOCKS UI ----------------
 with gr.Blocks() as demo:
+    gr.Markdown(f"# 🤖 {MODEL_NAME} (Stateless)")
     with gr.Row():
         with gr.Column():
             message = gr.Textbox(label="Type your message...", placeholder="Hello!")
             send_btn = gr.Button("Send")
         with gr.Column():
+            output = gr.Textbox(label="Assistant Response", lines=10)
+    # Connect button
+    send_btn.click(chat_fn, inputs=[message], outputs=[output])
+    message.submit(chat_fn, inputs=[message], outputs=[output])
+logger.info("Launching Gradio app...")
 demo.launch()