Spaces:

Futuresony
/

Mr.Events

Running

App Files Files Community

Futuresony commited on Jul 15

Commit

79d6d35

verified ·

1 Parent(s): 2b7ab90

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -17

app.py CHANGED Viewed

@@ -1,27 +1,29 @@
 import os
 import gradio as gr
 import spaces
 from huggingface_hub import InferenceClient
-# Read the HF access token from repository secrets (Settings → Secrets)
-HF_TOKEN = os.getenv("HF_TOKEN")          # None if not provided
-# Inference client for Gemma‑2‑9B‑IT (accept the model license on HF first)
-client = InferenceClient(
-    repo_id="google/gemma-2-9b-it",
-    token=HF_TOKEN                       # pass token explicitly
-)
-@spaces.GPU                              # needed only if you pick GPU/Zero‑GPU hardware
 def respond(
-    message,
     history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
 ):
-    # Build the ChatML message list
     messages = [{"role": "system", "content": system_message}]
     for user_msg, bot_msg in history:
         if user_msg:
@@ -30,7 +32,7 @@ def respond(
             messages.append({"role": "assistant", "content": bot_msg})
     messages.append({"role": "user", "content": message})
-    # Stream tokens from the model
     response = ""
     for chunk in client.chat_completion(
         messages=messages,
@@ -43,6 +45,9 @@ def respond(
         response += token
         yield response
 demo = gr.ChatInterface(
     fn=respond,
     additional_inputs=[
@@ -55,6 +60,8 @@ demo = gr.ChatInterface(
     description="Chat with Google Gemma‑2‑9B‑IT via Hugging Face Inference API.",
 )
-demo.queue()                # omit arguments; works on Gradio ≥ 4.0
 if __name__ == "__main__":
-    demo.launch()

+# app.py
 import os
 import gradio as gr
 import spaces
 from huggingface_hub import InferenceClient
+# ──────────────────────────
+# 1  Authentication & client
+# ──────────────────────────
+HF_TOKEN = os.getenv("HF_TOKEN")  # Add this secret in Settings → Secrets
+client = InferenceClient("google/gemma-2-9b-it", token=HF_TOKEN)
+# ──────────────────────────
+# 2  Chat handler
+# ──────────────────────────
+@spaces.GPU  # Only required if your Space uses GPU / Zero‑GPU hardware
 def respond(
+    message: str,
     history: list[tuple[str, str]],
+    system_message: str,
+    max_tokens: int,
+    temperature: float,
+    top_p: float,
 ):
+    # Build ChatML conversation
     messages = [{"role": "system", "content": system_message}]
     for user_msg, bot_msg in history:
         if user_msg:
             messages.append({"role": "assistant", "content": bot_msg})
     messages.append({"role": "user", "content": message})
+    # Stream tokens
     response = ""
     for chunk in client.chat_completion(
         messages=messages,
         response += token
         yield response
+# ──────────────────────────
+# 3  Gradio interface
+# ──────────────────────────
 demo = gr.ChatInterface(
     fn=respond,
     additional_inputs=[
     description="Chat with Google Gemma‑2‑9B‑IT via Hugging Face Inference API.",
 )
+# Enable request queueing (concurrency handled automatically on Gradio ≥ 4)
+demo.queue()
 if __name__ == "__main__":
+    demo.launch()