Futuresony commited on
Commit
79d6d35
Β·
verified Β·
1 Parent(s): 2b7ab90

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -17
app.py CHANGED
@@ -1,27 +1,29 @@
 
1
  import os
2
  import gradio as gr
3
  import spaces
4
  from huggingface_hub import InferenceClient
5
 
6
- # Read the HF access token from repository secrets (Settings β†’ Secrets)
7
- HF_TOKEN = os.getenv("HF_TOKEN") # None if not provided
 
 
8
 
9
- # Inference client for Gemma‑2‑9B‑IT (accept the model license on HF first)
10
- client = InferenceClient(
11
- repo_id="google/gemma-2-9b-it",
12
- token=HF_TOKEN # pass token explicitly
13
- )
14
 
15
- @spaces.GPU # needed only if you pick GPU/Zero‑GPU hardware
 
 
 
16
  def respond(
17
- message,
18
  history: list[tuple[str, str]],
19
- system_message,
20
- max_tokens,
21
- temperature,
22
- top_p,
23
  ):
24
- # Build the ChatML message list
25
  messages = [{"role": "system", "content": system_message}]
26
  for user_msg, bot_msg in history:
27
  if user_msg:
@@ -30,7 +32,7 @@ def respond(
30
  messages.append({"role": "assistant", "content": bot_msg})
31
  messages.append({"role": "user", "content": message})
32
 
33
- # Stream tokens from the model
34
  response = ""
35
  for chunk in client.chat_completion(
36
  messages=messages,
@@ -43,6 +45,9 @@ def respond(
43
  response += token
44
  yield response
45
 
 
 
 
46
  demo = gr.ChatInterface(
47
  fn=respond,
48
  additional_inputs=[
@@ -55,6 +60,8 @@ demo = gr.ChatInterface(
55
  description="Chat with Googleβ€―Gemma‑2‑9B‑IT via Huggingβ€―Face Inference API.",
56
  )
57
 
58
- demo.queue() # omit arguments; works on Gradio β‰₯β€―4.0
 
 
59
  if __name__ == "__main__":
60
- demo.launch()
 
1
+ # app.py
2
  import os
3
  import gradio as gr
4
  import spaces
5
  from huggingface_hub import InferenceClient
6
 
7
+ # ──────────────────────────
8
+ # 1Β Β Authentication & client
9
+ # ──────────────────────────
10
+ HF_TOKEN = os.getenv("HF_TOKEN") # Add this secret in Settings β†’ Secrets
11
 
12
+ client = InferenceClient("google/gemma-2-9b-it", token=HF_TOKEN)
 
 
 
 
13
 
14
+ # ──────────────────────────
15
+ # 2Β Β Chat handler
16
+ # ──────────────────────────
17
+ @spaces.GPU # Only required if your Space uses GPU / Zero‑GPU hardware
18
  def respond(
19
+ message: str,
20
  history: list[tuple[str, str]],
21
+ system_message: str,
22
+ max_tokens: int,
23
+ temperature: float,
24
+ top_p: float,
25
  ):
26
+ # Build ChatML conversation
27
  messages = [{"role": "system", "content": system_message}]
28
  for user_msg, bot_msg in history:
29
  if user_msg:
 
32
  messages.append({"role": "assistant", "content": bot_msg})
33
  messages.append({"role": "user", "content": message})
34
 
35
+ # Stream tokens
36
  response = ""
37
  for chunk in client.chat_completion(
38
  messages=messages,
 
45
  response += token
46
  yield response
47
 
48
+ # ──────────────────────────
49
+ # 3Β Β Gradio interface
50
+ # ──────────────────────────
51
  demo = gr.ChatInterface(
52
  fn=respond,
53
  additional_inputs=[
 
60
  description="Chat with Googleβ€―Gemma‑2‑9B‑IT via Huggingβ€―Face Inference API.",
61
  )
62
 
63
+ # Enable request queueing (concurrency handled automatically on Gradio β‰₯β€―4)
64
+ demo.queue()
65
+
66
  if __name__ == "__main__":
67
+ demo.launch()