lsb commited on
Commit
c1481f3
·
verified ·
1 Parent(s): b564341

Update requirements.txt (#1)

Browse files

- Update requirements.txt (875b34459a00833337d20f2f0ecb3e989577c51f)
- Update app.py (529ae2345094455908687d930c423cebb8abecb5)
- Create test_app.py (66b202656aa3f28fe24d857908069ff4e94f6bc7)

Files changed (3) hide show
  1. app.py +31 -35
  2. requirements.txt +4 -3
  3. test_app.py +43 -0
app.py CHANGED
@@ -1,29 +1,28 @@
1
  import gradio as gr
2
- import outlines
 
3
  import transformers
4
  import torch
5
- from threading import Thread
6
 
7
- pipe = transformers.pipeline("text-generation", "HuggingFaceTB/SmolLM-1.7B-Instruct", torch_dtype=torch.float32)
8
- outlines_tokenizer = outlines.models.TransformerTokenizer(pipe.tokenizer)
 
 
 
9
 
10
- def string_to_acrostic_grammar(s, dash_initial=True):
11
  # this will convert a string to a CFG grammar
12
  chars = filter(str.isalpha, s.upper())
13
- grammar_rules = [('"- " ' if dash_initial else '') + f'"{char}" /[^-\\r\\n]+/ "\\n"' for char in chars]
14
  return "?start: " + " ".join(grammar_rules)
15
 
16
  def is_this_prompt_a_list(prompt):
17
- # this will check if the prompt is a list
18
- # ask the model if the prompt is a list, by constraining the generation to yes or no about a question whether the prompt is a list
19
- question = f'You are trying to understand the desired format of output for a prompt, whether it will be a list or a story. The prompt:\n```{prompt}```\n\nIs this prompt asking for short phrases in a list, or long sentences in a story?'
20
- grammar = '?start: ("list" | "story")'
21
- cfg_logits_processor = outlines.processors.CFGLogitsProcessor(grammar, outlines_tokenizer)
22
- output = pipe([{"role": "user", "content": question}, {"role": "assistant", "content": "The output to this prompt is a "}], logits_processor=transformers.LogitsProcessorList([cfg_logits_processor]), max_new_tokens=10,)
23
- response = output[0]['generated_text'][-1]['content'].split()[-1]
24
- # the last word is the answer
25
- print("is this prompt a list?", response)
26
- return response == "list"
27
 
28
  def respond(
29
  message,
@@ -35,30 +34,27 @@ def respond(
35
  top_p,
36
  ):
37
  print({"message": message, "history": history, "system_message": system_message, "acrostic": acrostic, "max_tokens": max_tokens, "temperature": temperature, "top_p": top_p})
38
- # this will generate a response to the message
39
- prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
40
- grammar = string_to_acrostic_grammar(acrostic, dash_initial=is_this_prompt_a_list(prompt))
41
- acrostic_logits_processor = outlines.processors.CFGLogitsProcessor(grammar, outlines_tokenizer)
42
- streamer = transformers.TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, decode_kwargs={"skip_special_tokens": True})
43
  current_inputs = []
44
- # take the current inputs, and for every item in the history (which is a list of [x,y], add it to the current inputs like so: {"role": "user", "content": x), {"role": "assistant", "content": y}
45
  for x, y in history:
46
  current_inputs.append({"role": "user", "content": x})
47
  current_inputs.append({"role": "assistant", "content": y})
48
- # add the current inputs to the inputs
49
- inputs = current_inputs + [{"role": "user", "content": prompt}]
 
 
 
 
 
 
50
 
51
- generation_kwargs = dict(text_inputs=inputs, logits_processor=transformers.LogitsProcessorList([acrostic_logits_processor]), streamer=streamer, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)
52
- thread = Thread(target=pipe, kwargs=generation_kwargs)
53
- thread.start()
54
- # this will generate a response to the message
55
- # TODO: figure out why skip special tokens doesn't skip special tokens
56
- special_tokens = set([str(v) for v in pipe.tokenizer.special_tokens_map.values()])
57
- response = ""
58
- for new_text in streamer:
59
- if new_text not in special_tokens:
60
- response += new_text
61
- yield response
62
 
63
 
64
  """
 
1
  import gradio as gr
2
+ from outlines import Transformers
3
+ from outlines.types import CFG
4
  import transformers
5
  import torch
6
+ from sentence_transformers import SentenceTransformer
7
 
8
+ pipe = transformers.pipeline("text-generation", "HuggingFaceTB/SmolLM-360M-Instruct", dtype=torch.float32)
9
+ # Create outlines model wrapper
10
+ outlines_model = Transformers(pipe.model, pipe.tokenizer)
11
+ minilm = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
12
+ write_a_list, write_a_story = minilm.encode(["write a list", "write a story"])
13
 
14
+ def string_to_acrostic_grammar(s, bullet_initial=True):
15
  # this will convert a string to a CFG grammar
16
  chars = filter(str.isalpha, s.upper())
17
+ grammar_rules = [('"* " ' if bullet_initial else '') + f'"{char}" /[^-\\r\\n]+/ "\\n"' for char in chars]
18
  return "?start: " + " ".join(grammar_rules)
19
 
20
  def is_this_prompt_a_list(prompt):
21
+ # embed the prompt in minilm and find the euclidean distance to write_a_list and write_a_story and see which is closer
22
+ prompt_embedding = minilm.encode([prompt])[0]
23
+ distance_to_list = ((prompt_embedding - write_a_list) ** 2).sum()
24
+ distance_to_story = ((prompt_embedding - write_a_story) ** 2).sum()
25
+ return distance_to_list < distance_to_story
 
 
 
 
 
26
 
27
  def respond(
28
  message,
 
34
  top_p,
35
  ):
36
  print({"message": message, "history": history, "system_message": system_message, "acrostic": acrostic, "max_tokens": max_tokens, "temperature": temperature, "top_p": top_p})
37
+
38
+ # Build the prompt with conversation history
 
 
 
39
  current_inputs = []
 
40
  for x, y in history:
41
  current_inputs.append({"role": "user", "content": x})
42
  current_inputs.append({"role": "assistant", "content": y})
43
+ current_inputs.append({"role": "user", "content": message})
44
+
45
+ # Apply chat template
46
+ prompt = pipe.tokenizer.apply_chat_template(current_inputs, tokenize=False, add_generation_prompt=True)
47
+
48
+ # Create CFG grammar for acrostic format
49
+ grammar_str = string_to_acrostic_grammar(acrostic, bullet_initial=is_this_prompt_a_list(message))
50
+ cfg_type = CFG(grammar_str)
51
 
52
+ # Use outlines model to generate with CFG constraints
53
+ # Note: streaming is not yet supported for Transformers models with CFG
54
+ # so we generate the full response and yield it
55
+ # Call the model directly (not .generate()) to use CFG
56
+ response = outlines_model(prompt, cfg_type, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p)
57
+ yield response
 
 
 
 
 
58
 
59
 
60
  """
requirements.txt CHANGED
@@ -2,8 +2,9 @@ huggingface_hub
2
  transformers
3
  accelerate
4
  optimum-quanto
5
- outlines==0.1.14
 
6
  sentencepiece
7
  datasets
8
- # --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
9
- # llama-cpp-python==0.3.4
 
2
  transformers
3
  accelerate
4
  optimum-quanto
5
+ outlines
6
+ llguidance
7
  sentencepiece
8
  datasets
9
+ sentence-transformers
10
+ gradio
test_app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from app import is_this_prompt_a_list, respond
3
+
4
+
5
+ def test_is_this_prompt_a_list_with_list_request():
6
+ """Test that 'tell me ten css class names' is correctly identified as a list"""
7
+ result = is_this_prompt_a_list("tell me ten css class names")
8
+ assert result == True, "Expected 'tell me ten css class names' to be identified as a list"
9
+
10
+
11
+ def test_is_this_prompt_a_list_with_story_request():
12
+ """Test that 'tell me a tall tale' is correctly identified as NOT a list"""
13
+ result = is_this_prompt_a_list("tell me a tall tale")
14
+ assert result == False, "Expected 'tell me a tall tale' to be identified as a story (not a list)"
15
+
16
+
17
+ def test_respond_generates_acrostic_with_dash():
18
+ """Test that respond generates output starting with '- G' for acrostic 'good dog'"""
19
+ # Get the generator and consume it to get the final response
20
+ response_gen = respond(
21
+ "give me seven great names for a dog",
22
+ [],
23
+ "You are a friendly Chatbot.",
24
+ "good dog",
25
+ 512,
26
+ 0.2,
27
+ 0.95
28
+ )
29
+
30
+ # Get the final response by consuming the generator
31
+ final_response = None
32
+ for response in response_gen:
33
+ final_response = response
34
+
35
+ # Check that we got some output
36
+ assert final_response is not None, "Expected some response output"
37
+ assert len(final_response) > 0, "Expected non-empty response"
38
+
39
+ # Check that the last non-empty line starts with "* G" (acrostic format with dash)
40
+ lines = [line for line in final_response.split("\n") if line.strip()]
41
+ assert len(lines) > 0, "Expected at least one non-empty line"
42
+ last_line = lines[-1]
43
+ assert last_line[0:3] == "* G", f"Expected last line to start with '- G', but got: '{last_line[0:3]}'"