schroneko commited on
Commit
01d0d2d
·
verified ·
1 Parent(s): b19f17e

add 4o tokenizer

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -4,19 +4,24 @@ import tiktoken
4
 
5
  anthropic = Anthropic()
6
  gpt_35_encoder = tiktoken.get_encoding("cl100k_base")
 
7
 
8
  def count_anthropic_tokens(text: str) -> int:
9
  return anthropic.count_tokens(text)
10
 
11
- def count_gpt_tokens(text: str) -> int:
12
  return len(gpt_35_encoder.encode(text))
13
 
 
 
 
14
  def tokenize(text: str) -> tuple:
15
  anthropic_count = count_anthropic_tokens(text)
16
- gpt_tokens_count = count_gpt_tokens(text)
 
17
  char_count = len(text)
18
 
19
- return anthropic_count, gpt_tokens_count, char_count
20
 
21
  counter = gr.Interface(
22
  title="Token and Character Counter",
@@ -25,6 +30,7 @@ counter = gr.Interface(
25
  outputs=[
26
  gr.Number(label="Anthropic API (Claude) Token Count", interactive=False),
27
  gr.Number(label="OpenAI API (GPT-3.5 / GPT-4) Token Count", interactive=False),
 
28
  gr.Number(label="Character Count", interactive=False)
29
  ],
30
  live=True,
 
4
 
5
  anthropic = Anthropic()
6
  gpt_35_encoder = tiktoken.get_encoding("cl100k_base")
7
+ gpt_4o_encoder = tiktoken.get_encoding("o200k_base")
8
 
9
  def count_anthropic_tokens(text: str) -> int:
10
  return anthropic.count_tokens(text)
11
 
12
+ def count_gpt_tokens_35(text: str) -> int:
13
  return len(gpt_35_encoder.encode(text))
14
 
15
+ def count_gpt_tokens_4o(text: str) -> int:
16
+ return len(gpt_4o_encoder.encode(text))
17
+
18
  def tokenize(text: str) -> tuple:
19
  anthropic_count = count_anthropic_tokens(text)
20
+ gpt_tokens_count_35 = count_gpt_tokens_35(text)
21
+ gpt_tokens_count_4o = count_gpt_tokens_4o(text)
22
  char_count = len(text)
23
 
24
+ return anthropic_count, gpt_tokens_count_35, gpt_tokens_count_4o, char_count
25
 
26
  counter = gr.Interface(
27
  title="Token and Character Counter",
 
30
  outputs=[
31
  gr.Number(label="Anthropic API (Claude) Token Count", interactive=False),
32
  gr.Number(label="OpenAI API (GPT-3.5 / GPT-4) Token Count", interactive=False),
33
+ gr.Number(label="OpenAI API (GPT-4o) Token Count", interactive=False),
34
  gr.Number(label="Character Count", interactive=False)
35
  ],
36
  live=True,