Spaces:
Runtime error
Runtime error
add 4o tokenizer
Browse files
app.py
CHANGED
@@ -4,19 +4,24 @@ import tiktoken
|
|
4 |
|
5 |
anthropic = Anthropic()
|
6 |
gpt_35_encoder = tiktoken.get_encoding("cl100k_base")
|
|
|
7 |
|
8 |
def count_anthropic_tokens(text: str) -> int:
|
9 |
return anthropic.count_tokens(text)
|
10 |
|
11 |
-
def
|
12 |
return len(gpt_35_encoder.encode(text))
|
13 |
|
|
|
|
|
|
|
14 |
def tokenize(text: str) -> tuple:
|
15 |
anthropic_count = count_anthropic_tokens(text)
|
16 |
-
|
|
|
17 |
char_count = len(text)
|
18 |
|
19 |
-
return anthropic_count,
|
20 |
|
21 |
counter = gr.Interface(
|
22 |
title="Token and Character Counter",
|
@@ -25,6 +30,7 @@ counter = gr.Interface(
|
|
25 |
outputs=[
|
26 |
gr.Number(label="Anthropic API (Claude) Token Count", interactive=False),
|
27 |
gr.Number(label="OpenAI API (GPT-3.5 / GPT-4) Token Count", interactive=False),
|
|
|
28 |
gr.Number(label="Character Count", interactive=False)
|
29 |
],
|
30 |
live=True,
|
|
|
4 |
|
5 |
anthropic = Anthropic()
|
6 |
gpt_35_encoder = tiktoken.get_encoding("cl100k_base")
|
7 |
+
gpt_4o_encoder = tiktoken.get_encoding("o200k_base")
|
8 |
|
9 |
def count_anthropic_tokens(text: str) -> int:
|
10 |
return anthropic.count_tokens(text)
|
11 |
|
12 |
+
def count_gpt_tokens_35(text: str) -> int:
|
13 |
return len(gpt_35_encoder.encode(text))
|
14 |
|
15 |
+
def count_gpt_tokens_4o(text: str) -> int:
|
16 |
+
return len(gpt_4o_encoder.encode(text))
|
17 |
+
|
18 |
def tokenize(text: str) -> tuple:
|
19 |
anthropic_count = count_anthropic_tokens(text)
|
20 |
+
gpt_tokens_count_35 = count_gpt_tokens_35(text)
|
21 |
+
gpt_tokens_count_4o = count_gpt_tokens_4o(text)
|
22 |
char_count = len(text)
|
23 |
|
24 |
+
return anthropic_count, gpt_tokens_count_35, gpt_tokens_count_4o, char_count
|
25 |
|
26 |
counter = gr.Interface(
|
27 |
title="Token and Character Counter",
|
|
|
30 |
outputs=[
|
31 |
gr.Number(label="Anthropic API (Claude) Token Count", interactive=False),
|
32 |
gr.Number(label="OpenAI API (GPT-3.5 / GPT-4) Token Count", interactive=False),
|
33 |
+
gr.Number(label="OpenAI API (GPT-4o) Token Count", interactive=False),
|
34 |
gr.Number(label="Character Count", interactive=False)
|
35 |
],
|
36 |
live=True,
|