Spaces:
Running
Running
[email protected]
commited on
Commit
Β·
c40ac63
1
Parent(s):
d4d8b2d
update
Browse files- app.py +10 -0
- src/about.py +5 -1
app.py
CHANGED
@@ -99,6 +99,16 @@ with demo:
|
|
99 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
100 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
103 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
104 |
|
|
|
99 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
100 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
101 |
|
102 |
+
with gr.TabItem("π Performance Plot", elem_id="llm-benchmark-tab-table", id=1):
|
103 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
104 |
+
print(LEADERBOARD_DF)
|
105 |
+
# with gr.Row():
|
106 |
+
# bs_1_plot = gr.components.Plot(
|
107 |
+
# value=plot_throughput(LEADERBOARD_DF, bs=1),
|
108 |
+
# elem_id="bs1-plot",
|
109 |
+
# show_label=False,
|
110 |
+
# )
|
111 |
+
|
112 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
113 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
114 |
|
src/about.py
CHANGED
@@ -33,7 +33,7 @@ Intro text
|
|
33 |
|
34 |
# Which evaluations are you running? how can people reproduce what you have?
|
35 |
LLM_BENCHMARKS_TEXT = '''
|
36 |
-
##
|
37 |
The prompt will follow the following style. Models' output are expected to follow this format.
|
38 |
```
|
39 |
Select the correct option(s) from the following options given the question. To solve the problem, follow the Let's think Step by Step reasoning strategy.
|
@@ -47,6 +47,10 @@ E voltage
|
|
47 |
{"step_1": "<Step 1 of your reasoning>", "step_2": "<Step 2 of your reasoning>", "step_n": "<Step n of your reasoning>", "answer": <the list of selected option, e.g., ["A", "B", "C", "D", "E"]>}
|
48 |
Your output in a single line:
|
49 |
```
|
|
|
|
|
|
|
|
|
50 |
## Reproducibility
|
51 |
To reproduce our results, here is the commands you can run:
|
52 |
|
|
|
33 |
|
34 |
# Which evaluations are you running? how can people reproduce what you have?
|
35 |
LLM_BENCHMARKS_TEXT = '''
|
36 |
+
## Prompt Format
|
37 |
The prompt will follow the following style. Models' output are expected to follow this format.
|
38 |
```
|
39 |
Select the correct option(s) from the following options given the question. To solve the problem, follow the Let's think Step by Step reasoning strategy.
|
|
|
47 |
{"step_1": "<Step 1 of your reasoning>", "step_2": "<Step 2 of your reasoning>", "step_n": "<Step n of your reasoning>", "answer": <the list of selected option, e.g., ["A", "B", "C", "D", "E"]>}
|
48 |
Your output in a single line:
|
49 |
```
|
50 |
+
## Expected Output Format
|
51 |
+
```
|
52 |
+
{"step_1": "<Step 1 of your reasoning>", "step_2": "<Step 2 of your reasoning>", "step_n": "<Step n of your reasoning>", "answer": <the list of selected option, e.g., ["A", "B", "C", "D", "E"]>}
|
53 |
+
```
|
54 |
## Reproducibility
|
55 |
To reproduce our results, here is the commands you can run:
|
56 |
|