Spaces:
Running
Running
[New] Add certification column
Browse filesNew Certification column:
π
Gold Certification:
Awarded when you submit high-quality, pre-generated video samples from your model.
π Platinum Certification:
Our highest honor. Achieved by submitting your open-source model or a sampling-capable API for full verification by our team.
app.py
CHANGED
@@ -534,6 +534,7 @@ def get_baseline_df():
|
|
534 |
submission_repo.git_pull()
|
535 |
df = pd.read_csv(CSV_DIR)
|
536 |
df = get_final_score(df, checkbox_group.value)
|
|
|
537 |
df = df.sort_values(by="Selected Score", ascending=False)
|
538 |
present_columns = MODEL_INFO + checkbox_group.value
|
539 |
# print(present_columns)
|
@@ -559,6 +560,7 @@ def get_baseline_df_i2v():
|
|
559 |
submission_repo.git_pull()
|
560 |
df = pd.read_csv(I2V_DIR)
|
561 |
df = get_final_score_i2v(df, checkbox_group_i2v.value)
|
|
|
562 |
df = df.sort_values(by="Selected Score", ascending=False)
|
563 |
present_columns = MODEL_INFO_TAB_I2V + checkbox_group_i2v.value
|
564 |
# df = df[df["Sampled by"] == 'VBench Team']
|
@@ -571,6 +573,7 @@ def get_baseline_df_long():
|
|
571 |
submission_repo.git_pull()
|
572 |
df = pd.read_csv(LONG_DIR)
|
573 |
df = get_final_score(df, checkbox_group.value)
|
|
|
574 |
df = df.sort_values(by="Selected Score", ascending=False)
|
575 |
present_columns = MODEL_INFO + checkbox_group.value
|
576 |
# df = df[df["Sampled by"] == 'VBench Team']
|
@@ -582,6 +585,7 @@ def get_baseline_df_2():
|
|
582 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
583 |
submission_repo.git_pull()
|
584 |
df = pd.read_csv(VBENCH2_DIR)
|
|
|
585 |
df = get_final_score2(df, TASK_INFO_2)
|
586 |
df = df.sort_values(by="Total Score", ascending=False)
|
587 |
df = df[COLUMN_NAMES_2]
|
@@ -643,6 +647,20 @@ def convert_scores_to_percentage(df):
|
|
643 |
df[column] = df[column].apply(lambda x: f"{x:05.2f}%" if pd.notna(pd.to_numeric(x, errors='coerce')) else x)
|
644 |
return df
|
645 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
646 |
def choose_all_quailty():
|
647 |
return gr.update(value=QUALITY_LIST)
|
648 |
|
@@ -655,23 +673,42 @@ def disable_all():
|
|
655 |
def enable_all():
|
656 |
return gr.update(value=TASK_INFO)
|
657 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
658 |
# select function
|
659 |
-
def on_filter_model_size_method_change(selected_columns, vbench_team_sample, vbench_team_eval=False):
|
660 |
updated_data = get_all_df(selected_columns, CSV_DIR)
|
|
|
|
|
|
|
661 |
if vbench_team_sample:
|
662 |
-
updated_data = updated_data[updated_data["Sampled by"].str.contains('VBench Team')]
|
663 |
if vbench_team_eval:
|
664 |
-
updated_data =
|
665 |
-
|
666 |
-
|
|
|
667 |
selected_columns = [item for item in TASK_INFO if item in selected_columns]
|
668 |
present_columns = MODEL_INFO + selected_columns
|
669 |
-
|
|
|
|
|
670 |
updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
|
671 |
updated_data = convert_scores_to_percentage(updated_data)
|
672 |
-
updated_headers =
|
673 |
-
|
674 |
-
|
|
|
675 |
# print(updated_data,present_columns,update_datatype)
|
676 |
filter_component = gr.components.Dataframe(
|
677 |
value=updated_data,
|
@@ -681,7 +718,7 @@ def on_filter_model_size_method_change(selected_columns, vbench_team_sample, vbe
|
|
681 |
interactive=False,
|
682 |
visible=True,
|
683 |
)
|
684 |
-
return filter_component
|
685 |
|
686 |
def on_filter_model_size_method_change_quality(selected_columns):
|
687 |
updated_data = get_all_df_quality(selected_columns, QUALITY_DIR)
|
@@ -702,12 +739,13 @@ def on_filter_model_size_method_change_quality(selected_columns):
|
|
702 |
)
|
703 |
return filter_component#.value
|
704 |
|
705 |
-
def on_filter_model_size_method_change_i2v(selected_columns,vbench_team_sample, vbench_team_eval=False):
|
706 |
updated_data = get_all_df_i2v(selected_columns, I2V_DIR)
|
|
|
707 |
if vbench_team_sample:
|
708 |
-
updated_data = updated_data[updated_data["Sampled by"]
|
709 |
if vbench_team_eval:
|
710 |
-
updated_data = updated_data[updated_data['Evaluated by']
|
711 |
selected_columns = [item for item in I2V_TAB if item in selected_columns]
|
712 |
present_columns = MODEL_INFO_TAB_I2V + selected_columns
|
713 |
updated_data = updated_data[present_columns]
|
@@ -725,12 +763,15 @@ def on_filter_model_size_method_change_i2v(selected_columns,vbench_team_sample,
|
|
725 |
)
|
726 |
return filter_component#.value
|
727 |
|
728 |
-
def on_filter_model_size_method_change_long(selected_columns, vbench_team_sample, vbench_team_eval=False):
|
729 |
updated_data = get_all_df_long(selected_columns, LONG_DIR)
|
|
|
730 |
if vbench_team_sample:
|
731 |
-
updated_data = updated_data[updated_data["Sampled by"]
|
732 |
if vbench_team_eval:
|
733 |
-
updated_data = updated_data[updated_data['Evaluated by']
|
|
|
|
|
734 |
selected_columns = [item for item in TASK_INFO if item in selected_columns]
|
735 |
present_columns = MODEL_INFO + selected_columns
|
736 |
updated_data = updated_data[present_columns]
|
@@ -749,14 +790,17 @@ def on_filter_model_size_method_change_long(selected_columns, vbench_team_sample
|
|
749 |
return filter_component#.value
|
750 |
|
751 |
|
752 |
-
def on_filter_model_size_method_change_2(selected_columns=TASK_INFO_2, vbench_team_sample=False, vbench_team_eval=False):
|
753 |
updated_data = get_all_df2(selected_columns, VBENCH2_DIR)
|
754 |
-
print(updated_data)
|
755 |
-
print(vbench_team_sample, vbench_team_eval)
|
|
|
756 |
if vbench_team_sample:
|
757 |
-
updated_data = updated_data[updated_data["Sampled by"].str.contains('VBench Team')]
|
758 |
if vbench_team_eval:
|
759 |
-
updated_data = updated_data[updated_data["Evaluated by"].str.contains('VBench Team')]
|
|
|
|
|
760 |
|
761 |
present_columns = VBENCH2_MODEL_INFO_DEFAULT + selected_columns
|
762 |
updated_headers = present_columns
|
@@ -776,16 +820,18 @@ def on_filter_model_size_method_change_2(selected_columns=TASK_INFO_2, vbench_te
|
|
776 |
)
|
777 |
return filter_component
|
778 |
|
779 |
-
def on_filter_model_size_method_score_change_2(select_score, vbench_team_sample=False, vbench_team_eval=False):
|
780 |
selected_columns = category_to_dimension[select_score]
|
781 |
-
print(select_score,"===>",selected_columns)
|
782 |
updated_data = get_all_df2(selected_columns, VBENCH2_DIR)
|
783 |
-
|
|
|
784 |
if vbench_team_sample:
|
785 |
-
updated_data = updated_data[updated_data["Sampled by"].str.contains('VBench Team')]
|
786 |
if vbench_team_eval:
|
787 |
-
updated_data = updated_data[updated_data["Evaluated by"].str.contains('VBench Team')]
|
788 |
-
|
|
|
789 |
present_columns = VBENCH2_MODEL_INFO_DEFAULT + [f"{select_score} Score"] + selected_columns
|
790 |
updated_headers = present_columns
|
791 |
updated_data = updated_data[present_columns]
|
@@ -843,6 +889,11 @@ with block:
|
|
843 |
value=True,
|
844 |
interactive=True
|
845 |
)
|
|
|
|
|
|
|
|
|
|
|
846 |
# selection for column part:
|
847 |
checkbox_group = gr.CheckboxGroup(
|
848 |
choices=TASK_INFO,
|
@@ -850,6 +901,7 @@ with block:
|
|
850 |
label="Evaluation Dimension",
|
851 |
interactive=True,
|
852 |
)
|
|
|
853 |
|
854 |
data_component = gr.components.Dataframe(
|
855 |
value=get_baseline_df,
|
@@ -860,14 +912,17 @@ with block:
|
|
860 |
visible=True,
|
861 |
# height=700,
|
862 |
)
|
|
|
863 |
|
864 |
choosen_q.click(choose_all_quailty, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
|
865 |
choosen_s.click(choose_all_semantic, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
|
866 |
# enable_b.click(enable_all, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter], outputs=data_component)
|
867 |
disable_b.click(disable_all, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter, vbench_validate_filter], outputs=data_component)
|
868 |
-
checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter, vbench_validate_filter], outputs=data_component)
|
869 |
-
vbench_team_filter.change(fn=on_filter_model_size_method_change, inputs=[checkbox_group, vbench_team_filter, vbench_validate_filter], outputs=data_component)
|
870 |
-
vbench_validate_filter.change(fn=on_filter_model_size_method_change, inputs=[checkbox_group, vbench_team_filter, vbench_validate_filter], outputs=data_component)
|
|
|
|
|
871 |
# VBench 2.0
|
872 |
with gr.TabItem("β VBench 2.0", elem_id="vbench-tab-table", id=2):
|
873 |
with gr.Row():
|
@@ -893,6 +948,11 @@ with block:
|
|
893 |
value=True,
|
894 |
interactive=True
|
895 |
)
|
|
|
|
|
|
|
|
|
|
|
896 |
with gr.Row():
|
897 |
vbench2_creativity_button = gr.Button("Show Creativity Score")
|
898 |
vbench2_commonsense_button = gr.Button("Show Commonsense Score")
|
@@ -916,14 +976,15 @@ with block:
|
|
916 |
visible=True,
|
917 |
# height=700,
|
918 |
)
|
919 |
-
vbench_team_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter_2], outputs=data_component_2)
|
920 |
-
vbench_validate_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter_2], outputs=data_component_2)
|
921 |
# vbench2_checkgroup.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
922 |
-
|
923 |
-
|
924 |
-
|
925 |
-
|
926 |
-
|
|
|
927 |
|
928 |
with gr.TabItem("Video Quality", elem_id="vbench-tab-table", id=3):
|
929 |
with gr.Accordion("INSTRUCTION", open=False):
|
@@ -978,6 +1039,11 @@ with block:
|
|
978 |
value=False,
|
979 |
interactive=True
|
980 |
)
|
|
|
|
|
|
|
|
|
|
|
981 |
checkbox_group_i2v = gr.CheckboxGroup(
|
982 |
choices=I2V_TAB,
|
983 |
value=I2V_TAB,
|
@@ -994,10 +1060,10 @@ with block:
|
|
994 |
visible=True,
|
995 |
)
|
996 |
|
997 |
-
checkbox_group_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v, vbench_team_filter_i2v,vbench_validate_filter_i2v], outputs=data_component_i2v)
|
998 |
-
vbench_team_filter_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v, vbench_team_filter_i2v,vbench_validate_filter_i2v], outputs=data_component_i2v)
|
999 |
-
vbench_validate_filter_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v, vbench_team_filter_i2v,vbench_validate_filter_i2v], outputs=data_component_i2v)
|
1000 |
-
|
1001 |
with gr.TabItem("π VBench-Long", elem_id="vbench-tab-table", id=5):
|
1002 |
with gr.Row():
|
1003 |
with gr.Accordion("INSTRUCTION", open=False):
|
@@ -1030,6 +1096,11 @@ with block:
|
|
1030 |
value=False,
|
1031 |
interactive=True
|
1032 |
)
|
|
|
|
|
|
|
|
|
|
|
1033 |
checkbox_group_long = gr.CheckboxGroup(
|
1034 |
choices=TASK_INFO,
|
1035 |
value=DEFAULT_INFO,
|
@@ -1037,7 +1108,7 @@ with block:
|
|
1037 |
interactive=True,
|
1038 |
)
|
1039 |
|
1040 |
-
|
1041 |
value=get_baseline_df_long,
|
1042 |
headers=COLUMN_NAMES,
|
1043 |
type="pandas",
|
@@ -1047,14 +1118,14 @@ with block:
|
|
1047 |
# height=700,
|
1048 |
)
|
1049 |
|
1050 |
-
choosen_q_long.click(choose_all_quailty, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=
|
1051 |
-
choosen_s_long.click(choose_all_semantic, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=
|
1052 |
-
enable_b_long.click(enable_all, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=
|
1053 |
-
disable_b_long.click(disable_all, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=
|
1054 |
-
checkbox_group_long.change(fn=on_filter_model_size_method_change_long, inputs=[checkbox_group_long, vbench_team_filter_long,vbench_validate_filter_long], outputs=
|
1055 |
-
vbench_team_filter_long.change(fn=on_filter_model_size_method_change_long, inputs=[checkbox_group_long, vbench_team_filter_long,vbench_validate_filter_long], outputs=
|
1056 |
-
vbench_validate_filter_long.change(fn=on_filter_model_size_method_change_long, inputs=[checkbox_group_long, vbench_team_filter_long,vbench_validate_filter_long], outputs=
|
1057 |
-
|
1058 |
# table info
|
1059 |
with gr.TabItem("π About", elem_id="mvbench-tab-table", id=6):
|
1060 |
gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
|
|
|
534 |
submission_repo.git_pull()
|
535 |
df = pd.read_csv(CSV_DIR)
|
536 |
df = get_final_score(df, checkbox_group.value)
|
537 |
+
df['Certification'] = df.apply(get_certification, axis=1)
|
538 |
df = df.sort_values(by="Selected Score", ascending=False)
|
539 |
present_columns = MODEL_INFO + checkbox_group.value
|
540 |
# print(present_columns)
|
|
|
560 |
submission_repo.git_pull()
|
561 |
df = pd.read_csv(I2V_DIR)
|
562 |
df = get_final_score_i2v(df, checkbox_group_i2v.value)
|
563 |
+
df['Certification'] = df.apply(get_certification, axis=1)
|
564 |
df = df.sort_values(by="Selected Score", ascending=False)
|
565 |
present_columns = MODEL_INFO_TAB_I2V + checkbox_group_i2v.value
|
566 |
# df = df[df["Sampled by"] == 'VBench Team']
|
|
|
573 |
submission_repo.git_pull()
|
574 |
df = pd.read_csv(LONG_DIR)
|
575 |
df = get_final_score(df, checkbox_group.value)
|
576 |
+
df['Certification'] = df.apply(get_certification, axis=1)
|
577 |
df = df.sort_values(by="Selected Score", ascending=False)
|
578 |
present_columns = MODEL_INFO + checkbox_group.value
|
579 |
# df = df[df["Sampled by"] == 'VBench Team']
|
|
|
585 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
586 |
submission_repo.git_pull()
|
587 |
df = pd.read_csv(VBENCH2_DIR)
|
588 |
+
df['Certification'] = df.apply(get_certification, axis=1)
|
589 |
df = get_final_score2(df, TASK_INFO_2)
|
590 |
df = df.sort_values(by="Total Score", ascending=False)
|
591 |
df = df[COLUMN_NAMES_2]
|
|
|
647 |
df[column] = df[column].apply(lambda x: f"{x:05.2f}%" if pd.notna(pd.to_numeric(x, errors='coerce')) else x)
|
648 |
return df
|
649 |
|
650 |
+
def convert_scores_to_percentage(df):
|
651 |
+
df_copy = df.copy()
|
652 |
+
NON_SCORE_COLS = ['Model Name (clickable)', 'Certification', 'Sampled by', 'Evaluated by', 'Accessibility', 'Date', 'Mail']
|
653 |
+
|
654 |
+
for col in df_copy.columns:
|
655 |
+
if col not in NON_SCORE_COLS:
|
656 |
+
numeric_series = pd.to_numeric(df_copy[col], errors='coerce')
|
657 |
+
|
658 |
+
if numeric_series.notna().any():
|
659 |
+
processed_series = round(numeric_series * 100, 2)
|
660 |
+
formatted_series = processed_series.apply(lambda x: f"{x:05.2f}%" if pd.notna(x) else x)
|
661 |
+
df_copy[col] = df_copy[col].astype(str).where(formatted_series.isna(), formatted_series)
|
662 |
+
return df_copy
|
663 |
+
|
664 |
def choose_all_quailty():
|
665 |
return gr.update(value=QUALITY_LIST)
|
666 |
|
|
|
673 |
def enable_all():
|
674 |
return gr.update(value=TASK_INFO)
|
675 |
|
676 |
+
def get_certification(row):
|
677 |
+
is_sampled_by_vbench = 'VBench Team' in str(row.get('Sampled by', ''))
|
678 |
+
is_evaluated_by_vbench = 'VBench Team' in str(row.get('Evaluated by', ''))
|
679 |
+
if is_sampled_by_vbench and is_evaluated_by_vbench:
|
680 |
+
return 'π₯ Platinum'
|
681 |
+
elif is_sampled_by_vbench or is_evaluated_by_vbench:
|
682 |
+
return 'π
Gold'
|
683 |
+
else:
|
684 |
+
return ''
|
685 |
+
|
686 |
+
|
687 |
+
|
688 |
# select function
|
689 |
+
def on_filter_model_size_method_change(selected_columns, vbench_team_sample, vbench_team_eval=False, show_platinum_only=False):
|
690 |
updated_data = get_all_df(selected_columns, CSV_DIR)
|
691 |
+
|
692 |
+
updated_data['Certification'] = updated_data.apply(get_certification, axis=1)
|
693 |
+
|
694 |
if vbench_team_sample:
|
695 |
+
updated_data = updated_data[updated_data["Sampled by"].str.contains('VBench Team', na=False)]
|
696 |
if vbench_team_eval:
|
697 |
+
updated_data = updated_data[updated_data["Evaluated by"].str.contains('VBench Team', na=False)]
|
698 |
+
|
699 |
+
if show_platinum_only:
|
700 |
+
updated_data = updated_data[updated_data['Certification'] == 'π₯ Platinum']
|
701 |
selected_columns = [item for item in TASK_INFO if item in selected_columns]
|
702 |
present_columns = MODEL_INFO + selected_columns
|
703 |
+
final_present_columns = [col for col in present_columns if col in updated_data.columns]
|
704 |
+
updated_data = updated_data[final_present_columns]
|
705 |
+
|
706 |
updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
|
707 |
updated_data = convert_scores_to_percentage(updated_data)
|
708 |
+
updated_headers = final_present_columns
|
709 |
+
type_map = {name: dtype for name, dtype in zip(COLUMN_NAMES, DATA_TITILE_TYPE)}
|
710 |
+
type_map['Certification'] = 'str'
|
711 |
+
update_datatype = [type_map.get(h, 'str') for h in updated_headers]
|
712 |
# print(updated_data,present_columns,update_datatype)
|
713 |
filter_component = gr.components.Dataframe(
|
714 |
value=updated_data,
|
|
|
718 |
interactive=False,
|
719 |
visible=True,
|
720 |
)
|
721 |
+
return filter_component
|
722 |
|
723 |
def on_filter_model_size_method_change_quality(selected_columns):
|
724 |
updated_data = get_all_df_quality(selected_columns, QUALITY_DIR)
|
|
|
739 |
)
|
740 |
return filter_component#.value
|
741 |
|
742 |
+
def on_filter_model_size_method_change_i2v(selected_columns,vbench_team_sample, vbench_team_eval=False, show_platinum_only=False):
|
743 |
updated_data = get_all_df_i2v(selected_columns, I2V_DIR)
|
744 |
+
updated_data['Certification'] = updated_data.apply(get_certification, axis=1)
|
745 |
if vbench_team_sample:
|
746 |
+
updated_data = updated_data[updated_data["Sampled by"].str.contains('VBench Team', na=False)]
|
747 |
if vbench_team_eval:
|
748 |
+
updated_data = updated_data[updated_data['Evaluated by'].str.contains('VBench Team', na=False)]
|
749 |
selected_columns = [item for item in I2V_TAB if item in selected_columns]
|
750 |
present_columns = MODEL_INFO_TAB_I2V + selected_columns
|
751 |
updated_data = updated_data[present_columns]
|
|
|
763 |
)
|
764 |
return filter_component#.value
|
765 |
|
766 |
+
def on_filter_model_size_method_change_long(selected_columns, vbench_team_sample, vbench_team_eval=False, show_platinum_only=False):
|
767 |
updated_data = get_all_df_long(selected_columns, LONG_DIR)
|
768 |
+
updated_data['Certification'] = updated_data.apply(get_certification, axis=1)
|
769 |
if vbench_team_sample:
|
770 |
+
updated_data = updated_data[updated_data["Sampled by"].str.contains('VBench Team', na=False)]
|
771 |
if vbench_team_eval:
|
772 |
+
updated_data = updated_data[updated_data['Evaluated by'].str.contains('VBench Team', na=False)]
|
773 |
+
if show_platinum_only:
|
774 |
+
updated_data = updated_data[updated_data['Certification'] == 'π₯ Platinum']
|
775 |
selected_columns = [item for item in TASK_INFO if item in selected_columns]
|
776 |
present_columns = MODEL_INFO + selected_columns
|
777 |
updated_data = updated_data[present_columns]
|
|
|
790 |
return filter_component#.value
|
791 |
|
792 |
|
793 |
+
def on_filter_model_size_method_change_2(selected_columns=TASK_INFO_2, vbench_team_sample=False, vbench_team_eval=False, show_platinum_only=False):
|
794 |
updated_data = get_all_df2(selected_columns, VBENCH2_DIR)
|
795 |
+
# print(updated_data)
|
796 |
+
# print(vbench_team_sample, vbench_team_eval)
|
797 |
+
updated_data['Certification'] = updated_data.apply(get_certification, axis=1)
|
798 |
if vbench_team_sample:
|
799 |
+
updated_data = updated_data[updated_data["Sampled by"].str.contains('VBench Team', na=False)]
|
800 |
if vbench_team_eval:
|
801 |
+
updated_data = updated_data[updated_data["Evaluated by"].str.contains('VBench Team', na=False)]
|
802 |
+
if show_platinum_only:
|
803 |
+
updated_data = updated_data[updated_data['Certification'] == 'π₯ Platinum']
|
804 |
|
805 |
present_columns = VBENCH2_MODEL_INFO_DEFAULT + selected_columns
|
806 |
updated_headers = present_columns
|
|
|
820 |
)
|
821 |
return filter_component
|
822 |
|
823 |
+
def on_filter_model_size_method_score_change_2(select_score, vbench_team_sample=False, vbench_team_eval=False, show_platinum_only=False):
|
824 |
selected_columns = category_to_dimension[select_score]
|
825 |
+
# print(select_score,"===>",selected_columns)
|
826 |
updated_data = get_all_df2(selected_columns, VBENCH2_DIR)
|
827 |
+
updated_data['Certification'] = updated_data.apply(get_certification, axis=1)
|
828 |
+
# print(updated_data)
|
829 |
if vbench_team_sample:
|
830 |
+
updated_data = updated_data[updated_data["Sampled by"].str.contains('VBench Team', na=False)]
|
831 |
if vbench_team_eval:
|
832 |
+
updated_data = updated_data[updated_data["Evaluated by"].str.contains('VBench Team', na=False)]
|
833 |
+
if show_platinum_only:
|
834 |
+
updated_data = updated_data[updated_data['Certification'] == 'π₯ Platinum']
|
835 |
present_columns = VBENCH2_MODEL_INFO_DEFAULT + [f"{select_score} Score"] + selected_columns
|
836 |
updated_headers = present_columns
|
837 |
updated_data = updated_data[present_columns]
|
|
|
889 |
value=True,
|
890 |
interactive=True
|
891 |
)
|
892 |
+
show_platinum_button = gr.Checkbox(
|
893 |
+
label="Only show π₯ Platinum",
|
894 |
+
value=False,
|
895 |
+
interactive=True
|
896 |
+
)
|
897 |
# selection for column part:
|
898 |
checkbox_group = gr.CheckboxGroup(
|
899 |
choices=TASK_INFO,
|
|
|
901 |
label="Evaluation Dimension",
|
902 |
interactive=True,
|
903 |
)
|
904 |
+
|
905 |
|
906 |
data_component = gr.components.Dataframe(
|
907 |
value=get_baseline_df,
|
|
|
912 |
visible=True,
|
913 |
# height=700,
|
914 |
)
|
915 |
+
|
916 |
|
917 |
choosen_q.click(choose_all_quailty, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
|
918 |
choosen_s.click(choose_all_semantic, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
|
919 |
# enable_b.click(enable_all, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter], outputs=data_component)
|
920 |
disable_b.click(disable_all, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter, vbench_validate_filter], outputs=data_component)
|
921 |
+
checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter, vbench_validate_filter, show_platinum_button], outputs=data_component)
|
922 |
+
vbench_team_filter.change(fn=on_filter_model_size_method_change, inputs=[checkbox_group, vbench_team_filter, vbench_validate_filter, show_platinum_button], outputs=data_component)
|
923 |
+
vbench_validate_filter.change(fn=on_filter_model_size_method_change, inputs=[checkbox_group, vbench_team_filter, vbench_validate_filter, show_platinum_button], outputs=data_component)
|
924 |
+
show_platinum_button.change(fn=on_filter_model_size_method_change, inputs=[checkbox_group, vbench_team_filter, vbench_validate_filter, show_platinum_button], outputs=data_component)
|
925 |
+
|
926 |
# VBench 2.0
|
927 |
with gr.TabItem("β VBench 2.0", elem_id="vbench-tab-table", id=2):
|
928 |
with gr.Row():
|
|
|
948 |
value=True,
|
949 |
interactive=True
|
950 |
)
|
951 |
+
show_platinum_button_2 = gr.Checkbox(
|
952 |
+
label="Only show π₯ Platinum",
|
953 |
+
value=False,
|
954 |
+
interactive=True
|
955 |
+
)
|
956 |
with gr.Row():
|
957 |
vbench2_creativity_button = gr.Button("Show Creativity Score")
|
958 |
vbench2_commonsense_button = gr.Button("Show Commonsense Score")
|
|
|
976 |
visible=True,
|
977 |
# height=700,
|
978 |
)
|
979 |
+
vbench_team_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter_2, show_platinum_button_2], outputs=data_component_2)
|
980 |
+
vbench_validate_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter_2, show_platinum_button_2], outputs=data_component_2)
|
981 |
# vbench2_checkgroup.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
982 |
+
|
983 |
+
vbench2_creativity_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Creativity"), vbench_team_filter_2, vbench_validate_filter_2, show_platinum_button_2], outputs=[data_component_2, vbench2_checkgroup])
|
984 |
+
vbench2_commonsense_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Commonsense"), vbench_team_filter_2, vbench_validate_filter_2, show_platinum_button_2], outputs=[data_component_2, vbench2_checkgroup])
|
985 |
+
vbench2_control_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Controllability"), vbench_team_filter_2, vbench_validate_filter_2, show_platinum_button_2], outputs=[data_component_2, vbench2_checkgroup])
|
986 |
+
vbench2_human_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Human Fidelity"), vbench_team_filter_2, vbench_validate_filter_2, show_platinum_button_2], outputs=[data_component_2, vbench2_checkgroup])
|
987 |
+
vbench2_physics_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Physics"), vbench_team_filter_2, vbench_validate_filter_2, show_platinum_button_2], outputs=[data_component_2, vbench2_checkgroup])
|
988 |
|
989 |
with gr.TabItem("Video Quality", elem_id="vbench-tab-table", id=3):
|
990 |
with gr.Accordion("INSTRUCTION", open=False):
|
|
|
1039 |
value=False,
|
1040 |
interactive=True
|
1041 |
)
|
1042 |
+
show_platinum_button_i2v = gr.Checkbox(
|
1043 |
+
label="Only show π₯ Platinum",
|
1044 |
+
value=False,
|
1045 |
+
interactive=True
|
1046 |
+
)
|
1047 |
checkbox_group_i2v = gr.CheckboxGroup(
|
1048 |
choices=I2V_TAB,
|
1049 |
value=I2V_TAB,
|
|
|
1060 |
visible=True,
|
1061 |
)
|
1062 |
|
1063 |
+
checkbox_group_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v, vbench_team_filter_i2v, vbench_validate_filter_i2v, show_platinum_button_i2v], outputs=data_component_i2v)
|
1064 |
+
vbench_team_filter_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v, vbench_team_filter_i2v, vbench_validate_filter_i2v, show_platinum_button_i2v], outputs=data_component_i2v)
|
1065 |
+
vbench_validate_filter_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v, vbench_team_filter_i2v, vbench_validate_filter_i2v, show_platinum_button_i2v], outputs=data_component_i2v)
|
1066 |
+
show_platinum_button_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v, vbench_team_filter_i2v, vbench_validate_filter_i2v, show_platinum_button_i2v], outputs=data_component_i2v)
|
1067 |
with gr.TabItem("π VBench-Long", elem_id="vbench-tab-table", id=5):
|
1068 |
with gr.Row():
|
1069 |
with gr.Accordion("INSTRUCTION", open=False):
|
|
|
1096 |
value=False,
|
1097 |
interactive=True
|
1098 |
)
|
1099 |
+
show_platinum_button_long = gr.Checkbox(
|
1100 |
+
label="Only show π₯ Platinum",
|
1101 |
+
value=False,
|
1102 |
+
interactive=True
|
1103 |
+
)
|
1104 |
checkbox_group_long = gr.CheckboxGroup(
|
1105 |
choices=TASK_INFO,
|
1106 |
value=DEFAULT_INFO,
|
|
|
1108 |
interactive=True,
|
1109 |
)
|
1110 |
|
1111 |
+
data_component_long = gr.components.Dataframe(
|
1112 |
value=get_baseline_df_long,
|
1113 |
headers=COLUMN_NAMES,
|
1114 |
type="pandas",
|
|
|
1118 |
# height=700,
|
1119 |
)
|
1120 |
|
1121 |
+
choosen_q_long.click(choose_all_quailty, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component_long)
|
1122 |
+
choosen_s_long.click(choose_all_semantic, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component_long)
|
1123 |
+
enable_b_long.click(enable_all, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component_long)
|
1124 |
+
disable_b_long.click(disable_all, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component_long)
|
1125 |
+
checkbox_group_long.change(fn=on_filter_model_size_method_change_long, inputs=[checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long, show_platinum_button_long], outputs=data_component_long)
|
1126 |
+
vbench_team_filter_long.change(fn=on_filter_model_size_method_change_long, inputs=[checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long, show_platinum_button_long], outputs=data_component_long)
|
1127 |
+
vbench_validate_filter_long.change(fn=on_filter_model_size_method_change_long, inputs=[checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long, show_platinum_button_long], outputs=data_component_long)
|
1128 |
+
show_platinum_button_long.change(fn=on_filter_model_size_method_change_long, inputs=[checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long, show_platinum_button_long], outputs=data_component_long)
|
1129 |
# table info
|
1130 |
with gr.TabItem("π About", elem_id="mvbench-tab-table", id=6):
|
1131 |
gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
|