giskard-evaluator

Running

App Files Files Community

200

Inoki at Giskard

ZeroCommand commited on Jan 25, 2024

Commit

7487fdb

verified ·

1 Parent(s): 8217e92

bug-fix-label-mapping-align-with-correct-idx (#80)

Browse files

- fix the label mapping order; fix out of scope error (bc6f52dabdda8f4e5aa9c9d980ffd3d2c8a55c49)

Co-authored-by: zcy <[email protected]>

Files changed (3) hide show

app.py +1 -3
app_leaderboard.py +6 -1
text_classification_ui_helpers.py +17 -9

app.py CHANGED Viewed

@@ -12,12 +12,10 @@ try:
         with gr.Tab("Text Classification"):
             get_demo_text_classification()
         with gr.Tab("Leaderboard") as leaderboard_tab:
-            get_demo_leaderboard()
         with gr.Tab("Logs(Debug)"):
             get_demo_debug()
-        leaderboard_tab.select(fn=get_demo_leaderboard)
     start_process_run_job()
     demo.queue(max_size=1000)

         with gr.Tab("Text Classification"):
             get_demo_text_classification()
         with gr.Tab("Leaderboard") as leaderboard_tab:
+            get_demo_leaderboard(leaderboard_tab)
         with gr.Tab("Logs(Debug)"):
             get_demo_debug()
     start_process_run_job()
     demo.queue(max_size=1000)

app_leaderboard.py CHANGED Viewed

@@ -73,8 +73,11 @@ def get_display_df(df):
         )
     return display_df
-def get_demo():
     logger.info("Loading leaderboard records")
     leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
     records = leaderboard.records
@@ -116,6 +119,8 @@ def get_demo():
     with gr.Row():
         leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
     @gr.on(
         triggers=[
             model_select.change,

         )
     return display_df
+def update_leaderboard_records():
+    logger.info("Updating leaderboard records")
+    leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
+def get_demo(leaderboard_tab):
     logger.info("Loading leaderboard records")
     leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
     records = leaderboard.records
     with gr.Row():
         leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
+    leaderboard_tab.select(fn=update_leaderboard_records)
     @gr.on(
         triggers=[
             model_select.change,

text_classification_ui_helpers.py CHANGED Viewed

@@ -30,7 +30,6 @@ MAX_FEATURES = 20
 ds_dict = None
 ds_config = None
 def get_related_datasets_from_leaderboard(model_id):
     records = leaderboard.records
     model_records = records[records["model_id"] == model_id]
@@ -100,7 +99,7 @@ def export_mappings(all_mappings, key, subkeys, values):
     if subkeys is None:
         subkeys = list(all_mappings[key].keys())
-    if not subkeys:
         logging.debug(f"subkeys is empty for {key}")
         return all_mappings
@@ -121,6 +120,8 @@ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels,
         ds_labels = ds_labels[:MAX_LABELS]
         gr.Warning(f"The number of labels is truncated to length {MAX_LABELS}")
     ds_labels.sort()
     model_labels.sort()
@@ -293,17 +294,20 @@ def check_column_mapping_keys_validity(all_mappings):
         return (gr.update(interactive=True), gr.update(visible=False))
-def construct_label_and_feature_mapping(all_mappings):
     label_mapping = {}
-    for i, label in zip(
-        range(len(all_mappings["labels"].keys())), all_mappings["labels"].keys()
-    ):
-        # FIXME: What's the order during the save
         label_mapping.update({str(i): all_mappings["labels"][label]})
     if "features" not in all_mappings.keys():
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
-        return (gr.update(interactive=True), gr.update(visible=False))
     feature_mapping = all_mappings["features"]
     return label_mapping, feature_mapping
@@ -311,7 +315,11 @@ def construct_label_and_feature_mapping(all_mappings):
 def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
     all_mappings = read_column_mapping(uid)
     check_column_mapping_keys_validity(all_mappings)
-    label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings)
     eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
     save_job_to_pipe(

 ds_dict = None
 ds_config = None
 def get_related_datasets_from_leaderboard(model_id):
     records = leaderboard.records
     model_records = records[records["model_id"] == model_id]
     if subkeys is None:
         subkeys = list(all_mappings[key].keys())
+    if not subkeys:
         logging.debug(f"subkeys is empty for {key}")
         return all_mappings
         ds_labels = ds_labels[:MAX_LABELS]
         gr.Warning(f"The number of labels is truncated to length {MAX_LABELS}")
+    # sort labels to make sure the order is consistent
+    # prediction gives the order based on probability
     ds_labels.sort()
     model_labels.sort()
         return (gr.update(interactive=True), gr.update(visible=False))
+def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features):
     label_mapping = {}
+    if len(all_mappings["labels"].keys()) != len(ds_labels):
+        gr.Warning("Label mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
+    if len(all_mappings["features"].keys()) != len(ds_features):
+        gr.Warning("Feature mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
+    for i, label in zip(range(len(ds_labels)),  ds_labels):
+        # align the saved labels with dataset labels order
         label_mapping.update({str(i): all_mappings["labels"][label]})
     if "features" not in all_mappings.keys():
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
     feature_mapping = all_mappings["features"]
     return label_mapping, feature_mapping
 def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
     all_mappings = read_column_mapping(uid)
     check_column_mapping_keys_validity(all_mappings)
+    # get ds labels and features again for alignment
+    ds = datasets.load_dataset(d_id, config)[split]
+    ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
+    label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features)
     eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
     save_job_to_pipe(