Spaces:

Frederick
/

Clause_Segmentation_and_Classification

Build error

App Files Files Community

Frederick commited on Feb 24, 2023

Commit

202ae33

1 Parent(s): 95345e7

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -4

app.py CHANGED Viewed

@@ -127,10 +127,34 @@ def get_pred_classification_labels(clauses, batch_size=32):
         logits = classification_model(**model_inputs)[0]
         pred_labels = logits.argmax(-1).numpy()
         pred_labels = [index2label[l] for l in pred_labels]
-        clause2labels.extend([(s, str(labels2attrs[l]),) for s,l in zip(batch_examples, pred_labels)])
-        print(clause2labels)
     return clause2labels
 def run_pipeline(text):
@@ -143,7 +167,10 @@ def run_pipeline(text):
     clause2labels = get_pred_classification_labels(all_clauses)
     output_clauses = [(c, str(i + 1))  for i, c in enumerate(all_clauses)]
-    return output_clauses, clause2labels
 # with open("pipeline_outputs.jsonl", "w") as fw:
 #     with open("all_text.txt", "r") as f:
@@ -165,7 +192,7 @@ color_panel_1 =  ["red", "green",  "yellow", "DodgerBlue", "orange", "DarkSalmon
 index_colormap = {str(i) : color_panel_1[i % len(color_panel_1)] for i in np.arange(1, 100000)}
 color_panel_2 = ["Gray", "DodgerBlue", "Wheat", "OliveDrab", "DarkKhaki", "DarkSalmon", "Orange", "Gold", "Aqua", "Tomato", "Violet"]
 str_attrs = sorted([str(v) for v in set(labels2attrs.values())])
-print(str_attrs, len(str_attrs), len(color_panel_2))
 assert len(str_attrs) == len(color_panel_2)
 attr_colormap = {a:c for a, c in zip(str_attrs, color_panel_2)}
 # attr_colormap = {
@@ -203,6 +230,8 @@ demo = gr.Interface(
             show_legend=True,
             combine_adjacent=False,
         ).style(color_map=attr_colormap),
     ]
 )

         logits = classification_model(**model_inputs)[0]
         pred_labels = logits.argmax(-1).numpy()
         pred_labels = [index2label[l] for l in pred_labels]
+        clause2labels.extend([(s, labels2attrs[l],) for s,l in zip(batch_examples, pred_labels)])
     return clause2labels
+def label_visualization(clause2labels):
+    total_clauses = len(clause2labels)
+    aspect_labels, genericity_labels, boundedness_labels = [], [], []
+    for _, labels in clause2labels:
+        labels = tuple(labels)
+        print(labels)
+        genericity_label = labels[0]
+        aspect_label = labels[1]
+        boundedness_label = labels[2]
+        aspect_labels.append(aspect_label)
+        genericity_labels.append(genericity_label)
+        boundedness_labels.append(boundedness_label)
+    aspect_dict = {"Dynamic": aspect_labels.count("dynamic"), "Stative": aspect_labels.count("stative")}
+    genericity_dict = {"Generic": genericity_labels.count("generic"), "Specific": genericity_labels.count("specific")}
+    boundedness_dict = {"Static": boundedness_labels.count("static"), "Episodic": boundedness_labels.count("episodic"), "Habitual": aspect_labels.count("habitual")}
+    print(aspect_dict, genericity_dict, boundedness_dict)
+    fig, axs = plt.subplots(1, 3, figsize=(10, 6,))
+    axs[0].pie([float(v / total_clauses) for v in aspect_dict.values()], colors = sns.color_palette('pastel')[0:3], labels=aspect_dict.keys(), autopct='%.0f%%', normalize=True )
+    axs[0].set_title("Aspect")
+    axs[1].pie([float(v / total_clauses) for v in genericity_dict.values()], colors = sns.color_palette('pastel')[3:6], labels=genericity_dict.keys(), autopct='%.0f%%', normalize=True)
+    axs[1].set_title("Genericity")
+    axs[2].pie([float(v / total_clauses) for v in boundedness_dict.values()], colors = sns.color_palette('pastel')[8:10], labels=boundedness_dict.keys(), autopct='%.0f%%', normalize=True)
+    axs[2].set_title("Boundedness")
+    return fig
 def run_pipeline(text):
     clause2labels = get_pred_classification_labels(all_clauses)
     output_clauses = [(c, str(i + 1))  for i, c in enumerate(all_clauses)]
+    figure = label_visualization(clause2labels)
+    clause2labels = [(k,str(v),) for k, v in clause2labels]
+    return output_clauses, clause2labels, figure
 # with open("pipeline_outputs.jsonl", "w") as fw:
 #     with open("all_text.txt", "r") as f:
 index_colormap = {str(i) : color_panel_1[i % len(color_panel_1)] for i in np.arange(1, 100000)}
 color_panel_2 = ["Gray", "DodgerBlue", "Wheat", "OliveDrab", "DarkKhaki", "DarkSalmon", "Orange", "Gold", "Aqua", "Tomato", "Violet"]
 str_attrs = sorted([str(v) for v in set(labels2attrs.values())])
+# print(str_attrs, len(str_attrs), len(color_panel_2))
 assert len(str_attrs) == len(color_panel_2)
 attr_colormap = {a:c for a, c in zip(str_attrs, color_panel_2)}
 # attr_colormap = {
             show_legend=True,
             combine_adjacent=False,
         ).style(color_map=attr_colormap),
+        gr.Plot(),
     ]
 )