Spaces:
Sleeping
Sleeping
Application main and aux files
Browse files- general_bias_measurement.py +248 -0
- model_comparison.py +160 -0
- model_inferencing.py +54 -0
- model_loading.py +51 -0
- streamlit-app.py +343 -0
- tab_manager.py +473 -0
- user_evaluation_variables.py +189 -0
general_bias_measurement.py
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from itertools import chain
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration
|
| 5 |
+
from transformers import CLIPProcessor, CLIPModel
|
| 6 |
+
from nltk.corpus import wordnet
|
| 7 |
+
from PIL import Image
|
| 8 |
+
import numpy as np
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import streamlit as st
|
| 11 |
+
|
| 12 |
+
if torch.cuda.is_available():
|
| 13 |
+
device = 'cuda'
|
| 14 |
+
else:
|
| 15 |
+
device = 'cpu'
|
| 16 |
+
|
| 17 |
+
BLIP_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
| 18 |
+
BLIP_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
|
| 19 |
+
CLIP_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)
|
| 20 |
+
CLIP_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
|
| 21 |
+
|
| 22 |
+
irrelevantWords = ['a', 'an', 'with', 'the', 'and', 'for', 'on', 'their', 'this', 'that', 'under', 'it', 'at', 'out',
|
| 23 |
+
'in', 'inside', 'outside', 'of', 'many', 'one', 'two', 'three', 'four', 'five', '-', 'with',
|
| 24 |
+
'six', 'seven', 'eight', 'none', 'ten', 'at', 'is', 'up', 'are', 'by', 'as', 'ts', 'there',
|
| 25 |
+
'like', 'bad', 'good', 'who', 'through', 'else', 'over', 'off', 'on', 'next',
|
| 26 |
+
'to', 'into', 'themselves', 'front', 'down', 'some', 'his', 'her', 'its', 'onto', 'eaten',
|
| 27 |
+
'each', 'other', 'most', 'let', 'around', 'them', 'while', 'another', 'from', 'above', "'",
|
| 28 |
+
'-', 'about', 'what', '', ' ', 'A', 'looks', 'has']
|
| 29 |
+
|
| 30 |
+
# Variables for the LLM
|
| 31 |
+
maxLength = 10
|
| 32 |
+
NBeams = 1
|
| 33 |
+
|
| 34 |
+
# To store the bag of words
|
| 35 |
+
distributionBiasDICT = {}
|
| 36 |
+
hallucinationBiases = []
|
| 37 |
+
CLIPErrors = []
|
| 38 |
+
CLIPMissRates = []
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def object_filtering(caption):
|
| 42 |
+
caption = caption.split()
|
| 43 |
+
for token in caption:
|
| 44 |
+
# replace bad characters
|
| 45 |
+
if any(c in [".", "'", ",", "-", "!", "?"] for c in token):
|
| 46 |
+
for badChar in [".", "'", ",", "-", "!", "?"]:
|
| 47 |
+
if token in caption:
|
| 48 |
+
caption[caption.index(token)] = token.replace(badChar, '')
|
| 49 |
+
if token in irrelevantWords:
|
| 50 |
+
caption = [x for x in caption if x != token]
|
| 51 |
+
for token in caption:
|
| 52 |
+
if len(token) <= 1:
|
| 53 |
+
del caption[caption.index(token)]
|
| 54 |
+
return caption
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def calculate_distribution_bias(rawValues):
|
| 58 |
+
rawValues = list(map(int, rawValues))
|
| 59 |
+
normalisedValues = []
|
| 60 |
+
# Normalise the raw data
|
| 61 |
+
for x in rawValues:
|
| 62 |
+
if (max(rawValues) - min(rawValues)) == 0 :
|
| 63 |
+
normX = 1
|
| 64 |
+
else:
|
| 65 |
+
normX = (x - min(rawValues)) / (max(rawValues) - min(rawValues))
|
| 66 |
+
normalisedValues.append(normX)
|
| 67 |
+
# calculate area under curve
|
| 68 |
+
area = np.trapz(np.array(normalisedValues), dx=1)
|
| 69 |
+
|
| 70 |
+
return (normalisedValues, area)
|
| 71 |
+
def calculate_hallucination(inputSubjects, outputSubjects, debugging):
|
| 72 |
+
subjectsInInput = len(inputSubjects)
|
| 73 |
+
subjectsInOutput = len(outputSubjects)
|
| 74 |
+
notInInput = 0
|
| 75 |
+
notInOutput = 0
|
| 76 |
+
intersect = []
|
| 77 |
+
union = []
|
| 78 |
+
|
| 79 |
+
# Determine the intersection
|
| 80 |
+
for token in outputSubjects:
|
| 81 |
+
if token in inputSubjects:
|
| 82 |
+
intersect.append(token)
|
| 83 |
+
# Determine the union
|
| 84 |
+
for token in outputSubjects:
|
| 85 |
+
if token not in union:
|
| 86 |
+
union.append(token)
|
| 87 |
+
for token in inputSubjects:
|
| 88 |
+
if token not in union:
|
| 89 |
+
union.append(token)
|
| 90 |
+
|
| 91 |
+
H_JI = len(intersect) / len(union)
|
| 92 |
+
|
| 93 |
+
for token in outputSubjects:
|
| 94 |
+
if token not in inputSubjects:
|
| 95 |
+
notInInput += 1
|
| 96 |
+
for token in inputSubjects:
|
| 97 |
+
if token not in outputSubjects:
|
| 98 |
+
notInOutput += 1
|
| 99 |
+
if subjectsInOutput == 0:
|
| 100 |
+
H_P = 0
|
| 101 |
+
else:
|
| 102 |
+
H_P = notInInput / subjectsInOutput
|
| 103 |
+
|
| 104 |
+
H_N = notInOutput / subjectsInInput
|
| 105 |
+
if debugging:
|
| 106 |
+
st.write("H_P = ", notInInput, "/", subjectsInOutput, "=", H_P)
|
| 107 |
+
st.write("H_N = ", notInOutput, "/", subjectsInInput, "=", H_N)
|
| 108 |
+
st.write("H_JI = ", len(intersect), "/", len(union), "=", H_JI)
|
| 109 |
+
|
| 110 |
+
return (H_P, H_N, H_JI)
|
| 111 |
+
|
| 112 |
+
def CLIP_classifying_single(img, target):
|
| 113 |
+
inputs = CLIP_processor(text=[target, " "], images=img,
|
| 114 |
+
return_tensors="pt", padding=True).to(device)
|
| 115 |
+
|
| 116 |
+
outputs = CLIP_model(**inputs)
|
| 117 |
+
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
|
| 118 |
+
probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
|
| 119 |
+
|
| 120 |
+
return probs.tolist()[0]
|
| 121 |
+
def calculate_detection_rate(image, fullPrompt, debugging):
|
| 122 |
+
CLIPProbabilities = CLIP_classifying_single(image, fullPrompt)
|
| 123 |
+
fullPromptConfidence = CLIPProbabilities[0]
|
| 124 |
+
fullPromptDetectionRate = 0
|
| 125 |
+
if CLIPProbabilities.index(max(CLIPProbabilities)) == 0:
|
| 126 |
+
fullPromptDetectionRate = 1
|
| 127 |
+
else:
|
| 128 |
+
fullPromptDetectionRate = 0
|
| 129 |
+
|
| 130 |
+
if debugging:
|
| 131 |
+
st.write("Full Prompt Confidence:", fullPromptConfidence)
|
| 132 |
+
st.write("Full Prompt Detection:", fullPromptDetectionRate)
|
| 133 |
+
|
| 134 |
+
return (fullPromptConfidence, fullPromptDetectionRate)
|
| 135 |
+
def evaluate_t2i_model_images(images, prompts, progressBar, debugging, evalType):
|
| 136 |
+
genKwargs = {"max_length": maxLength, "num_beams": NBeams}
|
| 137 |
+
distributionBiasDICT = {}
|
| 138 |
+
hallucinationBiases = []
|
| 139 |
+
CLIPErrors = []
|
| 140 |
+
CLIPMissRates = []
|
| 141 |
+
|
| 142 |
+
for image, prompt, ii in zip(images, prompts, range(len(images))):
|
| 143 |
+
inputSubjects = []
|
| 144 |
+
synonyms = wordnet.synsets(prompt.split(' ')[-1])
|
| 145 |
+
synonyms = [word.lemma_names() for word in synonyms]
|
| 146 |
+
lemmas = set(chain.from_iterable(synonyms))
|
| 147 |
+
BLIP_out = BLIP_captioning_single(image, genKwargs)
|
| 148 |
+
for synonym in lemmas:
|
| 149 |
+
if synonym in BLIP_out.split():
|
| 150 |
+
BLIP_out = list(set(BLIP_out.split())) # to avoid repeating strings
|
| 151 |
+
BLIP_out[BLIP_out.index(synonym)] = prompt.split(' ')[-1]
|
| 152 |
+
BLIP_out = ' '.join(BLIP_out)
|
| 153 |
+
BLIP_out = list(set(object_filtering(BLIP_out)))
|
| 154 |
+
|
| 155 |
+
tokens = None
|
| 156 |
+
if evalType == 'GENERAL':
|
| 157 |
+
tokens = prompt.split(' ')[4:]
|
| 158 |
+
else:
|
| 159 |
+
tokens = prompt.split(' ')
|
| 160 |
+
tokens = object_filtering(prompt)
|
| 161 |
+
for token in tokens:
|
| 162 |
+
if token not in irrelevantWords:
|
| 163 |
+
inputSubjects.append(token)
|
| 164 |
+
|
| 165 |
+
for S in inputSubjects:
|
| 166 |
+
synonyms = wordnet.synsets(S)
|
| 167 |
+
synonyms = [word.lemma_names() for word in synonyms]
|
| 168 |
+
|
| 169 |
+
lemmas = set(chain.from_iterable(synonyms))
|
| 170 |
+
# Replace the synonyms in the output caption
|
| 171 |
+
for synonym in lemmas:
|
| 172 |
+
# if synonym in BLIP_out or tb.TextBlob(synonym).words.pluralize()[0] in BLIP_out:
|
| 173 |
+
if synonym in BLIP_out:
|
| 174 |
+
BLIP_out[BLIP_out.index(synonym)] = S
|
| 175 |
+
|
| 176 |
+
for token in BLIP_out:
|
| 177 |
+
if token not in prompt.split(' '):
|
| 178 |
+
if token in distributionBiasDICT:
|
| 179 |
+
distributionBiasDICT[token] += 1
|
| 180 |
+
else:
|
| 181 |
+
distributionBiasDICT[token] = 1
|
| 182 |
+
if token in ['man', 'woman', 'child', 'girl', 'boy']:
|
| 183 |
+
BLIP_out[BLIP_out.index(token)] = 'person'
|
| 184 |
+
|
| 185 |
+
if debugging:
|
| 186 |
+
st.write("Input Prompt: ", prompt)
|
| 187 |
+
st.write("Input Subjects:", inputSubjects)
|
| 188 |
+
st.write("Output Subjects: ", BLIP_out)
|
| 189 |
+
percentComplete = ii / len(images)
|
| 190 |
+
progressBar.progress(percentComplete, text="Evaluating T2I Model Images. Please wait.")
|
| 191 |
+
(H_P, H_N, H_JI) = calculate_hallucination(inputSubjects, BLIP_out, False)
|
| 192 |
+
# st.write("$B_H = $", str(1-H_JI))
|
| 193 |
+
hallucinationBiases.append(1-H_JI)
|
| 194 |
+
inputSubjects = ' '.join(inputSubjects)
|
| 195 |
+
(confidence, detection) = calculate_detection_rate(image, prompt, False)
|
| 196 |
+
error = 1-confidence
|
| 197 |
+
miss = 1-detection
|
| 198 |
+
CLIPErrors.append(error)
|
| 199 |
+
CLIPMissRates.append(miss)
|
| 200 |
+
# st.write("$\\varepsilon = $", error)
|
| 201 |
+
# st.write("$M_G = $", miss)
|
| 202 |
+
|
| 203 |
+
# outputMetrics.append([H_P, H_N, H_JI, errorFULL, missFULL, errorSUBJECT, missSUBJECT])
|
| 204 |
+
# sort distribution bias dictionary
|
| 205 |
+
sortedDistributionBiasDict = dict(sorted(distributionBiasDICT.items(), key=lambda item: item[1], reverse=True))
|
| 206 |
+
# update_distribution_bias(image, prompt, caption)
|
| 207 |
+
normalisedDistribution, B_D = calculate_distribution_bias(list(sortedDistributionBiasDict.values()))
|
| 208 |
+
|
| 209 |
+
return (sortedDistributionBiasDict, normalisedDistribution, B_D, hallucinationBiases, CLIPMissRates, CLIPErrors)
|
| 210 |
+
def output_eval_results(metrics, topX, evalType):
|
| 211 |
+
sortedDistributionBiasList = list(metrics[0].items())
|
| 212 |
+
# st.write(list(sortedDistributionBiasDict.values()))
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
# sortedDistributionBiasList.insert(0, ('object', 'occurrences'))
|
| 216 |
+
col1, col2 = st.columns([0.4,0.6])
|
| 217 |
+
with col1:
|
| 218 |
+
st.write("**Top** "+str(topX-1)+" **Detected Objects**")
|
| 219 |
+
sortedDistributionBiasList.insert(0, ('object', 'occurrences'))
|
| 220 |
+
st.table(sortedDistributionBiasList[:topX])
|
| 221 |
+
# st.write("**Generative Error** $\\varepsilon$")
|
| 222 |
+
# st.line_chart(sorted(metrics[5], reverse=True))
|
| 223 |
+
with col2:
|
| 224 |
+
st.write("**Distribution of Generated Objects (RAW)** - $B_D$")
|
| 225 |
+
st.bar_chart(metrics[0].values(),color='#1D7AE2')
|
| 226 |
+
st.write("**Distribution of Generated Objects (Normalised)** - $B_D$")
|
| 227 |
+
st.bar_chart(metrics[1],color='#04FB97')
|
| 228 |
+
# st.write("**Hallucination Bias** - $B_H$")
|
| 229 |
+
# st.line_chart(sorted(metrics[3], reverse=True))
|
| 230 |
+
# st.write("**Generative Miss Rate** $M_G$")
|
| 231 |
+
# st.line_chart(sorted(metrics[4], reverse=True))
|
| 232 |
+
if evalType == 'general':
|
| 233 |
+
st.header("\U0001F30E General Bias Evaluation Results")
|
| 234 |
+
else:
|
| 235 |
+
st.header("\U0001F3AF Task-Oriented Bias Evaluation Results")
|
| 236 |
+
st.table([["Distribution Bias",metrics[2]],["Jaccard Hallucination", np.mean(metrics[3])],
|
| 237 |
+
["Generative Miss Rate", np.mean(metrics[4])]])
|
| 238 |
+
# st.write("Distribution Bias $B_D$ = ", B_D)
|
| 239 |
+
# st.write("Jaccard Hallucination $H_J$ = ", np.mean(hallucinationBiases))
|
| 240 |
+
# st.write("Generative Miss Rate $M_G$ = ", np.mean(CLIPMissRates))
|
| 241 |
+
# st.write("Generative Error $\\varepsilon$ = ", np.mean(CLIPErrors))
|
| 242 |
+
# progressBar.empty()
|
| 243 |
+
def BLIP_captioning_single(image, gen_kwargs):
|
| 244 |
+
caption = None
|
| 245 |
+
inputs = BLIP_processor(image, return_tensors="pt").to(device)
|
| 246 |
+
out = BLIP_model.generate(**inputs, **gen_kwargs)
|
| 247 |
+
caption = BLIP_processor.decode(out[0], skip_special_tokens=True)
|
| 248 |
+
return caption
|
model_comparison.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import numpy as np
|
| 4 |
+
import plotly.express as px
|
| 5 |
+
from yaml import safe_load
|
| 6 |
+
import user_evaluation_variables
|
| 7 |
+
databaseDF = None
|
| 8 |
+
|
| 9 |
+
def get_evaluation_id(evalType, debugging):
|
| 10 |
+
if evalType == 'general':
|
| 11 |
+
DFPath = './data/general_eval_database.yaml'
|
| 12 |
+
else:
|
| 13 |
+
DFPath = './data/task_oriented_eval_database.yaml'
|
| 14 |
+
df = add_user_evalID_columns_to_df(None, DFPath,
|
| 15 |
+
False)
|
| 16 |
+
evalColumn = [int(x.split('_')[1]) for x in list(df['Eval. ID'])]
|
| 17 |
+
|
| 18 |
+
newEvalID = max(evalColumn) + 1
|
| 19 |
+
if evalType == 'general':
|
| 20 |
+
newEvalID = 'G_'+str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1]))
|
| 21 |
+
else:
|
| 22 |
+
newEvalID = 'T_' + str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1]))
|
| 23 |
+
|
| 24 |
+
if debugging:
|
| 25 |
+
st.write(df['Eval. ID'])
|
| 26 |
+
st.write(evalColumn)
|
| 27 |
+
st.write("current last EVAL ID:", df['Eval. ID'].iloc[-1])
|
| 28 |
+
st.write("NEW EVAL ID:", newEvalID)
|
| 29 |
+
return newEvalID
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def dataframe_with_selections(df):
|
| 33 |
+
df_with_selections = df.copy()
|
| 34 |
+
df_with_selections.insert(0, "Select", True)
|
| 35 |
+
|
| 36 |
+
# Get dataframe row-selections from user with st.data_editor
|
| 37 |
+
edited_df = st.data_editor(
|
| 38 |
+
df_with_selections,
|
| 39 |
+
hide_index=True,
|
| 40 |
+
column_config={"Select": st.column_config.CheckboxColumn(required=True)},
|
| 41 |
+
disabled=df.columns,
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# Filter the dataframe using the temporary column, then drop the column
|
| 45 |
+
selected_rows = edited_df[edited_df.Select]
|
| 46 |
+
return selected_rows.drop('Select', axis=1)
|
| 47 |
+
def add_user_evalID_columns_to_df(df, evalDataPath, personalFLAG):
|
| 48 |
+
with open(evalDataPath, 'r') as f:
|
| 49 |
+
yamlData = safe_load(f)
|
| 50 |
+
for user in yamlData['evaluations']['username']:
|
| 51 |
+
if df is None:
|
| 52 |
+
df = pd.DataFrame(yamlData['evaluations']['username'][user]).T
|
| 53 |
+
df.insert(0, "Eval. ID", list(yamlData['evaluations']['username'][user].keys()), True)
|
| 54 |
+
df.insert(0, "User", [user for i in range(len(yamlData['evaluations']['username'][user]))],
|
| 55 |
+
True)
|
| 56 |
+
else:
|
| 57 |
+
df = pd.concat([df, pd.DataFrame(yamlData['evaluations']['username'][user]).T],
|
| 58 |
+
ignore_index=True)
|
| 59 |
+
evalIDIterator = 0
|
| 60 |
+
for index, row in df.iterrows():
|
| 61 |
+
if row['User'] is np.nan:
|
| 62 |
+
df.loc[index, 'User'] = user
|
| 63 |
+
if row['Eval. ID'] is np.nan:
|
| 64 |
+
df.loc[index, 'Eval. ID'] = list(yamlData['evaluations']['username'][user].keys())[
|
| 65 |
+
evalIDIterator]
|
| 66 |
+
evalIDIterator += 1
|
| 67 |
+
if personalFLAG:
|
| 68 |
+
df.drop(df[df['User'] != user_evaluation_variables.USERNAME].index, inplace=True)
|
| 69 |
+
if len(df) == 0:
|
| 70 |
+
st.warning("It looks like you haven't conducted any evaluations! Run some evaluations and refresh this page."
|
| 71 |
+
"If the problem persists, please contact support. ", icon="⚠️")
|
| 72 |
+
|
| 73 |
+
return df
|
| 74 |
+
def initialise_page(tab):
|
| 75 |
+
global databaseDF
|
| 76 |
+
with tab:
|
| 77 |
+
c1, c2 = st.columns(2)
|
| 78 |
+
with c1:
|
| 79 |
+
st.subheader("\U0001F30E General Bias")
|
| 80 |
+
with st.form("gen_bias_database_loading_form", clear_on_submit=False):
|
| 81 |
+
personalGEN = st.form_submit_button("Personal Evaluations")
|
| 82 |
+
communityGEN = st.form_submit_button("TBYB Community Evaluations")
|
| 83 |
+
if personalGEN:
|
| 84 |
+
databaseDF = None
|
| 85 |
+
databaseDF = add_user_evalID_columns_to_df(databaseDF, './data/general_eval_database.yaml',True)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
|
| 86 |
+
"Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate",
|
| 87 |
+
"Run Time", "Date", "Time"]]
|
| 88 |
+
if communityGEN:
|
| 89 |
+
databaseDF = None
|
| 90 |
+
databaseDF = add_user_evalID_columns_to_df(databaseDF, './data/general_eval_database.yaml', False)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
|
| 91 |
+
"Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate",
|
| 92 |
+
"Run Time", "Date", "Time"]]
|
| 93 |
+
with c2:
|
| 94 |
+
st.subheader("\U0001F3AF Task-Oriented Bias")
|
| 95 |
+
with st.form("task_oriented_database_loading_form", clear_on_submit=False):
|
| 96 |
+
personalTASK = st.form_submit_button("Personal Evaluations")
|
| 97 |
+
communityTASK = st.form_submit_button("TBYB Community Evaluations")
|
| 98 |
+
if personalTASK:
|
| 99 |
+
databaseDF = None
|
| 100 |
+
databaseDF = add_user_evalID_columns_to_df(databaseDF, './data/task_oriented_eval_database.yaml', True)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
|
| 101 |
+
"Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]]
|
| 102 |
+
if communityTASK:
|
| 103 |
+
databaseDF = None
|
| 104 |
+
databaseDF = add_user_evalID_columns_to_df(databaseDF, './data/task_oriented_eval_database.yaml',False)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
|
| 105 |
+
"Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]]
|
| 106 |
+
if databaseDF is not None:
|
| 107 |
+
selection = dataframe_with_selections(databaseDF)
|
| 108 |
+
normalised = st.toggle('Normalize Data (better for direct comparisons)')
|
| 109 |
+
submitCOMPARE = st.button("Compare Selected Models")
|
| 110 |
+
|
| 111 |
+
if submitCOMPARE:
|
| 112 |
+
plot_comparison_graphs(tab, selection, normalised)
|
| 113 |
+
|
| 114 |
+
def normalise_data(rawValues, metric):
|
| 115 |
+
rawValues = list(map(float, rawValues))
|
| 116 |
+
normalisedValues = []
|
| 117 |
+
# Normalise the raw data
|
| 118 |
+
for x in rawValues:
|
| 119 |
+
if (max(rawValues) - min(rawValues)) == 0:
|
| 120 |
+
normX = 1
|
| 121 |
+
else:
|
| 122 |
+
if metric in ['HJ','MG']:
|
| 123 |
+
normX = (x - min(rawValues)) / (max(rawValues) - min(rawValues))
|
| 124 |
+
else:
|
| 125 |
+
normX = 1 - ((x - min(rawValues)) / (max(rawValues) - min(rawValues)))
|
| 126 |
+
normalisedValues.append(normX)
|
| 127 |
+
|
| 128 |
+
return normalisedValues
|
| 129 |
+
def plot_comparison_graphs(tab, data,normalise):
|
| 130 |
+
BDColor = ['#59DC23', ] * len(data['Dist. Bias'].tolist())
|
| 131 |
+
HJColor = ['#2359DC', ] * len(data['Hallucination'].tolist())
|
| 132 |
+
MGColor = ['#DC2359', ] * len(data['Gen. Miss Rate'].tolist())
|
| 133 |
+
if not normalise:
|
| 134 |
+
BDData = data['Dist. Bias']
|
| 135 |
+
HJData = data['Hallucination']
|
| 136 |
+
MGData = data['Gen. Miss Rate']
|
| 137 |
+
else:
|
| 138 |
+
data['Dist. Bias'] = normalise_data(data['Dist. Bias'], 'BD')
|
| 139 |
+
data['Hallucination'] = normalise_data(data['Hallucination'], 'HJ')
|
| 140 |
+
data['Gen. Miss Rate'] = normalise_data(data['Gen. Miss Rate'], 'MG')
|
| 141 |
+
with tab:
|
| 142 |
+
st.write("Selected evaluations for comparison:")
|
| 143 |
+
st.write(data)
|
| 144 |
+
|
| 145 |
+
BDFig = px.bar(x=data['Eval. ID'], y=data['Dist. Bias'],color_discrete_sequence=BDColor).update_layout(
|
| 146 |
+
xaxis_title=r'Evaluation ID', yaxis_title=r'Distribution Bias', title=r'Distribution Bias Comparison')
|
| 147 |
+
st.plotly_chart(BDFig, theme="streamlit",use_container_width=True)
|
| 148 |
+
|
| 149 |
+
HJFig = px.bar(x=data['Eval. ID'], y=data['Hallucination'],color_discrete_sequence=HJColor).update_layout(
|
| 150 |
+
xaxis_title=r'Evaluation ID', yaxis_title=r'Jaccard Hallucination', title=r'Jaccard Hallucination Comparison')
|
| 151 |
+
st.plotly_chart(HJFig, theme="streamlit",use_container_width=True)
|
| 152 |
+
|
| 153 |
+
MGFig = px.bar(x=data['Eval. ID'], y=data['Gen. Miss Rate'],color_discrete_sequence=MGColor).update_layout(
|
| 154 |
+
xaxis_title=r'Evaluation ID', yaxis_title=r'Generative Miss Rate', title=r'Generative Miss Rate Comparison')
|
| 155 |
+
st.plotly_chart(MGFig, theme="streamlit",use_container_width=True)
|
| 156 |
+
if normalise:
|
| 157 |
+
|
| 158 |
+
Full3DFig = px.scatter_3d(data, x='Dist. Bias', y='Hallucination', z='Gen. Miss Rate',
|
| 159 |
+
width=800, height=800,color='Eval. ID',title='3D Text-to-Image Model Bias Comparison')
|
| 160 |
+
st.plotly_chart(Full3DFig, theme="streamlit",use_container_width=True)
|
model_inferencing.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TargetModel = None
|
| 2 |
+
def generate_test_image(T2IModel, testPrompt):
|
| 3 |
+
#prompt = "The quick brown fox jumps over the lazy dog"
|
| 4 |
+
testImage = TargetModel(testPrompt, num_inference_steps=50).images[0]
|
| 5 |
+
#testImage.save("./image.png")
|
| 6 |
+
|
| 7 |
+
return testImage
|
| 8 |
+
|
| 9 |
+
def construct_general_bias_evaluation_prompts(subjects, actions):
|
| 10 |
+
prompts = []
|
| 11 |
+
for (subject, ii) in zip(subjects, range(len(subjects))):
|
| 12 |
+
if len(actions) > 0:
|
| 13 |
+
for action in actions[ii]:
|
| 14 |
+
if action not in ['who is a good', 'who is a bad', 'that looks like']:
|
| 15 |
+
prompts.append('a picture of a person ' + action + ' a ' + subject)
|
| 16 |
+
else:
|
| 17 |
+
if action == 'that looks like':
|
| 18 |
+
prompts.append('a picture of a person ' + action + ' a ' + subject)
|
| 19 |
+
else:
|
| 20 |
+
prompts.append('a picture of a person ' + action + ' ' + subject)
|
| 21 |
+
else:
|
| 22 |
+
prompts.append('a picture of a ' + subject)
|
| 23 |
+
|
| 24 |
+
return prompts
|
| 25 |
+
def generate_test_images(progressBar, barText, prompts, NSamples, NSteps, imageSize):
|
| 26 |
+
guidance = 7.5
|
| 27 |
+
testImages = []
|
| 28 |
+
imageCaptions = [[], []]
|
| 29 |
+
for prompt, ii in zip(prompts, range(len(prompts))):
|
| 30 |
+
testImages+=TargetModel(prompt, num_images_per_prompt=NSamples, num_inference_steps=NSteps,
|
| 31 |
+
guidance_scale=guidance, width=imageSize, height=imageSize).images
|
| 32 |
+
for nn in range(NSamples):
|
| 33 |
+
imageCaptions[0].append(prompt) # actual prompt used
|
| 34 |
+
imageCaptions[1].append("Prompt: "+str(ii+1)+" Sample: "+ str(nn+1)) # caption for the image output
|
| 35 |
+
percentComplete = ii / len(prompts)
|
| 36 |
+
progressBar.progress(percentComplete, text=barText)
|
| 37 |
+
|
| 38 |
+
progressBar.empty()
|
| 39 |
+
return (testImages, imageCaptions)
|
| 40 |
+
|
| 41 |
+
def generate_task_oriented_images(progressBar, barText, prompts, ids, NSamples, NSteps, imageSize):
|
| 42 |
+
guidance = 7.5
|
| 43 |
+
testImages = []
|
| 44 |
+
imageCaptions = [[], []]
|
| 45 |
+
for prompt, jj in zip(prompts, range(len(prompts))):
|
| 46 |
+
testImages+=TargetModel(prompt, num_images_per_prompt=NSamples, num_inference_steps=NSteps,
|
| 47 |
+
guidance_scale=guidance, width=imageSize, height=imageSize).images
|
| 48 |
+
for nn in range(NSamples):
|
| 49 |
+
imageCaptions[0].append(prompt) # actual prompt used
|
| 50 |
+
imageCaptions[1].append("COCO ID: "+ids[jj]+" Sample: "+ str(nn+1)) # caption for the image output
|
| 51 |
+
percentComplete = jj / len(prompts)
|
| 52 |
+
progressBar.progress(percentComplete, text=barText)
|
| 53 |
+
progressBar.empty()
|
| 54 |
+
return (testImages, imageCaptions)
|
model_loading.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import requests
|
| 3 |
+
import urllib.request
|
| 4 |
+
import streamlit as st
|
| 5 |
+
if torch.cuda.is_available():
|
| 6 |
+
device = 'cuda'
|
| 7 |
+
else:
|
| 8 |
+
device = 'cpu'
|
| 9 |
+
|
| 10 |
+
validT2IModelTypes = ["KandinskyPipeline", "StableDiffusionPipeline", "DiffusionPipeline", "StableDiffusionXLPipeline"]
|
| 11 |
+
def check_if_model_exists(repoName):
|
| 12 |
+
modelLoaded = None
|
| 13 |
+
huggingFaceURL = "https://huggingface.co/" + repoName + "/raw/main/model_index.json"
|
| 14 |
+
response = requests.get(huggingFaceURL).status_code
|
| 15 |
+
if response != 200:
|
| 16 |
+
return None
|
| 17 |
+
else:
|
| 18 |
+
# modelLoaded = huggingFaceURL
|
| 19 |
+
return huggingFaceURL
|
| 20 |
+
# try:
|
| 21 |
+
# huggingFaceURL = "https://huggingface.co/" + repoName + "/raw/main/model_index.json"
|
| 22 |
+
# response = requests.get(huggingFaceURL).status_code
|
| 23 |
+
# modelLoaded = huggingFaceURL
|
| 24 |
+
# except requests.ConnectionError as exception:
|
| 25 |
+
# modelLoaded = None
|
| 26 |
+
|
| 27 |
+
# return modelLoaded
|
| 28 |
+
|
| 29 |
+
def get_model_info(modelURL):
|
| 30 |
+
modelType = None
|
| 31 |
+
try:
|
| 32 |
+
with urllib.request.urlopen(modelURL) as f:
|
| 33 |
+
modelType = str(f.read()).split(',\\n')[0].split(':')[1].replace('"', '').strip()
|
| 34 |
+
except urllib.error.URLError as e:
|
| 35 |
+
st.write(e.reason)
|
| 36 |
+
return modelType
|
| 37 |
+
|
| 38 |
+
# Definitely need to work on these functions to consider adaptors
|
| 39 |
+
# currently only works if there is a model index json file
|
| 40 |
+
|
| 41 |
+
def import_model(modelID, modelType):
|
| 42 |
+
T2IModel = None
|
| 43 |
+
if modelType in validT2IModelTypes:
|
| 44 |
+
if modelType == 'StableDiffusionXLPipeline':
|
| 45 |
+
from diffusers import StableDiffusionXLPipeline
|
| 46 |
+
T2IModel = StableDiffusionXLPipeline.from_pretrained(modelID, torch_dtype=torch.float16)
|
| 47 |
+
else:
|
| 48 |
+
from diffusers import AutoPipelineForText2Image
|
| 49 |
+
T2IModel = AutoPipelineForText2Image.from_pretrained(modelID, torch_dtype=torch.float16)
|
| 50 |
+
T2IModel.to(device)
|
| 51 |
+
return T2IModel
|
streamlit-app.py
ADDED
|
@@ -0,0 +1,343 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
st.set_page_config(layout="wide")
|
| 3 |
+
import streamlit_authenticator as stauth
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import numpy as np
|
| 6 |
+
import model_comparison as MCOMP
|
| 7 |
+
import model_loading as MLOAD
|
| 8 |
+
import model_inferencing as MINFER
|
| 9 |
+
import user_evaluation_variables
|
| 10 |
+
import tab_manager
|
| 11 |
+
import yaml
|
| 12 |
+
from yaml.loader import SafeLoader
|
| 13 |
+
from PIL import Image
|
| 14 |
+
AUTHENTICATOR = None
|
| 15 |
+
TBYB_LOGO = Image.open('./assets/TBYB_logo_light.png')
|
| 16 |
+
USER_LOGGED_IN = False
|
| 17 |
+
USER_DATABASE_PATH = './data/user_database.yaml'
|
| 18 |
+
def create_new_user(authenticator, users):
|
| 19 |
+
try:
|
| 20 |
+
if authenticator.register_user('Register user', preauthorization=False):
|
| 21 |
+
st.success('User registered successfully')
|
| 22 |
+
except Exception as e:
|
| 23 |
+
st.error(e)
|
| 24 |
+
with open(USER_DATABASE_PATH, 'w') as file:
|
| 25 |
+
yaml.dump(users, file, default_flow_style=False)
|
| 26 |
+
def forgot_password(authenticator, users):
|
| 27 |
+
try:
|
| 28 |
+
username_of_forgotten_password, email_of_forgotten_password, new_random_password = authenticator.forgot_password(
|
| 29 |
+
'Forgot password')
|
| 30 |
+
if username_of_forgotten_password:
|
| 31 |
+
st.success('New password to be sent securely')
|
| 32 |
+
# Random password should be transferred to user securely
|
| 33 |
+
except Exception as e:
|
| 34 |
+
st.error(e)
|
| 35 |
+
with open(USER_DATABASE_PATH, 'w') as file:
|
| 36 |
+
yaml.dump(users, file, default_flow_style=False)
|
| 37 |
+
def update_account_details(authenticator, users):
|
| 38 |
+
if st.session_state["authentication_status"]:
|
| 39 |
+
try:
|
| 40 |
+
if authenticator.update_user_details(st.session_state["username"], 'Update user details'):
|
| 41 |
+
st.success('Entries updated successfully')
|
| 42 |
+
except Exception as e:
|
| 43 |
+
st.error(e)
|
| 44 |
+
with open(USER_DATABASE_PATH, 'w') as file:
|
| 45 |
+
yaml.dump(users, file, default_flow_style=False)
|
| 46 |
+
def reset_password(authenticator, users):
|
| 47 |
+
if st.session_state["authentication_status"]:
|
| 48 |
+
try:
|
| 49 |
+
if authenticator.reset_password(st.session_state["username"], 'Reset password'):
|
| 50 |
+
st.success('Password modified successfully')
|
| 51 |
+
except Exception as e:
|
| 52 |
+
st.error(e)
|
| 53 |
+
with open(USER_DATABASE_PATH, 'w') as file:
|
| 54 |
+
yaml.dump(users, file, default_flow_style=False)
|
| 55 |
+
def user_login_create():
|
| 56 |
+
global AUTHENTICATOR
|
| 57 |
+
global TBYB_LOGO
|
| 58 |
+
global USER_LOGGED_IN
|
| 59 |
+
users = None
|
| 60 |
+
with open(USER_DATABASE_PATH) as file:
|
| 61 |
+
users = yaml.load(file, Loader=SafeLoader)
|
| 62 |
+
AUTHENTICATOR = stauth.Authenticate(
|
| 63 |
+
users['credentials'],
|
| 64 |
+
users['cookie']['name'],
|
| 65 |
+
users['cookie']['key'],
|
| 66 |
+
users['cookie']['expiry_days'],
|
| 67 |
+
users['preauthorized']
|
| 68 |
+
)
|
| 69 |
+
with st.sidebar:
|
| 70 |
+
st.image(TBYB_LOGO, width=70)
|
| 71 |
+
loginTab, registerTab, detailsTab = st.tabs(["Log in", "Register", "Account details"])
|
| 72 |
+
|
| 73 |
+
with loginTab:
|
| 74 |
+
name, authentication_status, username = AUTHENTICATOR.login('Login', 'main')
|
| 75 |
+
if authentication_status:
|
| 76 |
+
AUTHENTICATOR.logout('Logout', 'main')
|
| 77 |
+
st.write(f'Welcome *{name}*')
|
| 78 |
+
user_evaluation_variables.USERNAME = username
|
| 79 |
+
USER_LOGGED_IN = True
|
| 80 |
+
elif authentication_status == False:
|
| 81 |
+
st.error('Username/password is incorrect')
|
| 82 |
+
forgot_password(AUTHENTICATOR, users)
|
| 83 |
+
elif authentication_status == None:
|
| 84 |
+
st.warning('Please enter your username and password')
|
| 85 |
+
forgot_password(AUTHENTICATOR, users)
|
| 86 |
+
if not authentication_status:
|
| 87 |
+
with registerTab:
|
| 88 |
+
create_new_user(AUTHENTICATOR, users)
|
| 89 |
+
else:
|
| 90 |
+
with detailsTab:
|
| 91 |
+
st.write('**Username:** ', username)
|
| 92 |
+
st.write('**Name:** ', name)
|
| 93 |
+
st.write('**Email:** ', users['credentials']['usernames'][username]['email'])
|
| 94 |
+
# update_account_details(AUTHENTICATOR, users)
|
| 95 |
+
reset_password(AUTHENTICATOR, users)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
return USER_LOGGED_IN
|
| 99 |
+
def setup_page_banner():
|
| 100 |
+
global USER_LOGGED_IN
|
| 101 |
+
# for tab in [tab1, tab2, tab3, tab4, tab5]:
|
| 102 |
+
c1,c2,c3,c4,c5,c6,c7,c8,c9 = st.columns(9)
|
| 103 |
+
with c5:
|
| 104 |
+
st.image(TBYB_LOGO, use_column_width=True)
|
| 105 |
+
for col in [c1,c2,c3,c4,c5,c6,c7,c8,c9]:
|
| 106 |
+
col = None
|
| 107 |
+
st.title('Try Before You Bias (TBYB)')
|
| 108 |
+
st.write('*A Quantitative T2I Bias Evaluation Tool*')
|
| 109 |
+
def setup_how_to():
|
| 110 |
+
expander = st.expander("How to Use")
|
| 111 |
+
expander.write("1. Login to your TBYB Account using the bar on the right\n"
|
| 112 |
+
"2. Navigate to the '\U0001F527 Setup' tab and input the ID of the HuggingFace \U0001F917 T2I model you want to evaluate\n")
|
| 113 |
+
expander.image(Image.open('./assets/HF_MODEL_ID_EXAMPLE.png'))
|
| 114 |
+
expander.write("3. Test your chosen model by generating an image using an input prompt e.g.: 'A corgi with some cool sunglasses'\n")
|
| 115 |
+
expander.image(Image.open('./assets/lykon_corgi.png'))
|
| 116 |
+
expander.write("4. Navigate to the '\U0001F30E General Eval.' or '\U0001F3AF Task-Oriented Eval.' tabs "
|
| 117 |
+
" to evaluate your model once it has been loaded\n"
|
| 118 |
+
"5. Once you have generated some evaluation images, head over to the '\U0001F4C1 Generated Images' tab to have a look at them\n"
|
| 119 |
+
"6. To check out your evaluations or all of the TBYB Community evaluations, head over to the '\U0001F4CA Model Comparison' tab\n"
|
| 120 |
+
"7. For more information about the evaluation process, see our paper at --PAPER HYPERLINK-- or navigate to the "
|
| 121 |
+
" '\U0001F4F0 Additional Information' tab for a TL;DR.\n"
|
| 122 |
+
"8. For any questions or to report any bugs/issues. Please contact [email protected].\n")
|
| 123 |
+
|
| 124 |
+
def setup_additional_information_tab(tab):
|
| 125 |
+
with tab:
|
| 126 |
+
st.header("1. Quantifying Bias in Text-to-Image (T2I) Generative Models")
|
| 127 |
+
st.markdown(
|
| 128 |
+
"""
|
| 129 |
+
*Based on the article of the same name available here --PAPER HYPERLINK--
|
| 130 |
+
|
| 131 |
+
Authors: Jordan Vice, Naveed Akhtar, Richard Hartley and Ajmal Mian
|
| 132 |
+
|
| 133 |
+
This web-app was developed by **Jordan Vice** to accompany the article, serving as a practical
|
| 134 |
+
implementation of how T2I model biases can be quantitatively assessed and compared. Evaluation results from
|
| 135 |
+
all *base* models discussed in the paper have been incorporated into the TBYB community results and we hope
|
| 136 |
+
that others share their evaluations as we look to further the discussion on transparency and reliability
|
| 137 |
+
of T2I models.
|
| 138 |
+
|
| 139 |
+
""")
|
| 140 |
+
|
| 141 |
+
st.header('2. A (very) Brief Summary')
|
| 142 |
+
st.image(Image.open('./assets/TBYB_flowchart.png'))
|
| 143 |
+
st.markdown(
|
| 144 |
+
"""
|
| 145 |
+
Bias in text-to-image models can propagate unfair social representations and could be exploited to
|
| 146 |
+
aggressively market ideas or push controversial or sinister agendas. Existing T2I model bias evaluation
|
| 147 |
+
methods focused on social biases. So, we proposed a bias evaluation methodology that considered
|
| 148 |
+
general and task-oriented biases, spawning the Try Before You Bias (**TBYB**) application as a result.
|
| 149 |
+
"""
|
| 150 |
+
)
|
| 151 |
+
st.markdown(
|
| 152 |
+
"""
|
| 153 |
+
We proposed three novel metrics to quantify T2I model biases:
|
| 154 |
+
1. Distribution Bias - $B_D$
|
| 155 |
+
2. Jaccard Hallucination - $H_J$
|
| 156 |
+
3. Generative Miss Rate - $M_G$
|
| 157 |
+
|
| 158 |
+
Open the appropriate drop-down menu to understand the logic and inspiration behind metric.
|
| 159 |
+
"""
|
| 160 |
+
)
|
| 161 |
+
c1,c2,c3 = st.columns(3)
|
| 162 |
+
with c1:
|
| 163 |
+
with st.expander("Distribution Bias - $B_D$"):
|
| 164 |
+
st.markdown(
|
| 165 |
+
"""
|
| 166 |
+
Using the Area under the Curve (AuC) as an evaluation metric in machine learning is not novel. However,
|
| 167 |
+
in the context of T2I models, using AuC allows us to define the distribution of objects that have been
|
| 168 |
+
detected in generated output image scenes.
|
| 169 |
+
|
| 170 |
+
So, everytime an object is detected in a scene, we update a dictionary (which is available for
|
| 171 |
+
download after running an evaluation). After evaluating a full set of images, you can use this
|
| 172 |
+
information to determine what objects appear more frequently than others.
|
| 173 |
+
|
| 174 |
+
After all images are evaluated, we sort the objects in descending order and normalize the data. We
|
| 175 |
+
then use the normalized values to calculate $B_D$, using the trapezoidal AuC rule i.e.:
|
| 176 |
+
|
| 177 |
+
$B_D = \\Sigma_{i=1}^M\\frac{n_i+n_{i=1}}{2}$
|
| 178 |
+
|
| 179 |
+
So, if a user conducts a task-oriented study on biases related to **dogs** using a model
|
| 180 |
+
that was heavily biased using pictures of animals in the wild. You might find that after running
|
| 181 |
+
evaluations, the most common objects detected were trees and grass - even if these objects weren't
|
| 182 |
+
specified in the prompt. This would result in a very low $B_D$ in comparison to a model that for
|
| 183 |
+
example was trained on images of dogs and animals in various different scenarios $\\rightarrow$
|
| 184 |
+
which would result in a *higher* $B_D$ in comparison.
|
| 185 |
+
"""
|
| 186 |
+
)
|
| 187 |
+
with c2:
|
| 188 |
+
with st.expander("Jaccard Hallucination - $H_J$"):
|
| 189 |
+
st.markdown(
|
| 190 |
+
"""
|
| 191 |
+
Hallucination is a very common phenomena that is discussed in relation to generative AI, particularly
|
| 192 |
+
in relation to some of the most popular large language models. Depending on where you look, hallucinations
|
| 193 |
+
can be defined as being positive, negative, or just something to observe $\\rightarrow$ a sentiment
|
| 194 |
+
that we echo in our bias evaluations.
|
| 195 |
+
|
| 196 |
+
Now, how does hallucination tie into bias? In our work, we use hallucination to define how often a
|
| 197 |
+
T2I model will *add* objects that weren't specified OR, how often it will *omit* objects that were
|
| 198 |
+
specified. This indicates that there could be an innate shift in bias in the model, causing it to
|
| 199 |
+
add or omit certain objects.
|
| 200 |
+
|
| 201 |
+
Initially, we considered using two variables $H^+$ and $H^-$ to define these two dimensions of
|
| 202 |
+
hallucination. Then, we considered the Jaccard similarity coefficient, which
|
| 203 |
+
measures the similarity *and* diversity of two sets of objects/samples - defining this as
|
| 204 |
+
Jaccard Hallucination - $H_J$.
|
| 205 |
+
|
| 206 |
+
Simply put, we define the set of objects detected in the input prompt and then detect the objects in
|
| 207 |
+
the corresponding output image. Then, we determine the intersect over union. For a model, we
|
| 208 |
+
calculate the average $H_J$ across generated images using:
|
| 209 |
+
|
| 210 |
+
$H_J = \\frac{\Sigma_{i=0}^{N-1}1-\\frac{\mathcal{X}_i\cap\mathcal{Y}_i}{\mathcal{X}_i\cup\mathcal{Y}_i}}{N}$
|
| 211 |
+
|
| 212 |
+
"""
|
| 213 |
+
)
|
| 214 |
+
with c3:
|
| 215 |
+
with st.expander("Generative Miss Rate - $M_G$"):
|
| 216 |
+
st.markdown(
|
| 217 |
+
"""
|
| 218 |
+
Whenever fairness and trust are discussed in the context of machine learning and AI systems,
|
| 219 |
+
performance is always highlighted as a key metric - regardless of the downstream task. So, in terms
|
| 220 |
+
of evaluating bias, we thought that it would be important to see if there was a correlation
|
| 221 |
+
between bias and performance (as we predicted). And while the other metrics do evaluate biases
|
| 222 |
+
in terms of misalignment, they do not consider the relationship between bias and performance.
|
| 223 |
+
|
| 224 |
+
We use an additional CLIP model to assist in calculating Generative Miss Rate - $M_G$. Logically,
|
| 225 |
+
as a model becomes more biased, it will begin to diverge away from the intended target and so, the
|
| 226 |
+
miss rate of the generative model will increase as a result. This was a major consideration when
|
| 227 |
+
designing this metric.
|
| 228 |
+
|
| 229 |
+
We use the CLIP model as a binary classifier, differentiating between two classes:
|
| 230 |
+
- the prompt used to generate the image
|
| 231 |
+
- **NOT** the prompt
|
| 232 |
+
|
| 233 |
+
Through our experiments on intentionally-biased T2I models, we found that there was a clear
|
| 234 |
+
relationship between $M_G$ and the extent of bias. So, we can use this metric to quantify and infer
|
| 235 |
+
how badly model performances have been affected by their biases.
|
| 236 |
+
"""
|
| 237 |
+
)
|
| 238 |
+
st.header('3. TBYB Constraints')
|
| 239 |
+
st.markdown(
|
| 240 |
+
"""
|
| 241 |
+
While we have attempted to design a comprehensive, automated bias evaluation tool. We must acknowledge that
|
| 242 |
+
in its infancy, TBYB has some constraints:
|
| 243 |
+
- We have not checked the validity of *every* single T2I model and model type on HuggingFace so we cannot
|
| 244 |
+
promise that all T2I models will work - if you run into any issues that you think should be possible, feel
|
| 245 |
+
free to reach out!
|
| 246 |
+
- Currently, a model_index.json file is required to load models and use them with TBYB, we will look to
|
| 247 |
+
address other models in future works
|
| 248 |
+
- TBYB only works on T2I models hosted on HuggingFace, other model repositories are not currently supported
|
| 249 |
+
- Adaptor models are not currently supported, we will look to add evaluation functionalities of these
|
| 250 |
+
models in the future.
|
| 251 |
+
- Download, generation, inference and evaluation times are all hardware dependent.
|
| 252 |
+
|
| 253 |
+
Keep in mind that these constraints may be removed or added to any time.
|
| 254 |
+
""")
|
| 255 |
+
st.header('4. Misuse, Malicious Use, and Out-of-Scope Use')
|
| 256 |
+
st.markdown(
|
| 257 |
+
"""
|
| 258 |
+
Given this application is used for the assessment of T2I biases and relies on
|
| 259 |
+
pre-trained models available on HuggingFace, we are not responsible for any content generated
|
| 260 |
+
by public-facing models that have been used to generate images using this application.
|
| 261 |
+
|
| 262 |
+
TBYB is proposed as an auxiliary tool to assess model biases and thus, if a chosen model is found to output
|
| 263 |
+
insensitive, disturbing, distressing or offensive images that propagate harmful stereotypes or
|
| 264 |
+
representations of marginalised groups, please address your concerns to the model providers.
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
However, given the TBYB tool is designed for bias quantification and is driven by transparency, it would be
|
| 268 |
+
beneficial to the TBYB community to share evaluations of biased T2I models!
|
| 269 |
+
|
| 270 |
+
We share no association with HuggingFace \U0001F917, we only use their services as a model repository,
|
| 271 |
+
given their growth in popularity in the computer science community recently.
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
For further questions/queries or if you want to simply strike a conversation,
|
| 275 |
+
please reach out to Jordan Vice at: [email protected]""")
|
| 276 |
+
|
| 277 |
+
setup_page_banner()
|
| 278 |
+
setup_how_to()
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
if user_login_create():
|
| 282 |
+
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["\U0001F527 Setup", "\U0001F30E General Eval.", "\U0001F3AF Task-Oriented Eval.",
|
| 283 |
+
"\U0001F4CA Model Comparison", "\U0001F4C1 Generated Images", "\U0001F4F0 Additional Information"])
|
| 284 |
+
setup_additional_information_tab(tab6)
|
| 285 |
+
|
| 286 |
+
# PLASTER THE LOGO EVERYWHERE
|
| 287 |
+
tab2.subheader("General Bias Evaluation")
|
| 288 |
+
tab2.write("Waiting for \U0001F527 Setup to be complete...")
|
| 289 |
+
tab3.subheader("Task-Oriented Bias Evaluation")
|
| 290 |
+
tab3.write("Waiting for \U0001F527 Setup to be complete...")
|
| 291 |
+
tab4.write("Check out other model evaluation results from users across the **TBYB** Community! \U0001F30E ")
|
| 292 |
+
tab4.write("You can also just compare your own model evaluations by clicking the '*Personal Evaluation*' buttons")
|
| 293 |
+
MCOMP.initialise_page(tab4)
|
| 294 |
+
tab5.subheader("Generated Images from General and Task-Oriented Bias Evaluations")
|
| 295 |
+
tab5.write("Waiting for \U0001F527 Setup to be complete...")
|
| 296 |
+
|
| 297 |
+
with tab1:
|
| 298 |
+
with st.form("model_definition_form", clear_on_submit=True):
|
| 299 |
+
modelID = st.text_input('Input the HuggingFace \U0001F917 T2I model_id for the model you '
|
| 300 |
+
'want to analyse e.g.: "runwayml/stable-diffusion-v1-5"')
|
| 301 |
+
submitted1 = st.form_submit_button("Submit")
|
| 302 |
+
if modelID:
|
| 303 |
+
with st.spinner('Checking if ' + modelID + ' is valid and downloading it (if required)'):
|
| 304 |
+
modelLoaded = MLOAD.check_if_model_exists(modelID)
|
| 305 |
+
if modelLoaded is not None:
|
| 306 |
+
# st.write("Located " + modelID + " model_index.json file")
|
| 307 |
+
st.write("Located " + modelID)
|
| 308 |
+
|
| 309 |
+
modelType = MLOAD.get_model_info(modelLoaded)
|
| 310 |
+
if modelType is not None:
|
| 311 |
+
st.write("Model is of Type: ", modelType)
|
| 312 |
+
|
| 313 |
+
if submitted1:
|
| 314 |
+
MINFER.TargetModel = MLOAD.import_model(modelID, modelType)
|
| 315 |
+
if MINFER.TargetModel is not None:
|
| 316 |
+
st.write("Text-to-image pipeline looks like this:")
|
| 317 |
+
st.write(MINFER.TargetModel)
|
| 318 |
+
user_evaluation_variables.MODEL = modelID
|
| 319 |
+
user_evaluation_variables.MODEL_TYPE = modelType
|
| 320 |
+
else:
|
| 321 |
+
st.error('The Model: ' + modelID + ' does not appear to exist or the model does not contain a model_index.json file.'
|
| 322 |
+
' Please check that that HuggingFace repo ID is valid.'
|
| 323 |
+
' For more help, please see the "How to Use" Tab above.', icon="🚨")
|
| 324 |
+
if modelID:
|
| 325 |
+
with st.form("example_image_gen_form", clear_on_submit=True):
|
| 326 |
+
testPrompt = st.text_input('Input a random test prompt to test out your '
|
| 327 |
+
'chosen model and see if its generating images:')
|
| 328 |
+
submitted2 = st.form_submit_button("Submit")
|
| 329 |
+
if testPrompt and submitted2:
|
| 330 |
+
with st.spinner("Generating an image with the prompt:\n"+testPrompt+"(This may take some time)"):
|
| 331 |
+
testImage = MINFER.generate_test_image(MINFER.TargetModel, testPrompt)
|
| 332 |
+
st.image(testImage, caption='Model: ' + modelID + ' Prompt: ' + testPrompt)
|
| 333 |
+
st.write('''If you are happy with this model, navigate to the other tabs to evaluate bias!
|
| 334 |
+
Otherwise, feel free to load up a different model and run it again''')
|
| 335 |
+
|
| 336 |
+
if MINFER.TargetModel is not None:
|
| 337 |
+
tab_manager.completed_setup([tab2, tab3, tab4, tab5], modelID)
|
| 338 |
+
else:
|
| 339 |
+
MCOMP.databaseDF = None
|
| 340 |
+
user_evaluation_variables.reset_variables('general')
|
| 341 |
+
user_evaluation_variables.reset_variables('task-oriented')
|
| 342 |
+
st.write('')
|
| 343 |
+
st.warning('Log in or register your email to get started! ', icon="⚠️")
|
tab_manager.py
ADDED
|
@@ -0,0 +1,473 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import model_inferencing as MINFER
|
| 3 |
+
import general_bias_measurement as GBM
|
| 4 |
+
import model_comparison as MCOMP
|
| 5 |
+
import user_evaluation_variables
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import numpy as np
|
| 8 |
+
import json
|
| 9 |
+
import csv
|
| 10 |
+
from itertools import cycle
|
| 11 |
+
import random
|
| 12 |
+
import time
|
| 13 |
+
import datetime
|
| 14 |
+
import zipfile
|
| 15 |
+
from io import BytesIO, StringIO
|
| 16 |
+
def completed_setup(tabs, modelID):
|
| 17 |
+
with tabs[0]:
|
| 18 |
+
st.write("\U0001F917 ", modelID, " has been loaded!")
|
| 19 |
+
st.write("Ready for General Bias Evaluation")
|
| 20 |
+
# general_bias_eval_setup(tabs[0])
|
| 21 |
+
with tabs[1]:
|
| 22 |
+
st.write("\U0001F917 ", modelID, " has been loaded!")
|
| 23 |
+
st.write("Ready for Task-Oriented Bias Evaluation")
|
| 24 |
+
with tabs[3]:
|
| 25 |
+
if not all([user_evaluation_variables.OBJECT_IMAGES_IN_UI, user_evaluation_variables.OCCUPATION_IMAGES_IN_UI, user_evaluation_variables.TASK_IMAGES_IN_UI]):
|
| 26 |
+
st.write("\U0001F917 ", modelID, " has been loaded!")
|
| 27 |
+
st.write("Waiting for Images to be generated.")
|
| 28 |
+
# if any([user_evaluation_variables.OBJECT_IMAGES_IN_UI, user_evaluation_variables.OCCUPATION_IMAGES_IN_UI,
|
| 29 |
+
# user_evaluation_variables.TASK_IMAGES_IN_UI]):
|
| 30 |
+
update_images_tab(tabs[3])
|
| 31 |
+
with tabs[0]:
|
| 32 |
+
general_bias_eval_setup(tabs[0], modelID, tabs[3])
|
| 33 |
+
with tabs[1]:
|
| 34 |
+
task_oriented_bias_eval_setup(tabs[1],modelID, tabs[3])
|
| 35 |
+
def general_bias_eval_setup(tab, modelID, imagesTab):
|
| 36 |
+
|
| 37 |
+
generalBiasSetupDF_EVAL = pd.DataFrame(
|
| 38 |
+
{
|
| 39 |
+
"GEN Eval. Variable": ["No. Images to Generate per prompt", "No. Inference Steps", "Image Size (N x N)"],
|
| 40 |
+
"GEN Values": ["10", "100", "512"],
|
| 41 |
+
}
|
| 42 |
+
)
|
| 43 |
+
generalBiasSetupDF_TYPE = pd.DataFrame(
|
| 44 |
+
{
|
| 45 |
+
"Image Types": ["Objects", "Person in Frame", "Occupations / Label"],
|
| 46 |
+
"Check": [True, True, True],
|
| 47 |
+
}
|
| 48 |
+
)
|
| 49 |
+
tableColumn1, tableColumn2 = st.columns(2)
|
| 50 |
+
with tab:
|
| 51 |
+
with tableColumn1:
|
| 52 |
+
GENValTable = st.data_editor(
|
| 53 |
+
generalBiasSetupDF_EVAL,
|
| 54 |
+
column_config={
|
| 55 |
+
"GEN Eval. Variable": st.column_config.Column(
|
| 56 |
+
"Variable",
|
| 57 |
+
help="General Bias Evaluation variable to control extent of evaluations",
|
| 58 |
+
width=None,
|
| 59 |
+
required=None,
|
| 60 |
+
disabled=True,
|
| 61 |
+
),
|
| 62 |
+
"GEN Values": st.column_config.Column(
|
| 63 |
+
"Values",
|
| 64 |
+
help="Input values in this column",
|
| 65 |
+
width=None,
|
| 66 |
+
required=True,
|
| 67 |
+
disabled=False,
|
| 68 |
+
),
|
| 69 |
+
},
|
| 70 |
+
hide_index=True,
|
| 71 |
+
num_rows="fixed",
|
| 72 |
+
)
|
| 73 |
+
with tableColumn2:
|
| 74 |
+
GENCheckTable = st.data_editor(
|
| 75 |
+
generalBiasSetupDF_TYPE,
|
| 76 |
+
column_config={
|
| 77 |
+
"Check": st.column_config.CheckboxColumn(
|
| 78 |
+
"Select",
|
| 79 |
+
help="Select the types of images you want to generate",
|
| 80 |
+
default=False,
|
| 81 |
+
)
|
| 82 |
+
},
|
| 83 |
+
disabled=["Image Types"],
|
| 84 |
+
hide_index=True,
|
| 85 |
+
num_rows="fixed",
|
| 86 |
+
)
|
| 87 |
+
if st.button('Evaluate!', key="EVAL_BUTTON_GEN"):
|
| 88 |
+
initiate_general_bias_evaluation(tab, modelID, [GENValTable, GENCheckTable], imagesTab)
|
| 89 |
+
st.rerun()
|
| 90 |
+
|
| 91 |
+
if user_evaluation_variables.RUN_TIME and user_evaluation_variables.CURRENT_EVAL_TYPE == 'general':
|
| 92 |
+
GBM.output_eval_results(user_evaluation_variables.EVAL_METRICS, 21, 'general')
|
| 93 |
+
st.write("\U0001F553 Time Taken: ", user_evaluation_variables.RUN_TIME)
|
| 94 |
+
|
| 95 |
+
saveEvalsButton = st.button("Save + Upload Evaluations", key='SAVE_EVAL_GEN')
|
| 96 |
+
saveDistButton = st.button("Download Object Distribution", key='SAVE_TOP_GEN')
|
| 97 |
+
if saveEvalsButton:
|
| 98 |
+
st.write("Saving and uploading evaluations")
|
| 99 |
+
user_evaluation_variables.update_evaluation_table('general',False)
|
| 100 |
+
user_evaluation_variables.reset_variables('general')
|
| 101 |
+
if saveDistButton:
|
| 102 |
+
download_word_distribution_csv(user_evaluation_variables.EVAL_METRICS,
|
| 103 |
+
user_evaluation_variables.EVAL_ID, 'general')
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def task_oriented_bias_eval_setup(tab,modelID,imagesTab):
|
| 107 |
+
biasSetupDF_EVAL = pd.DataFrame(
|
| 108 |
+
{
|
| 109 |
+
"TO Eval. Variable": ["No. Images to Generate per prompt", "No. Inference Steps", "Image Size (N x N)"],
|
| 110 |
+
"TO Values": ["10", "100", "512"],
|
| 111 |
+
}
|
| 112 |
+
)
|
| 113 |
+
with tab:
|
| 114 |
+
TOValTable = st.data_editor(
|
| 115 |
+
biasSetupDF_EVAL,
|
| 116 |
+
column_config={
|
| 117 |
+
"TO Eval. Variable": st.column_config.Column(
|
| 118 |
+
"Variable",
|
| 119 |
+
help="General Bias Evaluation variable to control extent of evaluations",
|
| 120 |
+
width=None,
|
| 121 |
+
required=None,
|
| 122 |
+
disabled=True,
|
| 123 |
+
),
|
| 124 |
+
"TO Values": st.column_config.Column(
|
| 125 |
+
"Values",
|
| 126 |
+
help="Input values in this column",
|
| 127 |
+
width=None,
|
| 128 |
+
required=True,
|
| 129 |
+
disabled=False,
|
| 130 |
+
),
|
| 131 |
+
},
|
| 132 |
+
hide_index=True,
|
| 133 |
+
num_rows="fixed",
|
| 134 |
+
)
|
| 135 |
+
target = st.text_input('What is the single-token target of your task-oriented evaluation study '
|
| 136 |
+
'e.g.: "burger", "coffee", "men", "women"')
|
| 137 |
+
|
| 138 |
+
if st.button('Evaluate!', key="EVAL_BUTTON_TO"):
|
| 139 |
+
if len(target) > 0:
|
| 140 |
+
initiate_task_oriented_bias_evaluation(tab, modelID, TOValTable, target, imagesTab)
|
| 141 |
+
st.rerun()
|
| 142 |
+
else:
|
| 143 |
+
st.error('Please input a target for your task-oriented analysis', icon="🚨")
|
| 144 |
+
# update_images_tab(imagesTab)
|
| 145 |
+
if user_evaluation_variables.RUN_TIME and user_evaluation_variables.CURRENT_EVAL_TYPE == 'task-oriented':
|
| 146 |
+
GBM.output_eval_results(user_evaluation_variables.EVAL_METRICS, 21, 'task-oriented')
|
| 147 |
+
st.write("\U0001F553 Time Taken: ", user_evaluation_variables.RUN_TIME)
|
| 148 |
+
saveEvalsButton = st.button("Save + Upload Evaluations", key='SAVE_EVAL_TASK')
|
| 149 |
+
saveDistButton = st.button("Download Object Distribution", key='SAVE_TOP_TASK')
|
| 150 |
+
if saveEvalsButton:
|
| 151 |
+
st.write("Saving and uploading evaluations")
|
| 152 |
+
user_evaluation_variables.update_evaluation_table('task-oriented',False)
|
| 153 |
+
user_evaluation_variables.reset_variables('task-oriented')
|
| 154 |
+
if saveDistButton:
|
| 155 |
+
download_word_distribution_csv(user_evaluation_variables.EVAL_METRICS,
|
| 156 |
+
user_evaluation_variables.EVAL_ID, user_evaluation_variables.TASK_TARGET)
|
| 157 |
+
# update_images_tab(imagesTab)
|
| 158 |
+
|
| 159 |
+
def download_word_distribution_csv(data, evalID, evalType):
|
| 160 |
+
filePath = './'+evalID+'_'+evalType+'_word_distribution.csv'
|
| 161 |
+
|
| 162 |
+
listOfObjects = list(data[0].items())
|
| 163 |
+
with open(filePath, 'w', newline='') as fp:
|
| 164 |
+
csvwriter = csv.writer(fp)
|
| 165 |
+
csvwriter.writerows([["Evaluation ID", evalID],
|
| 166 |
+
["Distribution Bias", data[2]],
|
| 167 |
+
["Jaccard hallucination", np.mean(data[3])],
|
| 168 |
+
["Generative Miss Rate", np.mean(data[4])]])
|
| 169 |
+
csvwriter.writerow(['Position', 'Object', 'No. Occurences', 'Normalized'])
|
| 170 |
+
for obj, val, norm, ii in zip(listOfObjects, data[0].values(), data[1], range(len(listOfObjects))):
|
| 171 |
+
csvwriter.writerow([ii, obj[0], val, norm])
|
| 172 |
+
st.success('Successfully downloaded word distribution data!', icon="✅")
|
| 173 |
+
|
| 174 |
+
def initiate_general_bias_evaluation(tab, modelID, specs, imagesTab):
|
| 175 |
+
startTime = time.time()
|
| 176 |
+
objectData = None
|
| 177 |
+
occupationData = None
|
| 178 |
+
objects = []
|
| 179 |
+
actions = []
|
| 180 |
+
occupations = []
|
| 181 |
+
occupationDescriptors = []
|
| 182 |
+
objectPrompts = None
|
| 183 |
+
occupationPrompts = None
|
| 184 |
+
|
| 185 |
+
objectImages = []
|
| 186 |
+
objectCaptions = []
|
| 187 |
+
occupationImages = []
|
| 188 |
+
occupationCaptions = []
|
| 189 |
+
evaluationImages = []
|
| 190 |
+
evaluationCaptions = []
|
| 191 |
+
with tab:
|
| 192 |
+
st.write("Initiating General Bias Evaluation Experiments with the following setup:")
|
| 193 |
+
st.write(" ***Model*** = ", modelID)
|
| 194 |
+
infoColumn1, infoColumn2 = st.columns(2)
|
| 195 |
+
with infoColumn1:
|
| 196 |
+
st.write(" ***No. Images per prompt*** = ", specs[0]["GEN Values"][0])
|
| 197 |
+
st.write(" ***No. Steps*** = ", specs[0]["GEN Values"][1])
|
| 198 |
+
st.write(" ***Image Size*** = ", specs[0]["GEN Values"][2], "$\\times$", specs[0]["GEN Values"][2])
|
| 199 |
+
with infoColumn2:
|
| 200 |
+
st.write(" ***Objects*** = ", specs[1]["Check"][0])
|
| 201 |
+
st.write(" ***Objects and Actions*** = ", specs[1]["Check"][1])
|
| 202 |
+
st.write(" ***Occupations*** = ", specs[1]["Check"][2])
|
| 203 |
+
st.markdown("___")
|
| 204 |
+
if specs[1]["Check"][0]:
|
| 205 |
+
objectData = read_csv_to_list("./data/list_of_objects.csv")
|
| 206 |
+
if specs[1]["Check"][2]:
|
| 207 |
+
occupationData = read_csv_to_list("./data/list_of_occupations.csv")
|
| 208 |
+
if objectData == None and occupationData == None:
|
| 209 |
+
st.error('Make sure that at least one of the "Objects" or "Occupations" rows are checked', icon="🚨")
|
| 210 |
+
else:
|
| 211 |
+
if specs[1]["Check"][0]:
|
| 212 |
+
for row in objectData[1:]:
|
| 213 |
+
objects.append(row[0])
|
| 214 |
+
if specs[1]["Check"][1]:
|
| 215 |
+
for row in objectData[1:]:
|
| 216 |
+
actions.append(row[1:])
|
| 217 |
+
if specs[1]["Check"][2]:
|
| 218 |
+
for row in occupationData[1:]:
|
| 219 |
+
occupations.append(row[0])
|
| 220 |
+
occupationDescriptors.append(row[1:])
|
| 221 |
+
with infoColumn1:
|
| 222 |
+
st.write("***No. Objects*** = ", len(objects))
|
| 223 |
+
st.write("***No. Actions*** = ", len(actions)*3)
|
| 224 |
+
with infoColumn2:
|
| 225 |
+
st.write("***No. Occupations*** = ", len(occupations))
|
| 226 |
+
st.write("***No. Occupation Descriptors*** = ", len(occupationDescriptors)*3)
|
| 227 |
+
if len(objects) > 0:
|
| 228 |
+
objectPrompts = MINFER.construct_general_bias_evaluation_prompts(objects, actions)
|
| 229 |
+
if len(occupations) > 0:
|
| 230 |
+
occupationPrompts = MINFER.construct_general_bias_evaluation_prompts(occupations, occupationDescriptors)
|
| 231 |
+
if objectPrompts is not None:
|
| 232 |
+
OBJECTprogressBar = st.progress(0, text="Generating Object-related images. Please wait.")
|
| 233 |
+
objectImages, objectCaptions = MINFER.generate_test_images(OBJECTprogressBar, "Generating Object-related images. Please wait.",
|
| 234 |
+
objectPrompts, int(specs[0]["GEN Values"][0]),
|
| 235 |
+
int(specs[0]["GEN Values"][1]), int(specs[0]["GEN Values"][2]))
|
| 236 |
+
evaluationImages+=objectImages
|
| 237 |
+
evaluationCaptions+=objectCaptions[0]
|
| 238 |
+
TXTObjectPrompts = ""
|
| 239 |
+
|
| 240 |
+
if occupationPrompts is not None:
|
| 241 |
+
OCCprogressBar = st.progress(0, text="Generating Occupation-related images. Please wait.")
|
| 242 |
+
occupationImages, occupationCaptions = MINFER.generate_test_images(OCCprogressBar, "Generating Occupation-related images. Please wait.",
|
| 243 |
+
occupationPrompts, int(specs[0]["GEN Values"][0]),
|
| 244 |
+
int(specs[0]["GEN Values"][1]), int(specs[0]["GEN Values"][2]))
|
| 245 |
+
evaluationImages += occupationImages
|
| 246 |
+
evaluationCaptions += occupationCaptions[0]
|
| 247 |
+
|
| 248 |
+
if len(evaluationImages) > 0:
|
| 249 |
+
EVALprogressBar = st.progress(0, text="Evaluating "+modelID+" Model Images. Please wait.")
|
| 250 |
+
user_evaluation_variables.EVAL_METRICS = GBM.evaluate_t2i_model_images(evaluationImages, evaluationCaptions, EVALprogressBar, False, "GENERAL")
|
| 251 |
+
# GBM.output_eval_results(user_evaluation_variables.EVAL_METRICS, 21)
|
| 252 |
+
elapsedTime = time.time() - startTime
|
| 253 |
+
# st.write("\U0001F553 Time Taken: ", str(datetime.timedelta(seconds=elapsedTime)).split(".")[0])
|
| 254 |
+
|
| 255 |
+
user_evaluation_variables.NO_SAMPLES = len(evaluationImages)
|
| 256 |
+
user_evaluation_variables.RESOLUTION = specs[0]["GEN Values"][2] + "x" + specs[0]["GEN Values"][2]
|
| 257 |
+
user_evaluation_variables.INFERENCE_STEPS = int(specs[0]["GEN Values"][1])
|
| 258 |
+
user_evaluation_variables.GEN_OBJECTS = bool(specs[1]["Check"][0])
|
| 259 |
+
user_evaluation_variables.GEN_ACTIONS = bool(specs[1]["Check"][1])
|
| 260 |
+
user_evaluation_variables.GEN_OCCUPATIONS = bool(specs[1]["Check"][2])
|
| 261 |
+
user_evaluation_variables.DIST_BIAS = float(f"{user_evaluation_variables.EVAL_METRICS[2]:.4f}")
|
| 262 |
+
user_evaluation_variables.HALLUCINATION = float(f"{np.mean(user_evaluation_variables.EVAL_METRICS[3]):.4f}")
|
| 263 |
+
user_evaluation_variables.MISS_RATE = float(f"{np.mean(user_evaluation_variables.EVAL_METRICS[4]):.4f}")
|
| 264 |
+
user_evaluation_variables.EVAL_ID = MCOMP.get_evaluation_id('general', True)
|
| 265 |
+
user_evaluation_variables.DATE = datetime.datetime.utcnow().strftime('%d-%m-%Y')
|
| 266 |
+
user_evaluation_variables.TIME = datetime.datetime.utcnow().strftime('%H:%M:%S')
|
| 267 |
+
user_evaluation_variables.RUN_TIME = str(datetime.timedelta(seconds=elapsedTime)).split(".")[0]
|
| 268 |
+
|
| 269 |
+
user_evaluation_variables.OBJECT_IMAGES =objectImages
|
| 270 |
+
user_evaluation_variables.OBJECT_CAPTIONS = objectCaptions
|
| 271 |
+
user_evaluation_variables.OCCUPATION_IMAGES = occupationImages
|
| 272 |
+
user_evaluation_variables.OCCUPATION_CAPTIONS = occupationCaptions
|
| 273 |
+
user_evaluation_variables.CURRENT_EVAL_TYPE = 'general'
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
def initiate_task_oriented_bias_evaluation(tab, modelID, specs, target, imagesTab):
|
| 277 |
+
startTime = time.time()
|
| 278 |
+
TASKImages = []
|
| 279 |
+
TASKCaptions = []
|
| 280 |
+
with tab:
|
| 281 |
+
st.write("Initiating Task-Oriented Bias Evaluation Experiments with the following setup:")
|
| 282 |
+
st.write(" ***Model*** = ", modelID)
|
| 283 |
+
infoColumn1, infoColumn2 = st.columns(2)
|
| 284 |
+
st.write(" ***No. Images per prompt*** = ", specs["TO Values"][0])
|
| 285 |
+
st.write(" ***No. Steps*** = ", specs["TO Values"][1])
|
| 286 |
+
st.write(" ***Image Size*** = ", specs["TO Values"][2], "$\\times$", specs["TO Values"][2])
|
| 287 |
+
st.write(" ***Target*** = ", target.lower())
|
| 288 |
+
st.markdown("___")
|
| 289 |
+
|
| 290 |
+
captionsToExtract = 50
|
| 291 |
+
if (captionsToExtract * int(specs['TO Values'][0])) < 30:
|
| 292 |
+
st.error('There should be at least 30 images generated, You are attempting to generate:\t'
|
| 293 |
+
+ str(captionsToExtract * int(specs['TO Values'][0]))+'.\nPlease readjust your No. Images per prompt',
|
| 294 |
+
icon="🚨")
|
| 295 |
+
else:
|
| 296 |
+
COCOLoadingBar = st.progress(0, text="Scanning through COCO Dataset for relevant prompts. Please wait")
|
| 297 |
+
prompts, cocoIDs = get_COCO_captions('./data/COCO_captions.json', target.lower(), COCOLoadingBar, captionsToExtract)
|
| 298 |
+
if len(prompts) == 0:
|
| 299 |
+
st.error('Woops! Could not find **ANY** relevant COCO prompts for the target: '+target.lower()+
|
| 300 |
+
'\nPlease input a different target', icon="🚨")
|
| 301 |
+
elif len(prompts) > 0 and len(prompts) < captionsToExtract:
|
| 302 |
+
st.warning('WARNING: Only found '+str(len(prompts))+ ' relevant COCO prompts for the target: '+target.lower()+
|
| 303 |
+
'\nWill work with these. Nothing to worry about!', icon="⚠️")
|
| 304 |
+
else:
|
| 305 |
+
st.success('Successfully found '+str(captionsToExtract)+' relevant COCO prompts', icon="✅")
|
| 306 |
+
if len(prompts) > 0:
|
| 307 |
+
COCOUIOutput = []
|
| 308 |
+
for id, pr in zip(cocoIDs, prompts):
|
| 309 |
+
COCOUIOutput.append([id, pr])
|
| 310 |
+
st.write('**Here are some of the randomised '+'"'+target.lower()+'"'+' captions extracted from the COCO dataset**')
|
| 311 |
+
COCOUIOutput.insert(0, ('ID', 'Caption'))
|
| 312 |
+
st.table(COCOUIOutput[:11])
|
| 313 |
+
TASKprogressBar = st.progress(0, text="Generating Task-oriented images. Please wait.")
|
| 314 |
+
TASKImages, TASKCaptions = MINFER.generate_task_oriented_images(TASKprogressBar,"Generating Task-oriented images. Please wait.",
|
| 315 |
+
prompts, cocoIDs, int(specs["TO Values"][0]),
|
| 316 |
+
int(specs["TO Values"][1]), int(specs["TO Values"][2]))
|
| 317 |
+
|
| 318 |
+
EVALprogressBar = st.progress(0, text="Evaluating " + modelID + " Model Images. Please wait.")
|
| 319 |
+
user_evaluation_variables.EVAL_METRICS = GBM.evaluate_t2i_model_images(TASKImages, TASKCaptions[0], EVALprogressBar, False, "TASK")
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
# GBM.output_eval_results(user_evaluation_variables.EVAL_METRICS, 21)
|
| 323 |
+
elapsedTime = time.time() - startTime
|
| 324 |
+
# st.write("\U0001F553 Time Taken: ", str(datetime.timedelta(seconds=elapsedTime)).split(".")[0])
|
| 325 |
+
|
| 326 |
+
user_evaluation_variables.NO_SAMPLES = len(TASKImages)
|
| 327 |
+
user_evaluation_variables.RESOLUTION = specs["TO Values"][2]+"x"+specs["TO Values"][2]
|
| 328 |
+
user_evaluation_variables.INFERENCE_STEPS = int(specs["TO Values"][1])
|
| 329 |
+
user_evaluation_variables.DIST_BIAS = float(f"{user_evaluation_variables.EVAL_METRICS[2]:.4f}")
|
| 330 |
+
user_evaluation_variables.HALLUCINATION = float(f"{np.mean(user_evaluation_variables.EVAL_METRICS[3]):.4f}")
|
| 331 |
+
user_evaluation_variables.MISS_RATE = float(f"{np.mean(user_evaluation_variables.EVAL_METRICS[4]):.4f}")
|
| 332 |
+
user_evaluation_variables.TASK_TARGET = target.lower()
|
| 333 |
+
user_evaluation_variables.EVAL_ID = MCOMP.get_evaluation_id('task-oriented', True)
|
| 334 |
+
user_evaluation_variables.DATE = datetime.datetime.utcnow().strftime('%d-%m-%Y')
|
| 335 |
+
user_evaluation_variables.TIME = datetime.datetime.utcnow().strftime('%H:%M:%S')
|
| 336 |
+
user_evaluation_variables.RUN_TIME = str(datetime.timedelta(seconds=elapsedTime)).split(".")[0]
|
| 337 |
+
|
| 338 |
+
user_evaluation_variables.TASK_IMAGES = TASKImages
|
| 339 |
+
user_evaluation_variables.TASK_CAPTIONS = TASKCaptions
|
| 340 |
+
user_evaluation_variables.TASK_COCOIDs = cocoIDs
|
| 341 |
+
|
| 342 |
+
user_evaluation_variables.CURRENT_EVAL_TYPE = 'task-oriented'
|
| 343 |
+
|
| 344 |
+
|
| 345 |
+
def download_and_zip_images(zipImagePath, images, captions, imageType):
|
| 346 |
+
csvFileName = None
|
| 347 |
+
if imageType == 'object':
|
| 348 |
+
csvFileName = 'object_prompts.csv'
|
| 349 |
+
elif imageType == 'occupation':
|
| 350 |
+
csvFileName = 'occupation_prompts.csv'
|
| 351 |
+
else:
|
| 352 |
+
csvFileName = 'task-oriented_prompts.csv'
|
| 353 |
+
with st.spinner("Zipping images..."):
|
| 354 |
+
with zipfile.ZipFile(zipImagePath, 'w') as img_zip:
|
| 355 |
+
for idx, image in enumerate(images):
|
| 356 |
+
imgName = captions[1][idx]
|
| 357 |
+
imageFile = BytesIO()
|
| 358 |
+
image.save(imageFile, 'JPEG')
|
| 359 |
+
img_zip.writestr(imgName, imageFile.getvalue())
|
| 360 |
+
|
| 361 |
+
# Saving prompt data as accompanying csv file
|
| 362 |
+
string_buffer = StringIO()
|
| 363 |
+
csvwriter = csv.writer(string_buffer)
|
| 364 |
+
|
| 365 |
+
if imageType in ['object', 'occupation']:
|
| 366 |
+
csvwriter.writerow(['No.', 'Prompt'])
|
| 367 |
+
for prompt, ii in zip(captions[0], range(len(captions[0]))):
|
| 368 |
+
csvwriter.writerow([ii + 1, prompt])
|
| 369 |
+
else:
|
| 370 |
+
csvwriter.writerow(['COCO ID', 'Prompt'])
|
| 371 |
+
for prompt, id in zip(captions[0], user_evaluation_variables.TASK_COCOIDs):
|
| 372 |
+
csvwriter.writerow([id, prompt])
|
| 373 |
+
|
| 374 |
+
img_zip.writestr(csvFileName, string_buffer.getvalue())
|
| 375 |
+
st.success('Successfully zipped and downloaded images!', icon="✅")
|
| 376 |
+
|
| 377 |
+
|
| 378 |
+
def update_images_tab(imagesTab):
|
| 379 |
+
with imagesTab:
|
| 380 |
+
if len(user_evaluation_variables.OBJECT_IMAGES) > 0:
|
| 381 |
+
with st.expander('Object-related Images'):
|
| 382 |
+
user_evaluation_variables.OBJECT_IMAGES_IN_UI = True
|
| 383 |
+
TXTObjectPrompts = ""
|
| 384 |
+
for prompt, ii in zip(user_evaluation_variables.OBJECT_CAPTIONS[0], range(len(user_evaluation_variables.OBJECT_CAPTIONS[0]))):
|
| 385 |
+
TXTObjectPrompts += str(1 + ii) + '. ' + prompt + '\n'
|
| 386 |
+
st.write("**Object-related General Bias Evaluation Images**")
|
| 387 |
+
st.write("Number of Generated Images = ", len(user_evaluation_variables.OBJECT_IMAGES))
|
| 388 |
+
st.write("Corresponding Number of *unique* Captions = ", len(user_evaluation_variables.OBJECT_CAPTIONS[0]))
|
| 389 |
+
st.text_area("***List of Object Prompts***",
|
| 390 |
+
TXTObjectPrompts,
|
| 391 |
+
height=400,
|
| 392 |
+
disabled=False,
|
| 393 |
+
key='TEXT_AREA_OBJECT')
|
| 394 |
+
cols = cycle(st.columns(3))
|
| 395 |
+
for idx, image in enumerate(user_evaluation_variables.OBJECT_IMAGES):
|
| 396 |
+
next(cols).image(image, width=225, caption=user_evaluation_variables.OBJECT_CAPTIONS[1][idx])
|
| 397 |
+
|
| 398 |
+
saveObjectImages = st.button("Save Object-related Images")
|
| 399 |
+
if saveObjectImages:
|
| 400 |
+
zipPath = 'TBYB_' + user_evaluation_variables.USERNAME + '_' + user_evaluation_variables.EVAL_ID + '_object_related_images.zip'
|
| 401 |
+
download_and_zip_images(zipPath, user_evaluation_variables.OBJECT_IMAGES,
|
| 402 |
+
user_evaluation_variables.OBJECT_CAPTIONS, 'object')
|
| 403 |
+
|
| 404 |
+
if len(user_evaluation_variables.OCCUPATION_IMAGES) > 0:
|
| 405 |
+
user_evaluation_variables.OCCUPATION_IMAGES_IN_UI = True
|
| 406 |
+
with st.expander('Occupation-related Images'):
|
| 407 |
+
TXTOccupationPrompts = ""
|
| 408 |
+
for prompt, ii in zip(user_evaluation_variables.OCCUPATION_CAPTIONS[0], range(len(user_evaluation_variables.OCCUPATION_CAPTIONS[0]))):
|
| 409 |
+
TXTOccupationPrompts += str(1 + ii) + '. ' + prompt + '\n'
|
| 410 |
+
st.write("**Occupation-related General Bias Evaluation Images**")
|
| 411 |
+
st.write("Number of Generated Images = ", len(user_evaluation_variables.OCCUPATION_IMAGES))
|
| 412 |
+
st.write("Corresponding Number of *unique* Captions = ", len(user_evaluation_variables.OCCUPATION_CAPTIONS[0]))
|
| 413 |
+
st.text_area("***List of Occupation Prompts***",
|
| 414 |
+
TXTOccupationPrompts,
|
| 415 |
+
height=400,
|
| 416 |
+
disabled=False,
|
| 417 |
+
key='TEXT_AREA_OCCU')
|
| 418 |
+
cols = cycle(st.columns(3))
|
| 419 |
+
for idx, image in enumerate(user_evaluation_variables.OCCUPATION_IMAGES):
|
| 420 |
+
next(cols).image(image, width=225, caption=user_evaluation_variables.OCCUPATION_CAPTIONS[1][idx])
|
| 421 |
+
|
| 422 |
+
saveOccupationImages = st.button("Save Occupation-related Images")
|
| 423 |
+
if saveOccupationImages:
|
| 424 |
+
zipPath = 'TBYB_' + user_evaluation_variables.USERNAME + '_' + user_evaluation_variables.EVAL_ID + '_occupation_related_images.zip'
|
| 425 |
+
download_and_zip_images(zipPath, user_evaluation_variables.OCCUPATION_IMAGES,
|
| 426 |
+
user_evaluation_variables.OCCUPATION_CAPTIONS, 'occupation')
|
| 427 |
+
if len(user_evaluation_variables.TASK_IMAGES) > 0:
|
| 428 |
+
with st.expander(user_evaluation_variables.TASK_TARGET+'-related Images'):
|
| 429 |
+
user_evaluation_variables.TASK_IMAGES_IN_UI = True
|
| 430 |
+
TXTTaskPrompts = ""
|
| 431 |
+
for prompt, id in zip(user_evaluation_variables.TASK_CAPTIONS[0], user_evaluation_variables.TASK_COCOIDs):
|
| 432 |
+
TXTTaskPrompts += "ID_" + str(id) + '. ' + prompt + '\n'
|
| 433 |
+
|
| 434 |
+
st.write("**Task-oriented Bias Evaluation Images. Target** = ", user_evaluation_variables.TASK_TARGET)
|
| 435 |
+
st.write("Number of Generated Images = ", len(user_evaluation_variables.TASK_IMAGES))
|
| 436 |
+
st.write("Corresponding Number of *unique* Captions = ", len(user_evaluation_variables.TASK_CAPTIONS[0]))
|
| 437 |
+
st.text_area("***List of Task-Oriented Prompts***",
|
| 438 |
+
TXTTaskPrompts,
|
| 439 |
+
height=400,
|
| 440 |
+
disabled=False,
|
| 441 |
+
key='TEXT_AREA_TASK')
|
| 442 |
+
cols = cycle(st.columns(3))
|
| 443 |
+
for idx, image in enumerate(user_evaluation_variables.TASK_IMAGES):
|
| 444 |
+
next(cols).image(image, width=225, caption=user_evaluation_variables.TASK_CAPTIONS[1][idx])
|
| 445 |
+
|
| 446 |
+
saveTaskImages = st.button("Save Task-oriented Images")
|
| 447 |
+
if saveTaskImages:
|
| 448 |
+
zipPath = 'TBYB_' + user_evaluation_variables.USERNAME + '_' + user_evaluation_variables.EVAL_ID + '_'+ user_evaluation_variables.TASK_TARGET+'-oriented_images.zip'
|
| 449 |
+
download_and_zip_images(zipPath, user_evaluation_variables.TASK_IMAGES,
|
| 450 |
+
user_evaluation_variables.TASK_CAPTIONS, 'task-oriented')
|
| 451 |
+
|
| 452 |
+
def get_COCO_captions(filePath, target, progressBar, NPrompts=50):
|
| 453 |
+
captionData = json.load(open(filePath))
|
| 454 |
+
COCOCaptions = []
|
| 455 |
+
COCOIDs = []
|
| 456 |
+
random.seed(42)
|
| 457 |
+
random.shuffle(captionData['annotations'])
|
| 458 |
+
for anno in captionData['annotations']:
|
| 459 |
+
if target in anno.get('caption').lower().split(' '):
|
| 460 |
+
if len(COCOCaptions) < NPrompts:
|
| 461 |
+
COCOCaptions.append(anno.get('caption').lower())
|
| 462 |
+
COCOIDs.append(str(anno.get('id')))
|
| 463 |
+
percentComplete = len(COCOCaptions) / NPrompts
|
| 464 |
+
progressBar.progress(percentComplete, text="Scanning through COCO Dataset for relevant prompts. Please wait")
|
| 465 |
+
return (COCOCaptions, COCOIDs)
|
| 466 |
+
def read_csv_to_list(filePath):
|
| 467 |
+
data = []
|
| 468 |
+
with open(filePath, 'r', newline='') as csvfile:
|
| 469 |
+
csvReader = csv.reader(csvfile)
|
| 470 |
+
for row in csvReader:
|
| 471 |
+
data.append(row)
|
| 472 |
+
return data
|
| 473 |
+
|
user_evaluation_variables.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import yaml
|
| 2 |
+
from yaml import safe_load
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
USERNAME = None
|
| 6 |
+
EVAL_ID = None
|
| 7 |
+
MODEL = None
|
| 8 |
+
MODEL_TYPE = None
|
| 9 |
+
NO_SAMPLES = None
|
| 10 |
+
RESOLUTION = None
|
| 11 |
+
INFERENCE_STEPS = None
|
| 12 |
+
GEN_OBJECTS = None
|
| 13 |
+
GEN_ACTIONS = None
|
| 14 |
+
GEN_OCCUPATIONS = None
|
| 15 |
+
TASK_TARGET = None
|
| 16 |
+
DIST_BIAS = None
|
| 17 |
+
HALLUCINATION = None
|
| 18 |
+
MISS_RATE = None
|
| 19 |
+
DATE = None
|
| 20 |
+
TIME = None
|
| 21 |
+
RUN_TIME = None
|
| 22 |
+
|
| 23 |
+
EVAL_METRICS = None
|
| 24 |
+
OBJECT_IMAGES = []
|
| 25 |
+
OCCUPATION_IMAGES = []
|
| 26 |
+
TASK_IMAGES = []
|
| 27 |
+
OBJECT_CAPTIONS = None
|
| 28 |
+
OCCUPATION_CAPTIONS = None
|
| 29 |
+
TASK_CAPTIONS = None
|
| 30 |
+
TASK_COCOIDs = None
|
| 31 |
+
|
| 32 |
+
OBJECT_IMAGES_IN_UI = False
|
| 33 |
+
OCCUPATION_IMAGES_IN_UI = False
|
| 34 |
+
TASK_IMAGES_IN_UI = False
|
| 35 |
+
CURRENT_EVAL_TYPE = None
|
| 36 |
+
def update_evaluation_table(evalType, debugging):
|
| 37 |
+
global USERNAME
|
| 38 |
+
global EVAL_ID
|
| 39 |
+
global MODEL
|
| 40 |
+
global MODEL_TYPE
|
| 41 |
+
global NO_SAMPLES
|
| 42 |
+
global RESOLUTION
|
| 43 |
+
global INFERENCE_STEPS
|
| 44 |
+
global GEN_OBJECTS
|
| 45 |
+
global GEN_ACTIONS
|
| 46 |
+
global GEN_OCCUPATIONS
|
| 47 |
+
global TASK_TARGET
|
| 48 |
+
global DIST_BIAS
|
| 49 |
+
global HALLUCINATION
|
| 50 |
+
global MISS_RATE
|
| 51 |
+
global DATE
|
| 52 |
+
global TIME
|
| 53 |
+
global RUN_TIME
|
| 54 |
+
global CURRENT_EVAL_TYPE
|
| 55 |
+
|
| 56 |
+
if debugging:
|
| 57 |
+
st.write("Username: ", USERNAME)
|
| 58 |
+
st.write("EVAL_ID: ", EVAL_ID)
|
| 59 |
+
st.write("MODEL: ", MODEL)
|
| 60 |
+
st.write("MODEL_TYPE: ", MODEL_TYPE)
|
| 61 |
+
st.write("NO_SAMPLES: ", NO_SAMPLES)
|
| 62 |
+
st.write("RESOLUTION: ", RESOLUTION)
|
| 63 |
+
st.write("INFERENCE_STEPS: ", INFERENCE_STEPS)
|
| 64 |
+
st.write("GEN_OBJECTS: ", GEN_OBJECTS)
|
| 65 |
+
st.write("GEN_ACTIONS: ", GEN_ACTIONS)
|
| 66 |
+
st.write("GEN_OCCUPATIONS: ", GEN_OCCUPATIONS)
|
| 67 |
+
st.write("TASK_TARGET: ", TASK_TARGET)
|
| 68 |
+
st.write("DIST_BIAS: ", DIST_BIAS)
|
| 69 |
+
st.write("HALLUCINATION: ", HALLUCINATION)
|
| 70 |
+
st.write("MISS_RATE: ", MISS_RATE)
|
| 71 |
+
st.write("DATE: ", DATE)
|
| 72 |
+
st.write("TIME: ", TIME)
|
| 73 |
+
st.write("RUN_TIME: ", RUN_TIME)
|
| 74 |
+
|
| 75 |
+
newEvaluationData = None
|
| 76 |
+
if evalType == 'general':
|
| 77 |
+
evalDataPath = './data/general_eval_database.yaml'
|
| 78 |
+
newEvaluationData = {
|
| 79 |
+
"Model": MODEL,
|
| 80 |
+
"Model Type": MODEL_TYPE,
|
| 81 |
+
"No. Samples": NO_SAMPLES,
|
| 82 |
+
"Resolution": RESOLUTION,
|
| 83 |
+
"Inference Steps": INFERENCE_STEPS,
|
| 84 |
+
"Objects": GEN_OBJECTS,
|
| 85 |
+
"Actions": GEN_ACTIONS,
|
| 86 |
+
"Occupations": GEN_OCCUPATIONS,
|
| 87 |
+
"Dist. Bias": DIST_BIAS,
|
| 88 |
+
"Hallucination": HALLUCINATION,
|
| 89 |
+
"Gen. Miss Rate": MISS_RATE,
|
| 90 |
+
"Date": DATE,
|
| 91 |
+
"Time": TIME,
|
| 92 |
+
"Run Time": RUN_TIME
|
| 93 |
+
}
|
| 94 |
+
else:
|
| 95 |
+
evalDataPath = './data/task_oriented_eval_database.yaml'
|
| 96 |
+
newEvaluationData = {
|
| 97 |
+
"Model": MODEL,
|
| 98 |
+
"Model Type": MODEL_TYPE,
|
| 99 |
+
"No. Samples": NO_SAMPLES,
|
| 100 |
+
"Resolution": RESOLUTION,
|
| 101 |
+
"Inference Steps": INFERENCE_STEPS,
|
| 102 |
+
"Target": TASK_TARGET,
|
| 103 |
+
"Dist. Bias": DIST_BIAS,
|
| 104 |
+
"Hallucination": HALLUCINATION,
|
| 105 |
+
"Gen. Miss Rate": MISS_RATE,
|
| 106 |
+
"Date": DATE,
|
| 107 |
+
"Time": TIME,
|
| 108 |
+
"Run Time": RUN_TIME
|
| 109 |
+
}
|
| 110 |
+
with open(evalDataPath, 'r') as f:
|
| 111 |
+
yamlData = safe_load(f)
|
| 112 |
+
|
| 113 |
+
# st.write("OLD DATABASE ", yamlData['evaluations']['username'][USERNAME])
|
| 114 |
+
if USERNAME not in yamlData['evaluations']['username']:
|
| 115 |
+
if TASK_TARGET is not None:
|
| 116 |
+
st.success('Congrats on your first General Bias evaluation!', icon='\U0001F388')
|
| 117 |
+
else:
|
| 118 |
+
st.success('Congrats on your first Task-Oriented Bias evaluation!', icon='\U0001F388')
|
| 119 |
+
yamlData['evaluations']['username'][USERNAME]= {}
|
| 120 |
+
|
| 121 |
+
yamlData['evaluations']['username'][USERNAME][EVAL_ID] = newEvaluationData
|
| 122 |
+
|
| 123 |
+
st.write("NEW DATABASE ", yamlData['evaluations']['username'][USERNAME])
|
| 124 |
+
with open(evalDataPath, 'w') as yaml_file:
|
| 125 |
+
yaml_file.write(yaml.dump(yamlData, default_flow_style=False))
|
| 126 |
+
|
| 127 |
+
def reset_variables(evalType):
|
| 128 |
+
global USERNAME
|
| 129 |
+
global EVAL_ID
|
| 130 |
+
global MODEL
|
| 131 |
+
global MODEL_TYPE
|
| 132 |
+
global NO_SAMPLES
|
| 133 |
+
global RESOLUTION
|
| 134 |
+
global INFERENCE_STEPS
|
| 135 |
+
global GEN_OBJECTS
|
| 136 |
+
global GEN_ACTIONS
|
| 137 |
+
global GEN_OCCUPATIONS
|
| 138 |
+
global TASK_TARGET
|
| 139 |
+
global DIST_BIAS
|
| 140 |
+
global HALLUCINATION
|
| 141 |
+
global MISS_RATE
|
| 142 |
+
global DATE
|
| 143 |
+
global TIME
|
| 144 |
+
global RUN_TIME
|
| 145 |
+
global EVAL_METRICS
|
| 146 |
+
global OBJECT_IMAGES
|
| 147 |
+
global OCCUPATION_IMAGES
|
| 148 |
+
global TASK_IMAGES
|
| 149 |
+
global OBJECT_CAPTIONS
|
| 150 |
+
global OCCUPATION_CAPTIONS
|
| 151 |
+
global TASK_CAPTIONS
|
| 152 |
+
global TASK_COCOIDs
|
| 153 |
+
global OBJECT_IMAGES_IN_UI
|
| 154 |
+
global OCCUPATION_IMAGES_IN_UI
|
| 155 |
+
global TASK_IMAGES_IN_UI
|
| 156 |
+
global CURRENT_EVAL_TYPE
|
| 157 |
+
EVAL_ID = None
|
| 158 |
+
# MODEL = None
|
| 159 |
+
# MODEL_TYPE = None
|
| 160 |
+
NO_SAMPLES = None
|
| 161 |
+
RESOLUTION = None
|
| 162 |
+
INFERENCE_STEPS = None
|
| 163 |
+
GEN_OBJECTS = None
|
| 164 |
+
GEN_ACTIONS = None
|
| 165 |
+
GEN_OCCUPATIONS = None
|
| 166 |
+
TASK_TARGET = None
|
| 167 |
+
DIST_BIAS = None
|
| 168 |
+
HALLUCINATION = None
|
| 169 |
+
MISS_RATE = None
|
| 170 |
+
DATE = None
|
| 171 |
+
TIME = None
|
| 172 |
+
RUN_TIME = None
|
| 173 |
+
|
| 174 |
+
EVAL_METRICS = None
|
| 175 |
+
CURRENT_EVAL_TYPE = None
|
| 176 |
+
|
| 177 |
+
if evalType == 'general':
|
| 178 |
+
OBJECT_IMAGES = []
|
| 179 |
+
OCCUPATION_IMAGES = []
|
| 180 |
+
OBJECT_CAPTIONS = None
|
| 181 |
+
OCCUPATION_CAPTIONS = None
|
| 182 |
+
OBJECT_IMAGES_IN_UI = False
|
| 183 |
+
OCCUPATION_IMAGES_IN_UI = False
|
| 184 |
+
else:
|
| 185 |
+
TASK_IMAGES = []
|
| 186 |
+
TASK_CAPTIONS = None
|
| 187 |
+
TASK_COCOIDs = None
|
| 188 |
+
TASK_IMAGES_IN_UI = False
|
| 189 |
+
|