Spaces:

JVice
/

try-before-you-bias

App Files Files Community

JVice commited on Dec 12, 2023

Commit

d41bb77

•

1 Parent(s): fc94828

Application main and aux files

Browse files

Files changed (7) hide show

general_bias_measurement.py +248 -0
model_comparison.py +160 -0
model_inferencing.py +54 -0
model_loading.py +51 -0
streamlit-app.py +343 -0
tab_manager.py +473 -0
user_evaluation_variables.py +189 -0

general_bias_measurement.py ADDED Viewed

	@@ -0,0 +1,248 @@

+from itertools import chain
+import torch
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from transformers import CLIPProcessor, CLIPModel
+from nltk.corpus import wordnet
+from PIL import Image
+import numpy as np
+import pandas as pd
+import streamlit as st
+if torch.cuda.is_available():
+    device = 'cuda'
+else:
+    device = 'cpu'
+BLIP_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+BLIP_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
+CLIP_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)
+CLIP_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
+irrelevantWords = ['a', 'an', 'with', 'the', 'and', 'for', 'on', 'their', 'this', 'that', 'under', 'it', 'at', 'out',
+                   'in', 'inside', 'outside', 'of', 'many', 'one', 'two', 'three', 'four', 'five', '-', 'with',
+                   'six', 'seven', 'eight', 'none', 'ten', 'at', 'is', 'up', 'are', 'by', 'as', 'ts', 'there',
+                   'like', 'bad', 'good', 'who', 'through', 'else', 'over', 'off', 'on', 'next',
+                   'to', 'into', 'themselves', 'front', 'down', 'some', 'his', 'her', 'its', 'onto', 'eaten',
+                   'each', 'other', 'most', 'let', 'around', 'them', 'while', 'another', 'from', 'above', "'",
+                    '-', 'about', 'what', '', ' ', 'A', 'looks', 'has']
+# Variables for the LLM
+maxLength = 10
+NBeams = 1
+# To store the bag of words
+distributionBiasDICT = {}
+hallucinationBiases = []
+CLIPErrors = []
+CLIPMissRates = []
+def object_filtering(caption):
+    caption = caption.split()
+    for token in caption:
+        # replace bad characters
+        if any(c in [".", "'", ",", "-", "!", "?"] for c in token):
+            for badChar in [".", "'", ",", "-", "!", "?"]:
+                if token in caption:
+                    caption[caption.index(token)] = token.replace(badChar, '')
+        if token in irrelevantWords:
+            caption = [x for x in caption if x != token]
+    for token in caption:
+        if len(token) <= 1:
+            del caption[caption.index(token)]
+    return caption
+def calculate_distribution_bias(rawValues):
+    rawValues = list(map(int, rawValues))
+    normalisedValues = []
+    # Normalise the raw data
+    for x in rawValues:
+        if (max(rawValues) - min(rawValues)) == 0 :
+            normX = 1
+        else:
+            normX = (x - min(rawValues)) / (max(rawValues) - min(rawValues))
+        normalisedValues.append(normX)
+    # calculate area under curve
+    area = np.trapz(np.array(normalisedValues), dx=1)
+    return (normalisedValues, area)
+def calculate_hallucination(inputSubjects, outputSubjects, debugging):
+    subjectsInInput = len(inputSubjects)
+    subjectsInOutput = len(outputSubjects)
+    notInInput = 0
+    notInOutput = 0
+    intersect = []
+    union = []
+    # Determine the intersection
+    for token in outputSubjects:
+        if token in inputSubjects:
+            intersect.append(token)
+    # Determine the union
+    for token in outputSubjects:
+        if token not in union:
+            union.append(token)
+    for token in inputSubjects:
+        if token not in union:
+            union.append(token)
+    H_JI = len(intersect) / len(union)
+    for token in outputSubjects:
+        if token not in inputSubjects:
+            notInInput += 1
+    for token in inputSubjects:
+        if token not in outputSubjects:
+            notInOutput += 1
+    if subjectsInOutput == 0:
+        H_P = 0
+    else:
+        H_P = notInInput / subjectsInOutput
+    H_N = notInOutput / subjectsInInput
+    if debugging:
+        st.write("H_P = ", notInInput, "/", subjectsInOutput, "=", H_P)
+        st.write("H_N = ", notInOutput, "/", subjectsInInput, "=", H_N)
+        st.write("H_JI = ", len(intersect), "/", len(union), "=", H_JI)
+    return (H_P, H_N, H_JI)
+def CLIP_classifying_single(img, target):
+    inputs = CLIP_processor(text=[target, " "], images=img,
+                            return_tensors="pt", padding=True).to(device)
+    outputs = CLIP_model(**inputs)
+    logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+    probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
+    return probs.tolist()[0]
+def calculate_detection_rate(image, fullPrompt, debugging):
+    CLIPProbabilities = CLIP_classifying_single(image, fullPrompt)
+    fullPromptConfidence = CLIPProbabilities[0]
+    fullPromptDetectionRate = 0
+    if CLIPProbabilities.index(max(CLIPProbabilities)) == 0:
+        fullPromptDetectionRate = 1
+    else:
+        fullPromptDetectionRate = 0
+    if debugging:
+        st.write("Full Prompt Confidence:", fullPromptConfidence)
+        st.write("Full Prompt Detection:", fullPromptDetectionRate)
+    return (fullPromptConfidence, fullPromptDetectionRate)
+def evaluate_t2i_model_images(images, prompts, progressBar, debugging, evalType):
+    genKwargs = {"max_length": maxLength, "num_beams": NBeams}
+    distributionBiasDICT = {}
+    hallucinationBiases = []
+    CLIPErrors = []
+    CLIPMissRates = []
+    for image, prompt, ii in zip(images, prompts, range(len(images))):
+        inputSubjects = []
+        synonyms = wordnet.synsets(prompt.split(' ')[-1])
+        synonyms = [word.lemma_names() for word in synonyms]
+        lemmas = set(chain.from_iterable(synonyms))
+        BLIP_out = BLIP_captioning_single(image, genKwargs)
+        for synonym in lemmas:
+            if synonym in BLIP_out.split():
+                BLIP_out = list(set(BLIP_out.split()))  # to avoid repeating strings
+                BLIP_out[BLIP_out.index(synonym)] = prompt.split(' ')[-1]
+                BLIP_out = ' '.join(BLIP_out)
+        BLIP_out = list(set(object_filtering(BLIP_out)))
+        tokens = None
+        if evalType == 'GENERAL':
+            tokens = prompt.split(' ')[4:]
+        else:
+            tokens = prompt.split(' ')
+            tokens = object_filtering(prompt)
+        for token in tokens:
+            if token not in irrelevantWords:
+                inputSubjects.append(token)
+        for S in inputSubjects:
+            synonyms = wordnet.synsets(S)
+            synonyms = [word.lemma_names() for word in synonyms]
+            lemmas = set(chain.from_iterable(synonyms))
+            # Replace the synonyms in the output caption
+            for synonym in lemmas:
+                # if synonym in BLIP_out or tb.TextBlob(synonym).words.pluralize()[0] in BLIP_out:
+                if synonym in BLIP_out:
+                    BLIP_out[BLIP_out.index(synonym)] = S
+        for token in BLIP_out:
+            if token not in prompt.split(' '):
+                if token in distributionBiasDICT:
+                    distributionBiasDICT[token] += 1
+                else:
+                    distributionBiasDICT[token] = 1
+            if token in ['man', 'woman', 'child', 'girl', 'boy']:
+                BLIP_out[BLIP_out.index(token)] = 'person'
+        if debugging:
+            st.write("Input Prompt: ", prompt)
+            st.write("Input Subjects:", inputSubjects)
+            st.write("Output Subjects: ", BLIP_out)
+        percentComplete = ii / len(images)
+        progressBar.progress(percentComplete, text="Evaluating T2I Model Images. Please wait.")
+        (H_P, H_N, H_JI) = calculate_hallucination(inputSubjects, BLIP_out, False)
+        # st.write("$B_H = $", str(1-H_JI))
+        hallucinationBiases.append(1-H_JI)
+        inputSubjects = ' '.join(inputSubjects)
+        (confidence, detection) = calculate_detection_rate(image, prompt, False)
+        error = 1-confidence
+        miss = 1-detection
+        CLIPErrors.append(error)
+        CLIPMissRates.append(miss)
+        # st.write("$\\varepsilon = $", error)
+        # st.write("$M_G = $", miss)
+        # outputMetrics.append([H_P, H_N, H_JI, errorFULL, missFULL, errorSUBJECT, missSUBJECT])
+    # sort distribution bias dictionary
+    sortedDistributionBiasDict = dict(sorted(distributionBiasDICT.items(), key=lambda item: item[1], reverse=True))
+    # update_distribution_bias(image, prompt, caption)
+    normalisedDistribution, B_D = calculate_distribution_bias(list(sortedDistributionBiasDict.values()))
+    return (sortedDistributionBiasDict, normalisedDistribution, B_D, hallucinationBiases, CLIPMissRates, CLIPErrors)
+def output_eval_results(metrics, topX, evalType):
+    sortedDistributionBiasList = list(metrics[0].items())
+    # st.write(list(sortedDistributionBiasDict.values()))
+    # sortedDistributionBiasList.insert(0, ('object', 'occurrences'))
+    col1, col2 = st.columns([0.4,0.6])
+    with col1:
+        st.write("**Top** "+str(topX-1)+" **Detected Objects**")
+        sortedDistributionBiasList.insert(0, ('object', 'occurrences'))
+        st.table(sortedDistributionBiasList[:topX])
+        # st.write("**Generative Error** $\\varepsilon$")
+        # st.line_chart(sorted(metrics[5], reverse=True))
+    with col2:
+        st.write("**Distribution of Generated Objects (RAW)** - $B_D$")
+        st.bar_chart(metrics[0].values(),color='#1D7AE2')
+        st.write("**Distribution of Generated Objects (Normalised)** - $B_D$")
+        st.bar_chart(metrics[1],color='#04FB97')
+        # st.write("**Hallucination Bias** - $B_H$")
+        # st.line_chart(sorted(metrics[3], reverse=True))
+        # st.write("**Generative Miss Rate** $M_G$")
+        # st.line_chart(sorted(metrics[4], reverse=True))
+    if evalType == 'general':
+        st.header("\U0001F30E General Bias Evaluation Results")
+    else:
+        st.header("\U0001F3AF Task-Oriented Bias Evaluation Results")
+    st.table([["Distribution Bias",metrics[2]],["Jaccard Hallucination", np.mean(metrics[3])],
+              ["Generative Miss Rate", np.mean(metrics[4])]])
+    # st.write("Distribution Bias $B_D$ = ", B_D)
+    # st.write("Jaccard Hallucination $H_J$ = ", np.mean(hallucinationBiases))
+    # st.write("Generative Miss Rate $M_G$ = ", np.mean(CLIPMissRates))
+    # st.write("Generative Error $\\varepsilon$ = ", np.mean(CLIPErrors))
+    # progressBar.empty()
+def BLIP_captioning_single(image, gen_kwargs):
+    caption = None
+    inputs = BLIP_processor(image, return_tensors="pt").to(device)
+    out = BLIP_model.generate(**inputs, **gen_kwargs)
+    caption = BLIP_processor.decode(out[0], skip_special_tokens=True)
+    return caption

model_comparison.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import pandas as pd
+import streamlit as st
+import numpy as np
+import plotly.express as px
+from yaml import safe_load
+import user_evaluation_variables
+databaseDF = None
+def get_evaluation_id(evalType, debugging):
+    if evalType == 'general':
+        DFPath = './data/general_eval_database.yaml'
+    else:
+        DFPath = './data/task_oriented_eval_database.yaml'
+    df = add_user_evalID_columns_to_df(None, DFPath,
+                                               False)
+    evalColumn = [int(x.split('_')[1]) for x in list(df['Eval. ID'])]
+    newEvalID = max(evalColumn) + 1
+    if evalType == 'general':
+        newEvalID = 'G_'+str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1]))
+    else:
+        newEvalID = 'T_' + str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1]))
+    if debugging:
+        st.write(df['Eval. ID'])
+        st.write(evalColumn)
+        st.write("current last EVAL ID:", df['Eval. ID'].iloc[-1])
+        st.write("NEW EVAL ID:", newEvalID)
+    return newEvalID
+def dataframe_with_selections(df):
+    df_with_selections = df.copy()
+    df_with_selections.insert(0, "Select", True)
+    # Get dataframe row-selections from user with st.data_editor
+    edited_df = st.data_editor(
+        df_with_selections,
+        hide_index=True,
+        column_config={"Select": st.column_config.CheckboxColumn(required=True)},
+        disabled=df.columns,
+    )
+    # Filter the dataframe using the temporary column, then drop the column
+    selected_rows = edited_df[edited_df.Select]
+    return selected_rows.drop('Select', axis=1)
+def add_user_evalID_columns_to_df(df, evalDataPath, personalFLAG):
+    with open(evalDataPath, 'r') as f:
+        yamlData = safe_load(f)
+        for user in yamlData['evaluations']['username']:
+            if df is None:
+                df = pd.DataFrame(yamlData['evaluations']['username'][user]).T
+                df.insert(0, "Eval. ID", list(yamlData['evaluations']['username'][user].keys()), True)
+                df.insert(0, "User", [user for i in range(len(yamlData['evaluations']['username'][user]))],
+                                  True)
+            else:
+                df = pd.concat([df, pd.DataFrame(yamlData['evaluations']['username'][user]).T],
+                                       ignore_index=True)
+            evalIDIterator = 0
+            for index, row in df.iterrows():
+                if row['User'] is np.nan:
+                    df.loc[index, 'User'] = user
+                if row['Eval. ID'] is np.nan:
+                    df.loc[index, 'Eval. ID'] = list(yamlData['evaluations']['username'][user].keys())[
+                        evalIDIterator]
+                    evalIDIterator += 1
+        if personalFLAG:
+            df.drop(df[df['User'] != user_evaluation_variables.USERNAME].index, inplace=True)
+            if len(df) == 0:
+                st.warning("It looks like you haven't conducted any evaluations! Run some evaluations and refresh this page."
+                           "If the problem persists, please contact support. ", icon="⚠️")
+    return df
+def initialise_page(tab):
+    global databaseDF
+    with tab:
+        c1, c2 = st.columns(2)
+        with c1:
+            st.subheader("\U0001F30E General Bias")
+            with st.form("gen_bias_database_loading_form", clear_on_submit=False):
+                personalGEN = st.form_submit_button("Personal Evaluations")
+                communityGEN = st.form_submit_button("TBYB Community Evaluations")
+                if personalGEN:
+                    databaseDF = None
+                    databaseDF = add_user_evalID_columns_to_df(databaseDF, './data/general_eval_database.yaml',True)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
+                             "Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate",
+                             "Run Time", "Date", "Time"]]
+                if communityGEN:
+                    databaseDF = None
+                    databaseDF = add_user_evalID_columns_to_df(databaseDF, './data/general_eval_database.yaml', False)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
+                             "Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate",
+                             "Run Time", "Date", "Time"]]
+        with c2:
+            st.subheader("\U0001F3AF Task-Oriented Bias")
+            with st.form("task_oriented_database_loading_form", clear_on_submit=False):
+                personalTASK = st.form_submit_button("Personal Evaluations")
+                communityTASK = st.form_submit_button("TBYB Community Evaluations")
+                if personalTASK:
+                    databaseDF = None
+                    databaseDF = add_user_evalID_columns_to_df(databaseDF, './data/task_oriented_eval_database.yaml', True)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
+                                             "Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]]
+                if communityTASK:
+                    databaseDF = None
+                    databaseDF = add_user_evalID_columns_to_df(databaseDF, './data/task_oriented_eval_database.yaml',False)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
+                                             "Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]]
+        if databaseDF is not None:
+            selection = dataframe_with_selections(databaseDF)
+            normalised = st.toggle('Normalize Data (better for direct comparisons)')
+            submitCOMPARE = st.button("Compare Selected Models")
+            if submitCOMPARE:
+                plot_comparison_graphs(tab, selection, normalised)
+def normalise_data(rawValues, metric):
+    rawValues = list(map(float, rawValues))
+    normalisedValues = []
+    # Normalise the raw data
+    for x in rawValues:
+        if (max(rawValues) - min(rawValues)) == 0:
+            normX = 1
+        else:
+            if metric in ['HJ','MG']:
+                normX = (x - min(rawValues)) / (max(rawValues) - min(rawValues))
+            else:
+                normX = 1 - ((x - min(rawValues)) / (max(rawValues) - min(rawValues)))
+        normalisedValues.append(normX)
+    return normalisedValues
+def plot_comparison_graphs(tab, data,normalise):
+    BDColor = ['#59DC23', ] * len(data['Dist. Bias'].tolist())
+    HJColor = ['#2359DC', ] * len(data['Hallucination'].tolist())
+    MGColor = ['#DC2359', ] * len(data['Gen. Miss Rate'].tolist())
+    if not normalise:
+        BDData = data['Dist. Bias']
+        HJData = data['Hallucination']
+        MGData = data['Gen. Miss Rate']
+    else:
+        data['Dist. Bias'] = normalise_data(data['Dist. Bias'], 'BD')
+        data['Hallucination'] = normalise_data(data['Hallucination'], 'HJ')
+        data['Gen. Miss Rate'] = normalise_data(data['Gen. Miss Rate'], 'MG')
+    with tab:
+        st.write("Selected evaluations for comparison:")
+        st.write(data)
+        BDFig = px.bar(x=data['Eval. ID'], y=data['Dist. Bias'],color_discrete_sequence=BDColor).update_layout(
+                       xaxis_title=r'Evaluation ID', yaxis_title=r'Distribution Bias', title=r'Distribution Bias Comparison')
+        st.plotly_chart(BDFig, theme="streamlit",use_container_width=True)
+        HJFig = px.bar(x=data['Eval. ID'], y=data['Hallucination'],color_discrete_sequence=HJColor).update_layout(
+                       xaxis_title=r'Evaluation ID', yaxis_title=r'Jaccard Hallucination', title=r'Jaccard Hallucination Comparison')
+        st.plotly_chart(HJFig, theme="streamlit",use_container_width=True)
+        MGFig = px.bar(x=data['Eval. ID'], y=data['Gen. Miss Rate'],color_discrete_sequence=MGColor).update_layout(
+                       xaxis_title=r'Evaluation ID', yaxis_title=r'Generative Miss Rate', title=r'Generative Miss Rate Comparison')
+        st.plotly_chart(MGFig, theme="streamlit",use_container_width=True)
+        if normalise:
+            Full3DFig = px.scatter_3d(data, x='Dist. Bias', y='Hallucination', z='Gen. Miss Rate',
+                                      width=800, height=800,color='Eval. ID',title='3D Text-to-Image Model Bias Comparison')
+            st.plotly_chart(Full3DFig, theme="streamlit",use_container_width=True)

model_inferencing.py ADDED Viewed

	@@ -0,0 +1,54 @@

+TargetModel = None
+def generate_test_image(T2IModel, testPrompt):
+    #prompt = "The quick brown fox jumps over the lazy dog"
+    testImage = TargetModel(testPrompt, num_inference_steps=50).images[0]
+    #testImage.save("./image.png")
+    return testImage
+def construct_general_bias_evaluation_prompts(subjects, actions):
+    prompts = []
+    for (subject, ii) in zip(subjects, range(len(subjects))):
+        if len(actions) > 0:
+            for action in actions[ii]:
+                if action not in ['who is a good', 'who is a bad', 'that looks like']:
+                    prompts.append('a picture of a person ' + action + ' a ' + subject)
+                else:
+                    if action == 'that looks like':
+                        prompts.append('a picture of a person ' + action + ' a ' + subject)
+                    else:
+                        prompts.append('a picture of a person ' + action + ' ' + subject)
+        else:
+            prompts.append('a picture of a ' + subject)
+    return prompts
+def generate_test_images(progressBar, barText, prompts, NSamples, NSteps, imageSize):
+    guidance = 7.5
+    testImages = []
+    imageCaptions = [[], []]
+    for prompt, ii in zip(prompts, range(len(prompts))):
+        testImages+=TargetModel(prompt, num_images_per_prompt=NSamples, num_inference_steps=NSteps,
+                             guidance_scale=guidance, width=imageSize, height=imageSize).images
+        for nn in range(NSamples):
+            imageCaptions[0].append(prompt)                                         # actual prompt used
+            imageCaptions[1].append("Prompt: "+str(ii+1)+"    Sample: "+ str(nn+1)) # caption for the image output
+        percentComplete = ii / len(prompts)
+        progressBar.progress(percentComplete, text=barText)
+    progressBar.empty()
+    return (testImages, imageCaptions)
+def generate_task_oriented_images(progressBar, barText, prompts, ids, NSamples, NSteps, imageSize):
+    guidance = 7.5
+    testImages = []
+    imageCaptions = [[], []]
+    for prompt, jj in zip(prompts, range(len(prompts))):
+        testImages+=TargetModel(prompt, num_images_per_prompt=NSamples, num_inference_steps=NSteps,
+                             guidance_scale=guidance, width=imageSize, height=imageSize).images
+        for nn in range(NSamples):
+            imageCaptions[0].append(prompt)                                         # actual prompt used
+            imageCaptions[1].append("COCO ID: "+ids[jj]+"    Sample: "+ str(nn+1)) # caption for the image output
+        percentComplete = jj / len(prompts)
+        progressBar.progress(percentComplete, text=barText)
+    progressBar.empty()
+    return (testImages, imageCaptions)

model_loading.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import torch
+import requests
+import urllib.request
+import streamlit as st
+if torch.cuda.is_available():
+    device = 'cuda'
+else:
+    device = 'cpu'
+validT2IModelTypes = ["KandinskyPipeline", "StableDiffusionPipeline", "DiffusionPipeline", "StableDiffusionXLPipeline"]
+def check_if_model_exists(repoName):
+    modelLoaded = None
+    huggingFaceURL = "https://huggingface.co/" + repoName + "/raw/main/model_index.json"
+    response = requests.get(huggingFaceURL).status_code
+    if response != 200:
+        return None
+    else:
+        # modelLoaded = huggingFaceURL
+        return huggingFaceURL
+    # try:
+    #     huggingFaceURL = "https://huggingface.co/" + repoName + "/raw/main/model_index.json"
+    #     response = requests.get(huggingFaceURL).status_code
+    #     modelLoaded = huggingFaceURL
+    # except requests.ConnectionError as exception:
+    #     modelLoaded = None
+    # return modelLoaded
+def get_model_info(modelURL):
+    modelType = None
+    try:
+        with urllib.request.urlopen(modelURL) as f:
+            modelType = str(f.read()).split(',\\n')[0].split(':')[1].replace('"', '').strip()
+    except urllib.error.URLError as e:
+        st.write(e.reason)
+    return modelType
+# Definitely need to work on these functions to consider adaptors
+# currently only works if there is a model index json file
+def import_model(modelID, modelType):
+    T2IModel = None
+    if modelType in validT2IModelTypes:
+        if modelType == 'StableDiffusionXLPipeline':
+            from diffusers import StableDiffusionXLPipeline
+            T2IModel = StableDiffusionXLPipeline.from_pretrained(modelID, torch_dtype=torch.float16)
+        else:
+            from diffusers import AutoPipelineForText2Image
+            T2IModel = AutoPipelineForText2Image.from_pretrained(modelID, torch_dtype=torch.float16)
+        T2IModel.to(device)
+    return T2IModel

streamlit-app.py ADDED Viewed

	@@ -0,0 +1,343 @@

+import streamlit as st
+st.set_page_config(layout="wide")
+import streamlit_authenticator as stauth
+import pandas as pd
+import numpy as np
+import model_comparison as MCOMP
+import model_loading as MLOAD
+import model_inferencing as MINFER
+import user_evaluation_variables
+import tab_manager
+import yaml
+from yaml.loader import SafeLoader
+from PIL import Image
+AUTHENTICATOR = None
+TBYB_LOGO = Image.open('./assets/TBYB_logo_light.png')
+USER_LOGGED_IN = False
+USER_DATABASE_PATH = './data/user_database.yaml'
+def create_new_user(authenticator, users):
+    try:
+        if authenticator.register_user('Register user', preauthorization=False):
+            st.success('User registered successfully')
+    except Exception as e:
+        st.error(e)
+    with open(USER_DATABASE_PATH, 'w') as file:
+        yaml.dump(users, file, default_flow_style=False)
+def forgot_password(authenticator, users):
+    try:
+        username_of_forgotten_password, email_of_forgotten_password, new_random_password = authenticator.forgot_password(
+            'Forgot password')
+        if username_of_forgotten_password:
+            st.success('New password to be sent securely')
+            # Random password should be transferred to user securely
+    except Exception as e:
+        st.error(e)
+    with open(USER_DATABASE_PATH, 'w') as file:
+        yaml.dump(users, file, default_flow_style=False)
+def update_account_details(authenticator, users):
+    if st.session_state["authentication_status"]:
+        try:
+            if authenticator.update_user_details(st.session_state["username"], 'Update user details'):
+                st.success('Entries updated successfully')
+        except Exception as e:
+            st.error(e)
+    with open(USER_DATABASE_PATH, 'w') as file:
+        yaml.dump(users, file, default_flow_style=False)
+def reset_password(authenticator, users):
+    if st.session_state["authentication_status"]:
+        try:
+            if authenticator.reset_password(st.session_state["username"], 'Reset password'):
+                st.success('Password modified successfully')
+        except Exception as e:
+            st.error(e)
+    with open(USER_DATABASE_PATH, 'w') as file:
+        yaml.dump(users, file, default_flow_style=False)
+def user_login_create():
+    global AUTHENTICATOR
+    global TBYB_LOGO
+    global USER_LOGGED_IN
+    users = None
+    with open(USER_DATABASE_PATH) as file:
+        users = yaml.load(file, Loader=SafeLoader)
+        AUTHENTICATOR = stauth.Authenticate(
+            users['credentials'],
+            users['cookie']['name'],
+            users['cookie']['key'],
+            users['cookie']['expiry_days'],
+            users['preauthorized']
+        )
+    with st.sidebar:
+        st.image(TBYB_LOGO, width=70)
+        loginTab, registerTab, detailsTab = st.tabs(["Log in", "Register", "Account details"])
+        with loginTab:
+            name, authentication_status, username = AUTHENTICATOR.login('Login', 'main')
+            if authentication_status:
+                AUTHENTICATOR.logout('Logout', 'main')
+                st.write(f'Welcome *{name}*')
+                user_evaluation_variables.USERNAME = username
+                USER_LOGGED_IN = True
+            elif authentication_status == False:
+                st.error('Username/password is incorrect')
+                forgot_password(AUTHENTICATOR, users)
+            elif authentication_status == None:
+                st.warning('Please enter your username and password')
+                forgot_password(AUTHENTICATOR, users)
+        if not authentication_status:
+            with registerTab:
+                create_new_user(AUTHENTICATOR, users)
+        else:
+            with detailsTab:
+                st.write('**Username:** ', username)
+                st.write('**Name:** ', name)
+                st.write('**Email:** ', users['credentials']['usernames'][username]['email'])
+                # update_account_details(AUTHENTICATOR, users)
+                reset_password(AUTHENTICATOR, users)
+    return USER_LOGGED_IN
+def setup_page_banner():
+    global USER_LOGGED_IN
+    # for tab in [tab1, tab2, tab3, tab4, tab5]:
+    c1,c2,c3,c4,c5,c6,c7,c8,c9 = st.columns(9)
+    with c5:
+        st.image(TBYB_LOGO, use_column_width=True)
+    for col in [c1,c2,c3,c4,c5,c6,c7,c8,c9]:
+        col = None
+    st.title('Try Before You Bias (TBYB)')
+    st.write('*A Quantitative T2I Bias Evaluation Tool*')
+def setup_how_to():
+    expander = st.expander("How to Use")
+    expander.write("1. Login to your TBYB Account using the bar on the right\n"
+                   "2. Navigate to the '\U0001F527 Setup' tab and input the ID of the HuggingFace \U0001F917 T2I model you want to evaluate\n")
+    expander.image(Image.open('./assets/HF_MODEL_ID_EXAMPLE.png'))
+    expander.write("3. Test your chosen model by generating an image using an input prompt e.g.: 'A corgi with some cool sunglasses'\n")
+    expander.image(Image.open('./assets/lykon_corgi.png'))
+    expander.write("4. Navigate to the '\U0001F30E General Eval.' or '\U0001F3AF Task-Oriented Eval.' tabs "
+                   "   to evaluate your model once it has been loaded\n"
+                   "5. Once you have generated some evaluation images, head over to the '\U0001F4C1 Generated Images' tab to have a look at them\n"
+                   "6. To check out your evaluations or all of the TBYB Community evaluations, head over to the '\U0001F4CA Model Comparison' tab\n"
+                   "7. For more information about the evaluation process, see our paper at --PAPER HYPERLINK-- or navigate to the "
+                   "   '\U0001F4F0 Additional Information' tab for a TL;DR.\n"
+                   "8. For any questions or to report any bugs/issues. Please contact [email protected].\n")
+def setup_additional_information_tab(tab):
+    with tab:
+        st.header("1. Quantifying Bias in Text-to-Image (T2I) Generative Models")
+        st.markdown(
+            """
+            *Based on the article of the same name available here --PAPER HYPERLINK--
+            Authors: Jordan Vice, Naveed Akhtar, Richard Hartley and Ajmal Mian
+            This web-app was developed by **Jordan Vice** to accompany the article, serving as a practical
+            implementation of how T2I model biases can be quantitatively assessed and compared. Evaluation results from
+            all *base* models discussed in the paper have been incorporated into the TBYB community results and we hope
+            that others share their evaluations as we look to further the discussion on transparency and reliability
+            of T2I models.
+            """)
+        st.header('2. A (very) Brief Summary')
+        st.image(Image.open('./assets/TBYB_flowchart.png'))
+        st.markdown(
+                    """
+                    Bias in text-to-image models can propagate unfair social representations and could be exploited to
+                    aggressively market ideas or push controversial or sinister agendas. Existing T2I model bias evaluation
+                    methods focused on social biases. So, we proposed a bias evaluation methodology that considered
+                    general and task-oriented biases, spawning the Try Before You Bias (**TBYB**) application as a result.
+                    """
+                )
+        st.markdown(
+        """
+            We proposed three novel metrics to quantify T2I model biases:
+            1. Distribution Bias - $B_D$
+            2. Jaccard Hallucination - $H_J$
+            3. Generative Miss Rate - $M_G$
+            Open the appropriate drop-down menu to understand the logic and inspiration behind metric.
+            """
+        )
+        c1,c2,c3 = st.columns(3)
+        with c1:
+            with st.expander("Distribution Bias - $B_D$"):
+                st.markdown(
+                    """
+                    Using the Area under the Curve (AuC) as an evaluation metric in machine learning is not novel. However,
+                    in the context of T2I models, using AuC allows us to define the distribution of objects that have been
+                    detected in generated output image scenes.
+                    So, everytime an object is detected in a scene, we update a dictionary (which is available for
+                    download after running an evaluation). After evaluating a full set of images, you can use this
+                    information to determine what objects appear more frequently than others.
+                    After all images are evaluated, we sort the objects in descending order and normalize the data. We
+                    then use the normalized values to calculate $B_D$, using the trapezoidal AuC rule i.e.:
+                    $B_D = \\Sigma_{i=1}^M\\frac{n_i+n_{i=1}}{2}$
+                    So, if a user conducts a task-oriented study on biases related to **dogs** using a model
+                    that was heavily biased using pictures of animals in the wild. You might find that after running
+                    evaluations, the most common objects detected were trees and grass - even if these objects weren't
+                    specified in the prompt. This would result in a very low $B_D$ in comparison to a model that for
+                    example was trained on images of dogs and animals in various different scenarios $\\rightarrow$
+                    which would result in a *higher* $B_D$ in comparison.
+                    """
+                )
+        with c2:
+            with st.expander("Jaccard Hallucination - $H_J$"):
+                st.markdown(
+                    """
+                    Hallucination is a very common phenomena that is discussed in relation to generative AI, particularly
+                    in relation to some of the most popular large language models. Depending on where you look, hallucinations
+                    can be defined as being positive, negative, or just something to observe $\\rightarrow$ a sentiment
+                    that we echo in our bias evaluations.
+                    Now, how does hallucination tie into bias? In our work, we use hallucination to define how often a
+                    T2I model will *add* objects that weren't specified OR, how often it will *omit* objects that were
+                    specified. This indicates that there could be an innate shift in bias in the model, causing it to
+                    add or omit certain objects.
+                    Initially, we considered using two variables $H^+$ and $H^-$ to define these two dimensions of
+                    hallucination. Then, we considered the Jaccard similarity coefficient, which
+                    measures the similarity *and* diversity of two sets of objects/samples - defining this as
+                    Jaccard Hallucination - $H_J$.
+                    Simply put, we define the set of objects detected in the input prompt and then detect the objects in
+                    the corresponding output image. Then, we determine the intersect over union. For a model, we
+                    calculate the average $H_J$ across generated images using:
+                    $H_J = \\frac{\Sigma_{i=0}^{N-1}1-\\frac{\mathcal{X}_i\cap\mathcal{Y}_i}{\mathcal{X}_i\cup\mathcal{Y}_i}}{N}$
+                    """
+                )
+        with c3:
+            with st.expander("Generative Miss Rate - $M_G$"):
+                st.markdown(
+                    """
+                    Whenever fairness and trust are discussed in the context of machine learning and AI systems,
+                    performance is always highlighted as a key metric - regardless of the downstream task. So, in terms
+                    of evaluating bias, we thought that it would be important to see if there was a correlation
+                    between bias and performance (as we predicted). And while the other metrics do evaluate biases
+                    in terms of misalignment, they do not consider the relationship between bias and performance.
+                    We use an additional CLIP model to assist in calculating Generative Miss Rate - $M_G$. Logically,
+                    as a model becomes more biased, it will begin to diverge away from the intended target and so, the
+                    miss rate of the generative model will increase as a result. This was a major consideration when
+                    designing this metric.
+                    We use the CLIP model as a binary classifier, differentiating between two classes:
+                    - the prompt used to generate the image
+                    - **NOT** the prompt
+                    Through our experiments on intentionally-biased T2I models, we found that there was a clear
+                    relationship between $M_G$ and the extent of bias. So, we can use this metric to quantify and infer
+                    how badly model performances have been affected by their biases.
+                    """
+                )
+        st.header('3. TBYB Constraints')
+        st.markdown(
+            """
+            While we have attempted to design a comprehensive, automated bias evaluation tool. We must acknowledge that
+            in its infancy, TBYB has some constraints:
+            - We have not checked the validity of *every* single T2I model and model type on HuggingFace so we cannot
+            promise that all T2I models will work - if you run into any issues that you think should be possible, feel
+            free to reach out!
+            - Currently, a model_index.json file is required to load models and use them with TBYB, we will look to
+            address other models in future works
+            - TBYB only works on T2I models hosted on HuggingFace, other model repositories are not currently supported
+            - Adaptor models are not currently supported, we will look to add evaluation functionalities of these
+            models in the future.
+            - Download, generation, inference and evaluation times are all hardware dependent.
+            Keep in mind that these constraints may be removed or added to any time.
+            """)
+        st.header('4. Misuse, Malicious Use, and Out-of-Scope Use')
+        st.markdown(
+            """
+            Given this application is used for the assessment of T2I biases and relies on
+            pre-trained models available on HuggingFace, we are not responsible for any content generated
+            by public-facing models that have been used to generate images using this application.
+            TBYB is proposed as an auxiliary tool to assess model biases and thus, if a chosen model is found to output
+            insensitive, disturbing, distressing or offensive images that propagate harmful stereotypes or
+            representations of marginalised groups, please address your concerns to the model providers.
+            However, given the TBYB tool is designed for bias quantification and is driven by transparency, it would be
+            beneficial to the TBYB community to share evaluations of biased T2I models!
+            We share no association with HuggingFace \U0001F917, we only use their services as a model repository,
+            given their growth in popularity in the computer science community recently.
+            For further questions/queries or if you want to simply strike a conversation,
+            please reach out to Jordan Vice at: [email protected]""")
+setup_page_banner()
+setup_how_to()
+if user_login_create():
+    tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["\U0001F527 Setup", "\U0001F30E General Eval.", "\U0001F3AF Task-Oriented Eval.",
+                                           "\U0001F4CA Model Comparison", "\U0001F4C1 Generated Images", "\U0001F4F0 Additional Information"])
+    setup_additional_information_tab(tab6)
+    # PLASTER THE LOGO EVERYWHERE
+    tab2.subheader("General Bias Evaluation")
+    tab2.write("Waiting for \U0001F527 Setup to be complete...")
+    tab3.subheader("Task-Oriented Bias Evaluation")
+    tab3.write("Waiting for \U0001F527 Setup to be complete...")
+    tab4.write("Check out other model evaluation results from users across the **TBYB** Community! \U0001F30E ")
+    tab4.write("You can also just compare your own model evaluations by clicking the '*Personal Evaluation*' buttons")
+    MCOMP.initialise_page(tab4)
+    tab5.subheader("Generated Images from General and Task-Oriented Bias Evaluations")
+    tab5.write("Waiting for \U0001F527 Setup to be complete...")
+    with tab1:
+        with st.form("model_definition_form", clear_on_submit=True):
+            modelID = st.text_input('Input the HuggingFace \U0001F917 T2I model_id for the model you '
+                                    'want to analyse e.g.: "runwayml/stable-diffusion-v1-5"')
+            submitted1 = st.form_submit_button("Submit")
+            if modelID:
+                with st.spinner('Checking if ' + modelID + ' is valid and downloading it (if required)'):
+                    modelLoaded = MLOAD.check_if_model_exists(modelID)
+                    if modelLoaded is not None:
+                        # st.write("Located " + modelID + " model_index.json file")
+                        st.write("Located " + modelID)
+                        modelType = MLOAD.get_model_info(modelLoaded)
+                        if modelType is not None:
+                            st.write("Model is of Type: ", modelType)
+                            if submitted1:
+                                MINFER.TargetModel = MLOAD.import_model(modelID, modelType)
+                                if MINFER.TargetModel is not None:
+                                    st.write("Text-to-image pipeline looks like this:")
+                                    st.write(MINFER.TargetModel)
+                                    user_evaluation_variables.MODEL = modelID
+                                    user_evaluation_variables.MODEL_TYPE = modelType
+                    else:
+                        st.error('The Model: ' + modelID + ' does not appear to exist or the model does not contain a model_index.json file.'
+                                                           ' Please check that that HuggingFace repo ID is valid.'
+                                                           ' For more help, please see the "How to Use" Tab above.', icon="🚨")
+        if modelID:
+            with st.form("example_image_gen_form", clear_on_submit=True):
+                testPrompt = st.text_input('Input a random test prompt to test out your '
+                                           'chosen model and see if its generating images:')
+                submitted2 = st.form_submit_button("Submit")
+                if testPrompt and submitted2:
+                    with st.spinner("Generating an image with the prompt:\n"+testPrompt+"(This may take some time)"):
+                        testImage = MINFER.generate_test_image(MINFER.TargetModel, testPrompt)
+                    st.image(testImage, caption='Model: ' + modelID + ' Prompt: ' + testPrompt)
+                    st.write('''If you are happy with this model, navigate to the other tabs to evaluate bias!
+                                  Otherwise, feel free to load up a different model and run it again''')
+        if MINFER.TargetModel is not None:
+            tab_manager.completed_setup([tab2, tab3, tab4, tab5], modelID)
+else:
+    MCOMP.databaseDF = None
+    user_evaluation_variables.reset_variables('general')
+    user_evaluation_variables.reset_variables('task-oriented')
+    st.write('')
+    st.warning('Log in or register your email to get started! ', icon="⚠️")

tab_manager.py ADDED Viewed

	@@ -0,0 +1,473 @@

+import streamlit as st
+import model_inferencing as MINFER
+import general_bias_measurement as GBM
+import model_comparison as MCOMP
+import user_evaluation_variables
+import pandas as pd
+import numpy as np
+import json
+import csv
+from itertools import cycle
+import random
+import time
+import datetime
+import zipfile
+from io import BytesIO, StringIO
+def completed_setup(tabs, modelID):
+    with tabs[0]:
+        st.write("\U0001F917 ", modelID, " has been loaded!")
+        st.write("Ready for General Bias Evaluation")
+        # general_bias_eval_setup(tabs[0])
+    with tabs[1]:
+        st.write("\U0001F917 ", modelID, " has been loaded!")
+        st.write("Ready for Task-Oriented Bias Evaluation")
+    with tabs[3]:
+        if not all([user_evaluation_variables.OBJECT_IMAGES_IN_UI, user_evaluation_variables.OCCUPATION_IMAGES_IN_UI, user_evaluation_variables.TASK_IMAGES_IN_UI]):
+            st.write("\U0001F917 ", modelID, " has been loaded!")
+            st.write("Waiting for Images to be generated.")
+        # if any([user_evaluation_variables.OBJECT_IMAGES_IN_UI, user_evaluation_variables.OCCUPATION_IMAGES_IN_UI,
+        #             user_evaluation_variables.TASK_IMAGES_IN_UI]):
+        update_images_tab(tabs[3])
+    with tabs[0]:
+        general_bias_eval_setup(tabs[0], modelID, tabs[3])
+    with tabs[1]:
+        task_oriented_bias_eval_setup(tabs[1],modelID, tabs[3])
+def general_bias_eval_setup(tab, modelID, imagesTab):
+    generalBiasSetupDF_EVAL = pd.DataFrame(
+        {
+            "GEN Eval. Variable": ["No. Images to Generate per prompt", "No. Inference Steps", "Image Size (N x N)"],
+            "GEN Values": ["10", "100", "512"],
+        }
+    )
+    generalBiasSetupDF_TYPE = pd.DataFrame(
+        {
+            "Image Types": ["Objects", "Person in Frame", "Occupations / Label"],
+            "Check": [True, True, True],
+        }
+    )
+    tableColumn1, tableColumn2 = st.columns(2)
+    with tab:
+        with tableColumn1:
+            GENValTable = st.data_editor(
+                generalBiasSetupDF_EVAL,
+                column_config={
+                    "GEN Eval. Variable": st.column_config.Column(
+                        "Variable",
+                        help="General Bias Evaluation variable to control extent of evaluations",
+                        width=None,
+                        required=None,
+                        disabled=True,
+                    ),
+                    "GEN Values": st.column_config.Column(
+                        "Values",
+                        help="Input values in this column",
+                        width=None,
+                        required=True,
+                        disabled=False,
+                    ),
+                },
+                hide_index=True,
+                num_rows="fixed",
+            )
+        with tableColumn2:
+            GENCheckTable = st.data_editor(
+                generalBiasSetupDF_TYPE,
+                column_config={
+                    "Check": st.column_config.CheckboxColumn(
+                        "Select",
+                        help="Select the types of images you want to generate",
+                        default=False,
+                    )
+                },
+                disabled=["Image Types"],
+                hide_index=True,
+                num_rows="fixed",
+            )
+        if st.button('Evaluate!', key="EVAL_BUTTON_GEN"):
+            initiate_general_bias_evaluation(tab, modelID, [GENValTable, GENCheckTable], imagesTab)
+            st.rerun()
+        if user_evaluation_variables.RUN_TIME and user_evaluation_variables.CURRENT_EVAL_TYPE == 'general':
+            GBM.output_eval_results(user_evaluation_variables.EVAL_METRICS, 21, 'general')
+            st.write("\U0001F553 Time Taken: ", user_evaluation_variables.RUN_TIME)
+            saveEvalsButton = st.button("Save + Upload Evaluations", key='SAVE_EVAL_GEN')
+            saveDistButton = st.button("Download Object Distribution", key='SAVE_TOP_GEN')
+            if saveEvalsButton:
+                st.write("Saving and uploading evaluations")
+                user_evaluation_variables.update_evaluation_table('general',False)
+                user_evaluation_variables.reset_variables('general')
+            if saveDistButton:
+                download_word_distribution_csv(user_evaluation_variables.EVAL_METRICS,
+                                               user_evaluation_variables.EVAL_ID, 'general')
+def task_oriented_bias_eval_setup(tab,modelID,imagesTab):
+    biasSetupDF_EVAL = pd.DataFrame(
+        {
+            "TO Eval. Variable": ["No. Images to Generate per prompt", "No. Inference Steps", "Image Size (N x N)"],
+            "TO Values": ["10", "100", "512"],
+        }
+    )
+    with tab:
+        TOValTable = st.data_editor(
+            biasSetupDF_EVAL,
+            column_config={
+                "TO Eval. Variable": st.column_config.Column(
+                    "Variable",
+                    help="General Bias Evaluation variable to control extent of evaluations",
+                    width=None,
+                    required=None,
+                    disabled=True,
+                ),
+                "TO Values": st.column_config.Column(
+                    "Values",
+                    help="Input values in this column",
+                    width=None,
+                    required=True,
+                    disabled=False,
+                ),
+            },
+            hide_index=True,
+            num_rows="fixed",
+        )
+        target = st.text_input('What is the single-token target of your task-oriented evaluation study '
+                               'e.g.: "burger", "coffee", "men",  "women"')
+        if st.button('Evaluate!', key="EVAL_BUTTON_TO"):
+            if len(target) > 0:
+                initiate_task_oriented_bias_evaluation(tab, modelID, TOValTable, target, imagesTab)
+                st.rerun()
+            else:
+                st.error('Please input a target for your task-oriented analysis', icon="🚨")
+            # update_images_tab(imagesTab)
+        if user_evaluation_variables.RUN_TIME and user_evaluation_variables.CURRENT_EVAL_TYPE == 'task-oriented':
+            GBM.output_eval_results(user_evaluation_variables.EVAL_METRICS, 21, 'task-oriented')
+            st.write("\U0001F553 Time Taken: ", user_evaluation_variables.RUN_TIME)
+            saveEvalsButton = st.button("Save + Upload Evaluations", key='SAVE_EVAL_TASK')
+            saveDistButton = st.button("Download Object Distribution", key='SAVE_TOP_TASK')
+            if saveEvalsButton:
+                st.write("Saving and uploading evaluations")
+                user_evaluation_variables.update_evaluation_table('task-oriented',False)
+                user_evaluation_variables.reset_variables('task-oriented')
+            if saveDistButton:
+                download_word_distribution_csv(user_evaluation_variables.EVAL_METRICS,
+                                               user_evaluation_variables.EVAL_ID, user_evaluation_variables.TASK_TARGET)
+            # update_images_tab(imagesTab)
+def download_word_distribution_csv(data, evalID, evalType):
+    filePath = './'+evalID+'_'+evalType+'_word_distribution.csv'
+    listOfObjects = list(data[0].items())
+    with open(filePath, 'w', newline='') as fp:
+        csvwriter = csv.writer(fp)
+        csvwriter.writerows([["Evaluation ID", evalID],
+                             ["Distribution Bias", data[2]],
+                             ["Jaccard hallucination", np.mean(data[3])],
+                             ["Generative Miss Rate", np.mean(data[4])]])
+        csvwriter.writerow(['Position', 'Object', 'No. Occurences', 'Normalized'])
+        for obj, val, norm, ii in zip(listOfObjects, data[0].values(), data[1], range(len(listOfObjects))):
+            csvwriter.writerow([ii, obj[0], val, norm])
+    st.success('Successfully downloaded word distribution data!', icon="✅")
+def initiate_general_bias_evaluation(tab, modelID, specs, imagesTab):
+    startTime = time.time()
+    objectData = None
+    occupationData = None
+    objects = []
+    actions = []
+    occupations = []
+    occupationDescriptors = []
+    objectPrompts = None
+    occupationPrompts = None
+    objectImages = []
+    objectCaptions = []
+    occupationImages = []
+    occupationCaptions = []
+    evaluationImages = []
+    evaluationCaptions = []
+    with tab:
+        st.write("Initiating General Bias Evaluation Experiments with the following setup:")
+        st.write(" ***Model*** = ", modelID)
+        infoColumn1, infoColumn2 = st.columns(2)
+        with infoColumn1:
+            st.write(" ***No. Images per prompt*** = ", specs[0]["GEN Values"][0])
+            st.write(" ***No. Steps*** = ", specs[0]["GEN Values"][1])
+            st.write(" ***Image Size*** = ", specs[0]["GEN Values"][2], "$\\times$", specs[0]["GEN Values"][2])
+        with infoColumn2:
+            st.write(" ***Objects*** = ", specs[1]["Check"][0])
+            st.write(" ***Objects and Actions*** = ", specs[1]["Check"][1])
+            st.write(" ***Occupations*** = ", specs[1]["Check"][2])
+        st.markdown("___")
+        if specs[1]["Check"][0]:
+            objectData = read_csv_to_list("./data/list_of_objects.csv")
+        if specs[1]["Check"][2]:
+            occupationData = read_csv_to_list("./data/list_of_occupations.csv")
+        if objectData == None and occupationData == None:
+            st.error('Make sure that at least one of the "Objects" or "Occupations" rows are checked', icon="🚨")
+        else:
+            if specs[1]["Check"][0]:
+                for row in objectData[1:]:
+                    objects.append(row[0])
+            if specs[1]["Check"][1]:
+                for row in objectData[1:]:
+                    actions.append(row[1:])
+            if specs[1]["Check"][2]:
+                for row in occupationData[1:]:
+                    occupations.append(row[0])
+                    occupationDescriptors.append(row[1:])
+        with infoColumn1:
+            st.write("***No. Objects*** = ", len(objects))
+            st.write("***No. Actions*** = ", len(actions)*3)
+        with infoColumn2:
+            st.write("***No. Occupations*** = ", len(occupations))
+            st.write("***No. Occupation Descriptors*** = ", len(occupationDescriptors)*3)
+        if len(objects) > 0:
+            objectPrompts = MINFER.construct_general_bias_evaluation_prompts(objects, actions)
+        if len(occupations) > 0:
+            occupationPrompts = MINFER.construct_general_bias_evaluation_prompts(occupations, occupationDescriptors)
+        if objectPrompts is not None:
+            OBJECTprogressBar = st.progress(0, text="Generating Object-related images. Please wait.")
+            objectImages, objectCaptions = MINFER.generate_test_images(OBJECTprogressBar, "Generating Object-related images. Please wait.",
+                                                                       objectPrompts, int(specs[0]["GEN Values"][0]),
+                                                                       int(specs[0]["GEN Values"][1]), int(specs[0]["GEN Values"][2]))
+            evaluationImages+=objectImages
+            evaluationCaptions+=objectCaptions[0]
+            TXTObjectPrompts = ""
+        if occupationPrompts is not None:
+            OCCprogressBar = st.progress(0, text="Generating Occupation-related images. Please wait.")
+            occupationImages, occupationCaptions = MINFER.generate_test_images(OCCprogressBar, "Generating Occupation-related images. Please wait.",
+                                                                               occupationPrompts, int(specs[0]["GEN Values"][0]),
+                                                                               int(specs[0]["GEN Values"][1]), int(specs[0]["GEN Values"][2]))
+            evaluationImages += occupationImages
+            evaluationCaptions += occupationCaptions[0]
+        if len(evaluationImages) > 0:
+            EVALprogressBar = st.progress(0, text="Evaluating "+modelID+" Model Images. Please wait.")
+            user_evaluation_variables.EVAL_METRICS = GBM.evaluate_t2i_model_images(evaluationImages, evaluationCaptions, EVALprogressBar, False, "GENERAL")
+            # GBM.output_eval_results(user_evaluation_variables.EVAL_METRICS, 21)
+            elapsedTime = time.time() - startTime
+            # st.write("\U0001F553 Time Taken: ", str(datetime.timedelta(seconds=elapsedTime)).split(".")[0])
+            user_evaluation_variables.NO_SAMPLES = len(evaluationImages)
+            user_evaluation_variables.RESOLUTION = specs[0]["GEN Values"][2] + "x" + specs[0]["GEN Values"][2]
+            user_evaluation_variables.INFERENCE_STEPS = int(specs[0]["GEN Values"][1])
+            user_evaluation_variables.GEN_OBJECTS = bool(specs[1]["Check"][0])
+            user_evaluation_variables.GEN_ACTIONS = bool(specs[1]["Check"][1])
+            user_evaluation_variables.GEN_OCCUPATIONS = bool(specs[1]["Check"][2])
+            user_evaluation_variables.DIST_BIAS = float(f"{user_evaluation_variables.EVAL_METRICS[2]:.4f}")
+            user_evaluation_variables.HALLUCINATION = float(f"{np.mean(user_evaluation_variables.EVAL_METRICS[3]):.4f}")
+            user_evaluation_variables.MISS_RATE = float(f"{np.mean(user_evaluation_variables.EVAL_METRICS[4]):.4f}")
+            user_evaluation_variables.EVAL_ID = MCOMP.get_evaluation_id('general', True)
+            user_evaluation_variables.DATE = datetime.datetime.utcnow().strftime('%d-%m-%Y')
+            user_evaluation_variables.TIME = datetime.datetime.utcnow().strftime('%H:%M:%S')
+            user_evaluation_variables.RUN_TIME = str(datetime.timedelta(seconds=elapsedTime)).split(".")[0]
+            user_evaluation_variables.OBJECT_IMAGES =objectImages
+            user_evaluation_variables.OBJECT_CAPTIONS = objectCaptions
+            user_evaluation_variables.OCCUPATION_IMAGES = occupationImages
+            user_evaluation_variables.OCCUPATION_CAPTIONS = occupationCaptions
+            user_evaluation_variables.CURRENT_EVAL_TYPE = 'general'
+def initiate_task_oriented_bias_evaluation(tab, modelID, specs, target, imagesTab):
+    startTime = time.time()
+    TASKImages = []
+    TASKCaptions = []
+    with tab:
+        st.write("Initiating Task-Oriented Bias Evaluation Experiments with the following setup:")
+        st.write(" ***Model*** = ", modelID)
+        infoColumn1, infoColumn2 = st.columns(2)
+        st.write(" ***No. Images per prompt*** = ", specs["TO Values"][0])
+        st.write(" ***No. Steps*** = ", specs["TO Values"][1])
+        st.write(" ***Image Size*** = ", specs["TO Values"][2], "$\\times$", specs["TO Values"][2])
+        st.write(" ***Target*** = ", target.lower())
+        st.markdown("___")
+        captionsToExtract = 50
+        if (captionsToExtract * int(specs['TO Values'][0])) < 30:
+            st.error('There should be at least 30 images generated, You are attempting to generate:\t'
+                     + str(captionsToExtract * int(specs['TO Values'][0]))+'.\nPlease readjust your No. Images per prompt',
+                     icon="🚨")
+        else:
+            COCOLoadingBar = st.progress(0, text="Scanning through COCO Dataset for relevant prompts. Please wait")
+            prompts, cocoIDs = get_COCO_captions('./data/COCO_captions.json', target.lower(), COCOLoadingBar, captionsToExtract)
+            if len(prompts) == 0:
+                st.error('Woops! Could not find **ANY** relevant COCO prompts for the target: '+target.lower()+
+                         '\nPlease input a different target', icon="🚨")
+            elif len(prompts) > 0 and len(prompts) < captionsToExtract:
+                st.warning('WARNING: Only found '+str(len(prompts))+ ' relevant COCO prompts for the target: '+target.lower()+
+                           '\nWill work with these. Nothing to worry about!', icon="⚠️")
+            else:
+                st.success('Successfully found '+str(captionsToExtract)+' relevant COCO prompts', icon="✅")
+            if len(prompts) > 0:
+                COCOUIOutput = []
+                for id, pr in zip(cocoIDs, prompts):
+                    COCOUIOutput.append([id, pr])
+                st.write('**Here are some of the randomised '+'"'+target.lower()+'"'+' captions extracted from the COCO dataset**')
+                COCOUIOutput.insert(0, ('ID', 'Caption'))
+                st.table(COCOUIOutput[:11])
+                TASKprogressBar = st.progress(0, text="Generating Task-oriented images. Please wait.")
+                TASKImages, TASKCaptions = MINFER.generate_task_oriented_images(TASKprogressBar,"Generating Task-oriented images. Please wait.",
+                                                                       prompts, cocoIDs, int(specs["TO Values"][0]),
+                                                                       int(specs["TO Values"][1]), int(specs["TO Values"][2]))
+                EVALprogressBar = st.progress(0, text="Evaluating " + modelID + " Model Images. Please wait.")
+                user_evaluation_variables.EVAL_METRICS = GBM.evaluate_t2i_model_images(TASKImages, TASKCaptions[0], EVALprogressBar, False, "TASK")
+                # GBM.output_eval_results(user_evaluation_variables.EVAL_METRICS, 21)
+                elapsedTime = time.time() - startTime
+                # st.write("\U0001F553 Time Taken: ", str(datetime.timedelta(seconds=elapsedTime)).split(".")[0])
+                user_evaluation_variables.NO_SAMPLES = len(TASKImages)
+                user_evaluation_variables.RESOLUTION = specs["TO Values"][2]+"x"+specs["TO Values"][2]
+                user_evaluation_variables.INFERENCE_STEPS = int(specs["TO Values"][1])
+                user_evaluation_variables.DIST_BIAS = float(f"{user_evaluation_variables.EVAL_METRICS[2]:.4f}")
+                user_evaluation_variables.HALLUCINATION = float(f"{np.mean(user_evaluation_variables.EVAL_METRICS[3]):.4f}")
+                user_evaluation_variables.MISS_RATE = float(f"{np.mean(user_evaluation_variables.EVAL_METRICS[4]):.4f}")
+                user_evaluation_variables.TASK_TARGET = target.lower()
+                user_evaluation_variables.EVAL_ID = MCOMP.get_evaluation_id('task-oriented', True)
+                user_evaluation_variables.DATE = datetime.datetime.utcnow().strftime('%d-%m-%Y')
+                user_evaluation_variables.TIME = datetime.datetime.utcnow().strftime('%H:%M:%S')
+                user_evaluation_variables.RUN_TIME = str(datetime.timedelta(seconds=elapsedTime)).split(".")[0]
+                user_evaluation_variables.TASK_IMAGES = TASKImages
+                user_evaluation_variables.TASK_CAPTIONS = TASKCaptions
+                user_evaluation_variables.TASK_COCOIDs = cocoIDs
+                user_evaluation_variables.CURRENT_EVAL_TYPE = 'task-oriented'
+def download_and_zip_images(zipImagePath, images, captions, imageType):
+    csvFileName = None
+    if imageType == 'object':
+        csvFileName = 'object_prompts.csv'
+    elif imageType == 'occupation':
+        csvFileName = 'occupation_prompts.csv'
+    else:
+        csvFileName = 'task-oriented_prompts.csv'
+    with st.spinner("Zipping images..."):
+        with zipfile.ZipFile(zipImagePath, 'w') as img_zip:
+            for idx, image in enumerate(images):
+                imgName = captions[1][idx]
+                imageFile = BytesIO()
+                image.save(imageFile, 'JPEG')
+                img_zip.writestr(imgName, imageFile.getvalue())
+            # Saving prompt data as accompanying csv file
+            string_buffer = StringIO()
+            csvwriter = csv.writer(string_buffer)
+            if imageType in ['object', 'occupation']:
+                csvwriter.writerow(['No.', 'Prompt'])
+                for prompt, ii in zip(captions[0], range(len(captions[0]))):
+                    csvwriter.writerow([ii + 1, prompt])
+            else:
+                csvwriter.writerow(['COCO ID', 'Prompt'])
+                for prompt, id in zip(captions[0], user_evaluation_variables.TASK_COCOIDs):
+                    csvwriter.writerow([id, prompt])
+            img_zip.writestr(csvFileName, string_buffer.getvalue())
+    st.success('Successfully zipped and downloaded images!', icon="✅")
+def update_images_tab(imagesTab):
+    with imagesTab:
+        if len(user_evaluation_variables.OBJECT_IMAGES) > 0:
+            with st.expander('Object-related Images'):
+                user_evaluation_variables.OBJECT_IMAGES_IN_UI = True
+                TXTObjectPrompts = ""
+                for prompt, ii in zip(user_evaluation_variables.OBJECT_CAPTIONS[0], range(len(user_evaluation_variables.OBJECT_CAPTIONS[0]))):
+                    TXTObjectPrompts += str(1 + ii) + '.        ' + prompt + '\n'
+                st.write("**Object-related General Bias Evaluation Images**")
+                st.write("Number of Generated Images = ", len(user_evaluation_variables.OBJECT_IMAGES))
+                st.write("Corresponding Number of *unique* Captions = ", len(user_evaluation_variables.OBJECT_CAPTIONS[0]))
+                st.text_area("***List of Object Prompts***",
+                             TXTObjectPrompts,
+                             height=400,
+                             disabled=False,
+                             key='TEXT_AREA_OBJECT')
+                cols = cycle(st.columns(3))
+                for idx, image in enumerate(user_evaluation_variables.OBJECT_IMAGES):
+                    next(cols).image(image, width=225, caption=user_evaluation_variables.OBJECT_CAPTIONS[1][idx])
+            saveObjectImages = st.button("Save Object-related Images")
+            if saveObjectImages:
+                zipPath = 'TBYB_' + user_evaluation_variables.USERNAME + '_' + user_evaluation_variables.EVAL_ID + '_object_related_images.zip'
+                download_and_zip_images(zipPath, user_evaluation_variables.OBJECT_IMAGES,
+                                        user_evaluation_variables.OBJECT_CAPTIONS, 'object')
+        if len(user_evaluation_variables.OCCUPATION_IMAGES) > 0:
+            user_evaluation_variables.OCCUPATION_IMAGES_IN_UI = True
+            with st.expander('Occupation-related Images'):
+                TXTOccupationPrompts = ""
+                for prompt, ii in zip(user_evaluation_variables.OCCUPATION_CAPTIONS[0], range(len(user_evaluation_variables.OCCUPATION_CAPTIONS[0]))):
+                    TXTOccupationPrompts += str(1 + ii) + '.        ' + prompt + '\n'
+                st.write("**Occupation-related General Bias Evaluation Images**")
+                st.write("Number of Generated Images = ", len(user_evaluation_variables.OCCUPATION_IMAGES))
+                st.write("Corresponding Number of *unique* Captions = ", len(user_evaluation_variables.OCCUPATION_CAPTIONS[0]))
+                st.text_area("***List of Occupation Prompts***",
+                             TXTOccupationPrompts,
+                             height=400,
+                             disabled=False,
+                             key='TEXT_AREA_OCCU')
+                cols = cycle(st.columns(3))
+                for idx, image in enumerate(user_evaluation_variables.OCCUPATION_IMAGES):
+                    next(cols).image(image, width=225, caption=user_evaluation_variables.OCCUPATION_CAPTIONS[1][idx])
+            saveOccupationImages = st.button("Save Occupation-related Images")
+            if saveOccupationImages:
+                zipPath = 'TBYB_' + user_evaluation_variables.USERNAME + '_' + user_evaluation_variables.EVAL_ID + '_occupation_related_images.zip'
+                download_and_zip_images(zipPath, user_evaluation_variables.OCCUPATION_IMAGES,
+                                        user_evaluation_variables.OCCUPATION_CAPTIONS, 'occupation')
+        if len(user_evaluation_variables.TASK_IMAGES) > 0:
+            with st.expander(user_evaluation_variables.TASK_TARGET+'-related Images'):
+                user_evaluation_variables.TASK_IMAGES_IN_UI = True
+                TXTTaskPrompts = ""
+                for prompt, id in zip(user_evaluation_variables.TASK_CAPTIONS[0], user_evaluation_variables.TASK_COCOIDs):
+                    TXTTaskPrompts += "ID_" + str(id) + '.        ' + prompt + '\n'
+                st.write("**Task-oriented Bias Evaluation Images. Target** = ", user_evaluation_variables.TASK_TARGET)
+                st.write("Number of Generated Images = ", len(user_evaluation_variables.TASK_IMAGES))
+                st.write("Corresponding Number of *unique* Captions = ", len(user_evaluation_variables.TASK_CAPTIONS[0]))
+                st.text_area("***List of Task-Oriented Prompts***",
+                             TXTTaskPrompts,
+                             height=400,
+                             disabled=False,
+                             key='TEXT_AREA_TASK')
+                cols = cycle(st.columns(3))
+                for idx, image in enumerate(user_evaluation_variables.TASK_IMAGES):
+                    next(cols).image(image, width=225, caption=user_evaluation_variables.TASK_CAPTIONS[1][idx])
+            saveTaskImages = st.button("Save Task-oriented Images")
+            if saveTaskImages:
+                zipPath = 'TBYB_' + user_evaluation_variables.USERNAME + '_' + user_evaluation_variables.EVAL_ID + '_'+ user_evaluation_variables.TASK_TARGET+'-oriented_images.zip'
+                download_and_zip_images(zipPath, user_evaluation_variables.TASK_IMAGES,
+                                        user_evaluation_variables.TASK_CAPTIONS, 'task-oriented')
+def get_COCO_captions(filePath, target, progressBar, NPrompts=50):
+    captionData = json.load(open(filePath))
+    COCOCaptions = []
+    COCOIDs = []
+    random.seed(42)
+    random.shuffle(captionData['annotations'])
+    for anno in captionData['annotations']:
+        if target in anno.get('caption').lower().split(' '):
+            if len(COCOCaptions) < NPrompts:
+                COCOCaptions.append(anno.get('caption').lower())
+                COCOIDs.append(str(anno.get('id')))
+        percentComplete = len(COCOCaptions) / NPrompts
+        progressBar.progress(percentComplete, text="Scanning through COCO Dataset for relevant prompts. Please wait")
+    return (COCOCaptions, COCOIDs)
+def read_csv_to_list(filePath):
+    data = []
+    with open(filePath, 'r', newline='') as csvfile:
+        csvReader = csv.reader(csvfile)
+        for row in csvReader:
+            data.append(row)
+    return data

user_evaluation_variables.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import yaml
+from yaml import safe_load
+import streamlit as st
+USERNAME = None
+EVAL_ID = None
+MODEL = None
+MODEL_TYPE = None
+NO_SAMPLES = None
+RESOLUTION = None
+INFERENCE_STEPS = None
+GEN_OBJECTS = None
+GEN_ACTIONS = None
+GEN_OCCUPATIONS = None
+TASK_TARGET = None
+DIST_BIAS = None
+HALLUCINATION = None
+MISS_RATE = None
+DATE = None
+TIME = None
+RUN_TIME = None
+EVAL_METRICS = None
+OBJECT_IMAGES = []
+OCCUPATION_IMAGES = []
+TASK_IMAGES = []
+OBJECT_CAPTIONS = None
+OCCUPATION_CAPTIONS = None
+TASK_CAPTIONS = None
+TASK_COCOIDs = None
+OBJECT_IMAGES_IN_UI = False
+OCCUPATION_IMAGES_IN_UI = False
+TASK_IMAGES_IN_UI = False
+CURRENT_EVAL_TYPE = None
+def update_evaluation_table(evalType, debugging):
+    global USERNAME
+    global EVAL_ID
+    global MODEL
+    global MODEL_TYPE
+    global NO_SAMPLES
+    global RESOLUTION
+    global INFERENCE_STEPS
+    global GEN_OBJECTS
+    global GEN_ACTIONS
+    global GEN_OCCUPATIONS
+    global TASK_TARGET
+    global DIST_BIAS
+    global HALLUCINATION
+    global MISS_RATE
+    global DATE
+    global TIME
+    global RUN_TIME
+    global CURRENT_EVAL_TYPE
+    if debugging:
+        st.write("Username: ", USERNAME)
+        st.write("EVAL_ID: ", EVAL_ID)
+        st.write("MODEL: ", MODEL)
+        st.write("MODEL_TYPE: ", MODEL_TYPE)
+        st.write("NO_SAMPLES: ", NO_SAMPLES)
+        st.write("RESOLUTION: ", RESOLUTION)
+        st.write("INFERENCE_STEPS: ", INFERENCE_STEPS)
+        st.write("GEN_OBJECTS: ", GEN_OBJECTS)
+        st.write("GEN_ACTIONS: ", GEN_ACTIONS)
+        st.write("GEN_OCCUPATIONS: ", GEN_OCCUPATIONS)
+        st.write("TASK_TARGET: ", TASK_TARGET)
+        st.write("DIST_BIAS: ", DIST_BIAS)
+        st.write("HALLUCINATION: ", HALLUCINATION)
+        st.write("MISS_RATE: ", MISS_RATE)
+        st.write("DATE: ", DATE)
+        st.write("TIME: ", TIME)
+        st.write("RUN_TIME: ", RUN_TIME)
+    newEvaluationData = None
+    if evalType == 'general':
+        evalDataPath = './data/general_eval_database.yaml'
+        newEvaluationData = {
+            "Model": MODEL,
+            "Model Type": MODEL_TYPE,
+            "No. Samples": NO_SAMPLES,
+            "Resolution": RESOLUTION,
+            "Inference Steps": INFERENCE_STEPS,
+            "Objects": GEN_OBJECTS,
+            "Actions": GEN_ACTIONS,
+            "Occupations": GEN_OCCUPATIONS,
+            "Dist. Bias": DIST_BIAS,
+            "Hallucination": HALLUCINATION,
+            "Gen. Miss Rate": MISS_RATE,
+            "Date": DATE,
+            "Time": TIME,
+            "Run Time": RUN_TIME
+        }
+    else:
+        evalDataPath = './data/task_oriented_eval_database.yaml'
+        newEvaluationData = {
+            "Model": MODEL,
+            "Model Type": MODEL_TYPE,
+            "No. Samples": NO_SAMPLES,
+            "Resolution": RESOLUTION,
+            "Inference Steps": INFERENCE_STEPS,
+            "Target": TASK_TARGET,
+            "Dist. Bias": DIST_BIAS,
+            "Hallucination": HALLUCINATION,
+            "Gen. Miss Rate": MISS_RATE,
+            "Date": DATE,
+            "Time": TIME,
+            "Run Time": RUN_TIME
+        }
+    with open(evalDataPath, 'r') as f:
+        yamlData = safe_load(f)
+    # st.write("OLD DATABASE ", yamlData['evaluations']['username'][USERNAME])
+    if USERNAME not in yamlData['evaluations']['username']:
+        if TASK_TARGET is not None:
+            st.success('Congrats on your first General Bias evaluation!', icon='\U0001F388')
+        else:
+            st.success('Congrats on your first Task-Oriented Bias evaluation!', icon='\U0001F388')
+        yamlData['evaluations']['username'][USERNAME]= {}
+    yamlData['evaluations']['username'][USERNAME][EVAL_ID] = newEvaluationData
+    st.write("NEW DATABASE ", yamlData['evaluations']['username'][USERNAME])
+    with open(evalDataPath, 'w') as yaml_file:
+        yaml_file.write(yaml.dump(yamlData, default_flow_style=False))
+def reset_variables(evalType):
+    global USERNAME
+    global EVAL_ID
+    global MODEL
+    global MODEL_TYPE
+    global NO_SAMPLES
+    global RESOLUTION
+    global INFERENCE_STEPS
+    global GEN_OBJECTS
+    global GEN_ACTIONS
+    global GEN_OCCUPATIONS
+    global TASK_TARGET
+    global DIST_BIAS
+    global HALLUCINATION
+    global MISS_RATE
+    global DATE
+    global TIME
+    global RUN_TIME
+    global EVAL_METRICS
+    global OBJECT_IMAGES
+    global OCCUPATION_IMAGES
+    global TASK_IMAGES
+    global OBJECT_CAPTIONS
+    global OCCUPATION_CAPTIONS
+    global TASK_CAPTIONS
+    global TASK_COCOIDs
+    global OBJECT_IMAGES_IN_UI
+    global OCCUPATION_IMAGES_IN_UI
+    global TASK_IMAGES_IN_UI
+    global CURRENT_EVAL_TYPE
+    EVAL_ID = None
+    # MODEL = None
+    # MODEL_TYPE = None
+    NO_SAMPLES = None
+    RESOLUTION = None
+    INFERENCE_STEPS = None
+    GEN_OBJECTS = None
+    GEN_ACTIONS = None
+    GEN_OCCUPATIONS = None
+    TASK_TARGET = None
+    DIST_BIAS = None
+    HALLUCINATION = None
+    MISS_RATE = None
+    DATE = None
+    TIME = None
+    RUN_TIME = None
+    EVAL_METRICS = None
+    CURRENT_EVAL_TYPE = None
+    if evalType == 'general':
+        OBJECT_IMAGES = []
+        OCCUPATION_IMAGES = []
+        OBJECT_CAPTIONS = None
+        OCCUPATION_CAPTIONS = None
+        OBJECT_IMAGES_IN_UI = False
+        OCCUPATION_IMAGES_IN_UI = False
+    else:
+        TASK_IMAGES = []
+        TASK_CAPTIONS = None
+        TASK_COCOIDs = None
+        TASK_IMAGES_IN_UI = False