File size: 1,136 Bytes
8a1d0f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44df589
 
 
 
 
 
 
 
 
8a1d0f1
44df589
 
 
8a1d0f1
 
44df589
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from datasets import load_dataset
import streamlit as st

HF_API_TOKEN = st.secrets["HF_API_TOKEN"]
PROMPT_COLOR = "#CA437E"

def safe_text(text):
    text = text.replace("\n", "<br>")
    return f"<pre>{text}</pre>"


def prompt_markup_format(text):
    return f'<*font color="black">{text}</*font>'


def generation_markup_format(text):
    return f"<font color={PROMPT_COLOR}>{text}</pre></font>"

ds = load_dataset("SaulLu/bloom-generations",  use_auth_token=HF_API_TOKEN)
ds = ds["train"]

possible_prompts = ds.unique("prompt")
chosen_prompt = st.selectbox("Chose a prompt", possible_prompts)
st.markdown(safe_text(chosen_prompt), unsafe_allow_html=True)

sub_ds = ds.filter(lambda exs:[prompt==chosen_prompt for prompt in exs["prompt"]], batched=True)


index_sample = st.number_input("Index of the chosen example", min_value=0, max_value=len(sub_ds) - 1, value=0, step=1)
sample = sub_ds[index_sample]
markdown_text = generation_markup_format(safe_text(sample['generation']))

st.markdown(markdown_text, unsafe_allow_html=True)
config = {key:value for key, value in sample.items() if key not in ["prompt", "generation"]}
config