in-the-stack / app.py
lvwerra's picture
lvwerra HF staff
Update app.py
a6c2f56
raw
history blame
711 Bytes
from datasets import load_dataset
import streamlit as st
@st.cache(allow_output_mutation=True)
def load_all_usernames():
list_of_usernames = load_dataset("bigcode/the-stack-usernames", split="train")["usernames"]
return set(list_of_usernames)
st.title("Am I in The Stack?")
st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).")
usernames = load_all_usernames()
username = st.text_input("GitHub Username:")
if st.button("Check!"):
if username in usernames:
st.markdown("**Yes**, your data is in The Stack.")
else:
st.markdown("**No**, your data is not in The Stack.")