File size: 711 Bytes
a431d31
 
 
 
a6c2f56
b0c1665
 
1f7c61a
a431d31
 
abfea56
b0c1665
a431d31
 
 
b0c1665
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from datasets import load_dataset
import streamlit as st


@st.cache(allow_output_mutation=True)
def load_all_usernames():
    list_of_usernames = load_dataset("bigcode/the-stack-usernames", split="train")["usernames"]
    return set(list_of_usernames)

st.title("Am I in The Stack?")
st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).")
usernames = load_all_usernames()

username = st.text_input("GitHub Username:")

if st.button("Check!"):
    if username in usernames:
        st.markdown("**Yes**, your data is in The Stack.")
    else:
        st.markdown("**No**, your data is not in The Stack.")