data-labeler / app.py
osbm's picture
use radio button
78e3e3b
import os
import streamlit as st
import pandas as pd
import huggingface_hub as hfh
import requests
import time
import uuid
def get_uuid():
return str(uuid.uuid4())[:6]
os.makedirs("labels", exist_ok=True)
voters = [
"osman",
"eren",
"robin",
"mira",
"bilal",
"volunteer-1",
"volunteer-2",
"volunteer-3",
"volunteer-4",
"volunteer-5",
]
api = hfh.HfApi(token=os.environ.get("hf_token"))
def get_list_of_images():
files = api.list_repo_tree(repo_id="aifred-smart-life-coach/capstone-images", repo_type="dataset", recursive=True,)
files = [file.path for file in files if file.path.endswith((".png", ".jpg"))]
return files
def get_one_from_queue(voter: str):
# get an image for the voter or return False if no image is left
# aifred-smart-life-coach/labels labels dataset
# labels dataset multiple csv files named as [voter name].csv
# each csv file has the image image path vote date, votes
url = f"https://huggingface.co/datasets/aifred-smart-life-coach/labels/raw/main/{voter}.csv"
# fetch file and save it to the labels folder
file_path = f"labels/{voter}.csv"
req = requests.get(url)
with open(file_path, "wb") as file:
file.write(req.content)
df = pd.read_csv(file_path)
print(df)
num_past_votes = df.shape[0]
print("num_past_votes", num_past_votes)
list_of_images = get_list_of_images()
print("list_of_images", len(list_of_images))
st.write(f"You have voted for {num_past_votes} images out of {len(list_of_images)} images")
# get the list of images that are not present in the csv file
images_not_voted = list(set(list_of_images) - set(df["image_path"].tolist()))
print("images_not_voted", len(images_not_voted))
return images_not_voted[0] if images_not_voted else False
def update_vote(
voter: str,
image: str,
gender: str,
healthiness: int,
fat_level: int,
muscle_level: int,
uuid_num: str,
):
url = f"https://huggingface.co/datasets/aifred-smart-life-coach/labels/raw/main/{voter}.csv"
# fetch file and save it to the labels folder
file_path = f"labels/{voter}.csv"
req = requests.get(url)
# delete the file if it exists
if os.path.exists(file_path):
os.remove(file_path)
with open(file_path, "wb") as file:
file.write(req.content)
df = pd.read_csv(file_path)
print(df)
# if the df has the image path of the image, raise an error
if image in df["image_path"].tolist():
st.error("You have already voted for this image")
st.stop()
new_row = pd.DataFrame(
{
"image_path": [image],
"gender": [ gender ],
"health": [healthiness],
"fat_level": [fat_level],
"muscle_level": [muscle_level],
"vote_date": [int(time.time())],
"voter_name": [voter],
"uuid": [uuid_num],
}
)
df = pd.concat([df, new_row], ignore_index=True)
df.to_csv(file_path, index=False)
# push the file to the dataset
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=f"{voter}.csv",
repo_id="aifred-smart-life-coach/labels", repo_type="dataset",
commit_message=f"{voter} voted for {image}"
)
if 'loggedin' not in st.session_state: # user is not logged in
with st.form("login"):
username = st.selectbox("Select voter", voters)
password = st.text_input("Password (get password from [email protected])", type="password")
submitted = st.form_submit_button("Login")
if submitted: # submitted password
if not password == os.environ.get("app_password"): # password is incorrect
st.error("The password you entered is incorrect")
st.stop()
else: # password is correct
st.success("Welcome, " + username)
st.write("You are now logged in")
st.session_state['loggedin'] = username
st.rerun()
else: # logged in
username = st.session_state['loggedin']
st.success(f"Welcome, {username}")
image_path = get_one_from_queue(username)
with st.form("images", clear_on_submit=True):
col1, col2= st.columns(2)
with col1:
if not image_path:
st.write("You have voted for all the images")
st.stop()
path = hfh.hf_hub_download(
repo_id="aifred-smart-life-coach/capstone-images",
repo_type="dataset",
filename=image_path,
token=os.environ.get("hf_token")
)
st.image(path, width=300)
with col2:
st.write(image_path)
gender = st.radio("Gender", [
"Male",
"Female",
"Non-defining",
])
healthiness = st.slider("How healthy is this picture?", 0, 100, 50)
fat_level = st.slider("How fat is this picture?", 0, 100, 50)
muscle_level = st.slider("How muscular is this picture?", 0, 100, 50)
uuid_num = get_uuid()
submitted_second = st.form_submit_button("Submit")
if submitted_second:
update_vote(
voter=username,
image=image_path,
gender=gender,
healthiness=healthiness,
fat_level=fat_level,
muscle_level=muscle_level,
uuid_num=uuid_num,
)
st.success(f"Vote submitted uuid: {uuid_num} for the image: {image_path} (in case you want to undo the vote)")
with st.spinner("Wait for the upload of the vote", show_time=True):
time.sleep(1)
st.rerun()