Tokentesting / app.py
frankai98's picture
Update app.py
a364940 verified
raw
history blame
5.66 kB
import os
import nest_asyncio
nest_asyncio.apply()
import streamlit as st
from transformers import pipeline
from huggingface_hub import login
from streamlit.components.v1 import html
import pandas as pd
import torch
# Retrieve the token from environment variables
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
st.stop()
# Login with the token
login(token=hf_token)
# Initialize session state for timer
#if 'timer_started' not in st.session_state:
#st.session_state.timer_started = False
#if 'timer_frozen' not in st.session_state:
#st.session_state.timer_frozen = False
# Timer component using HTML and JavaScript
def timer():
return """
<div id="timer" style="font-size:16px;color:#666;margin-bottom:10px;">⏱️ Elapsed: 00:00</div>
<script>
(function() {
var start = Date.now();
var timerElement = document.getElementById('timer');
localStorage.removeItem("freezeTimer");
var interval = setInterval(function() {
if(localStorage.getItem("freezeTimer") === "true"){
clearInterval(interval);
timerElement.style.color = '#00cc00';
return;
}
var elapsed = Date.now() - start;
var minutes = Math.floor(elapsed / 60000);
var seconds = Math.floor((elapsed % 60000) / 1000);
timerElement.innerHTML = '⏱️ Elapsed: ' +
(minutes < 10 ? '0' : '') + minutes + ':' +
(seconds < 10 ? '0' : '') + seconds;
}, 1000);
})();
</script>
"""
st.set_page_config(page_title="Review Scorer & Report Generator", page_icon="πŸ“")
st.header("Review Scorer & Report Generator")
# Concise introduction
st.write("This model will score your reviews in your CSV file and generate a report based on those results.")
# Load models with caching to avoid reloading on every run
@st.cache_resource
def load_models():
try:
score_pipe = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment", device=0)
except Exception as e:
st.error(f"Error loading score model: {e}")
score_pipe = None
try:
gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it", device=0, torch_dtype=torch.bfloat16)
except Exception as e:
st.error(f"Error loading Gemma model: {e}")
gemma_pipe = None
return score_pipe, gemma_pipe
score_pipe, gemma_pipe = load_models()
# Input: Query text for scoring and CSV file upload for candidate reviews
query_input = st.text_area("Enter your query text for analysis (this does not need to be part of the CSV):")
uploaded_file = st.file_uploader("Upload Reviews CSV File (must contain a 'reviewText' column)", type=["csv"])
if score_pipe is None or gemma_pipe is None:
st.error("Model loading failed. Please check your model names, token permissions, and GPU configuration.")
else:
candidate_docs = []
if uploaded_file is not None:
try:
df = pd.read_csv(uploaded_file)
if 'reviewText' not in df.columns:
st.error("CSV must contain a 'reviewText' column.")
else:
candidate_docs = df['reviewText'].dropna().astype(str).tolist()
except Exception as e:
st.error(f"Error reading CSV file: {e}")
if st.button("Generate Report"):
# Reset timer state so that the timer always shows up
st.session_state.timer_started = False
st.session_state.timer_frozen = False
if uploaded_file is None:
st.error("Please upload a CSV file.")
elif not candidate_docs:
st.error("CSV must contain a 'reviewText' column.")
elif not query_input.strip():
st.error("Please enter a query text!")
else:
if not st.session_state.timer_started and not st.session_state.timer_frozen:
st.session_state.timer_started = True
html(timer(), height=50)
status_text = st.empty()
progress_bar = st.progress(0)
# Stage 1: Score candidate documents (all reviews) without including the query.
status_text.markdown("**πŸ” Scoring candidate documents...**")
progress_bar.progress(33)
# Assuming score_pipe can take a list of texts directly:
scored_results = score_pipe(candidate_docs)
# Pair each review with its score assuming the output order matches the input order.
scored_docs = list(zip(candidate_docs, [result["score"] for result in scored_results]))
progress_bar.progress(67)
# Stage 2: Generate Report using Gemma, include the query and scored results.
status_text.markdown("**πŸ“ Generating report with Gemma...**")
prompt = f"""
Generate a detailed report based on the following analysis.
Query:
"{query_input}"
Candidate Reviews with their scores:
{scored_docs}
Please provide a concise summary report explaining the insights derived from these scores.
"""
report = gemma_pipe(prompt, max_new_tokens=50)
progress_bar.progress(100)
status_text.success("**βœ… Generation complete!**")
html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
st.session_state.timer_frozen = True
st.write("**Scored Candidate Reviews:**", scored_docs)
st.write("**Generated Report:**", report[0]['generated_text'])