Spaces:

frankai98
/

Tokentesting

Sleeping

App Files Files Community

Tokentesting / app.py

frankai98

Update app.py

a364940 verified 3 months ago

raw

history blame

5.66 kB

	import os
	import nest_asyncio
	nest_asyncio.apply()
	import streamlit as st
	from transformers import pipeline
	from huggingface_hub import login
	from streamlit.components.v1 import html
	import pandas as pd
	import torch

	# Retrieve the token from environment variables
	hf_token = os.environ.get("HF_TOKEN")
	if not hf_token:
	st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
	st.stop()

	# Login with the token
	login(token=hf_token)

	# Initialize session state for timer
	#if 'timer_started' not in st.session_state:
	#st.session_state.timer_started = False
	#if 'timer_frozen' not in st.session_state:
	#st.session_state.timer_frozen = False

	# Timer component using HTML and JavaScript
	def timer():
	return """
	<div id="timer" style="font-size:16px;color:#666;margin-bottom:10px;">⏱️ Elapsed: 00:00</div>
	<script>
	(function() {
	var start = Date.now();
	var timerElement = document.getElementById('timer');
	localStorage.removeItem("freezeTimer");
	var interval = setInterval(function() {
	if(localStorage.getItem("freezeTimer") === "true"){
	clearInterval(interval);
	timerElement.style.color = '#00cc00';
	return;
	}
	var elapsed = Date.now() - start;
	var minutes = Math.floor(elapsed / 60000);
	var seconds = Math.floor((elapsed % 60000) / 1000);
	timerElement.innerHTML = '⏱️ Elapsed: ' +
	(minutes < 10 ? '0' : '') + minutes + ':' +
	(seconds < 10 ? '0' : '') + seconds;
	}, 1000);
	})();
	</script>
	"""

	st.set_page_config(page_title="Review Scorer & Report Generator", page_icon="📝")
	st.header("Review Scorer & Report Generator")

	# Concise introduction
	st.write("This model will score your reviews in your CSV file and generate a report based on those results.")

	# Load models with caching to avoid reloading on every run
	@st.cache_resource
	def load_models():
	try:
	score_pipe = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment", device=0)
	except Exception as e:
	st.error(f"Error loading score model: {e}")
	score_pipe = None
	try:
	gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it", device=0, torch_dtype=torch.bfloat16)
	except Exception as e:
	st.error(f"Error loading Gemma model: {e}")
	gemma_pipe = None
	return score_pipe, gemma_pipe


	score_pipe, gemma_pipe = load_models()

	# Input: Query text for scoring and CSV file upload for candidate reviews
	query_input = st.text_area("Enter your query text for analysis (this does not need to be part of the CSV):")
	uploaded_file = st.file_uploader("Upload Reviews CSV File (must contain a 'reviewText' column)", type=["csv"])

	if score_pipe is None or gemma_pipe is None:
	st.error("Model loading failed. Please check your model names, token permissions, and GPU configuration.")
	else:
	candidate_docs = []
	if uploaded_file is not None:
	try:
	df = pd.read_csv(uploaded_file)
	if 'reviewText' not in df.columns:
	st.error("CSV must contain a 'reviewText' column.")
	else:
	candidate_docs = df['reviewText'].dropna().astype(str).tolist()
	except Exception as e:
	st.error(f"Error reading CSV file: {e}")

	if st.button("Generate Report"):
	# Reset timer state so that the timer always shows up
	st.session_state.timer_started = False
	st.session_state.timer_frozen = False
	if uploaded_file is None:
	st.error("Please upload a CSV file.")
	elif not candidate_docs:
	st.error("CSV must contain a 'reviewText' column.")
	elif not query_input.strip():
	st.error("Please enter a query text!")
	else:
	if not st.session_state.timer_started and not st.session_state.timer_frozen:
	st.session_state.timer_started = True
	html(timer(), height=50)
	status_text = st.empty()
	progress_bar = st.progress(0)

	# Stage 1: Score candidate documents (all reviews) without including the query.
	status_text.markdown("🔍 Scoring candidate documents...")
	progress_bar.progress(33)

	# Assuming score_pipe can take a list of texts directly:
	scored_results = score_pipe(candidate_docs)
	# Pair each review with its score assuming the output order matches the input order.
	scored_docs = list(zip(candidate_docs, [result["score"] for result in scored_results]))

	progress_bar.progress(67)

	# Stage 2: Generate Report using Gemma, include the query and scored results.
	status_text.markdown("📝 Generating report with Gemma...")
	prompt = f"""
	Generate a detailed report based on the following analysis.
	Query:
	"{query_input}"
	Candidate Reviews with their scores:
	{scored_docs}
	Please provide a concise summary report explaining the insights derived from these scores.
	"""
	report = gemma_pipe(prompt, max_new_tokens=50)
	progress_bar.progress(100)
	status_text.success("✅ Generation complete!")
	html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
	st.session_state.timer_frozen = True
	st.write("Scored Candidate Reviews:", scored_docs)
	st.write("Generated Report:", report[0]['generated_text'])