import os import nest_asyncio nest_asyncio.apply() import streamlit as st from transformers import pipeline from huggingface_hub import login from streamlit.components.v1 import html import pandas as pd import torch # Retrieve the token from environment variables hf_token = os.environ.get("HF_TOKEN") if not hf_token: st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.") st.stop() # Login with the token login(token=hf_token) # Initialize session state for timer #if 'timer_started' not in st.session_state: #st.session_state.timer_started = False #if 'timer_frozen' not in st.session_state: #st.session_state.timer_frozen = False # Timer component using HTML and JavaScript def timer(): return """

⏱️ Elapsed: 00:00

""" st.set_page_config(page_title="Review Scorer & Report Generator", page_icon="📝") st.header("Review Scorer & Report Generator") # Concise introduction st.write("This model will score your reviews in your CSV file and generate a report based on those results.") # Load models with caching to avoid reloading on every run @st.cache_resource def load_models(): try: score_pipe = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment", device=0) except Exception as e: st.error(f"Error loading score model: {e}") score_pipe = None try: gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it", device=0, torch_dtype=torch.bfloat16) except Exception as e: st.error(f"Error loading Gemma model: {e}") gemma_pipe = None return score_pipe, gemma_pipe score_pipe, gemma_pipe = load_models() # Input: Query text for scoring and CSV file upload for candidate reviews query_input = st.text_area("Enter your query text for analysis (this does not need to be part of the CSV):") uploaded_file = st.file_uploader("Upload Reviews CSV File (must contain a 'reviewText' column)", type=["csv"]) if score_pipe is None or gemma_pipe is None: st.error("Model loading failed. Please check your model names, token permissions, and GPU configuration.") else: candidate_docs = [] if uploaded_file is not None: try: df = pd.read_csv(uploaded_file) if 'reviewText' not in df.columns: st.error("CSV must contain a 'reviewText' column.") else: candidate_docs = df['reviewText'].dropna().astype(str).tolist() except Exception as e: st.error(f"Error reading CSV file: {e}") if st.button("Generate Report"): # Reset timer state so that the timer always shows up st.session_state.timer_started = False st.session_state.timer_frozen = False if uploaded_file is None: st.error("Please upload a CSV file.") elif not candidate_docs: st.error("CSV must contain a 'reviewText' column.") elif not query_input.strip(): st.error("Please enter a query text!") else: if not st.session_state.timer_started and not st.session_state.timer_frozen: st.session_state.timer_started = True html(timer(), height=50) status_text = st.empty() progress_bar = st.progress(0) # Stage 1: Score candidate documents (all reviews) without including the query. status_text.markdown("**🔍 Scoring candidate documents...**") progress_bar.progress(33) # Assuming score_pipe can take a list of texts directly: scored_results = score_pipe(candidate_docs) # Pair each review with its score assuming the output order matches the input order. scored_docs = list(zip(candidate_docs, [result["score"] for result in scored_results])) progress_bar.progress(67) # Stage 2: Generate Report using Gemma, include the query and scored results. status_text.markdown("**📝 Generating report with Gemma...**") prompt = f""" Generate a detailed report based on the following analysis. Query: "{query_input}" Candidate Reviews with their scores: {scored_docs} Please provide a concise summary report explaining the insights derived from these scores. """ report = gemma_pipe(prompt, max_new_tokens=50) progress_bar.progress(100) status_text.success("**✅ Generation complete!**") html("", height=0) st.session_state.timer_frozen = True st.write("**Scored Candidate Reviews:**", scored_docs) st.write("**Generated Report:**", report[0]['generated_text'])