import os import nest_asyncio nest_asyncio.apply() import streamlit as st from transformers import pipeline from huggingface_hub import login from streamlit.components.v1 import html import pandas as pd # Retrieve the token from environment variables hf_token = os.environ.get("HF_TOKEN") if not hf_token: st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.") st.stop() # Login with the token login(token=hf_token) # Initialize session state for timer if 'timer_started' not in st.session_state: st.session_state.timer_started = False if 'timer_frozen' not in st.session_state: st.session_state.timer_frozen = False # Timer component using HTML and JavaScript def timer(): return """
⏱️ Elapsed: 00:00
""" st.set_page_config(page_title="Review Scorer & Report Generator", page_icon="📝") st.header("Review Scorer & Report Generator") # Concise introduction st.write("This model will score your reviews in your CSV file and generate a report based on those results.") # Load models with caching to avoid reloading on every run @st.cache_resource def load_models(): # Load the scoring model via pipeline. score_pipe = pipeline("text-classification", model="mixedbread-ai/mxbai-rerank-base-v1") # Load the Gemma text generation pipeline. gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it", use_auth_token=hf_token) return score_pipe, gemma_pipe score_pipe, gemma_pipe = load_models() # Input: Query text for scoring and CSV file upload for candidate reviews query_input = st.text_area("Enter your query text for analysis (this does not need to be part of the CSV):") uploaded_file = st.file_uploader("Upload Reviews CSV File (must contain a 'document' column)", type=["csv"]) if uploaded_file is not None: try: df = pd.read_csv(uploaded_file) if 'document' not in df.columns: st.error("CSV must contain a 'document' column.") st.stop() candidate_docs = df['document'].dropna().astype(str).tolist() except Exception as e: st.error(f"Error reading CSV file: {e}") st.stop() else: st.error("Please upload a CSV file.") st.stop() if st.button("Generate Report"): if not query_input.strip(): st.error("Please enter a query text!") else: if not st.session_state.timer_started and not st.session_state.timer_frozen: st.session_state.timer_started = True html(timer(), height=50) status_text = st.empty() progress_bar = st.progress(0) try: # Stage 1: Score candidate documents using the provided query. status_text.markdown("**🔍 Scoring candidate documents...**") progress_bar.progress(0) scored_docs = [] for doc in candidate_docs: combined_text = f"Query: {query_input} Document: {doc}" result = score_pipe(combined_text)[0] scored_docs.append((doc, result["score"])) progress_bar.progress(50) # Stage 2: Generate Report using Gemma, including query and scored results. status_text.markdown("**📝 Generating report with Gemma...**") prompt = f""" Generate a detailed report based on the following analysis. Query: "{query_input}" Candidate Reviews with their scores: {scored_docs} Please provide a concise summary report explaining the insights derived from these scores. """ report = gemma_pipe(prompt, max_length=200) progress_bar.progress(100) status_text.success("**✅ Generation complete!**") html("", height=0) st.session_state.timer_frozen = True st.write("**Scored Candidate Reviews:**", scored_docs) st.write("**Generated Report:**", report[0]['generated_text']) except Exception as e: html("") status_text.error(f"**❌ Error:** {str(e)}") progress_bar.empty()