Spaces:
Sleeping
Sleeping
File size: 5,044 Bytes
91eb9f9 8b9bf01 6c1e788 d9a35fb 8b9bf01 6c1e788 d9a35fb 5ab1c08 9c4bbfa 91eb9f9 8b9bf01 6c1e788 91eb9f9 2382fa5 9c4bbfa 91eb9f9 9c4bbfa 2382fa5 6c1e788 91eb9f9 9c4bbfa 91eb9f9 9c4bbfa 2382fa5 9c4bbfa 2382fa5 9c4bbfa 2382fa5 5782099 91eb9f9 9c4bbfa 5782099 3899916 9c4bbfa 3899916 9c4bbfa 8b9bf01 91eb9f9 3899916 91eb9f9 9c4bbfa ba598f7 91eb9f9 ba598f7 9c4bbfa ba598f7 91eb9f9 9c4bbfa 91eb9f9 3899916 9c4bbfa ba598f7 9c4bbfa 91eb9f9 9c4bbfa 91eb9f9 9c4bbfa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import os
import nest_asyncio
nest_asyncio.apply()
import streamlit as st
from transformers import pipeline
from huggingface_hub import login
from streamlit.components.v1 import html
import pandas as pd
# Retrieve the token from environment variables
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
st.stop()
# Login with the token
login(token=hf_token)
# Initialize session state for timer
if 'timer_started' not in st.session_state:
st.session_state.timer_started = False
if 'timer_frozen' not in st.session_state:
st.session_state.timer_frozen = False
# Timer component using HTML and JavaScript
def timer():
return """
<div id="timer" style="font-size:16px;color:#666;margin-bottom:10px;">β±οΈ Elapsed: 00:00</div>
<script>
(function() {
var start = Date.now();
var timerElement = document.getElementById('timer');
localStorage.removeItem("freezeTimer");
var interval = setInterval(function() {
if(localStorage.getItem("freezeTimer") === "true"){
clearInterval(interval);
timerElement.style.color = '#00cc00';
return;
}
var elapsed = Date.now() - start;
var minutes = Math.floor(elapsed / 60000);
var seconds = Math.floor((elapsed % 60000) / 1000);
timerElement.innerHTML = 'β±οΈ Elapsed: ' +
(minutes < 10 ? '0' : '') + minutes + ':' +
(seconds < 10 ? '0' : '') + seconds;
}, 1000);
})();
</script>
"""
st.set_page_config(page_title="Review Scorer & Report Generator", page_icon="π")
st.header("Review Scorer & Report Generator")
# Concise introduction
st.write("This model will score your reviews in your CSV file and generate a report based on those results.")
# Load models with caching to avoid reloading on every run
@st.cache_resource
def load_models():
# Load the scoring model via pipeline.
score_pipe = pipeline("text-classification", model="mixedbread-ai/mxbai-rerank-base-v1")
# Load the Gemma text generation pipeline.
gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it", use_auth_token=hf_token)
return score_pipe, gemma_pipe
score_pipe, gemma_pipe = load_models()
# Input: Query text for scoring and CSV file upload for candidate reviews
query_input = st.text_area("Enter your query text for analysis (this does not need to be part of the CSV):")
uploaded_file = st.file_uploader("Upload Reviews CSV File (must contain a 'document' column)", type=["csv"])
if uploaded_file is not None:
try:
df = pd.read_csv(uploaded_file)
if 'document' not in df.columns:
st.error("CSV must contain a 'document' column.")
st.stop()
candidate_docs = df['document'].dropna().astype(str).tolist()
except Exception as e:
st.error(f"Error reading CSV file: {e}")
st.stop()
else:
st.error("Please upload a CSV file.")
st.stop()
if st.button("Generate Report"):
if not query_input.strip():
st.error("Please enter a query text!")
else:
if not st.session_state.timer_started and not st.session_state.timer_frozen:
st.session_state.timer_started = True
html(timer(), height=50)
status_text = st.empty()
progress_bar = st.progress(0)
try:
# Stage 1: Score candidate documents using the provided query.
status_text.markdown("**π Scoring candidate documents...**")
progress_bar.progress(0)
scored_docs = []
for doc in candidate_docs:
combined_text = f"Query: {query_input} Document: {doc}"
result = score_pipe(combined_text)[0]
scored_docs.append((doc, result["score"]))
progress_bar.progress(50)
# Stage 2: Generate Report using Gemma, including query and scored results.
status_text.markdown("**π Generating report with Gemma...**")
prompt = f"""
Generate a detailed report based on the following analysis.
Query:
"{query_input}"
Candidate Reviews with their scores:
{scored_docs}
Please provide a concise summary report explaining the insights derived from these scores.
"""
report = gemma_pipe(prompt, max_length=200)
progress_bar.progress(100)
status_text.success("**β
Generation complete!**")
html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
st.session_state.timer_frozen = True
st.write("**Scored Candidate Reviews:**", scored_docs)
st.write("**Generated Report:**", report[0]['generated_text'])
except Exception as e:
html("<script>document.getElementById('timer').remove();</script>")
status_text.error(f"**β Error:** {str(e)}")
progress_bar.empty()
|