Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from transformers import pipeline
|
4 |
+
import re
|
5 |
+
|
6 |
+
# Model configurations
|
7 |
+
MODELS = {
|
8 |
+
"English": "MarieAngeA13/Sentiment-Analysis-BERT",
|
9 |
+
"Danish": "larskjeldgaard/senda"
|
10 |
+
}
|
11 |
+
|
12 |
+
# Page config
|
13 |
+
st.set_page_config(
|
14 |
+
page_title="Multi-language Sentiment Analyzer",
|
15 |
+
page_icon="π",
|
16 |
+
layout="wide"
|
17 |
+
)
|
18 |
+
|
19 |
+
# Load custom CSS
|
20 |
+
with open('style.css') as f:
|
21 |
+
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
|
22 |
+
|
23 |
+
def process_sentiment(text, pipeline):
|
24 |
+
"""Process sentiment for a single text entry"""
|
25 |
+
try:
|
26 |
+
result = pipeline(str(text))
|
27 |
+
# Convert sentiment to lowercase
|
28 |
+
return result[0]['label'].lower(), result[0]['score']
|
29 |
+
except Exception as e:
|
30 |
+
st.warning(f"Error processing text: {text[:50]}... Error: {str(e)}")
|
31 |
+
return "unknown", 0.0
|
32 |
+
|
33 |
+
# App layout
|
34 |
+
col1, col2, col3 = st.columns([1, 2, 1])
|
35 |
+
with col2:
|
36 |
+
st.title("π Multi-language Sentiment Analysis")
|
37 |
+
|
38 |
+
selected_language = st.selectbox(
|
39 |
+
"Select Language",
|
40 |
+
options=list(MODELS.keys()),
|
41 |
+
index=0
|
42 |
+
)
|
43 |
+
|
44 |
+
st.markdown("""
|
45 |
+
<div class="privacy-notice">
|
46 |
+
β οΈ <b>Privacy Notice:</b> Your data is processed in memory and not stored.
|
47 |
+
</div>
|
48 |
+
""", unsafe_allow_html=True)
|
49 |
+
|
50 |
+
uploaded_file = st.file_uploader("Upload a CSV file with text", type=["csv"])
|
51 |
+
|
52 |
+
if uploaded_file:
|
53 |
+
try:
|
54 |
+
df = pd.read_csv(uploaded_file)
|
55 |
+
if "text" not in df.columns:
|
56 |
+
st.error("CSV must contain a 'text' column")
|
57 |
+
else:
|
58 |
+
with st.spinner(f"π Analyzing sentiments in {selected_language}..."):
|
59 |
+
def clean_transcript_text(text):
|
60 |
+
speaker_timestamp_pattern = r'Speaker: Speaker [A-Z], Start Time: \d+\.\d+ - End Time: \d+\.\d+'
|
61 |
+
timestamp_pattern = r'Start Time: \d+\.\d+ - End Time: \d+\.\d+'
|
62 |
+
cleaned_text = re.sub(speaker_timestamp_pattern, '', text)
|
63 |
+
if cleaned_text == text:
|
64 |
+
cleaned_text = re.sub(timestamp_pattern, '', text)
|
65 |
+
cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
|
66 |
+
return cleaned_text.strip()
|
67 |
+
|
68 |
+
df['cleaned_text'] = df['text'].apply(clean_transcript_text)
|
69 |
+
|
70 |
+
sentiment_pipeline = pipeline(
|
71 |
+
"text-classification",
|
72 |
+
model=MODELS[selected_language],
|
73 |
+
truncation=True,
|
74 |
+
max_length=512
|
75 |
+
)
|
76 |
+
|
77 |
+
results = [process_sentiment(text, sentiment_pipeline) for text in df["cleaned_text"]]
|
78 |
+
df["sentiment"] = [r[0] for r in results]
|
79 |
+
df["confidence"] = [r[1] for r in results]
|
80 |
+
|
81 |
+
st.markdown("### π Analysis Results")
|
82 |
+
|
83 |
+
# Fix the sentiment counting logic
|
84 |
+
if selected_language == 'English':
|
85 |
+
pos_count = len(df[df["sentiment"] == "positive"])
|
86 |
+
neu_count = len(df[df["sentiment"] == "neutral"])
|
87 |
+
neg_count = len(df[df["sentiment"] == "negative"])
|
88 |
+
else: # Danish
|
89 |
+
pos_count = len(df[df["sentiment"] == "positiv"])
|
90 |
+
neu_count = len(df[df["sentiment"] == "neutral"])
|
91 |
+
neg_count = len(df[df["sentiment"] == "negativ"])
|
92 |
+
|
93 |
+
metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4)
|
94 |
+
|
95 |
+
with metric_col1:
|
96 |
+
st.metric(
|
97 |
+
"Positive Sentiments",
|
98 |
+
f"{pos_count} ({pos_count/len(df)*100:.1f}%)"
|
99 |
+
)
|
100 |
+
with metric_col2:
|
101 |
+
st.metric(
|
102 |
+
"Neutral Sentiments",
|
103 |
+
f"{neu_count} ({neu_count/len(df)*100:.1f}%)"
|
104 |
+
)
|
105 |
+
with metric_col3:
|
106 |
+
st.metric(
|
107 |
+
"Negative Sentiments",
|
108 |
+
f"{neg_count} ({neg_count/len(df)*100:.1f}%)"
|
109 |
+
)
|
110 |
+
with metric_col4:
|
111 |
+
st.metric(
|
112 |
+
"Average Confidence",
|
113 |
+
f"{df['confidence'].mean():.1%}"
|
114 |
+
)
|
115 |
+
|
116 |
+
st.markdown("#### Preview")
|
117 |
+
|
118 |
+
preview_df = df[["cleaned_text", "sentiment", "confidence"]].head()
|
119 |
+
preview_df["confidence"] = preview_df["confidence"].apply(lambda x: f"{x:.1%}")
|
120 |
+
|
121 |
+
def highlight_sentiment(val):
|
122 |
+
if val in ["positive", "positiv"]:
|
123 |
+
return 'background-color: rgba(0, 255, 0, 0.2)'
|
124 |
+
elif val in ["negative", "negativ"]:
|
125 |
+
return 'background-color: rgba(255, 0, 0, 0.2)'
|
126 |
+
elif val == "neutral":
|
127 |
+
return 'background-color: rgba(128, 128, 128, 0.2)'
|
128 |
+
return ''
|
129 |
+
|
130 |
+
st.dataframe(
|
131 |
+
preview_df.style.applymap(highlight_sentiment, subset=['sentiment']),
|
132 |
+
use_container_width=True
|
133 |
+
)
|
134 |
+
|
135 |
+
st.markdown("### πΎ Download Results")
|
136 |
+
csv_data = df.to_csv(index=False)
|
137 |
+
st.download_button(
|
138 |
+
label="Download Complete Analysis",
|
139 |
+
data=csv_data,
|
140 |
+
file_name=f"sentiment_results_{selected_language.lower()}.csv",
|
141 |
+
mime="text/csv"
|
142 |
+
)
|
143 |
+
|
144 |
+
except Exception as e:
|
145 |
+
st.error(f"Error processing file: {str(e)}")
|
146 |
+
st.error("Full error details:")
|
147 |
+
st.code(str(e))
|
148 |
+
else:
|
149 |
+
st.markdown("""
|
150 |
+
<div class="instructions">
|
151 |
+
<h4>π How to use:</h4>
|
152 |
+
<ol>
|
153 |
+
<li>Select your desired language</li>
|
154 |
+
<li>Prepare a CSV file with a column named "text"</li>
|
155 |
+
<li>Upload your file using the button above</li>
|
156 |
+
<li>Wait for the analysis to complete</li>
|
157 |
+
<li>Download the results with sentiment labels</li>
|
158 |
+
</ol>
|
159 |
+
</div>
|
160 |
+
""", unsafe_allow_html=True)
|