Apasalic commited on
Commit
e50c383
Β·
verified Β·
1 Parent(s): 9dcd3cd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -0
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from transformers import pipeline
4
+ import re
5
+
6
+ # Model configurations
7
+ MODELS = {
8
+ "English": "MarieAngeA13/Sentiment-Analysis-BERT",
9
+ "Danish": "larskjeldgaard/senda"
10
+ }
11
+
12
+ # Page config
13
+ st.set_page_config(
14
+ page_title="Multi-language Sentiment Analyzer",
15
+ page_icon="🎭",
16
+ layout="wide"
17
+ )
18
+
19
+ # Load custom CSS
20
+ with open('style.css') as f:
21
+ st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
22
+
23
+ def process_sentiment(text, pipeline):
24
+ """Process sentiment for a single text entry"""
25
+ try:
26
+ result = pipeline(str(text))
27
+ # Convert sentiment to lowercase
28
+ return result[0]['label'].lower(), result[0]['score']
29
+ except Exception as e:
30
+ st.warning(f"Error processing text: {text[:50]}... Error: {str(e)}")
31
+ return "unknown", 0.0
32
+
33
+ # App layout
34
+ col1, col2, col3 = st.columns([1, 2, 1])
35
+ with col2:
36
+ st.title("🎭 Multi-language Sentiment Analysis")
37
+
38
+ selected_language = st.selectbox(
39
+ "Select Language",
40
+ options=list(MODELS.keys()),
41
+ index=0
42
+ )
43
+
44
+ st.markdown("""
45
+ <div class="privacy-notice">
46
+ ⚠️ <b>Privacy Notice:</b> Your data is processed in memory and not stored.
47
+ </div>
48
+ """, unsafe_allow_html=True)
49
+
50
+ uploaded_file = st.file_uploader("Upload a CSV file with text", type=["csv"])
51
+
52
+ if uploaded_file:
53
+ try:
54
+ df = pd.read_csv(uploaded_file)
55
+ if "text" not in df.columns:
56
+ st.error("CSV must contain a 'text' column")
57
+ else:
58
+ with st.spinner(f"πŸ“Š Analyzing sentiments in {selected_language}..."):
59
+ def clean_transcript_text(text):
60
+ speaker_timestamp_pattern = r'Speaker: Speaker [A-Z], Start Time: \d+\.\d+ - End Time: \d+\.\d+'
61
+ timestamp_pattern = r'Start Time: \d+\.\d+ - End Time: \d+\.\d+'
62
+ cleaned_text = re.sub(speaker_timestamp_pattern, '', text)
63
+ if cleaned_text == text:
64
+ cleaned_text = re.sub(timestamp_pattern, '', text)
65
+ cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
66
+ return cleaned_text.strip()
67
+
68
+ df['cleaned_text'] = df['text'].apply(clean_transcript_text)
69
+
70
+ sentiment_pipeline = pipeline(
71
+ "text-classification",
72
+ model=MODELS[selected_language],
73
+ truncation=True,
74
+ max_length=512
75
+ )
76
+
77
+ results = [process_sentiment(text, sentiment_pipeline) for text in df["cleaned_text"]]
78
+ df["sentiment"] = [r[0] for r in results]
79
+ df["confidence"] = [r[1] for r in results]
80
+
81
+ st.markdown("### πŸ“ˆ Analysis Results")
82
+
83
+ # Fix the sentiment counting logic
84
+ if selected_language == 'English':
85
+ pos_count = len(df[df["sentiment"] == "positive"])
86
+ neu_count = len(df[df["sentiment"] == "neutral"])
87
+ neg_count = len(df[df["sentiment"] == "negative"])
88
+ else: # Danish
89
+ pos_count = len(df[df["sentiment"] == "positiv"])
90
+ neu_count = len(df[df["sentiment"] == "neutral"])
91
+ neg_count = len(df[df["sentiment"] == "negativ"])
92
+
93
+ metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4)
94
+
95
+ with metric_col1:
96
+ st.metric(
97
+ "Positive Sentiments",
98
+ f"{pos_count} ({pos_count/len(df)*100:.1f}%)"
99
+ )
100
+ with metric_col2:
101
+ st.metric(
102
+ "Neutral Sentiments",
103
+ f"{neu_count} ({neu_count/len(df)*100:.1f}%)"
104
+ )
105
+ with metric_col3:
106
+ st.metric(
107
+ "Negative Sentiments",
108
+ f"{neg_count} ({neg_count/len(df)*100:.1f}%)"
109
+ )
110
+ with metric_col4:
111
+ st.metric(
112
+ "Average Confidence",
113
+ f"{df['confidence'].mean():.1%}"
114
+ )
115
+
116
+ st.markdown("#### Preview")
117
+
118
+ preview_df = df[["cleaned_text", "sentiment", "confidence"]].head()
119
+ preview_df["confidence"] = preview_df["confidence"].apply(lambda x: f"{x:.1%}")
120
+
121
+ def highlight_sentiment(val):
122
+ if val in ["positive", "positiv"]:
123
+ return 'background-color: rgba(0, 255, 0, 0.2)'
124
+ elif val in ["negative", "negativ"]:
125
+ return 'background-color: rgba(255, 0, 0, 0.2)'
126
+ elif val == "neutral":
127
+ return 'background-color: rgba(128, 128, 128, 0.2)'
128
+ return ''
129
+
130
+ st.dataframe(
131
+ preview_df.style.applymap(highlight_sentiment, subset=['sentiment']),
132
+ use_container_width=True
133
+ )
134
+
135
+ st.markdown("### πŸ’Ύ Download Results")
136
+ csv_data = df.to_csv(index=False)
137
+ st.download_button(
138
+ label="Download Complete Analysis",
139
+ data=csv_data,
140
+ file_name=f"sentiment_results_{selected_language.lower()}.csv",
141
+ mime="text/csv"
142
+ )
143
+
144
+ except Exception as e:
145
+ st.error(f"Error processing file: {str(e)}")
146
+ st.error("Full error details:")
147
+ st.code(str(e))
148
+ else:
149
+ st.markdown("""
150
+ <div class="instructions">
151
+ <h4>πŸ“ How to use:</h4>
152
+ <ol>
153
+ <li>Select your desired language</li>
154
+ <li>Prepare a CSV file with a column named "text"</li>
155
+ <li>Upload your file using the button above</li>
156
+ <li>Wait for the analysis to complete</li>
157
+ <li>Download the results with sentiment labels</li>
158
+ </ol>
159
+ </div>
160
+ """, unsafe_allow_html=True)