Spaces:
Sleeping
Sleeping
Create function.py
Browse files- function.py +39 -0
function.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ------------------ Sentiment Analysis Functions ------------------------#
|
2 |
+
def split_text_by_token_limit(text, tokenizer, max_tokens):
|
3 |
+
tokens = tokenizer.encode(text, add_special_tokens=False)
|
4 |
+
chunks = []
|
5 |
+
for i in range(0, len(tokens), max_tokens):
|
6 |
+
chunk_tokens = tokens[i:i+max_tokens]
|
7 |
+
chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True)
|
8 |
+
chunks.append(chunk_text)
|
9 |
+
return chunks
|
10 |
+
|
11 |
+
def safe_sentiment(text):
|
12 |
+
try:
|
13 |
+
result = sentiment_pipeline(text)[0]
|
14 |
+
except Exception as e:
|
15 |
+
result = None
|
16 |
+
return result
|
17 |
+
|
18 |
+
def analyze_detail(text, tokenizer, sentiment_pipeline, max_tokens):
|
19 |
+
text = preprocess_text(text)
|
20 |
+
chunks = split_text_by_token_limit(text, tokenizer, max_tokens)
|
21 |
+
if not chunks:
|
22 |
+
return None
|
23 |
+
# Initialize accumulated scores for each sentiment category
|
24 |
+
scores = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
|
25 |
+
for chunk in chunks:
|
26 |
+
result = safe_sentiment(chunk)
|
27 |
+
if result is not None:
|
28 |
+
label = result['label'].upper()
|
29 |
+
if label in scores:
|
30 |
+
scores[label] += result['score']
|
31 |
+
final_label = max(scores, key=lambda k: scores[k])
|
32 |
+
final_score = scores[final_label]
|
33 |
+
return {"label": final_label, "score": final_score}
|
34 |
+
|
35 |
+
def preprocess_text(text):
|
36 |
+
# Replace URLs and user mentions
|
37 |
+
text = re.sub(r'http\S+', 'http', text)
|
38 |
+
text = re.sub(r'@\w+', '@user', text)
|
39 |
+
return text
|