kusa04 commited on
Commit
3534d27
·
verified ·
1 Parent(s): 53f2dab

Create function.py

Browse files
Files changed (1) hide show
  1. function.py +39 -0
function.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ------------------ Sentiment Analysis Functions ------------------------#
2
+ def split_text_by_token_limit(text, tokenizer, max_tokens):
3
+ tokens = tokenizer.encode(text, add_special_tokens=False)
4
+ chunks = []
5
+ for i in range(0, len(tokens), max_tokens):
6
+ chunk_tokens = tokens[i:i+max_tokens]
7
+ chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True)
8
+ chunks.append(chunk_text)
9
+ return chunks
10
+
11
+ def safe_sentiment(text):
12
+ try:
13
+ result = sentiment_pipeline(text)[0]
14
+ except Exception as e:
15
+ result = None
16
+ return result
17
+
18
+ def analyze_detail(text, tokenizer, sentiment_pipeline, max_tokens):
19
+ text = preprocess_text(text)
20
+ chunks = split_text_by_token_limit(text, tokenizer, max_tokens)
21
+ if not chunks:
22
+ return None
23
+ # Initialize accumulated scores for each sentiment category
24
+ scores = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
25
+ for chunk in chunks:
26
+ result = safe_sentiment(chunk)
27
+ if result is not None:
28
+ label = result['label'].upper()
29
+ if label in scores:
30
+ scores[label] += result['score']
31
+ final_label = max(scores, key=lambda k: scores[k])
32
+ final_score = scores[final_label]
33
+ return {"label": final_label, "score": final_score}
34
+
35
+ def preprocess_text(text):
36
+ # Replace URLs and user mentions
37
+ text = re.sub(r'http\S+', 'http', text)
38
+ text = re.sub(r'@\w+', '@user', text)
39
+ return text