kusa04 commited on
Commit
61e3519
·
verified ·
1 Parent(s): e652020

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +21 -9
functions.py CHANGED
@@ -109,33 +109,45 @@ def split_text_by_token_limit(text, tokenizer, max_tokens):
109
  return chunks
110
 
111
 
112
- def safe_sentiment(sentiment_pipeline, text):
 
 
 
 
 
 
 
 
 
 
 
113
  try:
114
- result = sentiment_pipeline(text)[0]
115
  except Exception as e:
116
- result = None
117
- return result
118
-
119
 
120
  def analyze_detail(text, tokenizer, sentiment_pipeline, max_tokens):
121
  text = preprocess_text(text)
122
  chunks = split_text_by_token_limit(text, tokenizer, max_tokens)
123
  if not chunks:
124
  return None
125
- # Initialize accumulated scores for each sentiment category
 
 
 
126
  scores = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
127
- for chunk in chunks:
128
- result = safe_sentiment(sentiment_pipeline, chunk)
129
  if result is not None:
130
  label = result['label'].upper()
131
  if label in scores:
132
  scores[label] += result['score']
133
  final_label = max(scores, key=lambda k: scores[k])
134
  final_score = scores[final_label]
 
135
  return {"label": final_label, "score": final_score}
136
 
137
 
138
-
139
  def preprocess_text(text):
140
  # Replace URLs and user mentions
141
  text = re.sub(r'http\S+', 'http', text)
 
109
  return chunks
110
 
111
 
112
+ # def safe_sentiment(sentiment_pipeline, text):
113
+ # try:
114
+ # result = sentiment_pipeline(text)[0]
115
+ # except Exception as e:
116
+ # result = None
117
+ # return result
118
+
119
+
120
+ def safe_sentiment_batch(sentiment_pipeline, texts):
121
+ """
122
+ breack donwn into batch
123
+ """
124
  try:
125
+ results = sentiment_pipeline(texts)
126
  except Exception as e:
127
+ results = [None] * len(texts)
128
+ return results
 
129
 
130
  def analyze_detail(text, tokenizer, sentiment_pipeline, max_tokens):
131
  text = preprocess_text(text)
132
  chunks = split_text_by_token_limit(text, tokenizer, max_tokens)
133
  if not chunks:
134
  return None
135
+ # ここでバッチ処理を実行(チャンク全体を一括推論)
136
+ results = safe_sentiment_batch(sentiment_pipeline, chunks)
137
+
138
+ # 各チャンクの結果を集計
139
  scores = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
140
+ for result in results:
 
141
  if result is not None:
142
  label = result['label'].upper()
143
  if label in scores:
144
  scores[label] += result['score']
145
  final_label = max(scores, key=lambda k: scores[k])
146
  final_score = scores[final_label]
147
+
148
  return {"label": final_label, "score": final_score}
149
 
150
 
 
151
  def preprocess_text(text):
152
  # Replace URLs and user mentions
153
  text = re.sub(r'http\S+', 'http', text)