Spaces:
Sleeping
Sleeping
Update functions.py
Browse files- functions.py +21 -9
functions.py
CHANGED
@@ -109,33 +109,45 @@ def split_text_by_token_limit(text, tokenizer, max_tokens):
|
|
109 |
return chunks
|
110 |
|
111 |
|
112 |
-
def safe_sentiment(sentiment_pipeline, text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
try:
|
114 |
-
|
115 |
except Exception as e:
|
116 |
-
|
117 |
-
return
|
118 |
-
|
119 |
|
120 |
def analyze_detail(text, tokenizer, sentiment_pipeline, max_tokens):
|
121 |
text = preprocess_text(text)
|
122 |
chunks = split_text_by_token_limit(text, tokenizer, max_tokens)
|
123 |
if not chunks:
|
124 |
return None
|
125 |
-
#
|
|
|
|
|
|
|
126 |
scores = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
|
127 |
-
for
|
128 |
-
result = safe_sentiment(sentiment_pipeline, chunk)
|
129 |
if result is not None:
|
130 |
label = result['label'].upper()
|
131 |
if label in scores:
|
132 |
scores[label] += result['score']
|
133 |
final_label = max(scores, key=lambda k: scores[k])
|
134 |
final_score = scores[final_label]
|
|
|
135 |
return {"label": final_label, "score": final_score}
|
136 |
|
137 |
|
138 |
-
|
139 |
def preprocess_text(text):
|
140 |
# Replace URLs and user mentions
|
141 |
text = re.sub(r'http\S+', 'http', text)
|
|
|
109 |
return chunks
|
110 |
|
111 |
|
112 |
+
# def safe_sentiment(sentiment_pipeline, text):
|
113 |
+
# try:
|
114 |
+
# result = sentiment_pipeline(text)[0]
|
115 |
+
# except Exception as e:
|
116 |
+
# result = None
|
117 |
+
# return result
|
118 |
+
|
119 |
+
|
120 |
+
def safe_sentiment_batch(sentiment_pipeline, texts):
|
121 |
+
"""
|
122 |
+
breack donwn into batch
|
123 |
+
"""
|
124 |
try:
|
125 |
+
results = sentiment_pipeline(texts)
|
126 |
except Exception as e:
|
127 |
+
results = [None] * len(texts)
|
128 |
+
return results
|
|
|
129 |
|
130 |
def analyze_detail(text, tokenizer, sentiment_pipeline, max_tokens):
|
131 |
text = preprocess_text(text)
|
132 |
chunks = split_text_by_token_limit(text, tokenizer, max_tokens)
|
133 |
if not chunks:
|
134 |
return None
|
135 |
+
# ここでバッチ処理を実行(チャンク全体を一括推論)
|
136 |
+
results = safe_sentiment_batch(sentiment_pipeline, chunks)
|
137 |
+
|
138 |
+
# 各チャンクの結果を集計
|
139 |
scores = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
|
140 |
+
for result in results:
|
|
|
141 |
if result is not None:
|
142 |
label = result['label'].upper()
|
143 |
if label in scores:
|
144 |
scores[label] += result['score']
|
145 |
final_label = max(scores, key=lambda k: scores[k])
|
146 |
final_score = scores[final_label]
|
147 |
+
|
148 |
return {"label": final_label, "score": final_score}
|
149 |
|
150 |
|
|
|
151 |
def preprocess_text(text):
|
152 |
# Replace URLs and user mentions
|
153 |
text = re.sub(r'http\S+', 'http', text)
|