kusa04 commited on
Commit
1d43e3c
·
verified ·
1 Parent(s): 2a185f2

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +12 -5
functions.py CHANGED
@@ -16,8 +16,6 @@ from transformers import (
16
  )
17
  from transformers.pipelines import AggregationStrategy
18
 
19
-
20
-
21
  # Function to normalize text by replacing multiple spaces/newlines with a single space
22
  def normalize_text(text):
23
  if not isinstance(text, str):
@@ -78,15 +76,24 @@ def safe_sentiment(sentiment_pipeline, text):
78
  return result
79
 
80
 
 
 
 
 
 
 
 
81
  def analyze_detail(text, tokenizer, sentiment_pipeline, max_tokens):
82
  text = preprocess_text(text)
83
  chunks = split_text_by_token_limit(text, tokenizer, max_tokens)
84
  if not chunks:
85
  return None
86
- # Initialize accumulated scores for each sentiment category
 
 
 
87
  scores = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
88
- for chunk in chunks:
89
- result = safe_sentiment(chunk)
90
  if result is not None:
91
  label = result['label'].upper()
92
  if label in scores:
 
16
  )
17
  from transformers.pipelines import AggregationStrategy
18
 
 
 
19
  # Function to normalize text by replacing multiple spaces/newlines with a single space
20
  def normalize_text(text):
21
  if not isinstance(text, str):
 
76
  return result
77
 
78
 
79
+ def safe_sentiment_batch(sentiment_pipeline, texts):
80
+ try:
81
+ results = sentiment_pipeline(texts)
82
+ except Exception as e:
83
+ results = [None] * len(texts)
84
+ return results
85
+
86
  def analyze_detail(text, tokenizer, sentiment_pipeline, max_tokens):
87
  text = preprocess_text(text)
88
  chunks = split_text_by_token_limit(text, tokenizer, max_tokens)
89
  if not chunks:
90
  return None
91
+ # ここでバッチ処理を実行(チャンク全体を一括推論)
92
+ results = safe_sentiment_batch(sentiment_pipeline, chunks)
93
+
94
+ # 各チャンクの結果を集計
95
  scores = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
96
+ for result in results:
 
97
  if result is not None:
98
  label = result['label'].upper()
99
  if label in scores: