| | def format_predictions(words, predictions): |
| | ''' |
| | Chuyển đổi danh sách từ và dự đoán sang định dạng (word, label) |
| | ''' |
| | formatted = [] |
| | for word, label in zip(words, predictions): |
| | formatted.append((word, label)) |
| | return formatted |
| |
|
| | def process_predictions(predictions): |
| | ''' |
| | Tách các từ có dấu gạch dưới thành các từ riêng biệt với cùng nhãn |
| | ''' |
| | formatted = [] |
| | for word, label in predictions: |
| | if '_' in word: |
| | formatted.append((word.replace('_', ' '), label)) |
| | else: |
| | formatted.append((word, label)) |
| | return formatted |
| |
|
| |
|
| | def combine_entities(predictions): |
| | combined = [] |
| | temp_entity = [] |
| | temp_label = None |
| |
|
| | for word, label in predictions: |
| | if label.startswith('B-'): |
| | if temp_entity: |
| | combined.append((' '.join(temp_entity), temp_label)) |
| | temp_entity = [] |
| | temp_entity.append(word) |
| | temp_label = label |
| | elif label.startswith('I-') and temp_label and label[2:] == temp_label[2:]: |
| | temp_entity.append(word) |
| | else: |
| | if temp_entity: |
| | combined.append((' '.join(temp_entity), temp_label)) |
| | temp_entity = [] |
| | temp_label = None |
| | combined.append((word, label)) |
| |
|
| | if temp_entity: |
| | combined.append((' '.join(temp_entity), temp_label)) |
| |
|
| | return combined |
| |
|
| |
|
| |
|
| |
|
| | def remove_B_prefix(entities): |
| | modified_entities = [] |
| | for word, label in entities: |
| | if label.startswith('B-'): |
| | label = label[2:] |
| | modified_entities.append((word, label)) |
| | return modified_entities |
| |
|
| |
|
| | def combine_i_tags(tokens_labels): |
| | combined = [] |
| | current_combination = [] |
| | current_label = None |
| |
|
| | for token, label in tokens_labels: |
| | if label.startswith('I-'): |
| | label = label[2:] |
| | if current_label is None: |
| | current_label = label |
| | current_combination.append(token) |
| | elif current_label == label: |
| | current_combination.append(token) |
| | else: |
| | combined.append((' '.join(current_combination), current_label)) |
| | current_combination = [token] |
| | current_label = label |
| | else: |
| | if current_combination: |
| | combined.append((' '.join(current_combination), current_label)) |
| | current_combination = [] |
| | current_label = None |
| | combined.append((token, label)) |
| |
|
| | if current_combination: |
| | combined.append((' '.join(current_combination), current_label)) |
| |
|
| | return combined |
| |
|
| | |
| |
|
| | |
| | |
| |
|
| |
|
| |
|