File size: 3,345 Bytes
8c23fe3 0830375 5ad1844 0830375 a9dc165 58ff862 a2774cf 0830375 a9dc165 0830375 5ad1844 0830375 8c23fe3 0830375 8c23fe3 0830375 5ad1844 7f9bed9 0830375 58ff862 0830375 58ff862 0830375 8c23fe3 58ff862 9758bca 0830375 5ad1844 9758bca 0830375 5ad1844 0830375 5ad1844 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
import torch
from transformers import BertTokenizer, BertModel
import torch.nn.functional as F
# Load model and tokenizer from Hugging Face
model_name = "indobenchmark/indobert-base-p1"
tokenizer = BertTokenizer.from_pretrained(model_name)
class IndoBERTMultiTaskClassifier(torch.nn.Module):
def __init__(self, bert_model_name, num_labels_task1, num_labels_task2, dropout_rate=0.3):
super(IndoBERTMultiTaskClassifier, self).__init__()
self.bert = BertModel.from_pretrained(bert_model_name)
self.dropout = torch.nn.Dropout(dropout_rate)
self.classifier_task1 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task1)
self.classifier_task2 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task2)
def forward(self, input_ids, attention_mask):
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
cls_output = outputs[1] # CLS token
cls_output = self.dropout(cls_output)
logits_task1 = self.classifier_task1(cls_output)
logits_task2 = self.classifier_task2(cls_output)
return logits_task1, logits_task2
# Load model directly from Hugging Face
model = IndoBERTMultiTaskClassifier(
bert_model_name=model_name,
num_labels_task1=3, # Adjust with your task1 classes
num_labels_task2=3 # Adjust with your task2 classes
)
model.eval()
# Define label mappings
label_mapping_task1 = ["trusted", "fake", "non"] # Adjust with your task1 labels
label_mapping_task2 = ["positive", "negative", "neutral"] # Adjust with your task2 labels
def classify(text):
# Tokenize input text
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']
# Prediction with model
with torch.no_grad():
logits_task1, logits_task2 = model(input_ids, attention_mask)
# Softmax to get probabilities
probs_task1 = F.softmax(logits_task1, dim=1).cpu().numpy()[0] # Extract the first batch item
probs_task2 = F.softmax(logits_task2, dim=1).cpu().numpy()[0] # Extract the first batch item
# Predict label with highest probability
pred_task1 = label_mapping_task1[probs_task1.argmax()]
pred_task2 = label_mapping_task2[probs_task2.argmax()]
# Format probabilities as percentages
probs_task1_str = ", ".join([f"{label}: {prob*100:.2f}%" for label, prob in zip(label_mapping_task1, probs_task1)])
probs_task2_str = ", ".join([f"{label}: {prob*100:.2f}%" for label, prob in zip(label_mapping_task2, probs_task2)])
# Combine label predictions with their probabilities
result_task1 = f"{pred_task1} ({probs_task1_str})"
result_task2 = f"{pred_task2} ({probs_task2_str})"
return result_task1, result_task2
# Gradio Interface
iface = gr.Interface(fn=classify,
inputs="text",
outputs=[gr.Label(label="Fake Review Detection"),
gr.Label(label="Sentiment Classification")],
title="Multitask IndoBERT: Fake Review & Sentiment Classification",
description="Enter a skincare product review in Indonesian and the model will classify it as fake or trusted, and determine the sentiment.")
iface.launch() |