File size: 3,298 Bytes
8c23fe3 0830375 79f910e 0830375 a9dc165 58ff862 e34dedf 0830375 a9dc165 0830375 8c23fe3 0830375 8c23fe3 0830375 79f910e 7f9bed9 79f910e 7f9bed9 0830375 58ff862 0830375 58ff862 0830375 8c23fe3 58ff862 9758bca 0830375 f7be72d 9758bca 0830375 f7be72d 79f910e f7be72d 0830375 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
import torch
from transformers import BertTokenizer
import torch.nn.functional as F
# Load model and tokenizer from Hugging Face
model_name = "shobrunjb/mtl-indoBERT-product-review"
tokenizer = BertTokenizer.from_pretrained(model_name)
class IndoBERTMultiTaskClassifier(torch.nn.Module):
def __init__(self, bert_model_name, num_labels_task1, num_labels_task2, dropout_rate=0.3):
super(IndoBERTMultiTaskClassifier, self).__init__()
self.bert = BertModel.from_pretrained(bert_model_name)
self.dropout = torch.nn.Dropout(dropout_rate)
self.classifier_task1 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task1)
self.classifier_task2 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task2)
def forward(self, input_ids, attention_mask):
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
cls_output = outputs[1] # CLS token
cls_output = self.dropout(cls_output)
logits_task1 = self.classifier_task1(cls_output)
logits_task2 = self.classifier_task2(cls_output)
return logits_task1, logits_task2
# Load the model checkpoint into your multitask model class
model = IndoBERTMultiTaskClassifier(
bert_model_name=model_name,
num_labels_task1=3, # Adjust with your task1 classes
num_labels_task2=3 # Adjust with your task2 classes
)
model.load_state_dict(torch.load("pytorch_model.bin", map_location=torch.device('cpu')))
model.eval()
# Define label mappings
label_mapping_task1 = ["trusted", "fake", "non"] # Adjust with your task1 labels
label_mapping_task2 = ["positive", "negative", "neutral"] # Adjust with your task2 labels
def classify(text):
# Tokenize input text
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']
# Prediction with model
with torch.no_grad():
logits_task1, logits_task2 = model(input_ids, attention_mask)
# Softmax to get probabilities
probs_task1 = F.softmax(logits_task1, dim=1).cpu().numpy()[0] # Extract the first batch item
probs_task2 = F.softmax(logits_task2, dim=1).cpu().numpy()[0] # Extract the first batch item
# Map probabilities to their corresponding labels
result_task1 = {label: prob for label, prob in zip(label_mapping_task1, probs_task1)}
result_task2 = {label: prob for label, prob in zip(label_mapping_task2, probs_task2)}
return result_task1, result_task2
# Gradio Interface with percentage bars
iface = gr.Interface(
fn=classify,
inputs="text",
outputs=[
gr.Label(label="Fake Review Detection"),
gr.Label(label="Sentiment Classification")
],
title="Multitask IndoBERT: Fake Review & Sentiment Classification",
description="Enter a skincare product review in Indonesian and the model will classify it as fake or trusted, and determine the sentiment.",
examples=[
["Jokowi sangat kecewa dengan POLRI atas kerusuhan yang terjadi di Malang"],
["Lesti marah terhadap perlakuan KDRT yang dilakukan oleh Bilar"],
["Ungkapan rasa bahagia diutarakan oleh Coki Pardede karena kebebasannya dari penjara"]
]
)
iface.launch()
|