File size: 3,345 Bytes
8c23fe3
0830375
5ad1844
0830375
a9dc165
58ff862
a2774cf
0830375
a9dc165
0830375
 
 
5ad1844
0830375
 
 
8c23fe3
0830375
 
 
 
8c23fe3
0830375
 
 
 
 
5ad1844
7f9bed9
 
 
 
 
 
0830375
 
 
 
 
 
58ff862
0830375
 
 
 
58ff862
0830375
 
8c23fe3
58ff862
9758bca
 
0830375
5ad1844
 
 
 
 
 
 
 
 
 
 
9758bca
 
0830375
5ad1844
 
 
 
 
 
 
0830375
5ad1844
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
import torch
from transformers import BertTokenizer, BertModel  
import torch.nn.functional as F

# Load model and tokenizer from Hugging Face
model_name = "indobenchmark/indobert-base-p1"
tokenizer = BertTokenizer.from_pretrained(model_name)

class IndoBERTMultiTaskClassifier(torch.nn.Module):
    def __init__(self, bert_model_name, num_labels_task1, num_labels_task2, dropout_rate=0.3):
        super(IndoBERTMultiTaskClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model_name)
        self.dropout = torch.nn.Dropout(dropout_rate)
        self.classifier_task1 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task1)
        self.classifier_task2 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task2)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs[1]  # CLS token
        cls_output = self.dropout(cls_output)

        logits_task1 = self.classifier_task1(cls_output)
        logits_task2 = self.classifier_task2(cls_output)

        return logits_task1, logits_task2

# Load model directly from Hugging Face
model = IndoBERTMultiTaskClassifier(
    bert_model_name=model_name,
    num_labels_task1=3,  # Adjust with your task1 classes
    num_labels_task2=3   # Adjust with your task2 classes
)
model.eval()

# Define label mappings
label_mapping_task1 = ["trusted", "fake", "non"]  # Adjust with your task1 labels
label_mapping_task2 = ["positive", "negative", "neutral"]  # Adjust with your task2 labels

def classify(text):
    # Tokenize input text
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    # Prediction with model
    with torch.no_grad():
        logits_task1, logits_task2 = model(input_ids, attention_mask)
    
    # Softmax to get probabilities
    probs_task1 = F.softmax(logits_task1, dim=1).cpu().numpy()[0]  # Extract the first batch item
    probs_task2 = F.softmax(logits_task2, dim=1).cpu().numpy()[0]  # Extract the first batch item

    # Predict label with highest probability
    pred_task1 = label_mapping_task1[probs_task1.argmax()]
    pred_task2 = label_mapping_task2[probs_task2.argmax()]

    # Format probabilities as percentages
    probs_task1_str = ", ".join([f"{label}: {prob*100:.2f}%" for label, prob in zip(label_mapping_task1, probs_task1)])
    probs_task2_str = ", ".join([f"{label}: {prob*100:.2f}%" for label, prob in zip(label_mapping_task2, probs_task2)])

    # Combine label predictions with their probabilities
    result_task1 = f"{pred_task1} ({probs_task1_str})"
    result_task2 = f"{pred_task2} ({probs_task2_str})"

    return result_task1, result_task2

# Gradio Interface
iface = gr.Interface(fn=classify, 
                     inputs="text", 
                     outputs=[gr.Label(label="Fake Review Detection"), 
                              gr.Label(label="Sentiment Classification")],
                     title="Multitask IndoBERT: Fake Review & Sentiment Classification",
                     description="Enter a skincare product review in Indonesian and the model will classify it as fake or trusted, and determine the sentiment.")

iface.launch()