app.py
CHANGED
@@ -1,59 +1,69 @@
|
|
1 |
import gradio as gr
|
2 |
-
|
3 |
-
|
4 |
-
import
|
5 |
-
|
6 |
-
from transformers import BertForSequenceClassification
|
7 |
|
|
|
8 |
model_name = "shobrunjb/mtl-indobert-fake-review-product"
|
9 |
-
|
10 |
-
tokenizer = BertTokenizer.from_pretrained('shobrunjb/mtl-indobert-fake-review-product')
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
def
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
# Predict with the model
|
18 |
-
outputs = model(inputs)
|
19 |
-
|
20 |
-
# Get predictions for sentiment and review label
|
21 |
-
sentiment_logits = outputs.logits[0].numpy()
|
22 |
-
review_logits = outputs.logits[1].numpy()
|
23 |
-
|
24 |
-
# Convert logits to probabilities using softmax
|
25 |
-
sentiment_probs = tf.nn.softmax(sentiment_logits).numpy()
|
26 |
-
review_probs = tf.nn.softmax(review_logits).numpy()
|
27 |
-
|
28 |
-
# Convert logits to class labels
|
29 |
-
sentiment_label_map = {0: 'negative', 1: 'neutral', 2: 'positive'}
|
30 |
-
review_label_map = {0: 'fake', 1: 'trusted', 2: 'non'}
|
31 |
-
|
32 |
-
sentiment_pred = {sentiment_label_map[i]: f"{sentiment_probs[i]*100:.2f}%" for i in range(3)}
|
33 |
-
review_pred = {review_label_map[i]: f"{review_probs[i]*100:.2f}%" for i in range(3)}
|
34 |
-
|
35 |
-
return sentiment_pred, review_pred
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from transformers import BertTokenizer
|
4 |
+
import torch.nn.functional as F
|
|
|
|
|
5 |
|
6 |
+
# Load model dan tokenizer dari Hugging Face
|
7 |
model_name = "shobrunjb/mtl-indobert-fake-review-product"
|
8 |
+
tokenizer = BertTokenizer.from_pretrained(model_name)
|
|
|
9 |
|
10 |
+
class IndoBERTMultiTaskClassifier(torch.nn.Module):
|
11 |
+
def __init__(self, bert_model_name, num_labels_task1, num_labels_task2, dropout_rate=0.3):
|
12 |
+
super(IndoBERTMultiTaskClassifier, self).__init__()
|
13 |
+
self.bert = BertModel.from_pretrained(bert_model_name)
|
14 |
+
self.dropout = torch.nn.Dropout(dropout_rate)
|
15 |
+
self.classifier_task1 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task1)
|
16 |
+
self.classifier_task2 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task2)
|
17 |
|
18 |
+
def forward(self, input_ids, attention_mask):
|
19 |
+
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
|
20 |
+
cls_output = outputs[1] # CLS token
|
21 |
+
cls_output = self.dropout(cls_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
logits_task1 = self.classifier_task1(cls_output)
|
24 |
+
logits_task2 = self.classifier_task2(cls_output)
|
25 |
+
|
26 |
+
return logits_task1, logits_task2
|
27 |
+
|
28 |
+
# Load the model
|
29 |
+
model = IndoBERTMultiTaskClassifier(
|
30 |
+
bert_model_name="indobenchmark/indobert-base-p1",
|
31 |
+
num_labels_task1=3, # Adjust with your task1 classes
|
32 |
+
num_labels_task2=3 # Adjust with your task2 classes
|
33 |
+
)
|
34 |
+
model.load_state_dict(torch.load("pytorch_model.bin", map_location=torch.device('cpu')))
|
35 |
+
model.eval()
|
36 |
+
|
37 |
+
# Define label mappings
|
38 |
+
label_mapping_task1 = ["trusted", "fake", "non"] # Adjust with your task1 labels
|
39 |
+
label_mapping_task2 = ["positive", "negative", "neutral"] # Adjust with your task2 labels
|
40 |
+
|
41 |
+
def classify(text):
|
42 |
+
# Tokenisasi input teks
|
43 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
|
44 |
+
input_ids = inputs['input_ids']
|
45 |
+
attention_mask = inputs['attention_mask']
|
46 |
+
|
47 |
+
# Prediksi dengan model
|
48 |
+
with torch.no_grad():
|
49 |
+
logits_task1, logits_task2 = model(input_ids, attention_mask)
|
50 |
|
51 |
+
# Softmax untuk mendapatkan probabilitas
|
52 |
+
probs_task1 = F.softmax(logits_task1, dim=1).cpu().numpy()
|
53 |
+
probs_task2 = F.softmax(logits_task2, dim=1).cpu().numpy()
|
54 |
+
|
55 |
+
# Prediksi label dengan probabilitas tertinggi
|
56 |
+
pred_task1 = label_mapping_task1[probs_task1.argmax()]
|
57 |
+
pred_task2 = label_mapping_task2[probs_task2.argmax()]
|
58 |
+
|
59 |
+
return {"Fake Review Detection": pred_task1, "Sentiment Classification": pred_task2}
|
60 |
+
|
61 |
+
# Gradio Interface
|
62 |
+
iface = gr.Interface(fn=classify,
|
63 |
+
inputs="text",
|
64 |
+
outputs=[gr.outputs.Label(label="Fake Review Detection"),
|
65 |
+
gr.outputs.Label(label="Sentiment Classification")],
|
66 |
+
title="Multitask IndoBERT: Fake Review & Sentiment Classification",
|
67 |
+
description="Enter a skincare product review in Indonesian and the model will classify it as fake or trusted, and determine the sentiment.")
|
68 |
+
|
69 |
+
iface.launch()
|