Spaces:

shobrunjb
/

spiill-fake-review-product-v2

Sleeping

File size: 3,298 Bytes

import gradio as gr
import torch
from transformers import BertTokenizer
import torch.nn.functional as F

# Load model and tokenizer from Hugging Face
model_name = "shobrunjb/mtl-indoBERT-product-review"
tokenizer = BertTokenizer.from_pretrained(model_name)

class IndoBERTMultiTaskClassifier(torch.nn.Module):
    def __init__(self, bert_model_name, num_labels_task1, num_labels_task2, dropout_rate=0.3):
        super(IndoBERTMultiTaskClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model_name)
        self.dropout = torch.nn.Dropout(dropout_rate)
        self.classifier_task1 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task1)
        self.classifier_task2 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task2)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs[1]  # CLS token
        cls_output = self.dropout(cls_output)

        logits_task1 = self.classifier_task1(cls_output)
        logits_task2 = self.classifier_task2(cls_output)

        return logits_task1, logits_task2

# Load the model checkpoint into your multitask model class
model = IndoBERTMultiTaskClassifier(
    bert_model_name=model_name,
    num_labels_task1=3,  # Adjust with your task1 classes
    num_labels_task2=3   # Adjust with your task2 classes
)
model.load_state_dict(torch.load("pytorch_model.bin", map_location=torch.device('cpu')))
model.eval()

# Define label mappings
label_mapping_task1 = ["trusted", "fake", "non"]  # Adjust with your task1 labels
label_mapping_task2 = ["positive", "negative", "neutral"]  # Adjust with your task2 labels

def classify(text):
    # Tokenize input text
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    # Prediction with model
    with torch.no_grad():
        logits_task1, logits_task2 = model(input_ids, attention_mask)
    
    # Softmax to get probabilities
    probs_task1 = F.softmax(logits_task1, dim=1).cpu().numpy()[0]  # Extract the first batch item
    probs_task2 = F.softmax(logits_task2, dim=1).cpu().numpy()[0]  # Extract the first batch item

    # Map probabilities to their corresponding labels
    result_task1 = {label: prob for label, prob in zip(label_mapping_task1, probs_task1)}
    result_task2 = {label: prob for label, prob in zip(label_mapping_task2, probs_task2)}

    return result_task1, result_task2

# Gradio Interface with percentage bars
iface = gr.Interface(
    fn=classify, 
    inputs="text", 
    outputs=[
        gr.Label(label="Fake Review Detection"), 
        gr.Label(label="Sentiment Classification")
    ],
    title="Multitask IndoBERT: Fake Review & Sentiment Classification",
    description="Enter a skincare product review in Indonesian and the model will classify it as fake or trusted, and determine the sentiment.",
    examples=[
        ["Jokowi sangat kecewa dengan POLRI atas kerusuhan yang terjadi di Malang"],
        ["Lesti marah terhadap perlakuan KDRT yang dilakukan oleh Bilar"],
        ["Ungkapan rasa bahagia diutarakan oleh Coki Pardede karena kebebasannya dari penjara"]
    ]
)

iface.launch()