Spaces:

shobrunjb
/

spiill-fake-review-product-v2

Sleeping

App Files Files Community

shobrunjb commited on Aug 25, 2024

Commit

0830375

verified ·

1 Parent(s): dc55fb2

1

Browse files

Files changed (1) hide show

app.py +62 -52

app.py CHANGED Viewed

@@ -1,59 +1,69 @@
 import gradio as gr
-from transformers import BertTokenizer, TFBertForSequenceClassification
-import tensorflow as tf
-import numpy as np
-from transformers import BertForSequenceClassification
 model_name = "shobrunjb/mtl-indobert-fake-review-product"
-model = BertForSequenceClassification.from_pretrained(model_name)
-tokenizer = BertTokenizer.from_pretrained('shobrunjb/mtl-indobert-fake-review-product')
-def predict(text):
-    # Tokenize input text
-    inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True, max_length=400)
-    # Predict with the model
-    outputs = model(inputs)
-    # Get predictions for sentiment and review label
-    sentiment_logits = outputs.logits[0].numpy()
-    review_logits = outputs.logits[1].numpy()
-    # Convert logits to probabilities using softmax
-    sentiment_probs = tf.nn.softmax(sentiment_logits).numpy()
-    review_probs = tf.nn.softmax(review_logits).numpy()
-    # Convert logits to class labels
-    sentiment_label_map = {0: 'negative', 1: 'neutral', 2: 'positive'}
-    review_label_map = {0: 'fake', 1: 'trusted', 2: 'non'}
-    sentiment_pred = {sentiment_label_map[i]: f"{sentiment_probs[i]*100:.2f}%" for i in range(3)}
-    review_pred = {review_label_map[i]: f"{review_probs[i]*100:.2f}%" for i in range(3)}
-    return sentiment_pred, review_pred
-# Create Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("# Spiill- Deteksi Fake Review Produk")
-    gr.Markdown("Model ini memprediksi dan menganalsis review produk dengan multi task learning ")
-    with gr.Row():
-        input_text = gr.Textbox(label="Input", placeholder="Masukkan teks di sini...", lines=4)
-        sentiment_output = gr.JSON(label="Sentimen (dengan Presentase)")
-        review_output = gr.JSON(label="Label Review (dengan Presentase)")
-    submit_btn = gr.Button("Submit")
-    submit_btn.click(fn=predict, inputs=input_text, outputs=[sentiment_output, review_output])
-    gr.Markdown("### Contoh Kalimat:")
-    gr.Examples(
-        examples=["Bibirku lagi iritasi parah. Kering, gatal, dan mengelupas sampai luka. Diolesin mediheal ini langsung membaik dalam 2 hari. Bibirku jadi gak patchy lagi dan lukanya sembuh.",
-                  "Lip balm dengan 100,000ppm Panthenol, shea butter, beragam plant oil yang efektif untuk melembapkan bibir sangat kering, pecah-pecah, dan menghaluskan bibir.",
-                  "AKU CINTA BANGET BANGET beneran bantuuu rambut aku pas lagi rontok rontoknya 😭😭🤞🏻 tujuanku make cuma buat ilangin botak tapi surprisingly bikin rambut makin halus juga omg HIDDEN GEM kata aku"],
-        inputs=input_text,
-    )
-# Launch the interface
-demo.launch()

 import gradio as gr
+import torch
+from transformers import BertTokenizer
+import torch.nn.functional as F
+# Load model dan tokenizer dari Hugging Face
 model_name = "shobrunjb/mtl-indobert-fake-review-product"
+tokenizer = BertTokenizer.from_pretrained(model_name)
+class IndoBERTMultiTaskClassifier(torch.nn.Module):
+    def __init__(self, bert_model_name, num_labels_task1, num_labels_task2, dropout_rate=0.3):
+        super(IndoBERTMultiTaskClassifier, self).__init__()
+        self.bert = BertModel.from_pretrained(bert_model_name)
+        self.dropout = torch.nn.Dropout(dropout_rate)
+        self.classifier_task1 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task1)
+        self.classifier_task2 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task2)
+    def forward(self, input_ids, attention_mask):
+        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        cls_output = outputs[1]  # CLS token
+        cls_output = self.dropout(cls_output)
+        logits_task1 = self.classifier_task1(cls_output)
+        logits_task2 = self.classifier_task2(cls_output)
+        return logits_task1, logits_task2
+# Load the model
+model = IndoBERTMultiTaskClassifier(
+    bert_model_name="indobenchmark/indobert-base-p1",
+    num_labels_task1=3,  # Adjust with your task1 classes
+    num_labels_task2=3   # Adjust with your task2 classes
+)
+model.load_state_dict(torch.load("pytorch_model.bin", map_location=torch.device('cpu')))
+model.eval()
+# Define label mappings
+label_mapping_task1 = ["trusted", "fake", "non"]  # Adjust with your task1 labels
+label_mapping_task2 = ["positive", "negative", "neutral"]  # Adjust with your task2 labels
+def classify(text):
+    # Tokenisasi input teks
+    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
+    input_ids = inputs['input_ids']
+    attention_mask = inputs['attention_mask']
+    # Prediksi dengan model
+    with torch.no_grad():
+        logits_task1, logits_task2 = model(input_ids, attention_mask)
+    # Softmax untuk mendapatkan probabilitas
+    probs_task1 = F.softmax(logits_task1, dim=1).cpu().numpy()
+    probs_task2 = F.softmax(logits_task2, dim=1).cpu().numpy()
+    # Prediksi label dengan probabilitas tertinggi
+    pred_task1 = label_mapping_task1[probs_task1.argmax()]
+    pred_task2 = label_mapping_task2[probs_task2.argmax()]
+    return {"Fake Review Detection": pred_task1, "Sentiment Classification": pred_task2}
+# Gradio Interface
+iface = gr.Interface(fn=classify,
+                     inputs="text",
+                     outputs=[gr.outputs.Label(label="Fake Review Detection"),
+                              gr.outputs.Label(label="Sentiment Classification")],
+                     title="Multitask IndoBERT: Fake Review & Sentiment Classification",
+                     description="Enter a skincare product review in Indonesian and the model will classify it as fake or trusted, and determine the sentiment.")
+iface.launch()