Canstralian's picture
Update app.py
41f9a4d verified
import gradio as gr
from transformers import Trainer, TrainingArguments, BertForSequenceClassification, BertTokenizer
from datasets import load_dataset
from huggingface_hub import login
from huggingface_hub import InferenceClient
import torch
# Authenticate with Hugging Face
login()
# Load Dataset from Kaggle (you can change this to your specific Kaggle dataset)
# Example: Load a dataset related to password classification, or any text classification dataset
dataset = load_dataset("imdb") # Replace with your own dataset, e.g., Kaggle dataset
# Load Tokenizer and Model
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
# Preprocess the Dataset
def preprocess_function(examples):
return tokenizer(examples['text'], padding="max_length", truncation=True)
# Apply preprocessing to dataset
tokenized_datasets = dataset.map(preprocess_function, batched=True)
# Split into training and evaluation datasets
train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["test"]
# Define Training Arguments
training_args = TrainingArguments(
output_dir="./results", # output directory
num_train_epochs=3, # number of training epochs
per_device_train_batch_size=8, # batch size for training
per_device_eval_batch_size=16, # batch size for evaluation
warmup_steps=500, # number of warmup steps for learning rate scheduler
weight_decay=0.01, # strength of weight decay
logging_dir="./logs", # directory for storing logs
logging_steps=10,
evaluation_strategy="epoch", # evaluate each epoch
save_strategy="epoch", # save model each epoch
)
# Initialize Trainer
trainer = Trainer(
model=model, # the instantiated 🤗 Transformers model to be trained
args=training_args, # training arguments, defined above
train_dataset=train_dataset, # training dataset
eval_dataset=eval_dataset, # evaluation dataset
)
# Train the Model
trainer.train()
# Save the Model and Tokenizer
model.save_pretrained("./password_sniffer_model")
tokenizer.save_pretrained("./password_sniffer_tokenizer")
# Load the fine-tuned model and tokenizer
model = BertForSequenceClassification.from_pretrained("./password_sniffer_model")
tokenizer = BertTokenizer.from_pretrained("./password_sniffer_tokenizer")
# Setup Hugging Face Inference Client
client = InferenceClient("password_sniffer_model")
def detect_passwords(text):
"""
Detect potential passwords using the trained BERT model.
"""
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
outputs = model(**inputs)
predictions = torch.softmax(outputs.logits, dim=-1)
predicted_class = torch.argmax(predictions, dim=-1).item()
if predicted_class == 1: # Assuming '1' represents potential password
return "Potential password detected."
else:
return "No password detected."
# Gradio Interface
def respond(message, history, system_message, max_tokens, temperature, top_p):
detected_passwords = detect_passwords(message)
return detected_passwords
demo = gr.Interface(
fn=respond,
inputs=[
gr.Textbox(value="You are a password detection chatbot.", label="System message"),
gr.Textbox(value="Hello, your password might be 12345!", label="User input"),
],
outputs="text",
)
if __name__ == "__main__":
demo.launch()