Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import Trainer, TrainingArguments, BertForSequenceClassification, BertTokenizer | |
from datasets import load_dataset | |
from huggingface_hub import login | |
from huggingface_hub import InferenceClient | |
import torch | |
# Authenticate with Hugging Face | |
login() | |
# Load Dataset from Kaggle (you can change this to your specific Kaggle dataset) | |
# Example: Load a dataset related to password classification, or any text classification dataset | |
dataset = load_dataset("imdb") # Replace with your own dataset, e.g., Kaggle dataset | |
# Load Tokenizer and Model | |
model_name = "bert-base-uncased" | |
tokenizer = BertTokenizer.from_pretrained(model_name) | |
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2) | |
# Preprocess the Dataset | |
def preprocess_function(examples): | |
return tokenizer(examples['text'], padding="max_length", truncation=True) | |
# Apply preprocessing to dataset | |
tokenized_datasets = dataset.map(preprocess_function, batched=True) | |
# Split into training and evaluation datasets | |
train_dataset = tokenized_datasets["train"] | |
eval_dataset = tokenized_datasets["test"] | |
# Define Training Arguments | |
training_args = TrainingArguments( | |
output_dir="./results", # output directory | |
num_train_epochs=3, # number of training epochs | |
per_device_train_batch_size=8, # batch size for training | |
per_device_eval_batch_size=16, # batch size for evaluation | |
warmup_steps=500, # number of warmup steps for learning rate scheduler | |
weight_decay=0.01, # strength of weight decay | |
logging_dir="./logs", # directory for storing logs | |
logging_steps=10, | |
evaluation_strategy="epoch", # evaluate each epoch | |
save_strategy="epoch", # save model each epoch | |
) | |
# Initialize Trainer | |
trainer = Trainer( | |
model=model, # the instantiated 🤗 Transformers model to be trained | |
args=training_args, # training arguments, defined above | |
train_dataset=train_dataset, # training dataset | |
eval_dataset=eval_dataset, # evaluation dataset | |
) | |
# Train the Model | |
trainer.train() | |
# Save the Model and Tokenizer | |
model.save_pretrained("./password_sniffer_model") | |
tokenizer.save_pretrained("./password_sniffer_tokenizer") | |
# Load the fine-tuned model and tokenizer | |
model = BertForSequenceClassification.from_pretrained("./password_sniffer_model") | |
tokenizer = BertTokenizer.from_pretrained("./password_sniffer_tokenizer") | |
# Setup Hugging Face Inference Client | |
client = InferenceClient("password_sniffer_model") | |
def detect_passwords(text): | |
""" | |
Detect potential passwords using the trained BERT model. | |
""" | |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) | |
outputs = model(**inputs) | |
predictions = torch.softmax(outputs.logits, dim=-1) | |
predicted_class = torch.argmax(predictions, dim=-1).item() | |
if predicted_class == 1: # Assuming '1' represents potential password | |
return "Potential password detected." | |
else: | |
return "No password detected." | |
# Gradio Interface | |
def respond(message, history, system_message, max_tokens, temperature, top_p): | |
detected_passwords = detect_passwords(message) | |
return detected_passwords | |
demo = gr.Interface( | |
fn=respond, | |
inputs=[ | |
gr.Textbox(value="You are a password detection chatbot.", label="System message"), | |
gr.Textbox(value="Hello, your password might be 12345!", label="User input"), | |
], | |
outputs="text", | |
) | |
if __name__ == "__main__": | |
demo.launch() | |