Canstralian commited on
Commit
41f9a4d
·
verified ·
1 Parent(s): 9b9c9ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -56
app.py CHANGED
@@ -1,73 +1,95 @@
1
  import gradio as gr
 
 
 
2
  from huggingface_hub import InferenceClient
 
3
 
4
- # Initialize the Hugging Face Inference client with a more relevant model (e.g., a fine-tuned password detection model)
5
- client = InferenceClient("username/password-detection-model") # Replace with your trained model
6
 
7
- def detect_passwords(text, threshold=0.9):
8
- """
9
- Detects potential passwords in text using a model from Hugging Face.
10
- :param text: Input text containing potential passwords.
11
- :param threshold: Confidence score above which a pattern is flagged.
12
- :return: Flagged patterns and their confidence scores.
13
- """
14
- # Using a model inference to classify potential passwords
15
- response = client.query({"inputs": text}) # Query the model for classification
16
- predictions = response.get("predictions", [])
17
-
18
- flagged_items = []
19
- for pred in predictions:
20
- token, confidence_score = pred["token"], pred["score"]
21
- if confidence_score > threshold:
22
- flagged_items.append((token, confidence_score))
23
-
24
- if not flagged_items:
25
- return "No passwords detected."
26
- else:
27
- return f"Potential passwords detected: {flagged_items}"
28
 
 
 
29
 
30
- def respond(
31
- message,
32
- history: list[tuple[str, str]],
33
- system_message,
34
- max_tokens,
35
- temperature,
36
- top_p,
37
- ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  """
39
- Processes the user's message and history, integrates the password sniffer functionality.
40
  """
41
- messages = [{"role": "system", "content": system_message}]
 
 
 
42
 
43
- for val in history:
44
- if val[0]:
45
- messages.append({"role": "user", "content": val[0]})
46
- if val[1]:
47
- messages.append({"role": "assistant", "content": val[1]})
48
 
49
- # Use Hugging Face model to detect passwords in the user's message
 
50
  detected_passwords = detect_passwords(message)
51
- response = detected_passwords
52
-
53
- return response # Output the result
54
-
55
 
56
- # Gradio Interface for interaction
57
- demo = gr.ChatInterface(
58
- respond,
59
- additional_inputs=[
60
  gr.Textbox(value="You are a password detection chatbot.", label="System message"),
61
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
62
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
63
- gr.Slider(
64
- minimum=0.1,
65
- maximum=1.0,
66
- value=0.95,
67
- step=0.05,
68
- label="Top-p (nucleus sampling)",
69
- ),
70
  ],
 
71
  )
72
 
73
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from transformers import Trainer, TrainingArguments, BertForSequenceClassification, BertTokenizer
3
+ from datasets import load_dataset
4
+ from huggingface_hub import login
5
  from huggingface_hub import InferenceClient
6
+ import torch
7
 
8
+ # Authenticate with Hugging Face
9
+ login()
10
 
11
+ # Load Dataset from Kaggle (you can change this to your specific Kaggle dataset)
12
+ # Example: Load a dataset related to password classification, or any text classification dataset
13
+ dataset = load_dataset("imdb") # Replace with your own dataset, e.g., Kaggle dataset
14
+
15
+ # Load Tokenizer and Model
16
+ model_name = "bert-base-uncased"
17
+ tokenizer = BertTokenizer.from_pretrained(model_name)
18
+ model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
19
+
20
+ # Preprocess the Dataset
21
+ def preprocess_function(examples):
22
+ return tokenizer(examples['text'], padding="max_length", truncation=True)
 
 
 
 
 
 
 
 
 
23
 
24
+ # Apply preprocessing to dataset
25
+ tokenized_datasets = dataset.map(preprocess_function, batched=True)
26
 
27
+ # Split into training and evaluation datasets
28
+ train_dataset = tokenized_datasets["train"]
29
+ eval_dataset = tokenized_datasets["test"]
30
+
31
+ # Define Training Arguments
32
+ training_args = TrainingArguments(
33
+ output_dir="./results", # output directory
34
+ num_train_epochs=3, # number of training epochs
35
+ per_device_train_batch_size=8, # batch size for training
36
+ per_device_eval_batch_size=16, # batch size for evaluation
37
+ warmup_steps=500, # number of warmup steps for learning rate scheduler
38
+ weight_decay=0.01, # strength of weight decay
39
+ logging_dir="./logs", # directory for storing logs
40
+ logging_steps=10,
41
+ evaluation_strategy="epoch", # evaluate each epoch
42
+ save_strategy="epoch", # save model each epoch
43
+ )
44
+
45
+ # Initialize Trainer
46
+ trainer = Trainer(
47
+ model=model, # the instantiated 🤗 Transformers model to be trained
48
+ args=training_args, # training arguments, defined above
49
+ train_dataset=train_dataset, # training dataset
50
+ eval_dataset=eval_dataset, # evaluation dataset
51
+ )
52
+
53
+ # Train the Model
54
+ trainer.train()
55
+
56
+ # Save the Model and Tokenizer
57
+ model.save_pretrained("./password_sniffer_model")
58
+ tokenizer.save_pretrained("./password_sniffer_tokenizer")
59
+
60
+ # Load the fine-tuned model and tokenizer
61
+ model = BertForSequenceClassification.from_pretrained("./password_sniffer_model")
62
+ tokenizer = BertTokenizer.from_pretrained("./password_sniffer_tokenizer")
63
+
64
+ # Setup Hugging Face Inference Client
65
+ client = InferenceClient("password_sniffer_model")
66
+
67
+ def detect_passwords(text):
68
  """
69
+ Detect potential passwords using the trained BERT model.
70
  """
71
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
72
+ outputs = model(**inputs)
73
+ predictions = torch.softmax(outputs.logits, dim=-1)
74
+ predicted_class = torch.argmax(predictions, dim=-1).item()
75
 
76
+ if predicted_class == 1: # Assuming '1' represents potential password
77
+ return "Potential password detected."
78
+ else:
79
+ return "No password detected."
 
80
 
81
+ # Gradio Interface
82
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
83
  detected_passwords = detect_passwords(message)
84
+ return detected_passwords
 
 
 
85
 
86
+ demo = gr.Interface(
87
+ fn=respond,
88
+ inputs=[
 
89
  gr.Textbox(value="You are a password detection chatbot.", label="System message"),
90
+ gr.Textbox(value="Hello, your password might be 12345!", label="User input"),
 
 
 
 
 
 
 
 
91
  ],
92
+ outputs="text",
93
  )
94
 
95
  if __name__ == "__main__":