rmdhirr commited on
Commit
8cd35aa
1 Parent(s): b685318

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -2,11 +2,11 @@ import gradio as gr
2
  import tensorflow as tf
3
  import numpy as np
4
  import nltk
 
5
  from nltk.corpus import stopwords
6
  from nltk.tokenize import word_tokenize
7
  from nltk.stem import WordNetLemmatizer
8
  from tensorflow.keras.preprocessing.sequence import pad_sequences
9
- from tensorflow.keras.preprocessing.text import Tokenizer
10
  import re
11
 
12
  # Load the model
@@ -51,12 +51,11 @@ max_url_length = 180
51
  max_html_length = 2000
52
  max_words = 10000
53
 
54
- url_tokenizer = Tokenizer(num_words=max_words, char_level=True)
55
- html_tokenizer = Tokenizer(num_words=max_words)
56
-
57
- # Dummy fit to initialize tokenizers
58
- url_tokenizer.fit_on_texts(["dummy"])
59
- html_tokenizer.fit_on_texts(["dummy"])
60
 
61
  def preprocess_input(input_text, tokenizer, max_length):
62
  sequences = tokenizer.texts_to_sequences([input_text])
@@ -80,9 +79,9 @@ def get_prediction(input_text, input_type):
80
  def phishing_detection(input_text, input_type):
81
  prediction = get_prediction(input_text, input_type)
82
  if prediction > 0.7:
83
- return f"Warning: This site is likely a phishing site!"
84
  else:
85
- return f"Safe: This site is not likely a phishing site."
86
 
87
  iface = gr.Interface(
88
  fn=phishing_detection,
 
2
  import tensorflow as tf
3
  import numpy as np
4
  import nltk
5
+ import pickle
6
  from nltk.corpus import stopwords
7
  from nltk.tokenize import word_tokenize
8
  from nltk.stem import WordNetLemmatizer
9
  from tensorflow.keras.preprocessing.sequence import pad_sequences
 
10
  import re
11
 
12
  # Load the model
 
51
  max_html_length = 2000
52
  max_words = 10000
53
 
54
+ # Load tokenizers
55
+ with open('url_tokenizer.pkl', 'rb') as f:
56
+ url_tokenizer = pickle.load(f)
57
+ with open('html_tokenizer.pkl', 'rb') as f:
58
+ html_tokenizer = pickle.load(f)
 
59
 
60
  def preprocess_input(input_text, tokenizer, max_length):
61
  sequences = tokenizer.texts_to_sequences([input_text])
 
79
  def phishing_detection(input_text, input_type):
80
  prediction = get_prediction(input_text, input_type)
81
  if prediction > 0.7:
82
+ return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
83
  else:
84
+ return f"Safe: This site is not likely a phishing site. ({prediction:.2f})"
85
 
86
  iface = gr.Interface(
87
  fn=phishing_detection,