kithangw commited on
Commit
0ebd7c5
·
verified ·
1 Parent(s): a39dfac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -35
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  from PIL import Image
3
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 
4
 
5
  # Initialize the image-to-text pipeline and models
6
  @st.cache(allow_output_mutation=True)
@@ -8,13 +9,11 @@ def load_models():
8
  # Make sure to use the correct model names and tokenizer
9
  image_pipeline = pipeline("image-to-text", model="microsoft/trocr-large-printed")
10
  phishing_model = AutoModelForSequenceClassification.from_pretrained("kithangw/phishing_link_detection", num_labels=2)
11
- phishing_tokenizer = AutoTokenizer.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
12
  return image_pipeline, phishing_model, phishing_tokenizer
13
 
14
- image_pipeline, phishing_model, phishing_tokenizer = load_models()
15
-
16
  # Define the phishing check function
17
- def check_phishing(url_for_recognize):
18
  link_token = phishing_tokenizer(url_for_recognize, max_length=512, padding=True, truncation=True, return_tensors='pt')
19
 
20
  with torch.no_grad(): # Disable gradient calculation for inference
@@ -29,34 +28,34 @@ def check_phishing(url_for_recognize):
29
  sentence = f"The URL '{url_for_recognize}' is classified as '{prediction_label}' with a probability of {predicted_prob:.2f}."
30
  return sentence
31
 
32
- # Streamlit interface
33
- st.title("Phishing URL Detection from Image")
34
-
35
- # Text box for URL input
36
- verified_url = st.text_input("Enter or paste a URL to check for phishing:")
37
-
38
- # File uploader to scan the image
39
- uploaded_image = st.file_uploader("Alternatively, upload an image of the URL", type=["png", "jpg", "jpeg"])
40
-
41
- if uploaded_image is not None:
42
- image = Image.open(uploaded_image)
43
- st.image(image, caption='Uploaded URL Image', use_column_width=True)
44
-
45
- try:
46
- # Process the image with the OCR pipeline
47
- ocr_result = image_pipeline(image)[0]['generated_text'].replace(" ", "").lower()
48
- # Update the text input with the OCR result
49
- st.session_state['verified_url'] = ocr_result
50
- except Exception as e:
51
- st.error(f"An error occurred during image processing: {e}")
52
-
53
- if st.button('Detect Phishing'):
54
- if verified_url:
55
- result = check_phishing(verified_url)
56
- st.write(result)
57
- else:
58
- st.write("Please enter or upload a URL to check for phishing.")
59
-
60
- # Ensure the text box is updated with the OCR result (if any)
61
- if 'verified_url' in st.session_state and uploaded_image:
62
- verified_url = st.session_state['verified_url']
 
1
  import streamlit as st
2
  from PIL import Image
3
  from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
4
+ import torch
5
 
6
  # Initialize the image-to-text pipeline and models
7
  @st.cache(allow_output_mutation=True)
 
9
  # Make sure to use the correct model names and tokenizer
10
  image_pipeline = pipeline("image-to-text", model="microsoft/trocr-large-printed")
11
  phishing_model = AutoModelForSequenceClassification.from_pretrained("kithangw/phishing_link_detection", num_labels=2)
12
+ phishing_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
13
  return image_pipeline, phishing_model, phishing_tokenizer
14
 
 
 
15
  # Define the phishing check function
16
+ def check_phishing(phishing_model, phishing_tokenizer, url_for_recognize):
17
  link_token = phishing_tokenizer(url_for_recognize, max_length=512, padding=True, truncation=True, return_tensors='pt')
18
 
19
  with torch.no_grad(): # Disable gradient calculation for inference
 
28
  sentence = f"The URL '{url_for_recognize}' is classified as '{prediction_label}' with a probability of {predicted_prob:.2f}."
29
  return sentence
30
 
31
+ def main():
32
+ # Load models
33
+ image_pipeline, phishing_model, phishing_tokenizer = load_models()
34
+
35
+ # Streamlit interface
36
+ st.title("Phishing URL Detection from Image")
37
+
38
+ # File uploader to scan the image
39
+ uploaded_image = st.file_uploader("Upload an image of the URL", type=["png", "jpg", "jpeg"])
40
+
41
+ if uploaded_image is not None:
42
+ image = Image.open(uploaded_image)
43
+ st.image(image, caption='Uploaded URL Image', use_column_width=True)
44
+
45
+ try:
46
+ # Process the image with the OCR pipeline
47
+ ocr_result = image_pipeline(image)[0]['generated_text'].replace(" ", "").lower()
48
+ verified_url = st.text_input("Recognized URL", ocr_result)
49
+ except Exception as e:
50
+ st.error(f"An error occurred during image processing: {e}")
51
+
52
+ if st.button('Detect Phishing'):
53
+ if 'verified_url' in st.session_state:
54
+ result = check_phishing(phishing_model, phishing_tokenizer, st.session_state['verified_url'])
55
+ st.write(result)
56
+ else:
57
+ st.write("Please upload an image to detect the URL and check for phishing.")
58
+
59
+ # Run the main function
60
+ if __name__ == "__main__":
61
+ main()