Spaces:

kithangw
/

testingphishinglink

Sleeping

App Files Files Community

kithangw commited on Mar 17, 2024

Commit

0ebd7c5

verified ·

1 Parent(s): a39dfac

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -35

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 from PIL import Image
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 # Initialize the image-to-text pipeline and models
 @st.cache(allow_output_mutation=True)
@@ -8,13 +9,11 @@ def load_models():
     # Make sure to use the correct model names and tokenizer
     image_pipeline = pipeline("image-to-text", model="microsoft/trocr-large-printed")
     phishing_model = AutoModelForSequenceClassification.from_pretrained("kithangw/phishing_link_detection", num_labels=2)
-    phishing_tokenizer = AutoTokenizer.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
     return image_pipeline, phishing_model, phishing_tokenizer
-image_pipeline, phishing_model, phishing_tokenizer = load_models()
 # Define the phishing check function
-def check_phishing(url_for_recognize):
     link_token = phishing_tokenizer(url_for_recognize, max_length=512, padding=True, truncation=True, return_tensors='pt')
     with torch.no_grad():  # Disable gradient calculation for inference
@@ -29,34 +28,34 @@ def check_phishing(url_for_recognize):
     sentence = f"The URL '{url_for_recognize}' is classified as '{prediction_label}' with a probability of {predicted_prob:.2f}."
     return sentence
-# Streamlit interface
-st.title("Phishing URL Detection from Image")
-# Text box for URL input
-verified_url = st.text_input("Enter or paste a URL to check for phishing:")
-# File uploader to scan the image
-uploaded_image = st.file_uploader("Alternatively, upload an image of the URL", type=["png", "jpg", "jpeg"])
-if uploaded_image is not None:
-    image = Image.open(uploaded_image)
-    st.image(image, caption='Uploaded URL Image', use_column_width=True)
-    try:
-        # Process the image with the OCR pipeline
-        ocr_result = image_pipeline(image)[0]['generated_text'].replace(" ", "").lower()
-        # Update the text input with the OCR result
-        st.session_state['verified_url'] = ocr_result
-    except Exception as e:
-        st.error(f"An error occurred during image processing: {e}")
-if st.button('Detect Phishing'):
-    if verified_url:
-        result = check_phishing(verified_url)
-        st.write(result)
-    else:
-        st.write("Please enter or upload a URL to check for phishing.")
-# Ensure the text box is updated with the OCR result (if any)
-if 'verified_url' in st.session_state and uploaded_image:
-    verified_url = st.session_state['verified_url']

 import streamlit as st
 from PIL import Image
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
+import torch
 # Initialize the image-to-text pipeline and models
 @st.cache(allow_output_mutation=True)
     # Make sure to use the correct model names and tokenizer
     image_pipeline = pipeline("image-to-text", model="microsoft/trocr-large-printed")
     phishing_model = AutoModelForSequenceClassification.from_pretrained("kithangw/phishing_link_detection", num_labels=2)
+    phishing_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
     return image_pipeline, phishing_model, phishing_tokenizer
 # Define the phishing check function
+def check_phishing(phishing_model, phishing_tokenizer, url_for_recognize):
     link_token = phishing_tokenizer(url_for_recognize, max_length=512, padding=True, truncation=True, return_tensors='pt')
     with torch.no_grad():  # Disable gradient calculation for inference
     sentence = f"The URL '{url_for_recognize}' is classified as '{prediction_label}' with a probability of {predicted_prob:.2f}."
     return sentence
+def main():
+    # Load models
+    image_pipeline, phishing_model, phishing_tokenizer = load_models()
+    # Streamlit interface
+    st.title("Phishing URL Detection from Image")
+    # File uploader to scan the image
+    uploaded_image = st.file_uploader("Upload an image of the URL", type=["png", "jpg", "jpeg"])
+    if uploaded_image is not None:
+        image = Image.open(uploaded_image)
+        st.image(image, caption='Uploaded URL Image', use_column_width=True)
+        try:
+            # Process the image with the OCR pipeline
+            ocr_result = image_pipeline(image)[0]['generated_text'].replace(" ", "").lower()
+            verified_url = st.text_input("Recognized URL", ocr_result)
+        except Exception as e:
+            st.error(f"An error occurred during image processing: {e}")
+    if st.button('Detect Phishing'):
+        if 'verified_url' in st.session_state:
+            result = check_phishing(phishing_model, phishing_tokenizer, st.session_state['verified_url'])
+            st.write(result)
+        else:
+            st.write("Please upload an image to detect the URL and check for phishing.")
+# Run the main function
+if __name__ == "__main__":
+    main()