File size: 4,983 Bytes
42046f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import streamlit as st
import concurrent.futures
from functools import partial
import numpy as np
from io import StringIO
import sys
import time
# File Imports
from embedding import get_embeddings # Ensure this file/module is available
from preprocess import filtering # Ensure this file/module is available
from search import *
# Cosine Similarity Function
def cosine_similarity(vec1, vec2):
vec1 = np.array(vec1)
vec2 = np.array(vec2)
dot_product = np.dot(vec1, vec2)
magnitude_vec1 = np.linalg.norm(vec1)
magnitude_vec2 = np.linalg.norm(vec2)
if magnitude_vec1 == 0 or magnitude_vec2 == 0:
return 0.0
cosine_sim = dot_product / (magnitude_vec1 * magnitude_vec2)
return cosine_sim
# Logger class to capture output
class StreamCapture:
def __init__(self):
self.output = StringIO()
self._stdout = sys.stdout
def __enter__(self):
sys.stdout = self.output
return self.output
def __exit__(self, exc_type, exc_val, exc_tb):
sys.stdout = self._stdout
# Main Function
def score(main_product, main_url, search, logger, log_area):
data = {}
if search == 'all':
similar = extract_similar_products(main_product)[:1]
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
search_functions = [search_google, search_duckduckgo, search_github, search_wikipedia]
for search_func in search_functions:
futures.append(executor.submit(partial(filtering, search_func(similar), main_product, similar)))
for future in concurrent.futures.as_completed(futures):
data[similar] = future.result()
else:
similar = extract_similar_products(main_product)[:1]
for product in similar:
if search == 'google':
data[product] = filtering(search_google(product), main_product, product)
elif search == 'duckduckgo':
data[product] = filtering(search_duckduckgo(product), main_product, product)
elif search == 'archive':
data[product] = filtering(search_archive(product), main_product, product)
elif search == 'github':
data[product] = filtering(search_github(product), main_product, product)
elif search == 'wikipedia':
data[product] = filtering(search_wikipedia(product), main_product, product)
logger.write("\n\nFiltered Links ------------------>\n")
logger.write(str(data) + "\n")
log_area.text(logger.getvalue())
logger.write("\n\nCreating Main product Embeddings ---------->\n")
main_result, main_embedding = get_embeddings(main_url)
log_area.text(logger.getvalue())
cosine_sim_scores = []
logger.write("\n\nCreating Similar product Embeddings ---------->\n")
log_area.text(logger.getvalue())
print("main",main_embedding)
for product in data:
for link in data[product][:2]:
similar_result, similar_embedding = get_embeddings(link)
log_area.text(logger.getvalue())
print(similar_embedding)
for i in range(len(main_embedding)):
score = cosine_similarity(main_embedding[i], similar_embedding[i])
cosine_sim_scores.append((product, link, i, score))
log_area.text(logger.getvalue())
logger.write("--------------- DONE -----------------\n")
log_area.text(logger.getvalue())
return cosine_sim_scores, main_result
# Streamlit Interface
st.title("Product Infringement Checker")
# Inputs
main_product = st.text_input('Enter Main Product Name', 'Philips led 7w bulb')
main_url = st.text_input('Enter Main Product Manual URL', 'https://www.assets.signify.com/is/content/PhilipsConsumer/PDFDownloads/Colombia/technical-sheets/ODLI20180227_001-UPD-es_CO-Ficha_Tecnica_LED_MR16_Master_7W_Dim_12V_CRI90.pdf')
search_method = st.selectbox('Choose Search Engine', ['duckduckgo', 'google', 'archive', 'github', 'wikipedia', 'all'])
if st.button('Check for Infringement'):
log_output = st.empty() # Placeholder for log output
with st.spinner('Processing...'):
with StreamCapture() as logger:
cosine_sim_scores, main_result = score(main_product, main_url, search_method, logger, log_output)
st.success('Processing complete!')
st.subheader("Cosine Similarity Scores")
# = score(main_product, main_url, search, logger, log_output)
tags = ['Introduction', 'Specifications', 'Product Overview', 'Safety Information', 'Installation Instructions', 'Setup and Configuration', 'Operation Instructions', 'Maintenance and Care', 'Troubleshooting', 'Warranty Information', 'Legal Information']
for product, link, index, value in cosine_sim_scores:
if not index:
st.write(f"Product: {product}, Link: {link}")
st.write(f"{tags[index]:<20} Cosine Similarity Score: {value:.2f}") |