That1BrainCell commited on
Commit
28a52ae
·
verified ·
1 Parent(s): 36b235a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -20
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import streamlit as st
2
  import concurrent.futures
 
3
  from functools import partial
4
  import numpy as np
5
  from io import StringIO
@@ -40,27 +41,43 @@ class StreamCapture:
40
  sys.stdout = self._stdout
41
 
42
  # Main Function
43
- def score(main_product, main_url, search, logger, log_area):
44
  data = {}
 
45
 
46
  if search == 'all':
47
- similar = extract_similar_products(main_product)[:1]
48
 
49
- with concurrent.futures.ThreadPoolExecutor() as executor:
50
- futures = []
51
-
52
- search_functions = [search_google, search_duckduckgo, search_github, search_wikipedia]
53
-
54
- for search_func in search_functions:
55
- futures.append(executor.submit(partial(filtering, search_func(similar), main_product, similar)))
56
-
57
- for future in concurrent.futures.as_completed(futures):
58
- data[similar] = future.result()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  else:
61
- similar = extract_similar_products(main_product)[:1]
62
 
63
- for product in similar:
64
 
65
  if search == 'google':
66
  data[product] = filtering(search_google(product), main_product, product)
@@ -78,7 +95,7 @@ def score(main_product, main_url, search, logger, log_area):
78
  log_area.text(logger.getvalue())
79
 
80
  logger.write("\n\nCreating Main product Embeddings ---------->\n")
81
- main_result, main_embedding = get_embeddings(main_url)
82
  log_area.text(logger.getvalue())
83
 
84
  print("main",main_embedding)
@@ -90,9 +107,9 @@ def score(main_product, main_url, search, logger, log_area):
90
 
91
 
92
  for product in data:
93
- for link in data[product][:2]:
94
 
95
- similar_result, similar_embedding = get_embeddings(link)
96
  log_area.text(logger.getvalue())
97
 
98
  print(similar_embedding)
@@ -113,19 +130,31 @@ main_product = st.text_input('Enter Main Product Name', 'Philips led 7w bulb')
113
  main_url = st.text_input('Enter Main Product Manual URL', 'https://www.assets.signify.com/is/content/PhilipsConsumer/PDFDownloads/Colombia/technical-sheets/ODLI20180227_001-UPD-es_CO-Ficha_Tecnica_LED_MR16_Master_7W_Dim_12V_CRI90.pdf')
114
  search_method = st.selectbox('Choose Search Engine', ['duckduckgo', 'google', 'archive', 'github', 'wikipedia', 'all'])
115
 
 
 
 
 
 
 
 
 
 
 
116
  if st.button('Check for Infringement'):
117
  log_output = st.empty() # Placeholder for log output
118
 
119
  with st.spinner('Processing...'):
120
  with StreamCapture() as logger:
121
- cosine_sim_scores, main_result = score(main_product, main_url, search_method, logger, log_output)
122
 
123
  st.success('Processing complete!')
124
 
125
  st.subheader("Cosine Similarity Scores")
126
 
127
- # = score(main_product, main_url, search, logger, log_output)
128
- tags = ['Introduction', 'Specifications', 'Product Overview', 'Safety Information', 'Installation Instructions', 'Setup and Configuration', 'Operation Instructions', 'Maintenance and Care', 'Troubleshooting', 'Warranty Information', 'Legal Information']
 
 
129
 
130
  for product, link, index, value in cosine_sim_scores:
131
  if not index:
 
1
  import streamlit as st
2
  import concurrent.futures
3
+ from concurrent.futures import ThreadPoolExecutor,as_completed
4
  from functools import partial
5
  import numpy as np
6
  from io import StringIO
 
41
  sys.stdout = self._stdout
42
 
43
  # Main Function
44
+ def score(main_product, main_url, product_count, link_count, search, logger, log_area):
45
  data = {}
46
+ similar_products = extract_similar_products(main_product)[:product_count]
47
 
48
  if search == 'all':
 
49
 
50
+ def process_product(product, search_function, main_product):
51
+ search_result = search_function(product)
52
+ return filtering(search_result, main_product, product)
53
+
54
+
55
+ search_functions = {
56
+ 'google': search_google,
57
+ 'duckduckgo': search_duckduckgo,
58
+ 'github': search_github,
59
+ 'wikipedia': search_wikipedia
60
+ }
61
+
62
+ with ThreadPoolExecutor() as executor:
63
+ future_to_product_search = {
64
+ executor.submit(process_product, product, search_function, main_product): (product, search_name)
65
+ for product in similar_products
66
+ for search_name, search_function in search_functions.items()
67
+ }
68
+
69
+ for future in as_completed(future_to_product_search):
70
+ product, search_name = future_to_product_search[future]
71
+ try:
72
+ if product not in data:
73
+ data[product] = {}
74
+ data[product] = future.result()
75
+ except Exception as e:
76
+ print(f"Error processing product {product} with {search_name}: {e}")
77
 
78
  else:
 
79
 
80
+ for product in similar_products:
81
 
82
  if search == 'google':
83
  data[product] = filtering(search_google(product), main_product, product)
 
95
  log_area.text(logger.getvalue())
96
 
97
  logger.write("\n\nCreating Main product Embeddings ---------->\n")
98
+ main_result, main_embedding = get_embeddings(main_url,tag_option)
99
  log_area.text(logger.getvalue())
100
 
101
  print("main",main_embedding)
 
107
 
108
 
109
  for product in data:
110
+ for link in data[product][:link_count]:
111
 
112
+ similar_result, similar_embedding = get_embeddings(link,tag_option)
113
  log_area.text(logger.getvalue())
114
 
115
  print(similar_embedding)
 
130
  main_url = st.text_input('Enter Main Product Manual URL', 'https://www.assets.signify.com/is/content/PhilipsConsumer/PDFDownloads/Colombia/technical-sheets/ODLI20180227_001-UPD-es_CO-Ficha_Tecnica_LED_MR16_Master_7W_Dim_12V_CRI90.pdf')
131
  search_method = st.selectbox('Choose Search Engine', ['duckduckgo', 'google', 'archive', 'github', 'wikipedia', 'all'])
132
 
133
+ col1, col2 = st.columns(2)
134
+ with col1:
135
+ product_count = st.number_input("Number of Simliar Products",min_value=1, step=1, format="%i")
136
+ with col2:
137
+ link_count = st.number_input("Number of Links per product",min_value=1, step=1, format="%i")
138
+
139
+
140
+ tag_option = st.selectbox('Choose Similarity Method', ["Single","Tag Wise"])
141
+
142
+
143
  if st.button('Check for Infringement'):
144
  log_output = st.empty() # Placeholder for log output
145
 
146
  with st.spinner('Processing...'):
147
  with StreamCapture() as logger:
148
+ cosine_sim_scores, main_result = score(main_product, main_url,product_count, link_count, search_method, logger, log_output)
149
 
150
  st.success('Processing complete!')
151
 
152
  st.subheader("Cosine Similarity Scores")
153
 
154
+ if tag_option=="Single":
155
+ tags=["Details"]
156
+ else:
157
+ tags = ['Introduction', 'Specifications', 'Product Overview', 'Safety Information', 'Installation Instructions', 'Setup and Configuration', 'Operation Instructions', 'Maintenance and Care', 'Troubleshooting', 'Warranty Information', 'Legal Information']
158
 
159
  for product, link, index, value in cosine_sim_scores:
160
  if not index: