chandanzeon commited on
Commit
7381cc5
·
1 Parent(s): fd2307e

added threshold slidebar

Browse files
Files changed (3) hide show
  1. __pycache__/helper.cpython-312.pyc +0 -0
  2. app.py +4 -2
  3. helper.py +2 -1
__pycache__/helper.cpython-312.pyc CHANGED
Binary files a/__pycache__/helper.cpython-312.pyc and b/__pycache__/helper.cpython-312.pyc differ
 
app.py CHANGED
@@ -64,16 +64,18 @@ with st.sidebar:
64
  uploaded_excel = st.file_uploader("Upload the Master file (.xls or .csv)", type=["csv", "xls", "xlsx"])
65
  uploaded_text = st.file_uploader("Upload your Text file (.txt)", type=["txt"])
66
 
 
 
67
  # Check if both files are uploaded
68
  if uploaded_excel and uploaded_text:
69
  master_data, df = process_files(uploaded_excel, uploaded_text) # Process the files
70
 
71
- res, fbdm, fben, fbbm25, nf = get_res_df(master_data, df) # Generate the result DataFrame using the helper function
72
 
73
  # Create a layout with four columns to display the metrics
74
  col1, col2, col3, col4 = st.columns(4)
75
  with col1:
76
- st.metric("Direct Match", fbdm)
77
  with col2:
78
  st.metric("Employer Number", fben)
79
  with col3:
 
64
  uploaded_excel = st.file_uploader("Upload the Master file (.xls or .csv)", type=["csv", "xls", "xlsx"])
65
  uploaded_text = st.file_uploader("Upload your Text file (.txt)", type=["txt"])
66
 
67
+ threshold = st.slider("Adjust Threshold", min_value=0, max_value=30, value=12)
68
+
69
  # Check if both files are uploaded
70
  if uploaded_excel and uploaded_text:
71
  master_data, df = process_files(uploaded_excel, uploaded_text) # Process the files
72
 
73
+ res, fbdm, fben, fbbm25, nf = get_res_df(master_data, df, threshold) # Generate the result DataFrame using the helper function
74
 
75
  # Create a layout with four columns to display the metrics
76
  col1, col2, col3, col4 = st.columns(4)
77
  with col1:
78
+ st.metric("Direct Search", fbdm)
79
  with col2:
80
  st.metric("Employer Number", fben)
81
  with col3:
helper.py CHANGED
@@ -178,7 +178,7 @@ def generate_df(master_data, df, employer_names):
178
 
179
  return res_df
180
 
181
- def get_res_df(master_data, df):
182
  """
183
  Retrieves the result DataFrame by matching employer names using BM25 algorithm
184
  and employee numbers.
@@ -191,6 +191,7 @@ def get_res_df(master_data, df):
191
  DataFrame: A DataFrame containing matched employer data and transaction details.
192
  """
193
  # Preprocess master data
 
194
  corpus = list(master_data['Employer Name'])
195
  lower_case_corpus = [clean_text(name) for name in corpus]
196
  corpus = corpus[1:] # Exclude the first row if it's a header
 
178
 
179
  return res_df
180
 
181
+ def get_res_df(master_data, df, thrshld):
182
  """
183
  Retrieves the result DataFrame by matching employer names using BM25 algorithm
184
  and employee numbers.
 
191
  DataFrame: A DataFrame containing matched employer data and transaction details.
192
  """
193
  # Preprocess master data
194
+ threshold = thrshld
195
  corpus = list(master_data['Employer Name'])
196
  lower_case_corpus = [clean_text(name) for name in corpus]
197
  corpus = corpus[1:] # Exclude the first row if it's a header