Spaces:
Sleeping
Sleeping
Commit
·
7381cc5
1
Parent(s):
fd2307e
added threshold slidebar
Browse files- __pycache__/helper.cpython-312.pyc +0 -0
- app.py +4 -2
- helper.py +2 -1
__pycache__/helper.cpython-312.pyc
CHANGED
Binary files a/__pycache__/helper.cpython-312.pyc and b/__pycache__/helper.cpython-312.pyc differ
|
|
app.py
CHANGED
@@ -64,16 +64,18 @@ with st.sidebar:
|
|
64 |
uploaded_excel = st.file_uploader("Upload the Master file (.xls or .csv)", type=["csv", "xls", "xlsx"])
|
65 |
uploaded_text = st.file_uploader("Upload your Text file (.txt)", type=["txt"])
|
66 |
|
|
|
|
|
67 |
# Check if both files are uploaded
|
68 |
if uploaded_excel and uploaded_text:
|
69 |
master_data, df = process_files(uploaded_excel, uploaded_text) # Process the files
|
70 |
|
71 |
-
res, fbdm, fben, fbbm25, nf = get_res_df(master_data, df) # Generate the result DataFrame using the helper function
|
72 |
|
73 |
# Create a layout with four columns to display the metrics
|
74 |
col1, col2, col3, col4 = st.columns(4)
|
75 |
with col1:
|
76 |
-
st.metric("Direct
|
77 |
with col2:
|
78 |
st.metric("Employer Number", fben)
|
79 |
with col3:
|
|
|
64 |
uploaded_excel = st.file_uploader("Upload the Master file (.xls or .csv)", type=["csv", "xls", "xlsx"])
|
65 |
uploaded_text = st.file_uploader("Upload your Text file (.txt)", type=["txt"])
|
66 |
|
67 |
+
threshold = st.slider("Adjust Threshold", min_value=0, max_value=30, value=12)
|
68 |
+
|
69 |
# Check if both files are uploaded
|
70 |
if uploaded_excel and uploaded_text:
|
71 |
master_data, df = process_files(uploaded_excel, uploaded_text) # Process the files
|
72 |
|
73 |
+
res, fbdm, fben, fbbm25, nf = get_res_df(master_data, df, threshold) # Generate the result DataFrame using the helper function
|
74 |
|
75 |
# Create a layout with four columns to display the metrics
|
76 |
col1, col2, col3, col4 = st.columns(4)
|
77 |
with col1:
|
78 |
+
st.metric("Direct Search", fbdm)
|
79 |
with col2:
|
80 |
st.metric("Employer Number", fben)
|
81 |
with col3:
|
helper.py
CHANGED
@@ -178,7 +178,7 @@ def generate_df(master_data, df, employer_names):
|
|
178 |
|
179 |
return res_df
|
180 |
|
181 |
-
def get_res_df(master_data, df):
|
182 |
"""
|
183 |
Retrieves the result DataFrame by matching employer names using BM25 algorithm
|
184 |
and employee numbers.
|
@@ -191,6 +191,7 @@ def get_res_df(master_data, df):
|
|
191 |
DataFrame: A DataFrame containing matched employer data and transaction details.
|
192 |
"""
|
193 |
# Preprocess master data
|
|
|
194 |
corpus = list(master_data['Employer Name'])
|
195 |
lower_case_corpus = [clean_text(name) for name in corpus]
|
196 |
corpus = corpus[1:] # Exclude the first row if it's a header
|
|
|
178 |
|
179 |
return res_df
|
180 |
|
181 |
+
def get_res_df(master_data, df, thrshld):
|
182 |
"""
|
183 |
Retrieves the result DataFrame by matching employer names using BM25 algorithm
|
184 |
and employee numbers.
|
|
|
191 |
DataFrame: A DataFrame containing matched employer data and transaction details.
|
192 |
"""
|
193 |
# Preprocess master data
|
194 |
+
threshold = thrshld
|
195 |
corpus = list(master_data['Employer Name'])
|
196 |
lower_case_corpus = [clean_text(name) for name in corpus]
|
197 |
corpus = corpus[1:] # Exclude the first row if it's a header
|