Spaces:
Sleeping
Sleeping
Commit
·
b7a3211
1
Parent(s):
4cdcc2f
Validation For Master file
Browse files
app.py
CHANGED
@@ -22,6 +22,19 @@ def to_excel(df):
|
|
22 |
processed_data = output.getvalue()
|
23 |
return processed_data
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def process_files(excel_file, text_file):
|
26 |
"""
|
27 |
Process the uploaded Excel/CSV and text files and return cleaned dataframes.
|
@@ -41,6 +54,11 @@ def process_files(excel_file, text_file):
|
|
41 |
df_excel = pd.read_csv(excel_file)
|
42 |
else:
|
43 |
df_excel = pd.read_excel(excel_file)
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
# Ensure the 'Employer Number' column values are formatted as zero-padded 6-digit strings
|
46 |
df_excel['Employer Number'] = [str(number).zfill(6) for number in df_excel['Employer Number']]
|
@@ -71,6 +89,8 @@ with st.sidebar:
|
|
71 |
# Check if both files are uploaded
|
72 |
if uploaded_excel and uploaded_text:
|
73 |
master_data, df = process_files(uploaded_excel, uploaded_text) # Process the files
|
|
|
|
|
74 |
|
75 |
res, fbdm, fben, fbbm25, nf = get_res_df(master_data, df, threshold) # Generate the result DataFrame using the helper function
|
76 |
|
|
|
22 |
processed_data = output.getvalue()
|
23 |
return processed_data
|
24 |
|
25 |
+
def validate_columns(df):
|
26 |
+
required_columns = {
|
27 |
+
"Employer Number", "Employer Name", "Bank Statement Reference", "Employer Province",
|
28 |
+
"Region", "Industry", "Contributing Status", "Date Joined Plan", "Termination Date", "Email Addresses"
|
29 |
+
}
|
30 |
+
missing_columns = required_columns - set(df.columns)
|
31 |
+
|
32 |
+
if missing_columns:
|
33 |
+
st.error(f"Upload a valid Master Sheet. These columns are missing: {', '.join(missing_columns)}")
|
34 |
+
return None
|
35 |
+
else:
|
36 |
+
return df
|
37 |
+
|
38 |
def process_files(excel_file, text_file):
|
39 |
"""
|
40 |
Process the uploaded Excel/CSV and text files and return cleaned dataframes.
|
|
|
54 |
df_excel = pd.read_csv(excel_file)
|
55 |
else:
|
56 |
df_excel = pd.read_excel(excel_file)
|
57 |
+
|
58 |
+
# Validate Master Sheet
|
59 |
+
df_excel = validate_columns(df_excel)
|
60 |
+
if df_excel is None:
|
61 |
+
return None, None
|
62 |
|
63 |
# Ensure the 'Employer Number' column values are formatted as zero-padded 6-digit strings
|
64 |
df_excel['Employer Number'] = [str(number).zfill(6) for number in df_excel['Employer Number']]
|
|
|
89 |
# Check if both files are uploaded
|
90 |
if uploaded_excel and uploaded_text:
|
91 |
master_data, df = process_files(uploaded_excel, uploaded_text) # Process the files
|
92 |
+
if master_data is None and df is None:
|
93 |
+
st.stop()
|
94 |
|
95 |
res, fbdm, fben, fbbm25, nf = get_res_df(master_data, df, threshold) # Generate the result DataFrame using the helper function
|
96 |
|