chandanzeon commited on
Commit
b7a3211
·
1 Parent(s): 4cdcc2f

Validation For Master file

Browse files
Files changed (1) hide show
  1. app.py +20 -0
app.py CHANGED
@@ -22,6 +22,19 @@ def to_excel(df):
22
  processed_data = output.getvalue()
23
  return processed_data
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def process_files(excel_file, text_file):
26
  """
27
  Process the uploaded Excel/CSV and text files and return cleaned dataframes.
@@ -41,6 +54,11 @@ def process_files(excel_file, text_file):
41
  df_excel = pd.read_csv(excel_file)
42
  else:
43
  df_excel = pd.read_excel(excel_file)
 
 
 
 
 
44
 
45
  # Ensure the 'Employer Number' column values are formatted as zero-padded 6-digit strings
46
  df_excel['Employer Number'] = [str(number).zfill(6) for number in df_excel['Employer Number']]
@@ -71,6 +89,8 @@ with st.sidebar:
71
  # Check if both files are uploaded
72
  if uploaded_excel and uploaded_text:
73
  master_data, df = process_files(uploaded_excel, uploaded_text) # Process the files
 
 
74
 
75
  res, fbdm, fben, fbbm25, nf = get_res_df(master_data, df, threshold) # Generate the result DataFrame using the helper function
76
 
 
22
  processed_data = output.getvalue()
23
  return processed_data
24
 
25
+ def validate_columns(df):
26
+ required_columns = {
27
+ "Employer Number", "Employer Name", "Bank Statement Reference", "Employer Province",
28
+ "Region", "Industry", "Contributing Status", "Date Joined Plan", "Termination Date", "Email Addresses"
29
+ }
30
+ missing_columns = required_columns - set(df.columns)
31
+
32
+ if missing_columns:
33
+ st.error(f"Upload a valid Master Sheet. These columns are missing: {', '.join(missing_columns)}")
34
+ return None
35
+ else:
36
+ return df
37
+
38
  def process_files(excel_file, text_file):
39
  """
40
  Process the uploaded Excel/CSV and text files and return cleaned dataframes.
 
54
  df_excel = pd.read_csv(excel_file)
55
  else:
56
  df_excel = pd.read_excel(excel_file)
57
+
58
+ # Validate Master Sheet
59
+ df_excel = validate_columns(df_excel)
60
+ if df_excel is None:
61
+ return None, None
62
 
63
  # Ensure the 'Employer Number' column values are formatted as zero-padded 6-digit strings
64
  df_excel['Employer Number'] = [str(number).zfill(6) for number in df_excel['Employer Number']]
 
89
  # Check if both files are uploaded
90
  if uploaded_excel and uploaded_text:
91
  master_data, df = process_files(uploaded_excel, uploaded_text) # Process the files
92
+ if master_data is None and df is None:
93
+ st.stop()
94
 
95
  res, fbdm, fben, fbbm25, nf = get_res_df(master_data, df, threshold) # Generate the result DataFrame using the helper function
96