Spaces:
Runtime error
Runtime error
umairahmad89
commited on
Commit
·
f0a94b0
1
Parent(s):
191079a
Add lookup to previous quarter sheet and handle no previous quarter order
Browse files
app.py
CHANGED
@@ -6,6 +6,9 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
6 |
import numpy as np
|
7 |
import tempfile
|
8 |
import os
|
|
|
|
|
|
|
9 |
|
10 |
# Load the sentence transformer model
|
11 |
model = SentenceTransformer('BAAI/bge-small-en-v1.5')
|
@@ -52,7 +55,53 @@ def filter_excel2(excel_path, min_row, max_row, sheetname):
|
|
52 |
return data
|
53 |
except Exception as e:
|
54 |
raise gr.Error(f"Error processing Excel 2: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
def get_embeddings(texts):
|
57 |
return model.encode(texts)
|
58 |
|
@@ -109,7 +158,8 @@ def update_excel(excel_path, processed_data, sheetname):
|
|
109 |
def process_files(excel1, excel2, min_row1, max_row1, min_row2, max_row2, sheetname):
|
110 |
try:
|
111 |
gr.Info("Starting processing...")
|
112 |
-
|
|
|
113 |
# Process Excel 1
|
114 |
gr.Info("Processing Excel 1...")
|
115 |
csv1_data = filter_excel1(excel1, min_row1, max_row1)
|
|
|
6 |
import numpy as np
|
7 |
import tempfile
|
8 |
import os
|
9 |
+
import pandas as pd
|
10 |
+
import re
|
11 |
+
|
12 |
|
13 |
# Load the sentence transformer model
|
14 |
model = SentenceTransformer('BAAI/bge-small-en-v1.5')
|
|
|
55 |
return data
|
56 |
except Exception as e:
|
57 |
raise gr.Error(f"Error processing Excel 2: {str(e)}")
|
58 |
+
|
59 |
+
def sheet_lookup(current_sheet_name, excel_file_path):
|
60 |
+
# Read the Excel file
|
61 |
+
xl = pd.ExcelFile(excel_file_path)
|
62 |
+
|
63 |
+
# Determine the previous quarter sheet name
|
64 |
+
match = re.match(r'(\d)Q(\d{4})', current_sheet_name)
|
65 |
+
if match:
|
66 |
+
quarter, year = map(int, match.groups())
|
67 |
+
prev_quarter = 4 if quarter == 1 else quarter - 1
|
68 |
+
prev_year = year - 1 if quarter == 1 else year
|
69 |
+
prev_sheet_name = f"{prev_quarter}Q{prev_year}"
|
70 |
+
else:
|
71 |
+
raise ValueError("Invalid sheet name format")
|
72 |
+
|
73 |
+
# Read the current sheet
|
74 |
+
current_df = xl.parse(current_sheet_name)
|
75 |
+
|
76 |
+
# Check if previous sheet exists
|
77 |
+
if prev_sheet_name in xl.sheet_names:
|
78 |
+
# Read the previous quarter sheet
|
79 |
+
prev_df = xl.parse(prev_sheet_name)
|
80 |
+
|
81 |
+
# Perform the lookup
|
82 |
+
lookup_col = 'Monitoring Tool Instance ID-AU'
|
83 |
+
current_df.drop_duplicates(subset=[lookup_col], keep='first', inplace=True)
|
84 |
+
prev_df.drop_duplicates(subset=[lookup_col], keep='first', inplace=True)
|
85 |
+
|
86 |
+
value_col = f"{prev_quarter}q CRI Profile Mapping"
|
87 |
+
result_col = f"{quarter}q CRI Profile Mapping"
|
88 |
|
89 |
+
# Create a dictionary for faster lookup
|
90 |
+
lookup_dict = dict(zip(prev_df[lookup_col], prev_df[value_col]))
|
91 |
+
|
92 |
+
# Perform the lookup and fill the result column
|
93 |
+
current_df[result_col] = current_df[lookup_col].map(lookup_dict).fillna('#N/A')
|
94 |
+
else:
|
95 |
+
# If previous sheet doesn't exist, fill the result column with '#N/A'
|
96 |
+
result_col = f"{quarter}q CRI Profile Mapping"
|
97 |
+
current_df[result_col] = '#N/A'
|
98 |
+
print(f"Warning: Previous sheet {prev_sheet_name} not found. Filling {result_col} with '#N/A'")
|
99 |
+
|
100 |
+
# Save the results back to the Excel file
|
101 |
+
with pd.ExcelWriter(excel_file_path, mode='a', if_sheet_exists='replace') as writer:
|
102 |
+
current_df.to_excel(writer, sheet_name=current_sheet_name, index=False)
|
103 |
+
|
104 |
+
print(f"Processing complete for sheet {current_sheet_name}")
|
105 |
def get_embeddings(texts):
|
106 |
return model.encode(texts)
|
107 |
|
|
|
158 |
def process_files(excel1, excel2, min_row1, max_row1, min_row2, max_row2, sheetname):
|
159 |
try:
|
160 |
gr.Info("Starting processing...")
|
161 |
+
gr.Info("Doing lookup...")
|
162 |
+
sheet_lookup(sheetname, excel2)
|
163 |
# Process Excel 1
|
164 |
gr.Info("Processing Excel 1...")
|
165 |
csv1_data = filter_excel1(excel1, min_row1, max_row1)
|