Spaces:

youdata-ai
/

Tender-Selection

Sleeping

App Files Files Community

Tender-Selection / app.py

HarshilRamiAISV

Update app.py

466d49d verified 7 months ago

raw

history blame contribute delete

5.09 kB

	import streamlit as st
	import pandas as pd
	import pickle
	from sklearn.impute import SimpleImputer
	from sklearn.utils.validation import check_is_fitted
	import numpy as np

	# Load the trained model and preprocessing objects using pickle
	with open('random_forest_model.pkl', 'rb') as f:
	random_forest_model = pickle.load(f)

	with open('scaler.pkl', 'rb') as f:
	scaler = pickle.load(f)

	with open('label_encoders.pkl', 'rb') as f:
	label_encoders = pickle.load(f)

	# State corrections and valid states/UTs
	state_corrections = {
	'uttaranchal': 'uttarakhand',
	'orissa (odisha)': 'odisha',
	'kashmir': 'jammu and kashmir',
	'multi state': 'other',
	'not classified': 'other'
	}

	valid_states_uts = [
	'andhra pradesh', 'arunachal pradesh', 'assam', 'bihar', 'chhattisgarh', 'goa',
	'gujarat', 'haryana', 'himachal pradesh', 'jharkhand', 'karnataka', 'kerala',
	'madhya pradesh', 'maharashtra', 'manipur', 'meghalaya', 'mizoram', 'nagaland',
	'odisha', 'punjab', 'rajasthan', 'sikkim', 'tamil nadu', 'telangana', 'tripura',
	'uttar pradesh', 'uttarakhand', 'west bengal', 'andaman and nicobar islands',
	'chandigarh', 'dadra and nagar haveli and daman and diu', 'lakshadweep', 'delhi',
	'puducherry', 'jammu and kashmir', 'ladakh'
	]

	# Extract city, state, and country
	def extract_city(x):
	if isinstance(x, str):
	splitted_string = x.split("-")
	if len(splitted_string) == 4:
	return f"{splitted_string[0].strip().lower()} {splitted_string[1].strip().lower()}"
	else:
	return splitted_string[0].strip().lower()
	else:
	return "other"

	def extract_state(x):
	if isinstance(x, str):
	state = x.split("-")[-2].strip().lower()
	return state_corrections.get(state, state if state in valid_states_uts else 'other')
	else:
	return "other"

	def extract_country(x):
	if isinstance(x, str):
	return x.split("-")[-1].strip().lower()
	else:
	return "other"


	def preprocess_new_data(df):
	df['Ownership'] = df['Ownership'].str.lower().str.strip()
	df[' Type of Tender '] = df[' Type of Tender '].str.lower().str.strip()

	def parse_closing_date(date_str):
	try:
	return pd.to_datetime(date_str)
	except Exception:
	if " to " in date_str:
	date_str = date_str.split(" to ")[-1]
	return pd.to_datetime(date_str, errors='coerce')
	return pd.NaT

	df['Closing Date'] = df['Closing Date'].apply(parse_closing_date)
	df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
	df['days_left'] = (df['Closing Date'] - df['Date']).dt.days

	df['city'] = df['Location'].apply(lambda x: extract_city(x))
	df['state'] = df['Location'].apply(lambda x: extract_state(x))
	df['country'] = df['Location'].apply(lambda x: extract_country(x))

	df['city'].fillna("other", inplace=True)
	df['state'].fillna("other", inplace=True)
	df['country'].fillna("other", inplace=True)

	# Remove commas and convert numerical columns to floats
	numerical_columns = ['Earnest Money', 'Estimated Cost', 'DocFees']
	for col in numerical_columns:
	df[col] = df[col].replace({',': ''}, regex=True).astype(float)

	df = df[['Ref No', 'Earnest Money', 'Estimated Cost', 'DocFees', 'Ownership', ' Type of Tender ', 'days_left', 'city', 'state', 'country']]

	imputer = SimpleImputer(strategy='median')
	df['days_left'] = imputer.fit_transform(df[['days_left']])

	for column in ['Ownership', ' Type of Tender ', 'city', 'state', 'country']:
	le = label_encoders[column]

	# Add 'other' to the classes if it's not already there
	if 'other' not in le.classes_:
	le.classes_ = np.append(le.classes_, 'other')

	# Replace unseen labels with 'other'
	df[column] = df[column].apply(lambda x: x if x in le.classes_ else 'other')
	df[column] = le.transform(df[column])

	numerical_features = ['Earnest Money', 'Estimated Cost', 'DocFees', 'days_left']
	df[numerical_features] = scaler.transform(df[numerical_features])

	return df



	def predict_new_data(new_data):
	preprocessed_data = preprocess_new_data(new_data)
	X_new = preprocessed_data.drop(columns=['Ref No'])
	tender_ref_numbers_new = preprocessed_data['Ref No']
	predictions = random_forest_model.predict(X_new)
	results = pd.DataFrame({
	'Ref No': tender_ref_numbers_new,
	'predictions': predictions
	})

	return results

	st.title("Tender Selection Prediction")
	uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])

	if uploaded_file is not None:
	new_data = pd.read_csv(uploaded_file)
	prediction_results = predict_new_data(new_data)

	selected_tenders = prediction_results[prediction_results['predictions'] == "yes"]['Ref No'].astype(str).to_list()
	new_data['Ref No'] = new_data['Ref No'].astype(str)

	st.write("Selected Tenders:")
	st.write(new_data[new_data['Ref No'].isin(selected_tenders)].drop(columns=['Unnamed: 0']).reset_index().drop(columns=['index']))