ombhojane commited on
Commit
d173086
1 Parent(s): fce0351

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ import pandas as pd
3
+ from sklearn.model_selection import train_test_split, GridSearchCV
4
+ from sklearn.ensemble import RandomForestClassifier
5
+ from sklearn.preprocessing import StandardScaler
6
+ from sklearn.metrics import classification_report, accuracy_score
7
+ from category_encoders import OneHotEncoder
8
+
9
+ dataset = load_dataset("ombhojane/ckv3")
10
+ df = pd.DataFrame(dataset['train'])
11
+
12
+ # Preprocessing
13
+ # One-hot encoding for categorical features
14
+ encoder = OneHotEncoder(cols=['Biodiversity', 'Existing Infrastructure'], use_cat_names=True)
15
+ df_encoded = encoder.fit_transform(df)
16
+
17
+ scaler = StandardScaler()
18
+ df_encoded[['Land Size (hectares)', 'Budget (INR)']] = scaler.fit_transform(df_encoded[['Land Size (hectares)', 'Budget (INR)']])
19
+
20
+ # Splitting features and target
21
+ X = df_encoded.drop('Service', axis=1)
22
+ y = df_encoded['Service']
23
+
24
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
25
+
26
+ model = RandomForestClassifier()
27
+ param_grid = {
28
+ 'n_estimators': [100, 200, 300],
29
+ 'max_depth': [None, 10, 20, 30],
30
+ 'min_samples_split': [2, 5, 10]
31
+ }
32
+
33
+ grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
34
+ grid_search.fit(X_train, y_train)
35
+
36
+ best_model = grid_search.best_estimator_
37
+
38
+ # Model Evaluation
39
+ predictions = best_model.predict(X_test)
40
+ print(classification_report(y_test, predictions))
41
+ print("Accuracy:", accuracy_score(y_test, predictions))