vumichien commited on
Commit
5471564
·
1 Parent(s): ffd3d83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -20,15 +20,17 @@ model_card = f"""
20
  The **Out-of-bag (OOB)** method is a useful technique for estimating the optimal number of boosting iterations.
21
  This method is similar to cross-validation, but it does not require repeated model fitting and can be computed on-the-fly.
22
  **OOB** estimates are only applicable to Stochastic Gradient Boosting (i.e., subsample < 1.0). They are calculated from the improvement in loss based on examples not included in the bootstrap sample (i.e., out-of-bag examples).
23
- The **OOB** estimator provides a conservative estimate of the true test loss, but is still a reasonable approximation for a small number of trees.
24
- This demo shows the negative OOB improvements' cumulative sum as a function of the boosting iteration.
 
 
25
 
26
  ## Dataset
27
 
28
  Simulation data
29
  """
30
 
31
- def do_train(n_samples, n_splits, random_seed):
32
  # Generate data (adapted from G. Ridgeway's gbm example)
33
  random_state = np.random.RandomState(random_seed)
34
  x1 = random_state.uniform(size=n_samples)
@@ -45,7 +47,7 @@ def do_train(n_samples, n_splits, random_seed):
45
 
46
  # Fit classifier with out-of-bag estimates
47
  params = {
48
- "n_estimators": 1200,
49
  "max_depth": 3,
50
  "subsample": 0.5,
51
  "learning_rate": 0.01,
@@ -145,6 +147,7 @@ with gr.Blocks(theme=theme) as demo:
145
  n_samples = gr.Slider(minimum=500, maximum=5000, step=500, value=500, label="Number of samples")
146
  n_splits = gr.Slider(minimum=2, maximum=10, step=1, value=3, label="Number of cross validation folds")
147
  random_seed = gr.Slider(minimum=0, maximum=2000, step=1, value=0, label="Random seed")
 
148
 
149
  with gr.Row():
150
  with gr.Column():
@@ -152,8 +155,9 @@ with gr.Blocks(theme=theme) as demo:
152
  with gr.Column():
153
  result = gr.Textbox(label="Resusts")
154
 
155
- n_samples.change(fn=do_train, inputs=[n_samples, n_splits, random_seed], outputs=[plot, result])
156
- n_splits.change(fn=do_train, inputs=[n_samples, n_splits, random_seed], outputs=[plot, result])
157
- random_seed.change(fn=do_train, inputs=[n_samples, n_splits, random_seed], outputs=[plot, result])
 
158
 
159
  demo.launch()
 
20
  The **Out-of-bag (OOB)** method is a useful technique for estimating the optimal number of boosting iterations.
21
  This method is similar to cross-validation, but it does not require repeated model fitting and can be computed on-the-fly.
22
  **OOB** estimates are only applicable to Stochastic Gradient Boosting (i.e., subsample < 1.0). They are calculated from the improvement in loss based on examples not included in the bootstrap sample (i.e., out-of-bag examples).
23
+ The **OOB** estimator provides a conservative estimate of the true test loss but is still a reasonable approximation for a small number of trees.
24
+ In this demonstration, a **GradientBoostingClassifier** model is trained on a simulation dataset, and the loss of the training set, test set, and OOB set are displayed in the figure.
25
+ This information allows you to determine the level of generalization of your trained model on the simulation dataset.
26
+ You can play around with ``number of samples``,``number of splits fold``, ``random seed``and ``number of estimator (step)``
27
 
28
  ## Dataset
29
 
30
  Simulation data
31
  """
32
 
33
+ def do_train(n_samples, n_splits, random_seed, n_estimators):
34
  # Generate data (adapted from G. Ridgeway's gbm example)
35
  random_state = np.random.RandomState(random_seed)
36
  x1 = random_state.uniform(size=n_samples)
 
47
 
48
  # Fit classifier with out-of-bag estimates
49
  params = {
50
+ "n_estimators": n_estimators,
51
  "max_depth": 3,
52
  "subsample": 0.5,
53
  "learning_rate": 0.01,
 
147
  n_samples = gr.Slider(minimum=500, maximum=5000, step=500, value=500, label="Number of samples")
148
  n_splits = gr.Slider(minimum=2, maximum=10, step=1, value=3, label="Number of cross validation folds")
149
  random_seed = gr.Slider(minimum=0, maximum=2000, step=1, value=0, label="Random seed")
150
+ n_estimators = gr.Slider(minimum=500, maximum=2000, step=100, value=500, label="Number of estimator step")
151
 
152
  with gr.Row():
153
  with gr.Column():
 
155
  with gr.Column():
156
  result = gr.Textbox(label="Resusts")
157
 
158
+ n_samples.change(fn=do_train, inputs=[n_samples, n_splits, random_seed, n_estimators], outputs=[plot, result])
159
+ n_splits.change(fn=do_train, inputs=[n_samples, n_splits, random_seed, n_estimators], outputs=[plot, result])
160
+ random_seed.change(fn=do_train, inputs=[n_samples, n_splits, random_seed, n_estimators], outputs=[plot, result])
161
+ n_estimators.change(fn=do_train, inputs=[n_samples, n_splits, random_seed, n_estimators], outputs=[plot, result])
162
 
163
  demo.launch()