-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathsimulation.py
More file actions
102 lines (83 loc) · 3.09 KB
/
simulation.py
File metadata and controls
102 lines (83 loc) · 3.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""
=================================================================
Simulation Study Comparing Random Boosting with Gradient Boosting
=================================================================
Author: Tobias Krabel
Compares Friedman (2001)'s standard Gradient Boosting framework with
Random Tree Depth Injection.
"""
from random_boost.random_boost import RandomBoostingRegressor, RandomBoostingClassifier
from random_boost.utils import gen_friedman_data
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import time
import datetime
# Homegrown
def rmse(y_true, y_pred):
return np.sqrt(mean_squared_error(y_true, y_pred))
def mae(y_true, y_pred):
return mean_absolute_error(y_true, y_pred)
# Constants
N_SAMPLES = 20000
N_INPUTS = 20
N_COMPONENTS = 20
SIGNAL_TO_NOISE = 1.0
N_ROUNDS = 100
PARAMS = {
'learning_rate': 0.1,
'max_depth': 5,
'n_estimators': 100
}
# Set seed
np.random.seed(0)
# Results
df_result = pd.DataFrame(columns=['run', 'model', 'rmse', 'mae', 'time_sec'])
for i in range(N_ROUNDS):
print(f'Round #{i+1} of {N_ROUNDS}')
# Data
print('... generate Friedman data')
X, y = gen_friedman_data(n_samples=N_SAMPLES,
n_inputs=N_INPUTS,
n_components=N_COMPONENTS,
stn=SIGNAL_TO_NOISE)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
# Fit Random Boost
print(f'... fit Random Boost')
start_time = time.time()
rb = RandomBoostingRegressor(n_estimators=PARAMS['n_estimators'],
learning_rate=PARAMS['learning_rate'],
max_depth=PARAMS['max_depth'])
rb = rb.fit(X_train, y_train)
time_rb = time.time() - start_time
print(f'... took {time_rb} seconds')
# Fit MART
print(f'... fit MART')
start_time = time.time()
gb = GradientBoostingRegressor(n_estimators=PARAMS['n_estimators'],
learning_rate=PARAMS['learning_rate'],
max_depth=PARAMS['max_depth'])
gb = gb.fit(X_train, y_train)
time_gb = time.time() - start_time
print(f'... took {time_gb} seconds\n')
# Add Results to Container
models = [rb, gb]
df_result = pd.concat([
df_result,
pd.DataFrame(
data={
'run':([i+1] * 2),
'model':['rb', 'xgb'],
'rmse':[rmse(y_test, model.predict(X_test)) for model in models],
'mae':[mae(y_test, model.predict(X_test)) for model in models],
'time_sec':[time_rb, time_gb]
})],
axis=0,
ignore_index=True
)
# Save to file
now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
df_result.to_csv(f'data/simulation_results/rb-vs-xgb-lr{PARAMS["learning_rate"]}-d{PARAMS["max_depth"]}-nest{PARAMS["n_estimators"]}-nocv-{now}.csv',
index=False)