import time
import datetime
from scipy.stats import uniform
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.metrics import r2_score
from sklearn.linear_model import ElasticNet, Lasso, LinearRegression, Ridge, PoissonRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from IPython.display import display
import xgboost as xgb
from traitlets.traitlets import Any, Dict
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
from pandas.core.frame import DataFrame
import pandas as pd
import numpy as np
from enum import Enum, auto
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
pd.options.display.max_columns = None
pd.options.display.max_rows = 500
pd.options.display.width = None
pd.options.display.max_colwidth = 100
pd.options.display.precision = 3
base_path = '../../data/'
encoded_train = pd.read_pickle(f"{base_path}encoded_train.pkl")
encoded_test = pd.read_pickle(f"{base_path}encoded_test.pkl")
encoded_train_dim_red = pd.read_pickle(f"{base_path}encoded_train_dim_red.pkl")
encoded_test_dim_red = pd.read_pickle(f"{base_path}encoded_test_dim_red.pkl")
'''
Convert all the frames to float
'''
encoded_train = encoded_train.apply(lambda x: pd.to_numeric(x))
encoded_test = encoded_test.apply(lambda x: pd.to_numeric(x))
'''
Models to try [Linear Regression, Generalized Linear Regression, Regularized Regression(Ridge and Lasso Regression), SVM Regression, Tree Based Regression, XgBoost Regression]
With and without PCA Data
Hyper Parameter Tuning
Matrix plotting
Residual Analysis and Predictions/Error Plotting/Result validation plotting
'''
class DataMode(Enum):
ENCODED = auto()
ENCODED_DIM_RED = auto()
def split_data(X, y=None, test_fraction: float = 0.2, shuffle: bool = True, stratify=None):
return train_test_split(X, y, test_size=test_fraction, random_state=42, shuffle=shuffle, stratify=stratify) if y is not None else train_test_split(X, test_size=test_fraction, random_state=42, shuffle=shuffle, stratify=stratify)
def plot_error_patterns(error_data, **kwargs) -> None:
fig = ff.create_distplot(hist_data=[error_data["Residual"]], group_labels=["Residual"], **kwargs)
fig.update_layout(title_text="Residuals Distribution")
fig.show()
sm.qqplot(error_data["Residual"], line='45', **kwargs)
plt.show()
fig = px.scatter(error_data, x="Actual", y="Predicted", trendline="ols", **kwargs)
fig.update_layout(title_text="Actual vs Predicted values")
fig.show()
fig = px.scatter(error_data, x="Predicted", y="Residual", trendline="ols", **kwargs)
fig.update_layout(title_text="Predicted values vs Residuals")
fig.show()
linear = 'linear'
ridge = 'ridge'
lasso = 'lasso'
elasticnet = 'elasticnet'
poisson = 'poisson'
rf = 'rf'
dt = 'dt'
svr = 'svr'
adb = 'adb'
xgbr = 'xgbr'
model_obj = 'model'
params_dict = 'params'
models = {
linear: {model_obj: LinearRegression(), params_dict: dict(n_jobs=[-1])},
ridge: {model_obj: Ridge(), params_dict: dict(
alpha=[0.001, 0.01, 0.1, 1., 10.])},
lasso: {model_obj: Lasso(), params_dict: dict(alpha=[0.001, 0.01, 0.1, 1., 10.])},
elasticnet: {model_obj: ElasticNet(), params_dict: dict(
alpha=[0.001, 0.01, 0.1, 0.2, 0.5, 1.],
l1_ratio=[0.001, 0.01, 0.1, 0.2, 0.5, 1.])},
# poisson: {model_obj: PoissonRegressor(), params_dict: dict(
# alpha=[0.001, 0.01, 0.1, 0.2, 0.5, 1.],
# max_iter=[10, 20, 50, 70, 100],
# tol=[1e-4, 1e-3, 1e-2, 1e-1, 1])},
dt: {model_obj: DecisionTreeRegressor(), params_dict: dict(
criterion=["mse", "friedman_mse", "mae", "poisson"],
splitter=["best", "random"],
max_depth=[2, 3, 5, 10, 20, 50, 100, 200],
min_samples_split=[2, 3, 5, 10, 20, 50, 100, 200, 500, 1000],
max_leaf_nodes=[10, 20, 50, 100, 200, 500, 1000])},
rf: {model_obj: RandomForestRegressor(), params_dict: dict(
n_estimators=[10, 20, 50, 100, 200, 500, 1000],
criterion=["mse", "mae"],
max_depth=[2, 3, 5, 10, 20, 50, 100, 200],
min_samples_split=[2, 3, 5, 10, 20, 50, 100, 200, 500, 1000],
max_leaf_nodes=[10, 20, 50, 100, 200, 500, 1000])},
# svr: {model_obj: SVR(), params_dict: dict(
# epsilon=[1e-4, 1e-3, 1e-2, 1e-1, 1e+0, 1e+1],
# tol=[1e-4, 1e-3, 1e-2, 1e-1, 1e+0, 1e+1, 1e+2],
# C=[1e-4, 1e-3, 1e-2, 1e-1, 1e+0, 1e+1, 1e+2],
# loss=['epsilon_insensitive', 'squared_epsilon_insensitive']
# )},
adb: {model_obj: AdaBoostRegressor(), params_dict: dict(
n_estimators=[10, 20, 50, 100, 200, 500, 1000],
learning_rate=[1e-4, 1e-3, 1e-2, 1e-1, 1e+0, 1e+1],
loss=['linear', 'square', 'exponential'],
random_state=[0]
)},
xgbr: {model_obj: xgb.XGBRegressor(), params_dict: dict(
n_estimators=[10, 20, 50, 100, 200, 500, 1000],
max_depth=[2, 3, 5, 10, 20, 50, 100, 200],
learning_rate=[1e-4, 1e-3, 1e-2, 1e-1, 1e+0, 1e+1],
gamma=[1e-4, 1e-3, 1e-2, 1e-1, 1e+0, 1e+1]
)}
}
# degrees = [1, 2, 3]
# for degree in degrees:
# pipeline = Pipeline([('poly_features', PolynomialFeatures(degree=degree)),
# ('model', LinearRegression())])
# pass
X_train_encoded, X_test_encoded, y_train_encoded, y_test_encoded = split_data(encoded_train.drop('y', axis=1), encoded_train[['y']])
X_train_dim_red, X_test_dim_red, y_train_dim_red, y_test_dim_red = split_data(encoded_train_dim_red, encoded_train[['y']])
def prepare_error_data_and_plot(model, X, actual_y: pd.DataFrame, pred_y, mode, model_name, plot: bool = False) -> pd.DataFrame:
data_mode_str = "without PCA" if mode == DataMode.ENCODED else "with PCA"
error_df = actual_y.copy()
error_df["Pred"] = pred_y
error_df["Res"] = error_df["y"] - error_df["Pred"]
error_df.columns = ["Actual", "Predicted", "Residual"]
if plot:
print(f"Residual Analysis graphs for \"Train Data\" {model_name} {data_mode_str}")
plot_error_patterns(error_df)
return error_df
show_graphs = False
all_model_metrics = []
for model, model_config in models.items():
print(f"Starting for {model.title()}")
# With and without Dimensionality Reduction
for mode in [DataMode.ENCODED, DataMode.ENCODED_DIM_RED]:
X_train, X_test, y_train, y_test = (X_train_encoded, X_test_encoded, y_train_encoded, y_test_encoded) if mode == DataMode.ENCODED else (
X_train_dim_red, X_test_dim_red, y_train_dim_red, y_test_dim_red)
data_mode_str = "without PCA" if mode == DataMode.ENCODED else "with PCA"
# Hyper- Parameter tuning
ra_s_cv = RandomizedSearchCV(model_config.get(model_obj), model_config.get(params_dict), random_state=0, n_jobs=-1, cv=3, verbose=3, return_train_score=True)
start_time = time.perf_counter()
ra_s_cv.fit(X_train, y_train)
end_time = time.perf_counter()
train_pred = ra_s_cv.predict(X_train)
test_pred = ra_s_cv.predict(X_test)
print("-"*50)
print(f"Best Estimator for {model} {data_mode_str} is {ra_s_cv.best_estimator_}\n")
print(f"Best Params for {model} {data_mode_str} are {ra_s_cv.best_params_}\n")
print(f"Cross validation Results for {model} {data_mode_str}\n")
display(pd.DataFrame(ra_s_cv.cv_results_))
print("-"*50)
# Plot evaluation matrix
prepare_error_data_and_plot(ra_s_cv, X_train, y_train, train_pred, mode, model, show_graphs)
prepare_error_data_and_plot(ra_s_cv, X_test, y_test, test_pred, mode, model, show_graphs)
# Record performance
all_model_metrics.append([f"{model} {data_mode_str}", ra_s_cv.best_score_, ra_s_cv.score(X_train, y_train),
ra_s_cv.score(X_test, y_test.values), r2_score(y_train, train_pred), r2_score(y_test, test_pred), end_time-start_time])
print("="*50, "\n")
Starting for Linear Fitting 3 folds for each of 1 candidates, totalling 3 fits [Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. [Parallel(n_jobs=-1)]: Done 3 out of 3 | elapsed: 1.0s finished -------------------------------------------------- Best Estimator for linear without PCA is LinearRegression(n_jobs=-1) Best Params for linear without PCA are {'n_jobs': -1} Cross validation Results for linear without PCA
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_n_jobs | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.125 | 0.019 | 0.006 | 7.663e-04 | -1 | {'n_jobs': -1} | -5.359e+22 | -2.441e+22 | -9.090e+22 | -5.630e+22 | 2.721e+22 | 1 | 0.679 | 0.619 | 0.612 | 0.637 | 0.03 |
-------------------------------------------------- ================================================== Fitting 3 folds for each of 1 candidates, totalling 3 fits [Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. -------------------------------------------------- Best Estimator for linear with PCA is LinearRegression(n_jobs=-1) Best Params for linear with PCA are {'n_jobs': -1} Cross validation Results for linear with PCA [Parallel(n_jobs=-1)]: Done 3 out of 3 | elapsed: 0.5s finished
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_n_jobs | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.028 | 0.009 | 0.002 | 4.018e-05 | -1 | {'n_jobs': -1} | 0.426 | 0.553 | 0.562 | 0.513 | 0.062 | 1 | 0.609 | 0.532 | 0.528 | 0.556 | 0.038 |
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. -------------------------------------------------- ================================================== Starting for Ridge Fitting 3 folds for each of 5 candidates, totalling 15 fits [Parallel(n_jobs=-1)]: Done 4 out of 15 | elapsed: 0.3s remaining: 0.7s [Parallel(n_jobs=-1)]: Done 10 out of 15 | elapsed: 0.9s remaining: 0.4s [Parallel(n_jobs=-1)]: Done 15 out of 15 | elapsed: 1.0s finished -------------------------------------------------- Best Estimator for ridge without PCA is Ridge(alpha=10.0) Best Params for ridge without PCA are {'alpha': 10.0} Cross validation Results for ridge without PCA
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_alpha | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.090 | 0.014 | 0.011 | 3.643e-03 | 0.001 | {'alpha': 0.001} | 0.411 | 0.530 | 0.537 | 0.493 | 0.058 | 5 | 0.679 | 0.620 | 0.612 | 0.637 | 0.030 |
1 | 0.099 | 0.009 | 0.011 | 3.335e-03 | 0.01 | {'alpha': 0.01} | 0.411 | 0.531 | 0.538 | 0.493 | 0.058 | 4 | 0.679 | 0.620 | 0.612 | 0.637 | 0.030 |
2 | 0.144 | 0.033 | 0.033 | 2.770e-03 | 0.1 | {'alpha': 0.1} | 0.417 | 0.538 | 0.541 | 0.499 | 0.058 | 3 | 0.679 | 0.619 | 0.612 | 0.637 | 0.030 |
3 | 0.121 | 0.013 | 0.016 | 5.653e-04 | 1 | {'alpha': 1.0} | 0.431 | 0.556 | 0.554 | 0.514 | 0.058 | 2 | 0.676 | 0.615 | 0.609 | 0.634 | 0.030 |
4 | 0.073 | 0.003 | 0.013 | 2.840e-03 | 10 | {'alpha': 10.0} | 0.443 | 0.576 | 0.572 | 0.530 | 0.062 | 1 | 0.664 | 0.598 | 0.594 | 0.619 | 0.032 |
-------------------------------------------------- ================================================== Fitting 3 folds for each of 5 candidates, totalling 15 fits -------------------------------------------------- Best Estimator for ridge with PCA is Ridge(alpha=10.0) Best Params for ridge with PCA are {'alpha': 10.0} Cross validation Results for ridge with PCA [Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. [Parallel(n_jobs=-1)]: Done 4 out of 15 | elapsed: 0.0s remaining: 0.1s [Parallel(n_jobs=-1)]: Done 10 out of 15 | elapsed: 0.1s remaining: 0.0s [Parallel(n_jobs=-1)]: Done 15 out of 15 | elapsed: 0.1s finished
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_alpha | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.007 | 5.265e-04 | 0.003 | 2.947e-04 | 0.001 | {'alpha': 0.001} | 0.426 | 0.553 | 0.562 | 0.513 | 0.062 | 5 | 0.609 | 0.532 | 0.528 | 0.556 | 0.038 |
1 | 0.011 | 4.498e-03 | 0.003 | 4.938e-04 | 0.01 | {'alpha': 0.01} | 0.426 | 0.553 | 0.562 | 0.513 | 0.062 | 4 | 0.609 | 0.532 | 0.528 | 0.556 | 0.038 |
2 | 0.008 | 4.682e-04 | 0.003 | 3.691e-04 | 0.1 | {'alpha': 0.1} | 0.426 | 0.553 | 0.562 | 0.513 | 0.062 | 3 | 0.609 | 0.532 | 0.528 | 0.556 | 0.038 |
3 | 0.007 | 2.467e-04 | 0.003 | 3.098e-05 | 1 | {'alpha': 1.0} | 0.426 | 0.554 | 0.562 | 0.514 | 0.062 | 2 | 0.609 | 0.532 | 0.528 | 0.556 | 0.038 |
4 | 0.008 | 1.233e-03 | 0.002 | 2.017e-04 | 10 | {'alpha': 10.0} | 0.426 | 0.556 | 0.564 | 0.515 | 0.063 | 1 | 0.609 | 0.532 | 0.528 | 0.556 | 0.038 |
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. -------------------------------------------------- ================================================== Starting for Lasso Fitting 3 folds for each of 5 candidates, totalling 15 fits [Parallel(n_jobs=-1)]: Done 4 out of 15 | elapsed: 0.8s remaining: 2.1s [Parallel(n_jobs=-1)]: Done 10 out of 15 | elapsed: 4.6s remaining: 2.3s [Parallel(n_jobs=-1)]: Done 15 out of 15 | elapsed: 5.1s finished -------------------------------------------------- Best Estimator for lasso without PCA is Lasso(alpha=0.1) Best Params for lasso without PCA are {'alpha': 0.1} Cross validation Results for lasso without PCA
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_alpha | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 4.661 | 0.057 | 0.013 | 0.001 | 0.001 | {'alpha': 0.001} | 0.430 | 5.522e-01 | 0.551 | 0.511 | 0.057 | 3 | 0.677 | 0.617 | 0.610 | 0.635 | 0.030 |
1 | 4.912 | 0.174 | 0.010 | 0.007 | 0.01 | {'alpha': 0.01} | 0.451 | 5.834e-01 | 0.582 | 0.539 | 0.062 | 2 | 0.659 | 0.596 | 0.591 | 0.615 | 0.031 |
2 | 3.424 | 0.656 | 0.019 | 0.004 | 0.1 | {'alpha': 0.1} | 0.449 | 5.825e-01 | 0.595 | 0.542 | 0.066 | 1 | 0.603 | 0.530 | 0.527 | 0.553 | 0.035 |
3 | 0.434 | 0.051 | 0.022 | 0.001 | 1 | {'alpha': 1.0} | 0.316 | 4.052e-01 | 0.421 | 0.380 | 0.046 | 4 | 0.421 | 0.371 | 0.350 | 0.380 | 0.030 |
4 | 0.138 | 0.024 | 0.023 | 0.001 | 10 | {'alpha': 10.0} | -0.004 | -3.756e-04 | -0.003 | -0.003 | 0.002 | 5 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
-------------------------------------------------- ================================================== Fitting 3 folds for each of 5 candidates, totalling 15 fits -------------------------------------------------- Best Estimator for lasso with PCA is Lasso(alpha=0.01) Best Params for lasso with PCA are {'alpha': 0.01} Cross validation Results for lasso with PCA [Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. [Parallel(n_jobs=-1)]: Done 4 out of 15 | elapsed: 0.0s remaining: 0.1s [Parallel(n_jobs=-1)]: Done 10 out of 15 | elapsed: 0.1s remaining: 0.0s [Parallel(n_jobs=-1)]: Done 15 out of 15 | elapsed: 0.1s finished
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_alpha | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.010 | 1.528e-03 | 0.003 | 1.818e-04 | 0.001 | {'alpha': 0.001} | 0.426 | 5.537e-01 | 0.562 | 0.514 | 0.062 | 2 | 0.609 | 0.532 | 0.528 | 0.556 | 0.038 |
1 | 0.012 | 2.248e-03 | 0.002 | 4.130e-04 | 0.01 | {'alpha': 0.01} | 0.427 | 5.569e-01 | 0.564 | 0.516 | 0.063 | 1 | 0.609 | 0.531 | 0.527 | 0.556 | 0.038 |
2 | 0.010 | 1.495e-03 | 0.002 | 5.266e-04 | 0.1 | {'alpha': 0.1} | 0.415 | 5.586e-01 | 0.562 | 0.512 | 0.068 | 3 | 0.583 | 0.504 | 0.503 | 0.530 | 0.037 |
3 | 0.009 | 3.777e-04 | 0.003 | 3.871e-04 | 1 | {'alpha': 1.0} | 0.347 | 4.691e-01 | 0.452 | 0.423 | 0.054 | 4 | 0.471 | 0.404 | 0.402 | 0.426 | 0.032 |
4 | 0.007 | 9.208e-05 | 0.002 | 2.911e-04 | 10 | {'alpha': 10.0} | -0.004 | -3.756e-04 | -0.003 | -0.003 | 0.002 | 5 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
-------------------------------------------------- ================================================== Starting for Elasticnet Fitting 3 folds for each of 10 candidates, totalling 30 fits [Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. [Parallel(n_jobs=-1)]: Done 18 out of 30 | elapsed: 1.0s remaining: 0.7s [Parallel(n_jobs=-1)]: Done 30 out of 30 | elapsed: 5.3s finished -------------------------------------------------- Best Estimator for elasticnet without PCA is ElasticNet(alpha=0.01, l1_ratio=1.0) Best Params for elasticnet without PCA are {'l1_ratio': 1.0, 'alpha': 0.01} Cross validation Results for elasticnet without PCA
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_l1_ratio | param_alpha | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.117 | 0.018 | 0.019 | 2.601e-03 | 0.01 | 1 | {'l1_ratio': 0.01, 'alpha': 1.0} | 0.325 | 0.441 | 0.425 | 0.397 | 0.051 | 10 | 0.449 | 0.381 | 0.385 | 0.405 | 0.031 |
1 | 0.313 | 0.103 | 0.041 | 1.127e-02 | 0.1 | 0.2 | {'l1_ratio': 0.1, 'alpha': 0.2} | 0.417 | 0.559 | 0.556 | 0.511 | 0.066 | 7 | 0.579 | 0.504 | 0.501 | 0.528 | 0.036 |
2 | 0.437 | 0.043 | 0.024 | 3.422e-04 | 0.5 | 0.1 | {'l1_ratio': 0.5, 'alpha': 0.1} | 0.437 | 0.577 | 0.584 | 0.533 | 0.068 | 3 | 0.601 | 0.526 | 0.524 | 0.550 | 0.036 |
3 | 0.258 | 0.053 | 0.024 | 2.278e-03 | 0.001 | 1 | {'l1_ratio': 0.001, 'alpha': 1.0} | 0.325 | 0.442 | 0.426 | 0.398 | 0.052 | 9 | 0.451 | 0.382 | 0.386 | 0.406 | 0.031 |
4 | 0.317 | 0.033 | 0.024 | 2.165e-03 | 0.5 | 0.2 | {'l1_ratio': 0.5, 'alpha': 0.2} | 0.418 | 0.554 | 0.561 | 0.511 | 0.066 | 6 | 0.571 | 0.495 | 0.493 | 0.520 | 0.036 |
5 | 0.494 | 0.017 | 0.023 | 1.221e-03 | 0.2 | 0.1 | {'l1_ratio': 0.2, 'alpha': 0.1} | 0.435 | 0.575 | 0.578 | 0.529 | 0.067 | 4 | 0.605 | 0.530 | 0.528 | 0.554 | 0.036 |
6 | 2.390 | 0.466 | 0.019 | 3.889e-03 | 0.5 | 0.01 | {'l1_ratio': 0.5, 'alpha': 0.01} | 0.447 | 0.581 | 0.582 | 0.537 | 0.063 | 2 | 0.656 | 0.588 | 0.586 | 0.610 | 0.032 |
7 | 4.497 | 0.054 | 0.013 | 1.768e-03 | 0.1 | 0.001 | {'l1_ratio': 0.1, 'alpha': 0.001} | 0.436 | 0.564 | 0.559 | 0.520 | 0.059 | 5 | 0.674 | 0.612 | 0.606 | 0.631 | 0.031 |
8 | 4.449 | 0.231 | 0.011 | 4.514e-03 | 1 | 0.01 | {'l1_ratio': 1.0, 'alpha': 0.01} | 0.451 | 0.583 | 0.582 | 0.539 | 0.062 | 1 | 0.659 | 0.596 | 0.591 | 0.615 | 0.031 |
9 | 0.712 | 0.121 | 0.023 | 1.930e-03 | 1 | 0.5 | {'l1_ratio': 1.0, 'alpha': 0.5} | 0.354 | 0.453 | 0.499 | 0.435 | 0.060 | 8 | 0.480 | 0.415 | 0.415 | 0.437 | 0.031 |
-------------------------------------------------- ================================================== Fitting 3 folds for each of 10 candidates, totalling 30 fits [Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. [Parallel(n_jobs=-1)]: Done 18 out of 30 | elapsed: 0.1s remaining: 0.1s [Parallel(n_jobs=-1)]: Done 30 out of 30 | elapsed: 0.1s finished -------------------------------------------------- Best Estimator for elasticnet with PCA is ElasticNet(alpha=0.01) Best Params for elasticnet with PCA are {'l1_ratio': 0.5, 'alpha': 0.01} Cross validation Results for elasticnet with PCA
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_l1_ratio | param_alpha | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.010 | 1.444e-03 | 0.003 | 2.533e-04 | 0.01 | 1 | {'l1_ratio': 0.01, 'alpha': 1.0} | 0.324 | 0.441 | 0.425 | 0.396 | 0.052 | 10 | 0.448 | 0.379 | 0.383 | 0.403 | 0.032 |
1 | 0.010 | 8.170e-04 | 0.003 | 9.649e-04 | 0.1 | 0.2 | {'l1_ratio': 0.1, 'alpha': 0.2} | 0.410 | 0.551 | 0.549 | 0.503 | 0.066 | 6 | 0.571 | 0.494 | 0.491 | 0.519 | 0.037 |
2 | 0.012 | 2.876e-03 | 0.003 | 4.095e-04 | 0.5 | 0.1 | {'l1_ratio': 0.5, 'alpha': 0.1} | 0.419 | 0.562 | 0.565 | 0.515 | 0.068 | 4 | 0.588 | 0.510 | 0.508 | 0.535 | 0.037 |
3 | 0.012 | 1.247e-03 | 0.003 | 9.110e-04 | 0.001 | 1 | {'l1_ratio': 0.001, 'alpha': 1.0} | 0.324 | 0.441 | 0.425 | 0.397 | 0.052 | 9 | 0.448 | 0.379 | 0.383 | 0.404 | 0.032 |
4 | 0.010 | 3.167e-04 | 0.003 | 4.943e-04 | 0.5 | 0.2 | {'l1_ratio': 0.5, 'alpha': 0.2} | 0.407 | 0.550 | 0.548 | 0.502 | 0.067 | 7 | 0.566 | 0.487 | 0.486 | 0.513 | 0.038 |
5 | 0.010 | 6.227e-04 | 0.003 | 5.022e-04 | 0.2 | 0.1 | {'l1_ratio': 0.2, 'alpha': 0.1} | 0.422 | 0.562 | 0.565 | 0.516 | 0.067 | 2 | 0.591 | 0.513 | 0.510 | 0.538 | 0.038 |
6 | 0.009 | 5.607e-04 | 0.003 | 1.322e-03 | 0.5 | 0.01 | {'l1_ratio': 0.5, 'alpha': 0.01} | 0.427 | 0.558 | 0.565 | 0.517 | 0.063 | 1 | 0.609 | 0.531 | 0.527 | 0.556 | 0.038 |
7 | 0.009 | 1.920e-03 | 0.002 | 1.889e-04 | 0.1 | 0.001 | {'l1_ratio': 0.1, 'alpha': 0.001} | 0.426 | 0.554 | 0.562 | 0.514 | 0.062 | 5 | 0.609 | 0.532 | 0.528 | 0.556 | 0.038 |
8 | 0.009 | 9.490e-04 | 0.002 | 3.532e-04 | 1 | 0.01 | {'l1_ratio': 1.0, 'alpha': 0.01} | 0.427 | 0.557 | 0.564 | 0.516 | 0.063 | 3 | 0.609 | 0.531 | 0.527 | 0.556 | 0.038 |
9 | 0.008 | 6.682e-04 | 0.002 | 1.794e-04 | 1 | 0.5 | {'l1_ratio': 1.0, 'alpha': 0.5} | 0.386 | 0.518 | 0.513 | 0.472 | 0.061 | 8 | 0.527 | 0.450 | 0.449 | 0.475 | 0.036 |
-------------------------------------------------- ================================================== Starting for Dt Fitting 3 folds for each of 10 candidates, totalling 30 fits [Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. [Parallel(n_jobs=-1)]: Done 18 out of 30 | elapsed: 2.9s remaining: 1.9s -------------------------------------------------- Best Estimator for dt without PCA is DecisionTreeRegressor(criterion='friedman_mse', max_depth=200, max_leaf_nodes=20, min_samples_split=200, splitter='random') Best Params for dt without PCA are {'splitter': 'random', 'min_samples_split': 200, 'max_leaf_nodes': 20, 'max_depth': 200, 'criterion': 'friedman_mse'} Cross validation Results for dt without PCA [Parallel(n_jobs=-1)]: Done 30 out of 30 | elapsed: 4.0s finished
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_splitter | param_min_samples_split | param_max_leaf_nodes | param_max_depth | param_criterion | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3.599 | 2.758e-01 | 0.005 | 4.065e-04 | best | 100 | 100 | 10 | mae | {'splitter': 'best', 'min_samples_split': 100, 'max_leaf_nodes': 100, 'max_depth': 10, 'criterio... | 0.382 | 0.518 | 0.590 | 0.497 | 0.086 | 4 | 0.623 | 0.536 | 0.529 | 0.563 | 0.043 |
1 | 2.967 | 8.251e-02 | 0.006 | 9.550e-04 | random | 10 | 200 | 5 | mae | {'splitter': 'random', 'min_samples_split': 10, 'max_leaf_nodes': 200, 'max_depth': 5, 'criterio... | 0.417 | 0.550 | 0.585 | 0.517 | 0.073 | 3 | 0.611 | 0.536 | 0.534 | 0.560 | 0.036 |
2 | 0.128 | 1.200e-02 | 0.013 | 5.053e-03 | random | 100 | 500 | 10 | friedman_mse | {'splitter': 'random', 'min_samples_split': 100, 'max_leaf_nodes': 500, 'max_depth': 10, 'criter... | 0.433 | 0.565 | 0.577 | 0.525 | 0.065 | 2 | 0.651 | 0.583 | 0.570 | 0.602 | 0.036 |
3 | 3.687 | 1.236e-01 | 0.005 | 9.598e-05 | best | 5 | 50 | 200 | mae | {'splitter': 'best', 'min_samples_split': 5, 'max_leaf_nodes': 50, 'max_depth': 200, 'criterion'... | 0.365 | 0.482 | 0.526 | 0.458 | 0.068 | 8 | 0.647 | 0.571 | 0.662 | 0.627 | 0.040 |
4 | 0.087 | 6.838e-03 | 0.008 | 8.187e-04 | random | 100 | 50 | 200 | mse | {'splitter': 'random', 'min_samples_split': 100, 'max_leaf_nodes': 50, 'max_depth': 200, 'criter... | 0.430 | 0.543 | 0.496 | 0.490 | 0.046 | 5 | 0.680 | 0.607 | 0.595 | 0.627 | 0.038 |
5 | 0.019 | 2.100e-04 | 0.000 | 0.000e+00 | random | 100 | 20 | 200 | poisson | {'splitter': 'random', 'min_samples_split': 100, 'max_leaf_nodes': 20, 'max_depth': 200, 'criter... | NaN | NaN | NaN | NaN | NaN | 9 | NaN | NaN | NaN | NaN | NaN |
6 | 0.027 | 6.484e-03 | 0.000 | 0.000e+00 | best | 20 | 500 | 2 | poisson | {'splitter': 'best', 'min_samples_split': 20, 'max_leaf_nodes': 500, 'max_depth': 2, 'criterion'... | NaN | NaN | NaN | NaN | NaN | 10 | NaN | NaN | NaN | NaN | NaN |
7 | 0.053 | 2.257e-03 | 0.010 | 1.470e-03 | random | 5 | 10 | 50 | mse | {'splitter': 'random', 'min_samples_split': 5, 'max_leaf_nodes': 10, 'max_depth': 50, 'criterion... | 0.470 | 0.572 | 0.380 | 0.474 | 0.078 | 7 | 0.630 | 0.556 | 0.627 | 0.605 | 0.034 |
8 | 2.264 | 8.061e-02 | 0.008 | 2.765e-04 | random | 1000 | 100 | 5 | mae | {'splitter': 'random', 'min_samples_split': 1000, 'max_leaf_nodes': 100, 'max_depth': 5, 'criter... | 0.369 | 0.540 | 0.530 | 0.480 | 0.078 | 6 | 0.534 | 0.462 | 0.463 | 0.486 | 0.034 |
9 | 0.082 | 1.757e-02 | 0.009 | 8.204e-04 | random | 200 | 20 | 200 | friedman_mse | {'splitter': 'random', 'min_samples_split': 200, 'max_leaf_nodes': 20, 'max_depth': 200, 'criter... | 0.436 | 0.558 | 0.597 | 0.530 | 0.069 | 1 | 0.644 | 0.572 | 0.561 | 0.592 | 0.037 |
-------------------------------------------------- ================================================== Fitting 3 folds for each of 10 candidates, totalling 30 fits [Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. [Parallel(n_jobs=-1)]: Done 18 out of 30 | elapsed: 0.4s remaining: 0.3s -------------------------------------------------- Best Estimator for dt with PCA is DecisionTreeRegressor(criterion='friedman_mse', max_depth=10, max_leaf_nodes=500, min_samples_split=100, splitter='random') Best Params for dt with PCA are {'splitter': 'random', 'min_samples_split': 100, 'max_leaf_nodes': 500, 'max_depth': 10, 'criterion': 'friedman_mse'} Cross validation Results for dt with PCA [Parallel(n_jobs=-1)]: Done 30 out of 30 | elapsed: 3.7s finished
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_splitter | param_min_samples_split | param_max_leaf_nodes | param_max_depth | param_criterion | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2.917 | 2.858e-02 | 0.002 | 6.461e-05 | best | 100 | 100 | 10 | mae | {'splitter': 'best', 'min_samples_split': 100, 'max_leaf_nodes': 100, 'max_depth': 10, 'criterio... | 0.214 | 0.331 | 0.335 | 0.293 | 0.056 | 5 | 0.550 | 0.491 | 0.532 | 0.524 | 0.025 |
1 | 0.630 | 1.625e-02 | 0.002 | 1.986e-04 | random | 10 | 200 | 5 | mae | {'splitter': 'random', 'min_samples_split': 10, 'max_leaf_nodes': 200, 'max_depth': 5, 'criterio... | 0.234 | 0.379 | 0.185 | 0.266 | 0.082 | 6 | 0.330 | 0.268 | 0.209 | 0.269 | 0.049 |
2 | 0.015 | 1.380e-04 | 0.003 | 2.344e-04 | random | 100 | 500 | 10 | friedman_mse | {'splitter': 'random', 'min_samples_split': 100, 'max_leaf_nodes': 500, 'max_depth': 10, 'criter... | 0.304 | 0.418 | 0.356 | 0.359 | 0.047 | 1 | 0.503 | 0.497 | 0.486 | 0.495 | 0.007 |
3 | 3.280 | 2.875e-01 | 0.001 | 6.643e-06 | best | 5 | 50 | 200 | mae | {'splitter': 'best', 'min_samples_split': 5, 'max_leaf_nodes': 50, 'max_depth': 200, 'criterion'... | 0.256 | 0.377 | -0.063 | 0.190 | 0.186 | 7 | 0.619 | 0.538 | 0.660 | 0.606 | 0.051 |
4 | 0.014 | 3.133e-04 | 0.007 | 7.089e-03 | random | 100 | 50 | 200 | mse | {'splitter': 'random', 'min_samples_split': 100, 'max_leaf_nodes': 50, 'max_depth': 200, 'criter... | 0.299 | 0.343 | 0.365 | 0.336 | 0.027 | 2 | 0.524 | 0.473 | 0.482 | 0.493 | 0.022 |
5 | 0.004 | 6.811e-05 | 0.000 | 0.000e+00 | random | 100 | 20 | 200 | poisson | {'splitter': 'random', 'min_samples_split': 100, 'max_leaf_nodes': 20, 'max_depth': 200, 'criter... | NaN | NaN | NaN | NaN | NaN | 9 | NaN | NaN | NaN | NaN | NaN |
6 | 0.004 | 4.888e-04 | 0.000 | 0.000e+00 | best | 20 | 500 | 2 | poisson | {'splitter': 'best', 'min_samples_split': 20, 'max_leaf_nodes': 500, 'max_depth': 2, 'criterion'... | NaN | NaN | NaN | NaN | NaN | 10 | NaN | NaN | NaN | NaN | NaN |
7 | 0.014 | 6.333e-03 | 0.002 | 1.472e-04 | random | 5 | 10 | 50 | mse | {'splitter': 'random', 'min_samples_split': 5, 'max_leaf_nodes': 10, 'max_depth': 50, 'criterion... | 0.277 | 0.341 | 0.331 | 0.316 | 0.028 | 3 | 0.354 | 0.296 | 0.314 | 0.321 | 0.024 |
8 | 0.290 | 1.428e-02 | 0.002 | 6.572e-05 | random | 1000 | 100 | 5 | mae | {'splitter': 'random', 'min_samples_split': 1000, 'max_leaf_nodes': 100, 'max_depth': 5, 'criter... | 0.144 | 0.153 | 0.089 | 0.129 | 0.028 | 8 | 0.198 | 0.133 | 0.043 | 0.125 | 0.064 |
9 | 0.017 | 7.606e-03 | 0.007 | 7.463e-03 | random | 200 | 20 | 200 | friedman_mse | {'splitter': 'random', 'min_samples_split': 200, 'max_leaf_nodes': 20, 'max_depth': 200, 'criter... | 0.241 | 0.328 | 0.379 | 0.316 | 0.057 | 4 | 0.416 | 0.318 | 0.396 | 0.377 | 0.042 |
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. -------------------------------------------------- ================================================== Starting for Rf Fitting 3 folds for each of 10 candidates, totalling 30 fits [Parallel(n_jobs=-1)]: Done 18 out of 30 | elapsed: 31.6s remaining: 21.1s [Parallel(n_jobs=-1)]: Done 30 out of 30 | elapsed: 4.8min finished -------------------------------------------------- Best Estimator for rf without PCA is RandomForestRegressor(max_depth=5, max_leaf_nodes=10, min_samples_split=200, n_estimators=200) Best Params for rf without PCA are {'n_estimators': 200, 'min_samples_split': 200, 'max_leaf_nodes': 10, 'max_depth': 5, 'criterion': 'mse'} Cross validation Results for rf without PCA
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_n_estimators | param_min_samples_split | param_max_leaf_nodes | param_max_depth | param_criterion | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 8.865 | 0.348 | 0.020 | 1.550e-03 | 50 | 2 | 200 | 50 | mse | {'n_estimators': 50, 'min_samples_split': 2, 'max_leaf_nodes': 200, 'max_depth': 50, 'criterion'... | 0.443 | 0.564 | 0.496 | 0.501 | 0.049 | 9 | 0.880 | 0.864 | 0.871 | 0.872 | 0.007 |
1 | 9.064 | 0.415 | 0.024 | 1.883e-03 | 100 | 5 | 50 | 50 | mse | {'n_estimators': 100, 'min_samples_split': 5, 'max_leaf_nodes': 50, 'max_depth': 50, 'criterion'... | 0.459 | 0.581 | 0.522 | 0.521 | 0.049 | 8 | 0.749 | 0.713 | 0.740 | 0.734 | 0.015 |
2 | 1.973 | 0.094 | 0.019 | 1.135e-03 | 20 | 100 | 50 | 10 | mse | {'n_estimators': 20, 'min_samples_split': 100, 'max_leaf_nodes': 50, 'max_depth': 10, 'criterion... | 0.463 | 0.589 | 0.605 | 0.552 | 0.063 | 2 | 0.668 | 0.593 | 0.582 | 0.614 | 0.038 |
3 | 7.195 | 0.216 | 0.026 | 5.845e-04 | 50 | 100 | 200 | 100 | mse | {'n_estimators': 50, 'min_samples_split': 100, 'max_leaf_nodes': 200, 'max_depth': 100, 'criteri... | 0.464 | 0.584 | 0.604 | 0.551 | 0.062 | 3 | 0.693 | 0.617 | 0.609 | 0.640 | 0.038 |
4 | 132.138 | 3.727 | 0.014 | 2.831e-04 | 100 | 20 | 10 | 5 | mae | {'n_estimators': 100, 'min_samples_split': 20, 'max_leaf_nodes': 10, 'max_depth': 5, 'criterion'... | 0.426 | 0.586 | 0.592 | 0.535 | 0.077 | 5 | 0.603 | 0.525 | 0.525 | 0.551 | 0.037 |
5 | 18.497 | 0.529 | 0.011 | 3.192e-04 | 20 | 20 | 1000 | 3 | mae | {'n_estimators': 20, 'min_samples_split': 20, 'max_leaf_nodes': 1000, 'max_depth': 3, 'criterion... | 0.418 | 0.579 | 0.589 | 0.529 | 0.078 | 7 | 0.587 | 0.511 | 0.509 | 0.536 | 0.036 |
6 | 172.305 | 2.749 | 0.015 | 7.057e-04 | 100 | 5 | 1000 | 10 | mae | {'n_estimators': 100, 'min_samples_split': 5, 'max_leaf_nodes': 1000, 'max_depth': 10, 'criterio... | 0.433 | 0.571 | 0.589 | 0.531 | 0.070 | 6 | 0.700 | 0.625 | 0.642 | 0.656 | 0.032 |
7 | 120.353 | 4.729 | 0.015 | 2.276e-03 | 50 | 50 | 50 | 200 | mae | {'n_estimators': 50, 'min_samples_split': 50, 'max_leaf_nodes': 50, 'max_depth': 200, 'criterion... | 0.439 | 0.580 | 0.602 | 0.540 | 0.072 | 4 | 0.675 | 0.590 | 0.591 | 0.619 | 0.040 |
8 | 4.707 | 0.011 | 0.031 | 5.084e-04 | 200 | 200 | 10 | 5 | mse | {'n_estimators': 200, 'min_samples_split': 200, 'max_leaf_nodes': 10, 'max_depth': 5, 'criterion... | 0.468 | 0.592 | 0.612 | 0.557 | 0.064 | 1 | 0.636 | 0.559 | 0.557 | 0.584 | 0.037 |
9 | 242.883 | 11.811 | 0.030 | 1.265e-03 | 500 | 20 | 1000 | 2 | mae | {'n_estimators': 500, 'min_samples_split': 20, 'max_leaf_nodes': 1000, 'max_depth': 2, 'criterio... | 0.377 | 0.529 | 0.557 | 0.488 | 0.079 | 10 | 0.534 | 0.463 | 0.460 | 0.486 | 0.034 |
-------------------------------------------------- ================================================== Fitting 3 folds for each of 10 candidates, totalling 30 fits [Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. [Parallel(n_jobs=-1)]: Done 18 out of 30 | elapsed: 40.1s remaining: 26.7s [Parallel(n_jobs=-1)]: Done 30 out of 30 | elapsed: 5.5min finished -------------------------------------------------- Best Estimator for rf with PCA is RandomForestRegressor(criterion='mae', max_depth=200, max_leaf_nodes=50, min_samples_split=50, n_estimators=50) Best Params for rf with PCA are {'n_estimators': 50, 'min_samples_split': 50, 'max_leaf_nodes': 50, 'max_depth': 200, 'criterion': 'mae'} Cross validation Results for rf with PCA
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_n_estimators | param_min_samples_split | param_max_leaf_nodes | param_max_depth | param_criterion | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 8.094 | 0.237 | 0.012 | 1.846e-04 | 50 | 2 | 200 | 50 | mse | {'n_estimators': 50, 'min_samples_split': 2, 'max_leaf_nodes': 200, 'max_depth': 50, 'criterion'... | 0.375 | 0.492 | 0.505 | 0.457 | 0.059 | 4 | 0.897 | 0.873 | 0.880 | 0.883 | 0.010 |
1 | 11.085 | 0.356 | 0.017 | 4.262e-04 | 100 | 5 | 50 | 50 | mse | {'n_estimators': 100, 'min_samples_split': 5, 'max_leaf_nodes': 50, 'max_depth': 50, 'criterion'... | 0.397 | 0.478 | 0.520 | 0.465 | 0.051 | 3 | 0.756 | 0.729 | 0.746 | 0.743 | 0.011 |
2 | 1.906 | 0.026 | 0.005 | 1.413e-05 | 20 | 100 | 50 | 10 | mse | {'n_estimators': 20, 'min_samples_split': 100, 'max_leaf_nodes': 50, 'max_depth': 10, 'criterion... | 0.376 | 0.454 | 0.498 | 0.443 | 0.050 | 5 | 0.649 | 0.588 | 0.616 | 0.618 | 0.025 |
3 | 6.366 | 0.406 | 0.010 | 2.201e-04 | 50 | 100 | 200 | 100 | mse | {'n_estimators': 50, 'min_samples_split': 100, 'max_leaf_nodes': 200, 'max_depth': 100, 'criteri... | 0.375 | 0.444 | 0.497 | 0.438 | 0.050 | 6 | 0.676 | 0.666 | 0.672 | 0.671 | 0.004 |
4 | 138.673 | 3.880 | 0.010 | 1.089e-04 | 100 | 20 | 10 | 5 | mae | {'n_estimators': 100, 'min_samples_split': 20, 'max_leaf_nodes': 10, 'max_depth': 5, 'criterion'... | 0.321 | 0.437 | 0.453 | 0.404 | 0.059 | 7 | 0.489 | 0.397 | 0.430 | 0.439 | 0.038 |
5 | 21.699 | 0.430 | 0.005 | 8.841e-05 | 20 | 20 | 1000 | 3 | mae | {'n_estimators': 20, 'min_samples_split': 20, 'max_leaf_nodes': 1000, 'max_depth': 3, 'criterion... | 0.258 | 0.341 | 0.370 | 0.323 | 0.047 | 9 | 0.410 | 0.310 | 0.361 | 0.360 | 0.041 |
6 | 180.531 | 1.339 | 0.013 | 4.746e-04 | 100 | 5 | 1000 | 10 | mae | {'n_estimators': 100, 'min_samples_split': 5, 'max_leaf_nodes': 1000, 'max_depth': 10, 'criterio... | 0.378 | 0.520 | 0.533 | 0.477 | 0.070 | 2 | 0.779 | 0.680 | 0.708 | 0.722 | 0.042 |
7 | 126.021 | 9.398 | 0.010 | 2.640e-04 | 50 | 50 | 50 | 200 | mae | {'n_estimators': 50, 'min_samples_split': 50, 'max_leaf_nodes': 50, 'max_depth': 200, 'criterion... | 0.384 | 0.526 | 0.524 | 0.478 | 0.067 | 1 | 0.699 | 0.641 | 0.664 | 0.668 | 0.024 |
8 | 11.420 | 0.145 | 0.026 | 1.158e-03 | 200 | 200 | 10 | 5 | mse | {'n_estimators': 200, 'min_samples_split': 200, 'max_leaf_nodes': 10, 'max_depth': 5, 'criterion... | 0.346 | 0.359 | 0.439 | 0.381 | 0.041 | 8 | 0.497 | 0.465 | 0.478 | 0.480 | 0.013 |
9 | 289.174 | 3.287 | 0.031 | 1.359e-03 | 500 | 20 | 1000 | 2 | mae | {'n_estimators': 500, 'min_samples_split': 20, 'max_leaf_nodes': 1000, 'max_depth': 2, 'criterio... | 0.178 | 0.263 | 0.274 | 0.238 | 0.043 | 10 | 0.296 | 0.244 | 0.252 | 0.264 | 0.023 |
-------------------------------------------------- ================================================== Starting for Adb Fitting 3 folds for each of 10 candidates, totalling 30 fits [Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. [Parallel(n_jobs=-1)]: Done 18 out of 30 | elapsed: 11.0s remaining: 7.4s [Parallel(n_jobs=-1)]: Done 30 out of 30 | elapsed: 39.0s finished -------------------------------------------------- Best Estimator for adb without PCA is AdaBoostRegressor(learning_rate=0.0001, loss='square', n_estimators=20, random_state=0) Best Params for adb without PCA are {'random_state': 0, 'n_estimators': 20, 'loss': 'square', 'learning_rate': 0.0001} Cross validation Results for adb without PCA
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_random_state | param_n_estimators | param_loss | param_learning_rate | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3.152 | 0.626 | 0.207 | 0.060 | 0 | 20 | square | 0.0001 | {'random_state': 0, 'n_estimators': 20, 'loss': 'square', 'learning_rate': 0.0001} | 0.464 | 0.606 | 0.610 | 0.560 | 0.068 | 1 | 0.612 | 0.541 | 0.538 | 0.563 | 0.034 |
1 | 3.223 | 0.814 | 0.214 | 0.062 | 0 | 20 | square | 0.01 | {'random_state': 0, 'n_estimators': 20, 'loss': 'square', 'learning_rate': 0.01} | 0.464 | 0.578 | 0.607 | 0.550 | 0.062 | 4 | 0.613 | 0.540 | 0.538 | 0.564 | 0.035 |
2 | 4.630 | 0.096 | 0.248 | 0.007 | 0 | 20 | linear | 0.01 | {'random_state': 0, 'n_estimators': 20, 'loss': 'linear', 'learning_rate': 0.01} | 0.464 | 0.578 | 0.609 | 0.550 | 0.062 | 2 | 0.613 | 0.541 | 0.535 | 0.563 | 0.036 |
3 | 16.399 | 1.436 | 0.333 | 0.021 | 0 | 100 | linear | 0.001 | {'random_state': 0, 'n_estimators': 100, 'loss': 'linear', 'learning_rate': 0.001} | 0.464 | 0.576 | 0.609 | 0.550 | 0.062 | 3 | 0.612 | 0.539 | 0.537 | 0.563 | 0.035 |
4 | 2.170 | 0.215 | 0.145 | 0.052 | 0 | 10 | linear | 0.1 | {'random_state': 0, 'n_estimators': 10, 'loss': 'linear', 'learning_rate': 0.1} | 0.466 | 0.574 | 0.607 | 0.549 | 0.060 | 6 | 0.616 | 0.539 | 0.535 | 0.563 | 0.037 |
5 | 4.109 | 0.750 | 0.301 | 0.129 | 0 | 20 | exponential | 1 | {'random_state': 0, 'n_estimators': 20, 'loss': 'exponential', 'learning_rate': 1.0} | 0.199 | 0.265 | -0.099 | 0.122 | 0.158 | 9 | 0.279 | 0.373 | 0.049 | 0.234 | 0.136 |
6 | 29.951 | 0.748 | 1.069 | 0.078 | 0 | 500 | square | 0.01 | {'random_state': 0, 'n_estimators': 500, 'loss': 'square', 'learning_rate': 0.01} | 0.420 | 0.507 | 0.461 | 0.462 | 0.035 | 7 | 0.585 | 0.540 | 0.508 | 0.544 | 0.031 |
7 | 2.426 | 0.164 | 0.143 | 0.037 | 0 | 10 | square | 1 | {'random_state': 0, 'n_estimators': 10, 'loss': 'square', 'learning_rate': 1.0} | 0.284 | 0.303 | 0.084 | 0.223 | 0.099 | 8 | 0.410 | 0.393 | 0.229 | 0.344 | 0.081 |
8 | 2.206 | 0.334 | 0.116 | 0.036 | 0 | 10 | square | 0.0001 | {'random_state': 0, 'n_estimators': 10, 'loss': 'square', 'learning_rate': 0.0001} | 0.463 | 0.577 | 0.609 | 0.549 | 0.063 | 5 | 0.612 | 0.545 | 0.537 | 0.565 | 0.034 |
9 | 1.130 | 0.146 | 0.126 | 0.006 | 0 | 10 | square | 10 | {'random_state': 0, 'n_estimators': 10, 'loss': 'square', 'learning_rate': 10.0} | -4.353 | -180.328 | -182.693 | -122.458 | 83.519 | 10 | -5.058 | -161.492 | -160.647 | -109.066 | 73.545 |
-------------------------------------------------- ================================================== Fitting 3 folds for each of 10 candidates, totalling 30 fits [Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. [Parallel(n_jobs=-1)]: Done 18 out of 30 | elapsed: 2.3s remaining: 1.5s [Parallel(n_jobs=-1)]: Done 30 out of 30 | elapsed: 20.2s finished -------------------------------------------------- Best Estimator for adb with PCA is AdaBoostRegressor(learning_rate=0.01, loss='square', n_estimators=20, random_state=0) Best Params for adb with PCA are {'random_state': 0, 'n_estimators': 20, 'loss': 'square', 'learning_rate': 0.01} Cross validation Results for adb with PCA
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_random_state | param_n_estimators | param_loss | param_learning_rate | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.167 | 0.036 | 0.008 | 1.235e-04 | 0 | 20 | square | 0.0001 | {'random_state': 0, 'n_estimators': 20, 'loss': 'square', 'learning_rate': 0.0001} | 0.268 | 0.328 | 0.331 | 0.309 | 0.029 | 2 | 0.356 | 0.305 | 0.313 | 0.325 | 0.023 |
1 | 1.291 | 0.157 | 0.009 | 1.102e-03 | 0 | 20 | square | 0.01 | {'random_state': 0, 'n_estimators': 20, 'loss': 'square', 'learning_rate': 0.01} | 0.269 | 0.334 | 0.330 | 0.311 | 0.030 | 1 | 0.357 | 0.314 | 0.314 | 0.328 | 0.020 |
2 | 1.219 | 0.018 | 0.008 | 2.989e-05 | 0 | 20 | linear | 0.01 | {'random_state': 0, 'n_estimators': 20, 'loss': 'linear', 'learning_rate': 0.01} | 0.263 | 0.327 | 0.337 | 0.309 | 0.033 | 3 | 0.350 | 0.305 | 0.319 | 0.325 | 0.019 |
3 | 5.046 | 0.034 | 0.021 | 5.790e-04 | 0 | 100 | linear | 0.001 | {'random_state': 0, 'n_estimators': 100, 'loss': 'linear', 'learning_rate': 0.001} | 0.264 | 0.329 | 0.333 | 0.308 | 0.032 | 4 | 0.351 | 0.302 | 0.313 | 0.322 | 0.021 |
4 | 0.560 | 0.006 | 0.006 | 1.149e-03 | 0 | 10 | linear | 0.1 | {'random_state': 0, 'n_estimators': 10, 'loss': 'linear', 'learning_rate': 0.1} | 0.283 | 0.119 | 0.305 | 0.236 | 0.083 | 6 | 0.376 | 0.345 | 0.321 | 0.347 | 0.023 |
5 | 0.944 | 0.019 | 0.009 | 9.705e-04 | 0 | 20 | exponential | 1 | {'random_state': 0, 'n_estimators': 20, 'loss': 'exponential', 'learning_rate': 1.0} | 0.195 | 0.061 | 0.116 | 0.124 | 0.055 | 9 | 0.327 | 0.221 | 0.293 | 0.280 | 0.044 |
6 | 18.139 | 0.348 | 0.092 | 9.327e-04 | 0 | 500 | square | 0.01 | {'random_state': 0, 'n_estimators': 500, 'loss': 'square', 'learning_rate': 0.01} | 0.223 | 0.224 | 0.196 | 0.214 | 0.013 | 7 | 0.342 | 0.292 | 0.307 | 0.314 | 0.021 |
7 | 0.461 | 0.009 | 0.006 | 6.552e-04 | 0 | 10 | square | 1 | {'random_state': 0, 'n_estimators': 10, 'loss': 'square', 'learning_rate': 1.0} | 0.122 | 0.277 | 0.037 | 0.145 | 0.099 | 8 | 0.248 | 0.338 | 0.166 | 0.251 | 0.070 |
8 | 0.564 | 0.047 | 0.006 | 7.505e-04 | 0 | 10 | square | 0.0001 | {'random_state': 0, 'n_estimators': 10, 'loss': 'square', 'learning_rate': 0.0001} | 0.264 | 0.295 | 0.321 | 0.293 | 0.023 | 5 | 0.346 | 0.296 | 0.305 | 0.316 | 0.022 |
9 | 0.097 | 0.003 | 0.005 | 1.047e-03 | 0 | 10 | square | 10 | {'random_state': 0, 'n_estimators': 10, 'loss': 'square', 'learning_rate': 10.0} | -22.227 | -180.328 | -182.693 | -128.416 | 75.093 | 10 | -28.303 | -161.492 | -160.647 | -116.814 | 62.588 |
-------------------------------------------------- ================================================== Starting for Xgbr Fitting 3 folds for each of 10 candidates, totalling 30 fits [Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. [Parallel(n_jobs=-1)]: Done 18 out of 30 | elapsed: 40.0s remaining: 26.7s [Parallel(n_jobs=-1)]: Done 30 out of 30 | elapsed: 13.3min finished -------------------------------------------------- Best Estimator for xgbr without PCA is XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1, gamma=0.1, gpu_id=-1, importance_type='gain', interaction_constraints='', learning_rate=0.1, max_delta_step=0, max_depth=50, min_child_weight=1, missing=nan, monotone_constraints='()', n_estimators=500, n_jobs=12, num_parallel_tree=1, random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact', validate_parameters=1, verbosity=None) Best Params for xgbr without PCA are {'n_estimators': 500, 'max_depth': 50, 'learning_rate': 0.1, 'gamma': 0.1} Cross validation Results for xgbr without PCA
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_n_estimators | param_max_depth | param_learning_rate | param_gamma | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 37.182 | 2.040 | 0.203 | 0.030 | 500 | 3 | 0.0001 | 0.01 | {'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.0001, 'gamma': 0.01} | -4.951e+01 | -6.005e+01 | -6.029e+01 | -5.662e+01 | 5.029e+00 | 5 | -6.018e+01 | -5.421e+01 | -5.417e+01 | -5.619e+01 | 2.823e+00 |
1 | 786.371 | 10.368 | 0.044 | 0.026 | 1000 | 200 | 0.1 | 0.001 | {'n_estimators': 1000, 'max_depth': 200, 'learning_rate': 0.1, 'gamma': 0.001} | 3.401e-01 | 5.247e-01 | 2.944e-01 | 3.864e-01 | 9.958e-02 | 2 | 9.885e-01 | 9.830e-01 | 9.909e-01 | 9.875e-01 | 3.324e-03 |
2 | 2.002 | 0.500 | 0.225 | 0.028 | 20 | 20 | 10 | 1 | {'n_estimators': 20, 'max_depth': 20, 'learning_rate': 10.0, 'gamma': 1.0} | -7.397e+39 | -9.188e+39 | -9.296e+39 | -8.627e+39 | 8.711e+38 | 10 | -9.161e+39 | -8.252e+39 | -8.241e+39 | -8.551e+39 | 4.310e+38 |
3 | 539.966 | 0.418 | 0.129 | 0.022 | 500 | 50 | 0.1 | 0.1 | {'n_estimators': 500, 'max_depth': 50, 'learning_rate': 0.1, 'gamma': 0.1} | 3.455e-01 | 5.225e-01 | 3.438e-01 | 4.040e-01 | 8.385e-02 | 1 | 9.884e-01 | 9.829e-01 | 9.908e-01 | 9.874e-01 | 3.319e-03 |
4 | 5.834 | 0.819 | 0.193 | 0.014 | 50 | 200 | 0.01 | 0.01 | {'n_estimators': 50, 'max_depth': 200, 'learning_rate': 0.01, 'gamma': 0.01} | -1.993e+01 | -2.390e+01 | -2.399e+01 | -2.260e+01 | 1.893e+00 | 3 | -2.403e+01 | -2.166e+01 | -2.164e+01 | -2.244e+01 | 1.123e+00 |
5 | 1.794 | 0.149 | 0.208 | 0.007 | 10 | 50 | 0.001 | 0.01 | {'n_estimators': 10, 'max_depth': 50, 'learning_rate': 0.001, 'gamma': 0.01} | -5.363e+01 | -6.511e+01 | -6.537e+01 | -6.137e+01 | 5.471e+00 | 8 | -6.523e+01 | -5.876e+01 | -5.872e+01 | -6.090e+01 | 3.061e+00 |
6 | 6.075 | 0.099 | 0.211 | 0.005 | 50 | 200 | 0.0001 | 10 | {'n_estimators': 50, 'max_depth': 200, 'learning_rate': 0.0001, 'gamma': 10.0} | -5.417e+01 | -6.577e+01 | -6.603e+01 | -6.199e+01 | 5.529e+00 | 9 | -6.589e+01 | -5.935e+01 | -5.931e+01 | -6.152e+01 | 3.092e+00 |
7 | 20.850 | 0.502 | 0.200 | 0.024 | 200 | 50 | 0.0001 | 1 | {'n_estimators': 200, 'max_depth': 50, 'learning_rate': 0.0001, 'gamma': 1.0} | -5.257e+01 | -6.381e+01 | -6.406e+01 | -6.015e+01 | 5.357e+00 | 6 | -6.393e+01 | -5.759e+01 | -5.755e+01 | -5.969e+01 | 3.000e+00 |
8 | 17.460 | 0.291 | 0.182 | 0.059 | 200 | 10 | 0.0001 | 0.1 | {'n_estimators': 200, 'max_depth': 10, 'learning_rate': 0.0001, 'gamma': 0.1} | -5.257e+01 | -6.381e+01 | -6.406e+01 | -6.015e+01 | 5.357e+00 | 6 | -6.393e+01 | -5.759e+01 | -5.755e+01 | -5.969e+01 | 3.000e+00 |
9 | 14.708 | 1.717 | 0.160 | 0.024 | 200 | 3 | 0.001 | 10 | {'n_estimators': 200, 'max_depth': 3, 'learning_rate': 0.001, 'gamma': 10.0} | -3.664e+01 | -4.431e+01 | -4.448e+01 | -4.181e+01 | 3.655e+00 | 4 | -4.445e+01 | -4.004e+01 | -4.001e+01 | -4.150e+01 | 2.083e+00 |
-------------------------------------------------- ================================================== Fitting 3 folds for each of 10 candidates, totalling 30 fits [Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers. [Parallel(n_jobs=-1)]: Done 18 out of 30 | elapsed: 52.2s remaining: 34.8s [Parallel(n_jobs=-1)]: Done 30 out of 30 | elapsed: 7.1min finished -------------------------------------------------- Best Estimator for xgbr with PCA is XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1, gamma=0.1, gpu_id=-1, importance_type='gain', interaction_constraints='', learning_rate=0.1, max_delta_step=0, max_depth=50, min_child_weight=1, missing=nan, monotone_constraints='()', n_estimators=500, n_jobs=12, num_parallel_tree=1, random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact', validate_parameters=1, verbosity=None) Best Params for xgbr with PCA are {'n_estimators': 500, 'max_depth': 50, 'learning_rate': 0.1, 'gamma': 0.1} Cross validation Results for xgbr with PCA
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_n_estimators | param_max_depth | param_learning_rate | param_gamma | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 42.841 | 7.430 | 0.079 | 2.686e-02 | 500 | 3 | 0.0001 | 0.01 | {'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.0001, 'gamma': 0.01} | -4.952e+01 | -6.009e+01 | -6.031e+01 | -5.664e+01 | 5.033e+00 | 5 | -6.021e+01 | -5.423e+01 | -5.419e+01 | -5.621e+01 | 2.826e+00 |
1 | 417.781 | 3.043 | 0.009 | 2.533e-03 | 1000 | 200 | 0.1 | 0.001 | {'n_estimators': 1000, 'max_depth': 200, 'learning_rate': 0.1, 'gamma': 0.001} | 3.289e-01 | 4.981e-01 | 4.416e-01 | 4.229e-01 | 7.033e-02 | 2 | 9.885e-01 | 9.830e-01 | 9.909e-01 | 9.875e-01 | 3.324e-03 |
2 | 1.518 | 0.184 | 0.036 | 6.306e-03 | 20 | 20 | 10 | 1 | {'n_estimators': 20, 'max_depth': 20, 'learning_rate': 10.0, 'gamma': 1.0} | -7.273e+39 | -9.167e+39 | -9.189e+39 | -8.543e+39 | 8.980e+38 | 10 | -9.007e+39 | -8.233e+39 | -8.146e+39 | -8.462e+39 | 3.873e+38 |
3 | 265.782 | 15.563 | 0.038 | 1.101e-02 | 500 | 50 | 0.1 | 0.1 | {'n_estimators': 500, 'max_depth': 50, 'learning_rate': 0.1, 'gamma': 0.1} | 3.363e-01 | 4.990e-01 | 4.392e-01 | 4.248e-01 | 6.722e-02 | 1 | 9.885e-01 | 9.830e-01 | 9.909e-01 | 9.874e-01 | 3.322e-03 |
4 | 6.417 | 0.278 | 0.073 | 1.404e-02 | 50 | 200 | 0.01 | 0.01 | {'n_estimators': 50, 'max_depth': 200, 'learning_rate': 0.01, 'gamma': 0.01} | -2.004e+01 | -2.410e+01 | -2.410e+01 | -2.274e+01 | 1.914e+00 | 3 | -2.416e+01 | -2.177e+01 | -2.175e+01 | -2.256e+01 | 1.131e+00 |
5 | 1.283 | 0.063 | 0.041 | 9.808e-04 | 10 | 50 | 0.001 | 0.01 | {'n_estimators': 10, 'max_depth': 50, 'learning_rate': 0.001, 'gamma': 0.01} | -5.364e+01 | -6.512e+01 | -6.537e+01 | -6.138e+01 | 5.472e+00 | 8 | -6.524e+01 | -5.876e+01 | -5.872e+01 | -6.091e+01 | 3.061e+00 |
6 | 6.369 | 0.486 | 0.041 | 8.368e-03 | 50 | 200 | 0.0001 | 10 | {'n_estimators': 50, 'max_depth': 200, 'learning_rate': 0.0001, 'gamma': 10.0} | -5.418e+01 | -6.578e+01 | -6.603e+01 | -6.199e+01 | 5.529e+00 | 9 | -6.590e+01 | -5.936e+01 | -5.932e+01 | -6.152e+01 | 3.092e+00 |
7 | 26.757 | 1.395 | 0.042 | 1.412e-02 | 200 | 50 | 0.0001 | 1 | {'n_estimators': 200, 'max_depth': 50, 'learning_rate': 0.0001, 'gamma': 1.0} | -5.258e+01 | -6.383e+01 | -6.406e+01 | -6.016e+01 | 5.358e+00 | 6 | -6.394e+01 | -5.760e+01 | -5.756e+01 | -5.970e+01 | 3.001e+00 |
8 | 22.957 | 2.810 | 0.044 | 1.296e-02 | 200 | 10 | 0.0001 | 0.1 | {'n_estimators': 200, 'max_depth': 10, 'learning_rate': 0.0001, 'gamma': 0.1} | -5.258e+01 | -6.383e+01 | -6.406e+01 | -6.016e+01 | 5.358e+00 | 6 | -6.394e+01 | -5.760e+01 | -5.756e+01 | -5.970e+01 | 3.001e+00 |
9 | 15.105 | 0.477 | 0.040 | 1.298e-02 | 200 | 3 | 0.001 | 10 | {'n_estimators': 200, 'max_depth': 3, 'learning_rate': 0.001, 'gamma': 10.0} | -3.671e+01 | -4.445e+01 | -4.454e+01 | -4.190e+01 | 3.667e+00 | 4 | -4.454e+01 | -4.011e+01 | -4.008e+01 | -4.158e+01 | 2.091e+00 |
-------------------------------------------------- ==================================================
print(f"Best Params for {model} {data_mode_str} are {ra_s_cv.best_params_}\n")
print(f"Cross validation Results for {model} {data_mode_str}\n")
display(pd.DataFrame(ra_s_cv.cv_results_))
print("-"*50)
# Plot evaluation matrix
prepare_error_data_and_plot(ra_s_cv, X_train, y_train, train_pred, mode, model, show_graphs)
prepare_error_data_and_plot(ra_s_cv, X_test, y_test, test_pred, mode, model, show_graphs)
# Record performance
Best Params for xgbr with PCA are {'n_estimators': 500, 'max_depth': 50, 'learning_rate': 0.1, 'gamma': 0.1} Cross validation Results for xgbr with PCA
mean_fit_time | std_fit_time | mean_score_time | std_score_time | param_n_estimators | param_max_depth | param_learning_rate | param_gamma | params | split0_test_score | split1_test_score | split2_test_score | mean_test_score | std_test_score | rank_test_score | split0_train_score | split1_train_score | split2_train_score | mean_train_score | std_train_score | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 42.841 | 7.430 | 0.079 | 2.686e-02 | 500 | 3 | 0.0001 | 0.01 | {'n_estimators': 500, 'max_depth': 3, 'learning_rate': 0.0001, 'gamma': 0.01} | -4.952e+01 | -6.009e+01 | -6.031e+01 | -5.664e+01 | 5.033e+00 | 5 | -6.021e+01 | -5.423e+01 | -5.419e+01 | -5.621e+01 | 2.826e+00 |
1 | 417.781 | 3.043 | 0.009 | 2.533e-03 | 1000 | 200 | 0.1 | 0.001 | {'n_estimators': 1000, 'max_depth': 200, 'learning_rate': 0.1, 'gamma': 0.001} | 3.289e-01 | 4.981e-01 | 4.416e-01 | 4.229e-01 | 7.033e-02 | 2 | 9.885e-01 | 9.830e-01 | 9.909e-01 | 9.875e-01 | 3.324e-03 |
2 | 1.518 | 0.184 | 0.036 | 6.306e-03 | 20 | 20 | 10 | 1 | {'n_estimators': 20, 'max_depth': 20, 'learning_rate': 10.0, 'gamma': 1.0} | -7.273e+39 | -9.167e+39 | -9.189e+39 | -8.543e+39 | 8.980e+38 | 10 | -9.007e+39 | -8.233e+39 | -8.146e+39 | -8.462e+39 | 3.873e+38 |
3 | 265.782 | 15.563 | 0.038 | 1.101e-02 | 500 | 50 | 0.1 | 0.1 | {'n_estimators': 500, 'max_depth': 50, 'learning_rate': 0.1, 'gamma': 0.1} | 3.363e-01 | 4.990e-01 | 4.392e-01 | 4.248e-01 | 6.722e-02 | 1 | 9.885e-01 | 9.830e-01 | 9.909e-01 | 9.874e-01 | 3.322e-03 |
4 | 6.417 | 0.278 | 0.073 | 1.404e-02 | 50 | 200 | 0.01 | 0.01 | {'n_estimators': 50, 'max_depth': 200, 'learning_rate': 0.01, 'gamma': 0.01} | -2.004e+01 | -2.410e+01 | -2.410e+01 | -2.274e+01 | 1.914e+00 | 3 | -2.416e+01 | -2.177e+01 | -2.175e+01 | -2.256e+01 | 1.131e+00 |
5 | 1.283 | 0.063 | 0.041 | 9.808e-04 | 10 | 50 | 0.001 | 0.01 | {'n_estimators': 10, 'max_depth': 50, 'learning_rate': 0.001, 'gamma': 0.01} | -5.364e+01 | -6.512e+01 | -6.537e+01 | -6.138e+01 | 5.472e+00 | 8 | -6.524e+01 | -5.876e+01 | -5.872e+01 | -6.091e+01 | 3.061e+00 |
6 | 6.369 | 0.486 | 0.041 | 8.368e-03 | 50 | 200 | 0.0001 | 10 | {'n_estimators': 50, 'max_depth': 200, 'learning_rate': 0.0001, 'gamma': 10.0} | -5.418e+01 | -6.578e+01 | -6.603e+01 | -6.199e+01 | 5.529e+00 | 9 | -6.590e+01 | -5.936e+01 | -5.932e+01 | -6.152e+01 | 3.092e+00 |
7 | 26.757 | 1.395 | 0.042 | 1.412e-02 | 200 | 50 | 0.0001 | 1 | {'n_estimators': 200, 'max_depth': 50, 'learning_rate': 0.0001, 'gamma': 1.0} | -5.258e+01 | -6.383e+01 | -6.406e+01 | -6.016e+01 | 5.358e+00 | 6 | -6.394e+01 | -5.760e+01 | -5.756e+01 | -5.970e+01 | 3.001e+00 |
8 | 22.957 | 2.810 | 0.044 | 1.296e-02 | 200 | 10 | 0.0001 | 0.1 | {'n_estimators': 200, 'max_depth': 10, 'learning_rate': 0.0001, 'gamma': 0.1} | -5.258e+01 | -6.383e+01 | -6.406e+01 | -6.016e+01 | 5.358e+00 | 6 | -6.394e+01 | -5.760e+01 | -5.756e+01 | -5.970e+01 | 3.001e+00 |
9 | 15.105 | 0.477 | 0.040 | 1.298e-02 | 200 | 3 | 0.001 | 10 | {'n_estimators': 200, 'max_depth': 3, 'learning_rate': 0.001, 'gamma': 10.0} | -3.671e+01 | -4.445e+01 | -4.454e+01 | -4.190e+01 | 3.667e+00 | 4 | -4.454e+01 | -4.011e+01 | -4.008e+01 | -4.158e+01 | 2.091e+00 |
--------------------------------------------------
Actual | Predicted | Residual | |
---|---|---|---|
1073 | 97.94 | 95.485 | 2.455 |
144 | 96.41 | 100.184 | -3.774 |
2380 | 105.83 | 104.990 | 0.840 |
184 | 79.09 | 77.418 | 1.672 |
2587 | 108.69 | 109.566 | -0.876 |
... | ... | ... | ... |
657 | 113.68 | 111.256 | 2.424 |
3975 | 88.85 | 92.392 | -3.542 |
907 | 89.60 | 97.462 | -7.862 |
3597 | 89.23 | 96.537 | -7.307 |
1971 | 109.49 | 109.504 | -0.014 |
842 rows × 3 columns
pd.DataFrame(all_model_metrics, columns=["Algo", "Best Training Score (CV)", "Train Score", "Test Score", "Train R2", "Test R2", "Time Taken"]).style.format({
"Best Training Score (CV)": "{:.2f}",
"Train Score": "{:.2f}",
"Test Score": "{:.2f}",
"Train R2": "{:.2f}",
"Test R2": "{:.2f}"
}).hide_index()
Algo | Best Training Score (CV) | Train Score | Test Score | Train R2 | Test R2 | Time Taken |
---|---|---|---|---|---|---|
linear without PCA | -56301975154568338604032.00 | 0.62 | -4480720382390470967296.00 | 0.62 | -4480720382390470967296.00 | 1.254 |
linear with PCA | 0.51 | 0.55 | 0.56 | 0.55 | 0.56 | 0.635 |
ridge without PCA | 0.53 | 0.60 | 0.58 | 0.60 | 0.58 | 1.140 |
ridge with PCA | 0.52 | 0.55 | 0.56 | 0.55 | 0.56 | 0.177 |
lasso without PCA | 0.54 | 0.55 | 0.58 | 0.55 | 0.58 | 5.565 |
lasso with PCA | 0.52 | 0.55 | 0.56 | 0.55 | 0.56 | 0.182 |
elasticnet without PCA | 0.54 | 0.60 | 0.60 | 0.60 | 0.60 | 6.448 |
elasticnet with PCA | 0.52 | 0.55 | 0.56 | 0.55 | 0.56 | 0.241 |
dt without PCA | 0.53 | 0.58 | 0.58 | 0.58 | 0.58 | 4.223 |
dt with PCA | 0.36 | 0.51 | 0.40 | 0.51 | 0.40 | 3.812 |
rf without PCA | 0.56 | 0.58 | 0.59 | 0.58 | 0.59 | 290.588 |
rf with PCA | 0.48 | 0.62 | 0.51 | 0.62 | 0.51 | 463.777 |
adb without PCA | 0.56 | 0.56 | 0.60 | 0.56 | 0.60 | 39.828 |
adb with PCA | 0.31 | 0.33 | -0.16 | 0.33 | -0.16 | 21.341 |
xgbr without PCA | 0.40 | 0.98 | 0.38 | 0.98 | 0.38 | 827.256 |
xgbr with PCA | 0.42 | 0.98 | 0.40 | 0.98 | 0.40 | 475.330 |