from functools import partial
from sklearn.model_selection import \
(cross_validate,
KFold,
ShuffleSplit)
from sklearn.base import clone
from ISLP.models import sklearn_sm
def evalMSE(terms,
response,
train,
test):
mm = MS(terms)
X_train = mm.fit_transform(train)
y_train = train[response]
X_test = mm.transform(test)
y_test = test[response]
results = sm.OLS(y_train, X_train).fit()
test_pred = results.predict(X_test)
return np.mean((y_test - test_pred)**2)
Cross-Validation
- use
ISLP
wrapper sklearn_sm()
because sklearn
and statsmodels
don't play nice
Bootstrap
def boot_SE(func,
D,
n=None,
B=1000,
seed=0):
rng = np.random.default_rng(seed)
first_, second_ = 0, 0
n = n or D.shape[0]
for _ in range(B):
idx = rng.choice(D.index,
n,
replace=True)
value = func(D, idx)
first_ += value
second_ += value**2
return np.sqrt(second_ / B - (first_ / B)**2)
def boot_OLS(model_matrix, response, D, idx):
D_ = D.loc[idx]
Y_ = D_[response]
X_ = clone(model_matrix).fit_transform(D_)
return sm.OLS(Y_, X_).fit().params
partial()
takes a function as an argument, and freezes some of its arguments, starting from the left.