sklearn

import numpy as np
import pandas as pd
from matplotlib.pyplot import subplots
import statsmodels.api as sm
from ISLP import load_data
from ISLP.models import (ModelSpec as MS,
                         summarize)
from ISLP import confusion_table
from ISLP.models import contrast
from sklearn.discriminant_analysis import \
     (LinearDiscriminantAnalysis as LDA,
      QuadraticDiscriminantAnalysis as QDA)
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

Logistic Regression

labels = np.array(['baseline']*nrows)
probs = results.predict()
labels[probs>0.5] = "non-baseline"

Use labels in confusion matrix:

confusion_table(labels, df.y)

Calculate correct predictions by hand or with:

np.mean(labels == df.y)

split train and test data using a boolean array:

train = (Smarket.Year < 2005)
X_train, X_test = X.loc[train], X.loc[~train]
y_train, y_test = y.loc[train], y.loc[~train]

Linear Discriminant Analysis

lda_prob = lda.predict_proba(X_test)
np.sum(lda_prob[:,0] > 0.9)

Quadratic Discriminant Analysis

Naive Bayes

K-Nearest Neighbors

Tuning Parameters

for K in range(1,6):
    knn = KNeighborsClassifier(n_neighbors=K)
    knn_pred = knn.fit(X_train, y_train).predict(X_test)
    C = confusion_table(knn_pred, y_test)
    templ = ('K={0:d}: # predicted to rent: {1:>2},' +
            '  # who did rent {2:d}, accuracy {3:.1%}')
    pred = C.loc['Yes'].sum()
    did_rent = C.loc['Yes','Yes']
    print(templ.format(
          K,
          pred,
          did_rent,
          did_rent / pred))

Poisson

Gamma

Connect With Me!