###### Example: Selecting a Model to Predict Win % in Baseball
### Load the data
library(Stat2Data)
data(MLB2007Standings)
### Load a package used for model selection
library(leaps) ## may need to install.packages()
#-----------------
###### "Best subsets" regression: considers all possible combinations
###### of predictors from the given pool and returns the best fitting
###### model of each degree of complexity (number of predictors)
subsets <- regsubsets(WinPct ~ HR + BattingAvg + OBP + SLG +
ERA + Walks + StrikeOuts, data = MLB2007Standings)
### Show the best model and associated R^2 values at each number of predictors
plot(subsets, scale = "adjr2")
### Show the best model and associated Mallow's Cp at each number of predictors
plot(subsets, scale = "Cp")
### Another useful summary output of the best-subsets models
### uses the HH package
library(HH) ## may need to install
summaryHH(subsets)
#----------------
###### "Backward selection": start with all predictors
###### and try removing one at a time; see if fit measure improves
full <- lm(WinPct ~ HR + BattingAvg + OBP + SLG +
ERA + Walks + StrikeOuts, data = MLB2007Standings)
step(full, direction = "backward", scale = summary(full)$sigma^2)
#----------------
###### "Forward selection": start with no predictors
###### and try adding one at a time; see if fit measure improves
full <- lm(WinPct ~ HR + BattingAvg + OBP + SLG +
ERA + Walks + StrikeOuts, data = MLB2007Standings)
none <- lm(WinPct ~ 1, data = MLB2007Standings) ## null model
step(none, direction = "forward",
scope = list(upper = full), scale = summary(full)$sigma^2)
#----------------
###### "Stepwise selection": start with no predictors
###### and alternate between adding and removing
full <- lm(WinPct ~ HR + BattingAvg + OBP + SLG +
ERA + Walks + StrikeOuts, data = MLB2007Standings)
none <- lm(WinPct ~ 1, data = MLB2007Standings) ## null model
step(none, scope = list(upper = full), scale = summary(full)$sigma^2)