library(mosaic)
library(Lock5Data)
data(AllCountries)
head(AllCountries)
#### Let's try to predict a country's life expectancy based on
#### various other factors
### Birth rate
## First, plot the data
xyplot(LifeExpectancy ~ BirthRate, data = AllCountries)
## Now fit a model
birth.rate.model <- lm(LifeExpectancy ~ BirthRate, data = AllCountries)
plotModel(birth.rate.model)
## Let's look at the residuals via a fitted vs. residuals plot
plot(birth.rate.model, which = 1)
## and a QQ Plot
plot(birth.rate.model, which = 2)
## It's not perfect, but it's not too bad
### GDP
xyplot(LifeExpectancy ~ GDP, data = AllCountries)
GDP.model <- lm(LifeExpectancy ~ GDP, data = AllCountries)
plotModel(GDP.model)
plot(GDP.model, which = 1)
plot(GDP.model, which = 2)
## This one is terrible
#### Transformations
## It can help to look at plots of each variable separately
bwplot(~GDP, data = AllCountries) # Heavy right skew
bwplot(~LifeExpectancy, data = AllCountries) # A bit left-skewed
bwplot(~I(LifeExpectancy^2), data = AllCountries)
bwplot(~I((LifeExpectancy-40)^2), data = AllCountries)
bwplot(~exp((LifeExpectancy-40)/10), data = AllCountries)
## Use a "concave" transformation to address right skew
xyplot(LifeExpectancy ~ log(GDP), data = AllCountries)
xyplot(I(LifeExpectancy^2) ~ log(GDP), data = AllCountries)
## Use a "convex" transformation to address left skew
xyplot(I((LifeExpectancy-40)^2) ~ log(GDP), data = AllCountries)