###### Test for Goodness of Fit using Birth Month and ADHD data
library("mosaic")
###### Step-by-step code
### First enter the null proportions
null.proportions <- c(0.244, 0.258, 0.257, 0.241)
### Now the observed counts
### (A small sample with the same proportions as the real study)
observed.counts <- c(69, 80, 92, 89)
### The sample size 'n' is the total of the counts
n <- sum(observed.counts)
n
### To get expected counts, just multiply null proportions by n
expected.counts <- null.proportions * n
expected.counts
### Now we compute the actual chi-squared statistic
### R will do the subtraction, squaring, and dividing in "parallel"
### for all four pairs of counts
chisq.observed <-
sum((observed.counts - expected.counts)^2 / expected.counts)
chisq.observed
### To get the analytic P-value, get the area beyond the
### observed value in a chi-squared distribution with 4 - 1 df
## P-value from chi-square dist. w/ C - 1 df
xpchisq(chisq.observed, df = 4 - 1, lower.tail = FALSE)
####### We can get results from a "canned" function as well:
### Repeating the null proportions for self-containedness
null.proportions <- c(0.244, 0.258, 0.257, 0.241)
observed.counts <- c(69, 80, 92, 89)
xchisq.test(observed.counts, p = null.proportions)
####### The real data has the same proportions but much larger n
observed.counts <- c(6880, 7982, 9161, 8945)
xchisq.test(observed.counts, p = null.proportions)