obs<-100# obs in each single regressionNloops<-900# number of experimentsoutput<-numeric(Nloops) # vector holding p-values of estimated a1 parameter from Nloops experimentsfor(i inseq_along(output)){x<-rnorm(obs) y<-rnorm(obs)# x and y are independent, so null hypothesis is trueoutput[i] <-(summary(lm(y~x))$coefficients)[2,4] # we grab p-value of a1}h0true <-as.data.frame(output) %>%mutate(true =1)#h0trueggplot(data = h0true, aes(x = output)) +geom_histogram(breaks =seq(from =0, to =1, by = .05)) +ylim(0, 100) +geom_vline(xintercept =0.05, color ="red", linewidth =1, linetype ="dashed") +annotate("text", x = .2, y =75, label ="< False positives",size =8) +annotate("text", x = .75, y =60, label ="True negatives", size =8)
50
\(H_0\) is false (yes effect: 100)
Code
obs<-100# obs in each single regressionNloops<-100# number of experimentsoutput<-numeric(Nloops) # vector holding p-values of estimated a1 parameter from Nloops experimentsfor(i inseq_along(output)){x<-rnorm(obs) y<-rnorm(obs, .5*x +rnorm(obs, 0, 1))# x and y are related, so null hypothesis is falseoutput[i] <-(summary(lm(y~x))$coefficients)[2,4] # we grab p-value of a1}h0false <-as.data.frame(output) %>%mutate(true =0)#h0falseggplot(data = h0false, aes(x = output)) +geom_histogram(breaks =seq(from =0, to =1, by = .05)) +ylim(0, 100) +geom_vline(xintercept =0.05, color ="red", linewidth =1, linetype ="dashed") +annotate("text", x = .2, y =75, label ="< True positives",size =8) +annotate("text", x = .75, y =60, label ="False negatives", size =8)
93
4.6 Combined distribution of \(p\)-values
Code
h0all <-rbind(h0true, h0false)ggplot(data = h0all, aes(x = output)) +geom_histogram(breaks =seq(from =0, to =1, by = .05)) +geom_vline(xintercept =0.05, color ="red", linewidth =1, linetype ="dashed") +geom_hline(yintercept =sum(h0all$output > .05)/19, color ="blue", linewidth =1)
4.7 Ordered \(p\)-values
Code
h0all <- h0all %>%arrange(output)ggplot(data = h0all, aes(x =1:nrow(h0all), y = output)) +geom_point(alpha = .2) +geom_hline(yintercept = .05, color ="blue", linewidth =1) +labs(x ="Order", y ="p-value") +theme(legend.position="none")
4.8 Ordered \(p\)-values
Code
h0all <- h0all %>%arrange(output)ggplot(data = h0all, aes(x =1:nrow(h0all), y = output,color =as.factor(true)),alpha = .3) +geom_point() +geom_hline(yintercept = .05, color ="blue", linewidth =1) +labs(x ="Order", y ="p-value") +theme(legend.position="none")
4.9 No correction: FDR = 50/143 = 0.35
Code
h0all <- h0all %>%arrange(output)ggplot(data = h0all, aes(x =1:nrow(h0all), y = output,color =as.factor(true))) +geom_point() +labs(x ="Order", y ="p-value") +geom_hline(yintercept = .05, color ="blue", linewidth =1) +xlim(0,150) +ylim(0,.06) +theme(legend.position="none")
4.10 Corrected B-H FDR = .05
Code
h0all <- h0all %>%arrange(output)ggplot(data = h0all, aes(x =1:nrow(h0all), y = output,color =as.factor(true))) +geom_point() +labs(x ="Order", y ="p-value") +geom_hline(yintercept = .05, color ="blue", linewidth =1) +geom_abline(slope = .05/1000, color ="blue", linewidth =1, linetype ="dashed") +xlim(0,150) +ylim(0,.06) +annotate("text", x =125, y =0.01, label ="slope = .05/1000") +theme(legend.position="none")
4.11 Corrected B-H FDR = .10
Code
h0all <- h0all %>%arrange(output)ggplot(data = h0all, aes(x =1:nrow(h0all), y = output,color =as.factor(true))) +geom_point() +labs(x ="Order", y ="p-value") +geom_hline(yintercept = .05, color ="blue", linewidth =1) +geom_abline(slope = .1/1000, color ="blue", linewidth =1, linetype ="dashed") +xlim(0,150) +ylim(0,.06) +annotate("text", x =125, y =0.01, label ="slope = .10/1000") +theme(legend.position="none")