[1] 6 3 2 4 2 5
die6
2 3 4 5 6
2 1 1 1 1
die6
2 3 4 5 6
0.3333333 0.1666667 0.1666667 0.1666667 0.1666667
ggplot(data.frame(x = c(-3, 3)), aes(x)) +
stat_function(fun = dt, args = list(df = 10, ncp = 0), geom = "line", color = "blue", linewidth = 0.75) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 1), geom = "line", color = "red", linewidth = 0.75) +
ylim(0,.5) +
scale_x_continuous(breaks = -3:3) +
labs(x = "X", y = "f(x)")
flowchart LR A(Population) --> B{Sampling} B{Sampling} --> D[Sample]
flowchart RL C{Inference} --> A(Population) D[Sample] --> C{Inference}
Step 1. Identify a population that you’re interested in
Step 2. Sample from the population
ggplot(data.frame(x = c(-3, 3)), aes(x)) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 1), geom = "line") +
ylim(0,.5) +
scale_x_continuous(breaks = -3:3) +
labs(x = "X", y = "f(x)") +
annotate("pointrange", x = mean(sample1), y = 0, ymin = 0, ymax = 0,
colour = "red", size = .5, linewidth = 1) +
annotate("pointrange", x = mean(sample2), y = 0.01, ymin = 0.01, ymax = 0.01,
colour = "blue", size = .5, linewidth = 1) +
annotate("pointrange", x = mean(sample3), y = 0.02, ymin = 0.02, ymax = 0.02,
colour = "darkgreen", size = .5, linewidth = 1) +
annotate("pointrange", x = mean(sample4), y = 0.03, ymin = 0.03, ymax = 0.03,
colour = "purple", size = .5, linewidth = 1) +
annotate("pointrange", x = mean(sample5), y = 0.04, ymin = 0.04, ymax = 0.04,
colour = "cyan", size = .5, linewidth = 1)
ggplot(data.frame(x = c(-1.5, 1.5)), aes(x)) +
#stat_function(fun = dnorm, args = list(mean = 0, sd = 1), geom = "line") +
stat_function(fun = dnorm, args = list(mean = 0, sd = 1/sqrt(10)), geom = "line", color = "red", size = 1) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 1/sqrt(50)), geom = "line", color = "darkgreen", size = 1) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 1/sqrt(100)), geom = "line", color = "purple", size = 1) +
#ylim(0,1) +
annotate("text", x = 0.75, y = 0.5, label = "n = 10", color = "red") +
annotate("text", x = 0.5, y = 2, label = "n = 50", color = "darkgreen") +
annotate("text", x = 0.5, y = 3, label = "n = 100", color = "purple") +
scale_x_continuous(breaks = -2:2) +
labs(x = "x_bar", y = "f(x)")
library(infer)
means10 <- data.frame(pop) %>%
rep_sample_n(size = 10, reps = 10000, replace = TRUE) %>%
summarise(x_bar = mean(pop))
means50 <- data.frame(pop) %>%
rep_sample_n(size = 50, reps = 10000, replace = TRUE) %>%
summarise(x_bar = mean(pop))
means100 <- data.frame(pop) %>%
rep_sample_n(size = 100, reps = 10000, replace = TRUE) %>%
summarise(x_bar = mean(pop))
ggplot(data = means10, aes(x = x_bar)) +
geom_histogram(bins = 50, alpha = 0.5, fill = "red") +
xlim(-1.5,1.5) +
ylim(0, 2500)