ggplot(data = dat_ssr, aes(x = b1, y = ssr)) +
geom_line(linewidth = 1) +
ylim(0, 120) +
labs(x = "Regression coefficient (b)", y = "Sum of squared residuals") +
geom_abline(intercept = -110, slope = 10, linewidth = 1, linetype = "dashed", color = "red") +
annotate("pointrange", x = 15, y = 15*15 - 20*15 + 115, ymin = 15*15 - 20*15 + 115, ymax = 15*15 - 20*15 + 115) +
geom_segment(x = 15, xend = 15, y = -1, yend = 15*15 - 20*15 + 115, linetype = "dotted", linewidth = 1)
ggplot(data = dat_ssr, aes(x = b1, y = ssr)) +
geom_line(linewidth = 1) +
ylim(0, 120) +
labs(x = "Regression coefficient (b)", y = "Sum of squared residuals") +
geom_abline(intercept = -29, slope = 4, linewidth = 1, linetype = "dashed", color = "red") +
annotate("pointrange", x = 12, y = 12*12 - 20*12 + 115, ymin = 12*12 - 20*12 + 115, ymax = 12*12 - 20*12 + 115) +
geom_segment(x = 12, xend = 12, y = -1, yend = 12*12 - 20*12 + 115, linetype = "dotted", linewidth = 1)
ggplot(data = dat_ssr, aes(x = b1, y = ssr)) +
geom_line(linewidth = 1) +
ylim(0, 120) +
labs(x = "Regression coefficient (b)", y = "Sum of squared residuals") +
geom_abline(intercept = 15, slope = 0, linewidth = 1, linetype = "dashed", color = "red") +
annotate("pointrange", x = 10, y = 10*10 - 20*10 + 115, ymin = 10*10 - 20*10 + 115, ymax = 10*10 - 20*10 + 115) +
geom_segment(x = 10, xend = 10, y = -1, yend = 10*10 - 20*10 + 115, linetype = "dotted", linewidth = 1)
\[\Sigma(Y_i - \hat{Y}_i)^2 = \]
\[\Sigma(Y - (b_1 X + b_0))^2 =\] \[\Sigma(Y - b_1 X - b_0)^2 =\] \[\Sigma(Y^2 + {b^2_0} + {b^2_1} X^2 - 2 b_0 Y - 2 b_1 X Y + 2 b_0 b_1 X ) =\] \[\Sigma Y^2 + \Sigma{b^2_0} + \Sigma{b^2_1} X^2 - \Sigma 2 b_0 Y - \Sigma 2 b_1 X Y + \Sigma 2 b_0 b_1 X =\] \[\Sigma Y^2 + n{b^2_0} + {b^2_1} \Sigma X^2 - 2 b_0 \Sigma Y - 2 b_1 \Sigma X Y + 2 b_0 b_1 \Sigma X\]
\[\frac{\partial \Sigma (Y - \hat{Y})^2}{\partial b_1} = 2 b_1 \Sigma X^2 - 2 \Sigma X Y + 2 b_0 \Sigma X\]
\[\frac{\partial \Sigma (Y - \hat{Y})^2}{\partial b_0} = 2 n b_0 - 2 \Sigma Y + 2 b_1 \Sigma X\]
For \(b_1\):
\[2 b_1 \Sigma X^2 - 2 \Sigma X Y + 2 b_0 \Sigma X = 0 \] \[ \vdots \] \[b_1 = \frac{n \Sigma X Y - (\Sigma X) (\Sigma Y)}{n \Sigma X^2 - (\Sigma X)^2} = \frac{SP_{XY}}{SS_X} = \frac{s_{XY}}{{s^2_X}}\]
For \(b_0\):
\[2 n b_0 - 2 \Sigma Y + 2 b_1 \Sigma X = 0\] \[ \vdots \] \[b_0 = \overline{Y} - b_1 \overline{X}\]
\[b_1 = \frac{SP_{XY}}{SS_X} = \frac{s_{XY}}{{s^2_X}}\]
\[b_0 = \overline{Y} - b_1 \overline{X}\]
\[R^2_{multiple} = r^2_{Y\hat{Y}} = \frac{SS_{regression}}{SS_Y} = \frac{predictable\ variation}{total\ variation}\]
where \(SS_{regression} = \Sigma(\hat{Y_i} - \overline{Y}_i)^2\)
\[f(X) = \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{(X - \mu)^2}{2\sigma^2}}\]
ID BDI Likelihood LogLikelihood
1 1 5 0.001 -7.028
2 2 33 0.003 -5.908
3 3 17 0.067 -2.708
4 4 21 0.078 -2.548
5 5 13 0.030 -3.508
6 6 17 0.067 -2.708
7 7 5 0.001 -7.028
8 8 13 0.030 -3.508
9 9 17 0.067 -2.708
10 10 13 0.030 -3.508
norm_plot <- ggplot(data = data.frame(x = c(0, 40)), aes(x)) +
stat_function(fun = dnorm, n = 101, args = list(mean = 20, sd = 5)) +
ylab("Likelihood")
norm_plot +
annotate("pointrange", x = LL_dat[4,2], y = LL_dat[4,3], ymin = LL_dat[4,3], ymax = LL_dat[4,3], size = 1) +
annotate("text", x = LL_dat[4,2] + 2, y = LL_dat[4,3], label = "0.078", hjust = 0, size = 8) +
geom_segment(x = LL_dat[4,2], xend = LL_dat[4,2], y = 0, yend = LL_dat[4,3], linetype = "dashed") +
annotate("pointrange", x = LL_dat[2,2], y = LL_dat[2,3], ymin = LL_dat[2,3], ymax = LL_dat[2,3], size = 1) +
annotate("text", x = LL_dat[2,2] - 2, y = LL_dat[2,3], label = "0.003", hjust = 1, size = 8) +
geom_segment(x = LL_dat[2,2], xend = LL_dat[2,2], y = 0, yend = LL_dat[2,3], linetype = "dashed") +
annotate("pointrange", x = LL_dat[3,2], y = LL_dat[3,3], ymin = LL_dat[3,3], ymax = LL_dat[3,3], size = 1) +
annotate("text", x = LL_dat[3,2] - 2, y = LL_dat[3,3], label = "0.067", hjust = 1, size = 8) +
geom_segment(x = LL_dat[3,2], xend = LL_dat[3,2], y = 0, yend = LL_dat[3,3], linetype = "dashed") +
annotate("pointrange", x = LL_dat[1,2], y = LL_dat[1,3], ymin = LL_dat[1,3], ymax = LL_dat[1,3], size = 1) +
annotate("text", x = LL_dat[1,2] + 2, y = LL_dat[1,3], label = "0.001", hjust = 0, size = 8) +
geom_segment(x = LL_dat[1,2], xend = LL_dat[1,2], y = 0, yend = LL_dat[1,3], linetype = "dashed")
\[L = \prod_{i=1}^{n} L_i = L_1 \times L_2 \times L_3 \times \cdots \times L_n\]
\[L = 0.001 \times 0.003 \times 0.067 \times 0.078 \times 0.030 \times 0.067 \times 0.001\]
\[\times 0.030 \times 0.067 \times 0.030 = 0.000000000000000001327\]
\[L = \prod_{i=1}^{n} L_i = L_1 \times L_2 \times L_3 \times \cdots \times L_n\] becomes
\[log(L) = \sum_{i=1}^{n} log(L_i) = log(L_1) + log(L_2) + log(L_3) + \cdots + log(L_n)\]
ID BDI Likelihood LogLikelihood
1 1 5 0.001 -7.028
2 2 33 0.003 -5.908
3 3 17 0.067 -2.708
4 4 21 0.078 -2.548
5 5 13 0.030 -3.508
6 6 17 0.067 -2.708
7 7 5 0.001 -7.028
8 8 13 0.030 -3.508
9 9 17 0.067 -2.708
10 10 13 0.030 -3.508
Log-likelihood is always negative
Individual
Sample
mu logL
1 10 -42.764
2 11 -40.804
3 12 -39.244
4 13 -38.084
5 14 -37.324
6 15 -36.964
7 16 -37.004
8 17 -37.444
9 18 -38.284
10 19 -39.524
11 20 -41.164
ggplot(data = aud_LL, aes(x = mu, y = logL)) +
geom_smooth(method = "lm",formula = y~poly(x,3), se = FALSE, color = "black") +
labs(x = "Estimate of mean", y = "log(L)") +
geom_hline(yintercept = -36.932, color = "dodgerblue", linetype = "dashed", linewidth = 1) +
annotate("pointrange", x = 15.4, y = -36.932, ymin = -36.932, ymax = -36.932, color = "dodgerblue", size = 1)
mu <- c(0:40)
L_smallse <- dnorm(mu, 20, 2)
L_largese <- dnorm(mu, 20, 5)
LL_smallse <- log(L_smallse)
LL_largese <- log(L_largese)
LLse_dat <- data.frame(mu, L_smallse, L_largese, LL_smallse, LL_largese)
#round(LLse_dat, 3)
ggplot(data = LLse_dat, aes(x = mu, y = LL_smallse)) +
geom_smooth(method = "lm",formula = y~poly(x,3), se = FALSE, color = "black") +
ylim(-10,0) +
xlim(0,40) +
annotate("pointrange", x = 15, y = -4.737, ymin = -4.737, ymax = -4.737, color = "dodgerblue", size = 1) +
geom_segment(x = 15, xend = 15, y = -10, yend = -4.737, linetype = "dotted", linewidth = 1) +
geom_hline(yintercept = -1.6) +
labs(x = "Estimate of mean", y = "Log-likelihood")
ggplot(data = LLse_dat, aes(x = mu, y = LL_largese)) +
geom_smooth(method = "lm",formula = y~poly(x,3), se = FALSE, color = "black") +
ylim(-10,0) +
xlim(0,40) +
annotate("pointrange", x = 15, y = -3.028, ymin = -3.028, ymax = -3.028, color = "dodgerblue", size = 1) +
geom_segment(x = 15, xend = 15, y = -10, yend = -3.028, linetype = "dotted", linewidth = 1) +
geom_hline(yintercept = -2.5) +
labs(x = "Estimate of mean", y = "Log-likelihood")
anova()
that we’ve used previouslyanova(m1, m2, test = "LRT")
ICU
data from the Stat2Data packageAge
and Sex
predict Pulse
Call:
lm(formula = Pulse ~ Age + Sex, data = ICU)
Residuals:
Min 1Q Median 3Q Max
-59.231 -17.730 -4.163 19.440 93.225
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 95.55647 5.87949 16.253 <2e-16 ***
Age 0.04533 0.09563 0.474 0.636
Sex 2.00005 3.94123 0.507 0.612
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 26.93 on 197 degrees of freedom
Multiple R-squared: 0.0027, Adjusted R-squared: -0.007425
F-statistic: 0.2667 on 2 and 197 DF, p-value: 0.7662
vcov()
function in stats package (always loaded)
(Intercept) Age Sex
(Intercept) 34.5683578 -0.512491478 -3.81886908
Age -0.5124915 0.009145049 -0.03621144
Sex -3.8188691 -0.036211443 15.53330671
Sex
Age
and Sex
predict Pulse
, then add Infection
and Emergency
Call:
lm(formula = Pulse ~ Age + Sex + Infection + Emergency, data = ICU)
Residuals:
Min 1Q Median 3Q Max
-70.688 -16.922 -3.541 15.705 82.057
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 85.11154 6.83731 12.448 < 2e-16 ***
Age 0.02128 0.09430 0.226 0.8217
Sex 0.88884 3.77520 0.235 0.8141
Infection 15.52284 3.77981 4.107 5.9e-05 ***
Emergency 7.79833 4.29475 1.816 0.0709 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 25.53 on 195 degrees of freedom
Multiple R-squared: 0.1127, Adjusted R-squared: 0.09446
F-statistic: 6.19 on 4 and 195 DF, p-value: 0.0001041
(Intercept) Age Sex Infection Emergency
(Intercept) 46.7487516 -0.535360582 -1.31998556 0.23115884 -16.70442963
Age -0.5353606 0.008892873 -0.04388807 -0.06836532 0.09389264
Sex -1.3199856 -0.043888071 14.25216756 0.30122681 -2.30859090
Infection 0.2311588 -0.068365322 0.30122681 14.28699784 -3.28174376
Emergency -16.7044296 0.093892640 -2.30859090 -3.28174376 18.44486337
logLik()
function from stats package (always loaded)Analysis of Variance Table
Model 1: Pulse ~ Age + Sex
Model 2: Pulse ~ Age + Sex + Infection + Emergency
Res.Df RSS Df Sum of Sq Pr(>Chi)
1 197 142859
2 195 127107 2 15752 5.657e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
'log Lik.' 23.36533 (df=4)