OK, just that part. The abbreviated data produces NaN
s, which requires some contortions.
dta <- data.frame(
lcavol =
c(-0.5798185, -0.9942523, -0.5108256, -1.2039728, 0.7514161, -1.0498221),
lweight =
c(2.7695, 3.3196, 2.6912, 3.2828, 3.4324, 3.2288),
age =
c(50, 58, 74, 58, 62, 50),
lbph =
c(-1.386294, -1.386294, -1.386294, -1.386294, -1.386294, -1.386294),
svi =
c(0, 0, 0, 0, 0, 0),
lcp =
c(-1.38629, -1.38629, -1.38629, -1.38629, -1.38629, -1.38629),
gleason =
c(6, 6, 7, 6, 6, 6),
pgg45 =
c(0, 0, 20, 0, 0, 0),
lpsa =
c(-0.43078, -0.16252, -0.16252, -0.16252, 0.37156, 0.76547)
)
model.prostate <- lm(lpsa ~ lcavol + lweight + age + lbph + svi + lcp + gleason + pgg45, data = dta)
summary(model.prostate)
#>
#> Call:
#> lm(formula = lpsa ~ lcavol + lweight + age + lbph + svi + lcp +
#> gleason + pgg45, data = dta)
#>
#> Residuals:
#> 1 2 3 4 5 6
#> -5.021e-03 -1.014e-01 3.404e-17 8.396e-02 1.162e-02 1.083e-02
#>
#> Coefficients: (4 not defined because of singularities)
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -24.14927 3.77231 -6.402 0.0986 .
#> lcavol 0.36697 0.10008 3.667 0.1695
#> lweight 2.94551 0.42274 6.968 0.0907 .
#> age -0.13795 0.02264 -6.093 0.1036
#> lbph NA NA NA NA
#> svi NA NA NA NA
#> lcp NA NA NA NA
#> gleason 3.77937 0.61410 6.154 0.1025
#> pgg45 NA NA NA NA
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.1327 on 1 degrees of freedom
#> Multiple R-squared: 0.982, Adjusted R-squared: 0.9102
#> F-statistic: 13.68 on 4 and 1 DF, p-value: 0.1998
cook <- cooks.distance(model.prostate)
find_outliers <- function(x) {
a <- !is.na(x[x[!is.nan(x)] < max(x[!is.nan(x)])]) == TRUE
as.numeric(names(which(a == TRUE)))
}
model.prostate.x <- lm(lpsa ~ lcavol + lweight + age + lbph + svi + lcp + gleason + pgg45, data = dta[find_outliers(cook),])
summary(model.prostate.x)
#>
#> Call:
#> lm(formula = lpsa ~ lcavol + lweight + age + lbph + svi + lcp +
#> gleason + pgg45, data = dta[find_outliers(cook), ])
#>
#> Residuals:
#> ALL 3 residuals are 0: no residual degrees of freedom!
#>
#> Coefficients: (6 not defined because of singularities)
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 9.4807 NA NA NA
#> lcavol 0.4843 NA NA NA
#> lweight -2.7599 NA NA NA
#> age NA NA NA NA
#> lbph NA NA NA NA
#> svi NA NA NA NA
#> lcp NA NA NA NA
#> gleason NA NA NA NA
#> pgg45 NA NA NA NA
#>
#> Residual standard error: NaN on 0 degrees of freedom
#> Multiple R-squared: 1, Adjusted R-squared: NaN
#> F-statistic: NaN on 2 and 0 DF, p-value: NA