The code reproduces, though I'm not sure what the FDR adjusted p-values contribute; credit history 1 is still high and the others are still quite low and only their relative magnitudes have changed.
My only suggestion is to tie off the logistic model
options(scipen = 999)
library(tidyverse)
library(janitor)
#>
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#>
#> chisq.test, fisher.test
library(tidymodels)
url="http://freakonometrics.free.fr/german_credit.csv"
mydf = read.csv(url, header = TRUE, sep = ",") %>%
clean_names() %>%
mutate(tgt = as.character(creditability),
payment_status_of_previous_credit = as.character(payment_status_of_previous_credit))
# Generate the logit formula where we try and classify the tgt from the field payment_status_of_previous_credit
logit_formula = glm(formula = as.factor(tgt) ~ payment_status_of_previous_credit,
data = mydf, family = binomial)
# We then pull the co-efficents using broom
coefficents <- broom::tidy(logit_formula) %>%
janitor::clean_names()
# I want to get the number of people per group
diff_terms <- mydf %>%
group_by(payment_status_of_previous_credit) %>%
mutate(term = str_c("payment_status_of_previous_credit", payment_status_of_previous_credit, sep='')) %>%
group_by(term) %>%
summarise(ppl = n())
#> `summarise()` ungrouping output (override with `.groups` argument)
# Next we calculate the FDR but i don't apply any filtering because there isnt enough data points
coefficents %>%
mutate(FDR = p.adjust(p_value, method = "fdr")) %>%
inner_join(diff_terms) %>%
ggplot(aes(x=estimate, y=-log10(FDR), size = ppl)) +
geom_point(alpha = 0.3) +
theme_minimal()
#> Joining, by = "term"

library(MASS)
#>
#> Attaching package: 'MASS'
#> The following object is masked from 'package:dplyr':
#>
#> select
(waldscore <- confint.default(logit_formula))
#> 2.5 % 97.5 %
#> (Intercept) -1.1509472 0.1292959
#> payment_status_of_previous_credit1 -0.6311849 1.0774720
#> payment_status_of_previous_credit2 0.6041268 1.9354829
#> payment_status_of_previous_credit3 0.4913169 2.0546144
#> payment_status_of_previous_credit4 1.3830670 2.8006611
(vmscore <- confint(logit_formula))
#> Waiting for profiling to be done...
#> 2.5 % 97.5 %
#> (Intercept) -1.1738117 0.1176409
#> payment_status_of_previous_credit1 -0.6286570 1.0876744
#> payment_status_of_previous_credit2 0.6152303 1.9566827
#> payment_status_of_previous_credit3 0.5034365 2.0736977
#> payment_status_of_previous_credit4 1.3948185 2.8208648
library(profileModel)
(raoscore <- profConfint(profileModel(logit_formula, quantile=qchisq(0.95, 1), objective = "RaoScoreStatistic", X = model.matrix(logit_formula))))
#> Preliminary iteration ..... Done
#>
#> Profiling for parameter (Intercept) ... Done
#> Profiling for parameter payment_status_of_previous_credit1 ... Done
#> Profiling for parameter payment_status_of_previous_credit2 ... Done
#> Profiling for parameter payment_status_of_previous_credit3 ... Done
#> Profiling for parameter payment_status_of_previous_credit4 ... Done
#> Lower Upper
#> (Intercept) -1.1404745 0.1188233
#> payment_status_of_previous_credit1 -0.6240158 1.0695355
#> payment_status_of_previous_credit2 0.6134541 1.9256681
#> payment_status_of_previous_credit3 0.4979180 2.0476886
#> payment_status_of_previous_credit4 1.3909966 2.7922873
#> attr(,"profileModel object")
#> profileModel(logit_formula, quantile = qchisq(0.95, 1), objective = "RaoScoreStatistic",
#> X = model.matrix(logit_formula))
vcov(logit_formula)
#> (Intercept)
#> (Intercept) 0.1066667
#> payment_status_of_previous_credit1 -0.1066667
#> payment_status_of_previous_credit2 -0.1066667
#> payment_status_of_previous_credit3 -0.1066667
#> payment_status_of_previous_credit4 -0.1066667
#> payment_status_of_previous_credit1
#> (Intercept) -0.1066667
#> payment_status_of_previous_credit1 0.1900000
#> payment_status_of_previous_credit2 0.1066667
#> payment_status_of_previous_credit3 0.1066667
#> payment_status_of_previous_credit4 0.1066667
#> payment_status_of_previous_credit2
#> (Intercept) -0.1066667
#> payment_status_of_previous_credit1 0.1066667
#> payment_status_of_previous_credit2 0.1153539
#> payment_status_of_previous_credit3 0.1066667
#> payment_status_of_previous_credit4 0.1066667
#> payment_status_of_previous_credit3
#> (Intercept) -0.1066667
#> payment_status_of_previous_credit1 0.1066667
#> payment_status_of_previous_credit2 0.1066667
#> payment_status_of_previous_credit3 0.1590476
#> payment_status_of_previous_credit4 0.1066667
#> payment_status_of_previous_credit4
#> (Intercept) -0.1066667
#> payment_status_of_previous_credit1 0.1066667
#> payment_status_of_previous_credit2 0.1066667
#> payment_status_of_previous_credit3 0.1066667
#> payment_status_of_previous_credit4 0.1307819
library(ResourceSelection)
#> ResourceSelection 0.3-5 2019-07-22
hoslem.test(logit_formula$y, fitted(logit_formula), g=10)
#>
#> Hosmer and Lemeshow goodness of fit (GOF) test
#>
#> data: logit_formula$y, fitted(logit_formula)
#> X-squared = 0.000000000000000000000025938, df = 8, p-value = 1
Created on 2020-10-29 by the reprex package (v0.3.0.9001)