# Create calibration plots with different numbers of bins
n_bins <- c(5, 10, 20, 100)
# Create a tibble for plotting the calibration plots
bin_stats <- tibble::tibble(bin_center = double(), empirical_prob = double(), count = double(), empirical_se = double(), model = character())
for (n in n_bins) {
# Bin the predictions
rf_bins <- dplyr::ntile(plot_data$forest_pred, n)
nb_bins <- dplyr::ntile(plot_data$nb_pred, n)
# Calculate the statistics for the Random Forest model
rf_bin_stats <- tibble(
bin_center = tapply(plot_data$forest_pred, rf_bins, mean),
empirical_prob = tapply(plot_data$actual, rf_bins, mean),
count = as.numeric(table(rf_bins)),
empirical_se = tapply(plot_data$actual, rf_bins, sd)/sqrt(count)
)
# Calculate the statistics for the Naive Bayes model
nb_bin_stats <- tibble(
bin_center = tapply(plot_data$nb_pred, nb_bins, mean),
empirical_prob = tapply(plot_data$actual, nb_bins, mean),
count = as.numeric(table(nb_bins)),
empirical_se = tapply(plot_data$actual, nb_bins, sd)/sqrt(count)
)
# Combine the statistics for the two models
bin_stats <- bin_stats |>
bind_rows(mutate(rf_bin_stats, model = "Random Forest", n_bin = n)) |>
bind_rows(mutate(nb_bin_stats, model = "Naive Bayes", n_bin = n))
}
# Create the calibration plot
ggplot(bin_stats) +
geom_abline(aes(slope = 1, intercept = 0, color = "Perfect Calibration"), linetype = "dashed") +
geom_ribbon(aes(x = bin_center, y = empirical_prob, fill = model,
ymin = empirical_prob - 1.96 * empirical_se,
ymax = empirical_prob + 1.96 * empirical_se), alpha = 0.2) +
geom_point(aes(x=bin_center, y=empirical_prob, color = model), alpha = 0.5, size=2) +
scale_color_manual(values = c("Perfect Calibration" = "red", "Random Forest" = "blue", "Naive Bayes" = "green")) +
scale_fill_manual(values = c("Perfect Calibration" = "red", "Random Forest" = "blue", "Naive Bayes" = "green")) +
labs(title = paste0("Calibration Plot with different bins"), x=NULL, y=NULL, color = NULL) + guides(fill = "none") +
facet_wrap(~ n_bin, nrow = length(n_bins)%/%2, labeller = labeller(n_bin = function(x) paste0(x, " bins")), scales = "free_x")