Since I discovered that the GDQ donations are public, tabular und scrapeable, I couldn’t resist cobbling together a script to scrape all the data.
Here are some results.

Donations

First up we look at the donation totals for each event.
Note that as of now I only collected data from [AS]GDQs since 2011, the other events are not included (yet?).

gdq_donation_totals %>%
  ggplot(aes(x = year, y = total, fill = gdq)) +
  geom_col(position = position_dodge2(preserve = "single"), alpha = .75) +
  scale_x_year() +
  scale_y_currency(
    breaks = seq(0, 3e6, 5e5),
    minor_breaks = seq(0, 3e6, 1e5)
  ) +
  scale_colorfill_gdq() +
  labs(
    title = "Games Done Quick: Donation Totals",
    subtitle = "Donation totals as per the official tracker page"
  )

Relative change event to event

gdq_donation_totals %>%
  group_by(gdq) %>%
  mutate(gain = total - lag(total)) %>%
  ggplot(aes(x = year, y = gain, fill = gdq)) +
  geom_col(position = position_dodge(), alpha = .75) +
  scale_x_year() +
  scale_y_currency(
    breaks = seq(-2e6, 2e6, 2e5),
    minor_breaks = seq(-2e6, 2e6, 5e4)
  ) +
  facet_wrap(~gdq) +
  scale_colorfill_gdq(guide = FALSE) +
  labs(
    title = "Games Done Quick: Donation Totals",
    subtitle = "Per-GDQ donation totals: Difference from previous event"
  )

Cumulative total

gdq_donation_totals %>%
  ggplot(aes(x = year, y = cumtot, fill = gdq)) +
  geom_col(position = position_dodge2(preserve = "single"), alpha = .75) +
  scale_x_year() +
  scale_y_currency(
    breaks = seq(0, 3e7, 5e6),
    minor_breaks = seq(0, 3e7, 1e6)
  ) +
  scale_colorfill_gdq() +
  labs(
    title = "Games Done Quick: Donation Totals",
    subtitle = "Cumulative donation totals as per the official tracker page"
  )

Prediction

gdq_donation_totals %>%
  mutate(year = ifelse(gdq == "SGDQ", year + .5, year)) %>%
  ggplot(aes(x = year, y = cumtot, color = gdq, fill = gdq)) +
  geom_smooth(aes(group = 1), method = lm, formula = y ~ poly(x, 2),
              se = FALSE, color = "black", linetype = "dashed", size = .75) +
  geom_point(size = 2.5, key_glyph = "rect", shape = 21, color = "white") +
  scale_x_year() +
  scale_y_currency() +
  scale_colorfill_gdq() +
  labs(
    title = "Games Done Quick: Donation Totals",
    subtitle = "Cumulative donation totals: Quadratic model"
  )

mod <- lm(cumtot ~ year + I(year^2), # poly(year, 2),
          data = gdq_donation_totals %>%
            mutate(year = ifelse(gdq == "SGDQ", year + .5, year),
                   cumtot = cumtot/1e6)
)

model_equations <- function(mod) {
  coefs <- round(coef(mod), 2)

  glue::glue("\\widehat{\\text{Total}} = (coefs[[1]]) + (coefs[[2]]) \\cdot \\text{Year} + (coefs[[3]]) \\cdot \\text{Year}^2", .open = "(", .close = ")")
}

The model equation:

\[\widehat{\text{Total}} = 1450972.21 + -1442.54 \cdot \text{Year} + 0.36 \cdot \text{Year}^2\]

Let’s make some naive predictions for the upcoming years:

current_year <- year(today())

pred_df <- tibble(year = seq(current_year - 1, current_year + 5, .5))
predicted <- as_tibble(predict(mod, newdata = pred_df, interval = "pred")) %>%
  mutate_all(round, 2) %>%
  mutate(
    fit = paste0("$", fit),
    Year = pred_df$year,
    ci = glue::glue("(${lwr}, ${upr})"),
    GDQ = ifelse(str_detect(as.character(Year), ".5$"), "SGDQ", "AGDQ"),
    Year = floor(Year)
  ) %>%
  select(GDQ, Year, fit, ci)

predicted %>%
  setNames(c("GDQ", "Year", "Predicted Total (Millions)", "CI (95% Prediction)")) %>%
  kable(align = c("c", "c", "c", "c")) %>%
  kable_styling(
    bootstrap_options = c("striped"),
    position = "center"
  )
GDQ Year Predicted Total (Millions) CI (95% Prediction)
AGDQ 2019 $19.2 ($18.75, $19.65)
SGDQ 2019 $21.91 ($21.44, $22.38)
AGDQ 2020 $24.8 ($24.31, $25.3)
SGDQ 2020 $27.87 ($27.34, $28.4)
AGDQ 2021 $31.12 ($30.55, $31.69)
SGDQ 2021 $34.55 ($33.92, $35.18)
AGDQ 2022 $38.16 ($37.46, $38.85)
SGDQ 2022 $41.94 ($41.17, $42.72)
AGDQ 2023 $45.91 ($45.05, $46.77)
SGDQ 2023 $50.05 ($49.09, $51.01)
AGDQ 2024 $54.38 ($53.31, $55.45)
SGDQ 2024 $58.88 ($57.7, $60.07)
SGDQ 2025 $63.56 ($62.26, $64.87)

Donation amounts

Can you tell when they started to have a $5 minimum? Turns out there were a lot of 1 cent donations, which seems kind of… inefficient? So yeah, makes sense.

donations %>%
  ggplot(aes(x = year, y = amount, color = gdq, fill = gdq)) +
  geom_boxplot(alpha = .75) +
  stat_summary(
    fun.y = "mean", geom = "point", shape = 21, size = 2, 
    stroke = .6, color = "black",
    position = position_dodge(width = .75)
  ) +
  scale_y_dollar(trans = "log10") +
  scale_colorfill_gdq() +
  minilabs(
    title = p_title,
    subtitle = "Individual donations by event"
  )

donations %>%
  mutate(
    amount_c = cut(
      amount, breaks = 10^(-2:6), include.lowest = TRUE, 
      labels = paste0("<=", 10^(-1:6)))
  ) %>% 
  ggplot(aes(x = amount_c, color = gdq, fill = gdq)) +
  geom_bar(alpha = .75) +
  facet_wrap(~gdq, ncol = 1) +
  scale_y_comma() +
  scale_colorfill_gdq() +
  labs(
    title = p_title,
    subtitle = "Individual donations by event",
    x = "Donation range", y = "# of donations"
  )

Donation Comments

There was a huge difference in the proportion of donations with and without comments between AGDQ and SGDQ, but that has normalized in 2014-2015. For some reason, that ratio has also consistently decreased in both event series, meaning that there’s a trend towards an equal amount of comment to no-comment donations.

donations %>%
  ungroup() %>%
  count(gdq, year, comment) %>%
  spread(comment, n) %>%
  mutate(
    ratio = Yes/No,
    prop_no = No / (Yes + No),
    year = as.numeric(year),
    year = ifelse(gdq == "SGDQ", year + 0.5, year)
  ) %>%
  ggplot(aes(x = year, y = ratio, color = gdq, fill = gdq)) +
  geom_point(size = 4, key_glyph = "rect", color = "black", stroke = .25, shape = 21) +
  geom_hline(yintercept = 1, color = "black", linetype = "dashed") +
  scale_x_year() +
  scale_y_continuous(breaks = seq(0, 10, 1), limits = c(0, NA)) +
  scale_colorfill_gdq() +
  labs(
    title = p_title,
    subtitle = "Ratio of number of donations with comment to those without comment",
    x = "", y = "Ratio (> 1: More donations with comment)"
  )

Runs

Number of runs

runs %>%
  count(event, gdq, year) %>%
  mutate(year = as.numeric(year)) %>%
  left_join(event_dates, by = "event") %>%
  ggplot(aes(x = year, y = n, color = gdq, fill = gdq)) +
  geom_col(position = "dodge", alpha = 0.75) +
  scale_x_year() +
  scale_colorfill_gdq() +
  minilabs(
    title = p_title_r,
    subtitle = "Number of runs per event"
  )

runs %>%
  count(event, gdq, year) %>%
  mutate(year = as.numeric(year)) %>%
  left_join(event_dates, by = "event") %>%
  ggplot(aes(x = year, y = n/event_duration, color = gdq, fill = gdq)) +
  geom_col(position = "dodge", alpha = 0.75) +
  scale_x_year() +
  scale_colorfill_gdq() +
  minilabs(
    title = p_title_r,
    subtitle = "Runs per day (approximate)"
  )

Run length by event

runs %>%
  mutate(
    year = fct_rev(year),
    gdq = fct_rev(gdq)
  ) %>%
  ggplot(aes(x = year, y = run_time, color = gdq, fill = gdq)) +
  geom_boxplot(alpha = .5) +
  stat_summary(
    fun.y = mean, geom = "point", position = position_dodge2(width = .75),
    shape = 21, color = "black", stroke = .5, key_glyph = "rect"
  ) +
  coord_flip() +
  scale_y_time(breaks = seq(0, 10, 1) * 60^2) +
  scale_colorfill_gdq(guide = guide_legend(reverse = TRUE)) +
  #ggforce::facet_zoom(y = time <= hms::hms(hours = 3)) +
  labs(
    title = p_title_r,
    subtitle = "Run durations by event",
    x = "", y = "H:M:S"
  ) +
  theme_gdqd(
    grid = "Xx",
    legend.position = c(.6, 1.08),
    legend.direction = "horizontal",
  )

Maybe a zoomed in version is easier to digest.

runs %>%
  filter(run_time <= hms::hms(hours = 2)) %>%
  mutate(
    year = fct_rev(year),
    gdq = fct_rev(gdq)
  ) %>%
  ggplot(aes(x = year, y = run_time, color = gdq, fill = gdq)) +
  geom_boxplot(alpha = .5) +
  stat_summary(
    fun.y = mean, geom = "point", position = position_dodge2(width = .75),
    shape = 21, color = "black", stroke = .5, key_glyph = "rect"
  ) +
  coord_flip() +
  scale_y_time(
    breaks = seq(0, 10, .5) * 60^2,
    minor_breaks = seq(0, 10, .25) * 60^2
  ) +
  scale_colorfill_gdq(guide = guide_legend(reverse = TRUE)) +
  labs(
    title = p_title_r,
    subtitle = "Run durations by event: Sub 2 hour runs only",
    x = "", y = "H:M:S"
  ) +
  theme_gdqd(
    grid = "Xx",
    legend.position = c(.6, 1.08),
    legend.direction = "horizontal",
  )

Or maybe a histogram.

runs %>%
  ggplot(aes(x = run_time, color = gdq, fill = gdq)) +
  geom_histogram(
    binwidth = .5 * 60^2, position = "dodge", alpha = .5
  ) +
  facet_wrap(~gdq) +
  scale_colorfill_gdq(guide = FALSE) +
  labs(
    title = p_title_r,
    subtitle = "Number of runs in 30min intervals across all events",
    y = "# of runs"
  )

Runners

runs %>%
  separate_rows(players, sep = ", ") %>%
  count(players, gdq, sort = TRUE) %>%
  group_by(players) %>%
  mutate(player_total = sum(n)) %>%
  ungroup() %>%
  spread(gdq, n) %>%
  mutate(player_total_rank = min_rank(-player_total)) %>%
  filter(player_total_rank <= 20) %>%
  gather(gdq, n, matches("[AS]GDQ")) %>%
  mutate(gdq = fct_rev(gdq)) %>%
  arrange(desc(player_total)) -> runners_top20

runners_top20 %>%
  ggplot(aes(x = reorder(players, -player_total_rank), 
             y = n, color = gdq, fill = gdq)) +
  geom_col(alpha = .75) +
  coord_flip() +
  scale_colorfill_gdq(guide = guide_legend(reverse = TRUE)) +
  labs(
    title = p_title_runners,
    subtitle = "Top 20 runners by number of total appearances",
    x = "", y = "# of runs"
  ) +
  theme_gdqd(grid = "Xx")

Run length by runner

runs %>%
  mutate(time_min = as.numeric(run_time)/60) %>%
  separate_rows(players, sep = ", ") %>%
  filter(players %in% runners_top20$players) %>%
  group_by(players) %>%
  summarize(
    runlength_mean = mean(time_min)
  ) %>%
  ungroup() %>%
  ggplot(aes(x = fct_reorder(players, -runlength_mean), y = runlength_mean)) +
  geom_col(alpha = .75) +
  coord_flip() +
  labs(
    title = p_title_runners,
    subtitle = "Top 20 frequent runners by average run length",
    y = "Average run duration (min)", x = ""
  ) +
  theme_gdqd(grid = "Xx")

Who ran the last game

runs %>%
  group_by(event) %>%
  mutate(run_index = seq_along(run)) %>%
  filter(run_index == last(run_index)) %>%
  ungroup() %>%
  separate_rows(players, sep = ", ") %>%
  count(gdq, players, sort = TRUE) %>%
  spread(gdq, n) %>%
  mutate_all(~ifelse(is.na(.x), 0, .x)) %>%
  mutate(
    total = AGDQ + SGDQ,
    rank = min_rank(-total)
  ) %>%
  gather(gdq, n, AGDQ, SGDQ) %>%
  ggplot(aes(x = reorder(players, -rank), y = n, color = gdq, fill = gdq)) +
  geom_col(alpha = .75) +
  coord_flip() +
  scale_y_continuous(breaks = seq(0, 100, 1)) +
  scale_colorfill_gdq() +
  minilabs(
    title = p_title_runners,
    subtitle = "Who ran the last game of the event, how often"
  ) +
  theme_gdqd(grid = "X")

Games

runs %>%
  count(run, gdq, sort = TRUE) %>%
  group_by(run) %>%
  mutate(run_total = sum(n)) %>%
  ungroup() %>%
  spread(gdq, n) %>%
  mutate(run_total_rank = min_rank(-run_total)) %>%
  filter(run_total_rank <= 20) %>%
  gather(gdq, n, matches("[AS]GDQ")) %>%
  mutate(gdq = fct_rev(gdq)) %>%
  arrange(desc(run_total)) -> runs_top20

runs_top20 %>%
  ggplot(aes(x = reorder(run, -run_total_rank), 
             y = n, color = gdq, fill = gdq)) +
  geom_col(alpha = .75) +
  coord_flip() +
  scale_colorfill_gdq(guide = guide_legend(reverse = TRUE)) +
  labs(
    title = p_title_r,
    subtitle = "Top 20 games by number of total appearances",
    x = "", y = "# of runs"
  ) +
  theme_gdqd(grid = "X")

by decade

runs %>%
  ggplot(aes(x = game_decade, color = gdq, fill = gdq)) +
  geom_bar(
    aes(y = stat(prop), group = gdq),
    alpha = .75, position = position_dodge2(preserve = "single")
  ) +
  scale_y_continuous(labels = function(x) paste0(100 * x, "%")) +
  scale_colorfill_gdq() +
  labs(
    title = p_title_r,
    subtitle = "Games by decade of release",
    x = "", y = "% within event series"
  ) 

The oldest and newest games:

runs %>%
  filter(game_year %in% c(min(game_year), max(game_year))) %>%
  select(run, platform, players, run_time, event) %>%
  rename_all(~str_to_title(.x)) %>%
  kable() %>%
  kable_styling()
Run Platform Players Run_time Event
Oregon Trail (1971) PC Skybilz 00:05:22 SGDQ2015
The Textorcist (2019) PC teddyras 00:28:20 SGDQ2019
Resident Evil 2 (2019) (2019) PC Bawkbasoup 00:54:27 SGDQ2019
Kingdom Hearts III (2019) PS4 mistmaster1, CrispyMe, ninten866 03:12:02 SGDQ2019
Grand Poo World 2 (2019) SNES mitchflowerpower 01:07:36 SGDQ2019