Files
tso-paper-eval/figures/fig_idt_cdf.R
T
2026-05-27 21:00:28 +02:00

44 lines
1.7 KiB
R

#!/usr/bin/env Rscript
source("common.R")
# Random sample size per solution. stat_ecdf emits one tikz path segment per
# unique x value; the raw idts.csv has ~120 K unique values per solution,
# producing a ~500 K-line tikz file that exceeds pdflatex main_memory.
# 5000 samples per solution preserve the CDF shape (including the 92.5-100 %
# zoom tail: ~375 points) and keep the tikz file manageable.
SAMPLE_PER_SOLUTION <- 20000
parser <- fig_parser(description = "Per-flow inter-departure-time CDF")
parser$add_argument("--sample", type = "integer", default = SAMPLE_PER_SOLUTION,
help = "downsample to N points per solution (0 = no downsampling)")
parser$add_argument("--seed", type = "integer", default = 1,
help = "RNG seed for downsampling")
args <- parser$parse_args()
set.seed(args$seed)
idts <- read_csv(file.path(args$data, "idts.csv"), show_col_types = FALSE) %>%
prepare_solution()
if (args$sample > 0) {
idts <- idts %>%
group_by(solution) %>%
slice_sample(n = args$sample) %>%
ungroup()
}
p <- ggplot(idts, aes(x = idt_us, colour = solution, linetype = solution)) +
stat_ecdf(linewidth = 0.9, pad = FALSE, key_glyph = "rect") +
scale_x_log10(labels = label_comma()) +
scale_y_continuous(labels = label_pct()) +
scale_colour_manual(values = LABEL_COLORS) +
scale_linetype_manual(values = LABEL_LINETYPES) +
labs(x = paste0("Inter-departure time within flow (", label_us(), ")"), y = "CDF") +
theme_paper() +
theme(legend.text = element_text(size = rel(0.8)),
legend.key.size = unit(6, "pt"))
if (args$zoom) {
p <- p + coord_cartesian(xlim = c(10, 1000), ylim = c(0.925, 1.0))
}
save_figure(p, args)