#!/usr/bin/env Rscript source("common.R") # Random sample size per solution. stat_ecdf emits one tikz path segment per # unique x value; the raw idts.csv has ~120 K unique values per solution, # producing a ~500 K-line tikz file that exceeds pdflatex main_memory. # 5000 samples per solution preserve the CDF shape (including the 92.5-100 % # zoom tail: ~375 points) and keep the tikz file manageable. SAMPLE_PER_SOLUTION <- 20000 parser <- fig_parser(description = "Per-flow inter-departure-time CDF") parser$add_argument("--sample", type = "integer", default = SAMPLE_PER_SOLUTION, help = "downsample to N points per solution (0 = no downsampling)") parser$add_argument("--seed", type = "integer", default = 1, help = "RNG seed for downsampling") args <- parser$parse_args() set.seed(args$seed) idts <- read_csv(file.path(args$data, "idts.csv"), show_col_types = FALSE) %>% prepare_solution() if (args$sample > 0) { idts <- idts %>% group_by(solution) %>% slice_sample(n = args$sample) %>% ungroup() } p <- ggplot(idts, aes(x = idt_us, colour = solution, linetype = solution)) + stat_ecdf(linewidth = 0.9, pad = FALSE, key_glyph = "rect") + scale_x_log10(labels = label_comma()) + scale_y_continuous(labels = label_pct()) + scale_colour_manual(values = LABEL_COLORS) + scale_linetype_manual(values = LABEL_LINETYPES) + labs(x = paste0("Inter-departure time within flow (", label_us(), ")"), y = "CDF") + theme_paper() + theme(legend.text = element_text(size = rel(0.8)), legend.key.size = unit(6, "pt")) if (args$zoom) { p <- p + coord_cartesian(xlim = c(10, 1000), ylim = c(0.925, 1.0)) } save_figure(p, args)