"""Per-flow IDT per (experiment, solution): percentiles, mean, SD in microseconds. """ from pathlib import Path import numpy as np import pandas as pd def _stats(x: np.ndarray) -> dict[str, str]: q05, q25, q50, q75, q95, q99 = np.percentile(x, [5, 25, 50, 75, 95, 99]) return { "mean-us": f"{x.mean():.2f}", "sd-us": f"{x.std(ddof=1):.2f}", "median-us": f"{q50:.2f}", "p05-us": f"{q05:.2f}", "p25-us": f"{q25:.2f}", "p75-us": f"{q75:.2f}", "p95-us": f"{q95:.2f}", "p99-us": f"{q99:.2f}", "iqr-us": f"{q75 - q25:.2f}", "n-samples": str(x.size), } def compute(derived: Path) -> tuple[dict[str, str], list[Path]]: out: dict[str, str] = {} sources: list[Path] = [] for exp_dir in sorted(p for p in derived.iterdir() if p.is_dir()): idts_path = exp_dir / "idts.csv" if not idts_path.exists(): continue sources.append(idts_path) df = pd.read_csv(idts_path, usecols=["solution", "idt_us"]) per_sol: dict[str, dict[str, str]] = {} for sol, sub in df.groupby("solution", sort=True): x = sub["idt_us"].to_numpy() stats = _stats(x) per_sol[sol] = stats for k, v in stats.items(): out[f"{exp_dir.name}/idt/{sol}/{k}"] = v # IQR is meaningful only when the baseline solution actually spreads # the bulk; pre-pacing IDT is bimodal (back-to-back packets, heavy # tail above p95) so its IQR is often zero. Guard the ratio. if "tso-pacing" in per_sol: pac_sd = float(per_sol["tso-pacing"]["sd-us"]) pac_iqr = float(per_sol["tso-pacing"]["iqr-us"]) for other in ("no-tso", "tso", "cake"): if other not in per_sol: continue o_sd = float(per_sol[other]["sd-us"]) o_iqr = float(per_sol[other]["iqr-us"]) base = f"{exp_dir.name}/idt/tso-pacing-vs-{other}" if o_sd > 0: out[f"{base}/sd-ratio-pct"] = f"{100 * pac_sd / o_sd:.1f}" out[f"{base}/sd-reduction-pct"] = f"{100 * (1 - pac_sd / o_sd):.1f}" if o_iqr > 0: out[f"{base}/iqr-ratio-pct"] = f"{100 * pac_iqr / o_iqr:.1f}" out[f"{base}/iqr-reduction-pct"] = f"{100 * (1 - pac_iqr / o_iqr):.1f}" return out, sources