Files
tso-paper-eval/analysis/values/idt.py
T
2026-05-27 21:00:28 +02:00

61 lines
2.5 KiB
Python

"""Per-flow IDT per (experiment, solution): percentiles, mean, SD in microseconds.
"""
from pathlib import Path
import numpy as np
import pandas as pd
def _stats(x: np.ndarray) -> dict[str, str]:
q05, q25, q50, q75, q95, q99 = np.percentile(x, [5, 25, 50, 75, 95, 99])
return {
"mean-us": f"{x.mean():.2f}",
"sd-us": f"{x.std(ddof=1):.2f}",
"median-us": f"{q50:.2f}",
"p05-us": f"{q05:.2f}",
"p25-us": f"{q25:.2f}",
"p75-us": f"{q75:.2f}",
"p95-us": f"{q95:.2f}",
"p99-us": f"{q99:.2f}",
"iqr-us": f"{q75 - q25:.2f}",
"n-samples": str(x.size),
}
def compute(derived: Path) -> tuple[dict[str, str], list[Path]]:
out: dict[str, str] = {}
sources: list[Path] = []
for exp_dir in sorted(p for p in derived.iterdir() if p.is_dir()):
idts_path = exp_dir / "idts.csv"
if not idts_path.exists():
continue
sources.append(idts_path)
df = pd.read_csv(idts_path, usecols=["solution", "idt_us"])
per_sol: dict[str, dict[str, str]] = {}
for sol, sub in df.groupby("solution", sort=True):
x = sub["idt_us"].to_numpy()
stats = _stats(x)
per_sol[sol] = stats
for k, v in stats.items():
out[f"{exp_dir.name}/idt/{sol}/{k}"] = v
# IQR is meaningful only when the baseline solution actually spreads
# the bulk; pre-pacing IDT is bimodal (back-to-back packets, heavy
# tail above p95) so its IQR is often zero. Guard the ratio.
if "tso-pacing" in per_sol:
pac_sd = float(per_sol["tso-pacing"]["sd-us"])
pac_iqr = float(per_sol["tso-pacing"]["iqr-us"])
for other in ("no-tso", "tso", "cake"):
if other not in per_sol:
continue
o_sd = float(per_sol[other]["sd-us"])
o_iqr = float(per_sol[other]["iqr-us"])
base = f"{exp_dir.name}/idt/tso-pacing-vs-{other}"
if o_sd > 0:
out[f"{base}/sd-ratio-pct"] = f"{100 * pac_sd / o_sd:.1f}"
out[f"{base}/sd-reduction-pct"] = f"{100 * (1 - pac_sd / o_sd):.1f}"
if o_iqr > 0:
out[f"{base}/iqr-ratio-pct"] = f"{100 * pac_iqr / o_iqr:.1f}"
out[f"{base}/iqr-reduction-pct"] = f"{100 * (1 - pac_iqr / o_iqr):.1f}"
return out, sources