tso-paper-eval/analysis/values/idt.py

"""Per-flow IDT per (experiment, solution): percentiles, mean, SD in microseconds.
"""
from pathlib import Path

import numpy as np
import pandas as pd


def _stats(x: np.ndarray) -> dict[str, str]:
    q05, q25, q50, q75, q95, q99 = np.percentile(x, [5, 25, 50, 75, 95, 99])
    return {
        "mean-us":        f"{x.mean():.2f}",
        "sd-us":          f"{x.std(ddof=1):.2f}",
        "median-us":      f"{q50:.2f}",
        "p05-us":         f"{q05:.2f}",
        "p25-us":         f"{q25:.2f}",
        "p75-us":         f"{q75:.2f}",
        "p95-us":         f"{q95:.2f}",
        "p99-us":         f"{q99:.2f}",
        "iqr-us":         f"{q75 - q25:.2f}",
        "n-samples":      str(x.size),
    }


def compute(derived: Path) -> tuple[dict[str, str], list[Path]]:
    out: dict[str, str] = {}
    sources: list[Path] = []
    for exp_dir in sorted(p for p in derived.iterdir() if p.is_dir()):
        idts_path = exp_dir / "idts.csv"
        if not idts_path.exists():
            continue
        sources.append(idts_path)
        df = pd.read_csv(idts_path, usecols=["solution", "idt_us"])
        per_sol: dict[str, dict[str, str]] = {}
        for sol, sub in df.groupby("solution", sort=True):
            x = sub["idt_us"].to_numpy()
            stats = _stats(x)
            per_sol[sol] = stats
            for k, v in stats.items():
                out[f"{exp_dir.name}/idt/{sol}/{k}"] = v

        # IQR is meaningful only when the baseline solution actually spreads
        # the bulk; pre-pacing IDT is bimodal (back-to-back packets, heavy
        # tail above p95) so its IQR is often zero. Guard the ratio.
        if "tso-pacing" in per_sol:
            pac_sd  = float(per_sol["tso-pacing"]["sd-us"])
            pac_iqr = float(per_sol["tso-pacing"]["iqr-us"])
            for other in ("no-tso", "tso", "cake"):
                if other not in per_sol:
                    continue
                o_sd  = float(per_sol[other]["sd-us"])
                o_iqr = float(per_sol[other]["iqr-us"])
                base = f"{exp_dir.name}/idt/tso-pacing-vs-{other}"
                if o_sd > 0:
                    out[f"{base}/sd-ratio-pct"]      = f"{100 * pac_sd / o_sd:.1f}"
                    out[f"{base}/sd-reduction-pct"]  = f"{100 * (1 - pac_sd / o_sd):.1f}"
                if o_iqr > 0:
                    out[f"{base}/iqr-ratio-pct"]     = f"{100 * pac_iqr / o_iqr:.1f}"
                    out[f"{base}/iqr-reduction-pct"] = f"{100 * (1 - pac_iqr / o_iqr):.1f}"
    return out, sources