initial commit for public eval

2026-05-27 21:00:28 +02:00
commit d77a1bf412
36 changed files with 5414 additions and 0 deletions
@@ -0,0 +1,34 @@
+"""First-flow packet-per-bin stats from firstflow_bins.csv.
+"""
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+
+def _stats(x: np.ndarray) -> dict[str, str]:
+    return {
+        "max-pkts":    str(int(x.max())),
+        "mean-pkts":   f"{x.mean():.2f}",
+        "median-pkts": str(int(np.median(x))),
+        "p95-pkts":    str(int(np.percentile(x, 95))),
+        "p99-pkts":    str(int(np.percentile(x, 99))),
+        "sd-pkts":     f"{x.std(ddof=1):.2f}",
+        "n-bins":      str(x.size),
+    }
+
+
+def compute(derived: Path) -> tuple[dict[str, str], list[Path]]:
+    out: dict[str, str] = {}
+    sources: list[Path] = []
+    for exp_dir in sorted(p for p in derived.iterdir() if p.is_dir()):
+        bins_path = exp_dir / "firstflow_bins.csv"
+        if not bins_path.exists():
+            continue
+        sources.append(bins_path)
+        df = pd.read_csv(bins_path)
+        for sol, sub in df.groupby("solution", sort=True):
+            x = sub["packets"].to_numpy()
+            for k, v in _stats(x).items():
+                out[f"{exp_dir.name}/bins/{sol}/{k}"] = v
+    return out, sources
@@ -0,0 +1,28 @@
+"""Sender + receiver CPU per (experiment, solution): mean and sample SD in %."""
+from pathlib import Path
+
+import pandas as pd
+
+
+METRICS = {
+    "sender-cpu":   "cpu_sender",
+    "receiver-cpu": "cpu_receiver",
+}
+
+
+def compute(derived: Path) -> tuple[dict[str, str], list[Path]]:
+    out: dict[str, str] = {}
+    sources: list[Path] = []
+    for exp_dir in sorted(p for p in derived.iterdir() if p.is_dir()):
+        runs = exp_dir / "runs.csv"
+        if not runs.exists():
+            continue
+        sources.append(runs)
+        df = pd.read_csv(runs)
+        for sol, sub in df.groupby("solution", sort=True):
+            for metric, col in METRICS.items():
+                x = sub[col].to_numpy()
+                base = f"{exp_dir.name}/{metric}/{sol}"
+                out[f"{base}/mean-pct"] = f"{x.mean():.2f}"
+                out[f"{base}/sd-pct"]   = f"{x.std(ddof=1):.2f}"
+    return out, sources
@@ -0,0 +1,60 @@
+"""Per-flow IDT per (experiment, solution): percentiles, mean, SD in microseconds.
+"""
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+
+def _stats(x: np.ndarray) -> dict[str, str]:
+    q05, q25, q50, q75, q95, q99 = np.percentile(x, [5, 25, 50, 75, 95, 99])
+    return {
+        "mean-us":        f"{x.mean():.2f}",
+        "sd-us":          f"{x.std(ddof=1):.2f}",
+        "median-us":      f"{q50:.2f}",
+        "p05-us":         f"{q05:.2f}",
+        "p25-us":         f"{q25:.2f}",
+        "p75-us":         f"{q75:.2f}",
+        "p95-us":         f"{q95:.2f}",
+        "p99-us":         f"{q99:.2f}",
+        "iqr-us":         f"{q75 - q25:.2f}",
+        "n-samples":      str(x.size),
+    }
+
+
+def compute(derived: Path) -> tuple[dict[str, str], list[Path]]:
+    out: dict[str, str] = {}
+    sources: list[Path] = []
+    for exp_dir in sorted(p for p in derived.iterdir() if p.is_dir()):
+        idts_path = exp_dir / "idts.csv"
+        if not idts_path.exists():
+            continue
+        sources.append(idts_path)
+        df = pd.read_csv(idts_path, usecols=["solution", "idt_us"])
+        per_sol: dict[str, dict[str, str]] = {}
+        for sol, sub in df.groupby("solution", sort=True):
+            x = sub["idt_us"].to_numpy()
+            stats = _stats(x)
+            per_sol[sol] = stats
+            for k, v in stats.items():
+                out[f"{exp_dir.name}/idt/{sol}/{k}"] = v
+
+        # IQR is meaningful only when the baseline solution actually spreads
+        # the bulk; pre-pacing IDT is bimodal (back-to-back packets, heavy
+        # tail above p95) so its IQR is often zero. Guard the ratio.
+        if "tso-pacing" in per_sol:
+            pac_sd  = float(per_sol["tso-pacing"]["sd-us"])
+            pac_iqr = float(per_sol["tso-pacing"]["iqr-us"])
+            for other in ("no-tso", "tso", "cake"):
+                if other not in per_sol:
+                    continue
+                o_sd  = float(per_sol[other]["sd-us"])
+                o_iqr = float(per_sol[other]["iqr-us"])
+                base = f"{exp_dir.name}/idt/tso-pacing-vs-{other}"
+                if o_sd > 0:
+                    out[f"{base}/sd-ratio-pct"]      = f"{100 * pac_sd / o_sd:.1f}"
+                    out[f"{base}/sd-reduction-pct"]  = f"{100 * (1 - pac_sd / o_sd):.1f}"
+                if o_iqr > 0:
+                    out[f"{base}/iqr-ratio-pct"]     = f"{100 * pac_iqr / o_iqr:.1f}"
+                    out[f"{base}/iqr-reduction-pct"] = f"{100 * (1 - pac_iqr / o_iqr):.1f}"
+    return out, sources
@@ -0,0 +1,15 @@
+"""NFP-4000 memory constants (capacities + access latencies).
+
+Source: Netronome Network Flow Processor 4xxx Family datasheet
+(netronomeNetronomeNetworkFlow2018).
+"""
+from pathlib import Path
+
+
+def compute(derived: Path) -> tuple[dict[str, str], list[Path]]:
+    return {
+        "nfp4000/lmem/capacity-kb":    "4",
+        "nfp4000/lmem/latency-cycles": "1-3",
+        "nfp4000/ctm/capacity-kb":     "256",
+        "nfp4000/ctm/latency-cycles":  "50-100",
+    }, []
@@ -0,0 +1,60 @@
+"""RTT per (experiment, solution): mean, SD, quantiles, spread.
+
+Also emits tso-pacing's variance-reduction ratios vs the other solutions.
+"""
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+
+def _stats(x: np.ndarray) -> dict[str, str]:
+    q05, q25, q50, q75, q95 = np.percentile(x, [5, 25, 50, 75, 95])
+    return {
+        "mean-ms":        f"{x.mean():.2f}",
+        "sd-ms":          f"{x.std(ddof=1):.2f}",
+        "median-ms":      f"{q50:.2f}",
+        "iqr-ms":         f"{q75 - q25:.2f}",
+        "p05-ms":         f"{q05:.2f}",
+        "p95-ms":         f"{q95:.2f}",
+        "spread-5-95-ms": f"{q95 - q05:.2f}",
+        "n-samples":      str(x.size),
+    }
+
+
+def compute(derived: Path) -> tuple[dict[str, str], list[Path]]:
+    out: dict[str, str] = {}
+    sources: list[Path] = []
+    for exp_dir in sorted(p for p in derived.iterdir() if p.is_dir()):
+        rtts = exp_dir / "rtts.csv"
+        if not rtts.exists():
+            continue
+        sources.append(rtts)
+        df = pd.read_csv(rtts)
+        per_sol: dict[str, dict[str, str]] = {}
+        for sol, sub in df.groupby("solution", sort=True):
+            x = sub["rtt_us"].to_numpy() / 1000.0
+            stats = _stats(x)
+            per_sol[sol] = stats
+            for k, v in stats.items():
+                out[f"{exp_dir.name}/rtt/{sol}/{k}"] = v
+
+        if "tso-pacing" in per_sol:
+            pac_sd     = float(per_sol["tso-pacing"]["sd-ms"])
+            pac_spread = float(per_sol["tso-pacing"]["spread-5-95-ms"])
+            pac_iqr    = float(per_sol["tso-pacing"]["iqr-ms"])
+            for other in ("no-tso", "tso", "cake"):
+                if other not in per_sol:
+                    continue
+                o = per_sol[other]
+                o_sd     = float(o["sd-ms"])
+                o_spread = float(o["spread-5-95-ms"])
+                o_iqr    = float(o["iqr-ms"])
+                base = f"{exp_dir.name}/rtt/tso-pacing-vs-{other}"
+                out[f"{base}/sd-ratio-pct"]         = f"{100 * pac_sd / o_sd:.1f}"
+                out[f"{base}/spread-ratio-pct"]     = f"{100 * pac_spread / o_spread:.1f}"
+                out[f"{base}/iqr-ratio-pct"]        = f"{100 * pac_iqr / o_iqr:.1f}"
+                out[f"{base}/sd-reduction-pct"]     = f"{100 * (1 - pac_sd / o_sd):.1f}"
+                out[f"{base}/spread-reduction-pct"] = f"{100 * (1 - pac_spread / o_spread):.1f}"
+                out[f"{base}/iqr-reduction-pct"]    = f"{100 * (1 - pac_iqr / o_iqr):.1f}"
+    return out, sources
@@ -0,0 +1,109 @@
+"""Pairwise hypothesis tests across solutions.
+
+Means: Welch's t (two-sided + one-sided) and Mann-Whitney U.
+Variance: Brown-Forsythe (Levene, median-centered) and Fligner-Killeen.
+"""
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from scipy import stats
+
+PAIRS = [
+    ("no-tso", "tso"),
+    ("no-tso", "tso-pacing"),
+    ("no-tso", "cake"),
+    ("tso", "tso-pacing"),
+    ("tso", "cake"),
+    ("tso-pacing", "cake"),
+]
+
+ALPHA = 0.05
+
+
+def _fmt(x: float) -> str:
+    return f"{x:.4g}"
+
+
+def _mean_tests(out: dict, base: str, a: np.ndarray, b: np.ndarray) -> None:
+    _, p_t  = stats.ttest_ind(a, b, equal_var=False)
+    _, p_tl = stats.ttest_ind(a, b, equal_var=False, alternative="less")
+    _, p_tg = stats.ttest_ind(a, b, equal_var=False, alternative="greater")
+    _, p_u  = stats.mannwhitneyu(a, b, alternative="two-sided")
+    out[f"{base}/welch-p"]         = _fmt(p_t)
+    out[f"{base}/welch-less-p"]    = _fmt(p_tl)
+    out[f"{base}/welch-greater-p"] = _fmt(p_tg)
+    out[f"{base}/mwu-p"]           = _fmt(p_u)
+
+
+def _variance_tests(out: dict, base: str, a: np.ndarray, b: np.ndarray) -> None:
+    bf = stats.levene(a, b, center="median")
+    fl = stats.fligner(a, b)
+    out[f"{base}/bf-p"]      = _fmt(bf.pvalue)
+    out[f"{base}/fligner-p"] = _fmt(fl.pvalue)
+
+
+def compute(derived: Path) -> tuple[dict[str, str], list[Path]]:
+    out: dict[str, str] = {}
+    sources: list[Path] = []
+    for exp_dir in sorted(p for p in derived.iterdir() if p.is_dir()):
+        runs_path = exp_dir / "runs.csv"
+        rtts_path = exp_dir / "rtts.csv"
+
+        if runs_path.exists():
+            sources.append(runs_path)
+            runs = pd.read_csv(runs_path)
+            for metric, col in (("sender-cpu", "cpu_sender"),
+                                ("receiver-cpu", "cpu_receiver")):
+                vals = {sol: sub[col].to_numpy() for sol, sub in runs.groupby("solution")}
+                for a, b in PAIRS:
+                    if a not in vals or b not in vals:
+                        continue
+                    base = f"{exp_dir.name}/{metric}/test/{a}-vs-{b}"
+                    _mean_tests(out, base, vals[a], vals[b])
+
+        if rtts_path.exists():
+            sources.append(rtts_path)
+            rtts = pd.read_csv(rtts_path)
+            vals = {sol: sub["rtt_us"].to_numpy() / 1000.0
+                    for sol, sub in rtts.groupby("solution")}
+            for a, b in PAIRS:
+                if a not in vals or b not in vals:
+                    continue
+                base = f"{exp_dir.name}/rtt/test/{a}-vs-{b}"
+                _mean_tests(out, base, vals[a], vals[b])
+                _variance_tests(out, base, vals[a], vals[b])
+
+        idts_path = exp_dir / "idts.csv"
+        if idts_path.exists():
+            sources.append(idts_path)
+            idts = pd.read_csv(idts_path, usecols=["solution", "idt_us"])
+            vals = {sol: sub["idt_us"].to_numpy()
+                    for sol, sub in idts.groupby("solution")}
+            for a, b in PAIRS:
+                if a not in vals or b not in vals:
+                    continue
+                base = f"{exp_dir.name}/idt/test/{a}-vs-{b}"
+                _mean_tests(out, base, vals[a], vals[b])
+                _variance_tests(out, base, vals[a], vals[b])
+
+    # Bonferroni-corrected thresholds. Count one entry per comparison
+    # (welch-p for mean families, bf-p for variance) so the threshold reflects
+    # the number of pairwise comparisons, not the number of test statistics.
+    def n_with(in_pattern: str, suffix: str) -> int:
+        return sum(1 for k in out if in_pattern in k and k.endswith(suffix))
+
+    families = {
+        "cpu":          n_with("-cpu/test/",  "/mwu-p"),
+        "rtt-mean":     n_with("/rtt/test/",  "/mwu-p"),
+        "rtt-variance": n_with("/rtt/test/",  "/bf-p"),
+        "idt-mean":     n_with("/idt/test/",  "/mwu-p"),
+        "idt-variance": n_with("/idt/test/",  "/bf-p"),
+    }
+    for name, n in families.items():
+        if n > 0:
+            out[f"bonferroni/{name}-n"]     = str(n)
+            out[f"bonferroni/{name}-alpha"] = _fmt(ALPHA / n)
+    out["bonferroni/alpha-uncorrected"] = _fmt(ALPHA)
+
+    return out, sources
@@ -0,0 +1,22 @@
+"""Throughput per (experiment, solution): mean and sample SD in Gbps."""
+from pathlib import Path
+
+import pandas as pd
+
+
+def compute(derived: Path) -> tuple[dict[str, str], list[Path]]:
+    out: dict[str, str] = {}
+    sources: list[Path] = []
+    for exp_dir in sorted(p for p in derived.iterdir() if p.is_dir()):
+        runs = exp_dir / "runs.csv"
+        if not runs.exists():
+            continue
+        sources.append(runs)
+        df = pd.read_csv(runs)
+        for sol, sub in df.groupby("solution", sort=True):
+            thr = sub["throughput_bps"].to_numpy() / 1e9
+            base = f"{exp_dir.name}/throughput/{sol}"
+            out[f"{base}/mean-gbps"] = f"{thr.mean():.3f}"
+            out[f"{base}/sd-gbps"]   = f"{thr.std(ddof=1):.3f}"
+            out[f"{base}/n-runs"]    = str(len(thr))
+    return out, sources