35 lines
1.1 KiB
Python
35 lines
1.1 KiB
Python
"""First-flow packet-per-bin stats from firstflow_bins.csv.
|
|
"""
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
|
|
def _stats(x: np.ndarray) -> dict[str, str]:
|
|
return {
|
|
"max-pkts": str(int(x.max())),
|
|
"mean-pkts": f"{x.mean():.2f}",
|
|
"median-pkts": str(int(np.median(x))),
|
|
"p95-pkts": str(int(np.percentile(x, 95))),
|
|
"p99-pkts": str(int(np.percentile(x, 99))),
|
|
"sd-pkts": f"{x.std(ddof=1):.2f}",
|
|
"n-bins": str(x.size),
|
|
}
|
|
|
|
|
|
def compute(derived: Path) -> tuple[dict[str, str], list[Path]]:
|
|
out: dict[str, str] = {}
|
|
sources: list[Path] = []
|
|
for exp_dir in sorted(p for p in derived.iterdir() if p.is_dir()):
|
|
bins_path = exp_dir / "firstflow_bins.csv"
|
|
if not bins_path.exists():
|
|
continue
|
|
sources.append(bins_path)
|
|
df = pd.read_csv(bins_path)
|
|
for sol, sub in df.groupby("solution", sort=True):
|
|
x = sub["packets"].to_numpy()
|
|
for k, v in _stats(x).items():
|
|
out[f"{exp_dir.name}/bins/{sol}/{k}"] = v
|
|
return out, sources
|