#!/usr/bin/env python3 """Sanity check derived CSVs: shapes, no NaN, expected solution coverage.""" import argparse import sys from pathlib import Path import pandas as pd REQUIRED = { "runs.csv": {"setup", "solution", "run", "throughput_bps", "cpu_sender", "cpu_receiver"}, "rtts.csv": {"setup", "solution", "rtt_us"}, } OPTIONAL = { "idts.csv": {"setup", "solution", "run", "stream_id", "idt_us"}, "firstflow_bins.csv": {"setup", "solution", "t_ms", "packets"}, } def check(derived: Path, expected_solutions: set[str]) -> list[str]: errors = [] for name, cols in {**REQUIRED, **OPTIONAL}.items(): path = derived / name if not path.exists(): if name in REQUIRED: errors.append(f"{name}: missing") continue df = pd.read_csv(path) missing_cols = cols - set(df.columns) if missing_cols: errors.append(f"{name}: missing columns {missing_cols}") if df.isna().any().any(): nan_cols = df.columns[df.isna().any()].tolist() errors.append(f"{name}: NaN in columns {nan_cols}") if len(df) == 0: errors.append(f"{name}: empty") sols = set(df["solution"].unique()) if sols != expected_solutions: errors.append(f"{name}: solutions {sols} != expected {expected_solutions}") return errors def main() -> None: p = argparse.ArgumentParser() p.add_argument("--derived", required=True, type=Path) p.add_argument("--solutions", required=True, nargs="+") args = p.parse_args() errors = check(args.derived, set(args.solutions)) if errors: for e in errors: print(f" FAIL: {e}") sys.exit(1) print("ok") if __name__ == "__main__": main()