59 lines
1.8 KiB
Python
59 lines
1.8 KiB
Python
#!/usr/bin/env python3
|
|
"""Sanity check derived CSVs: shapes, no NaN, expected solution coverage."""
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import pandas as pd
|
|
|
|
REQUIRED = {
|
|
"runs.csv": {"setup", "solution", "run", "throughput_bps", "cpu_sender", "cpu_receiver"},
|
|
"rtts.csv": {"setup", "solution", "rtt_us"},
|
|
}
|
|
|
|
OPTIONAL = {
|
|
"idts.csv": {"setup", "solution", "run", "stream_id", "idt_us"},
|
|
"firstflow_bins.csv": {"setup", "solution", "t_ms", "packets"},
|
|
}
|
|
|
|
|
|
def check(derived: Path, expected_solutions: set[str]) -> list[str]:
|
|
errors = []
|
|
for name, cols in {**REQUIRED, **OPTIONAL}.items():
|
|
path = derived / name
|
|
if not path.exists():
|
|
if name in REQUIRED:
|
|
errors.append(f"{name}: missing")
|
|
continue
|
|
df = pd.read_csv(path)
|
|
missing_cols = cols - set(df.columns)
|
|
if missing_cols:
|
|
errors.append(f"{name}: missing columns {missing_cols}")
|
|
if df.isna().any().any():
|
|
nan_cols = df.columns[df.isna().any()].tolist()
|
|
errors.append(f"{name}: NaN in columns {nan_cols}")
|
|
if len(df) == 0:
|
|
errors.append(f"{name}: empty")
|
|
sols = set(df["solution"].unique())
|
|
if sols != expected_solutions:
|
|
errors.append(f"{name}: solutions {sols} != expected {expected_solutions}")
|
|
return errors
|
|
|
|
|
|
def main() -> None:
|
|
p = argparse.ArgumentParser()
|
|
p.add_argument("--derived", required=True, type=Path)
|
|
p.add_argument("--solutions", required=True, nargs="+")
|
|
args = p.parse_args()
|
|
|
|
errors = check(args.derived, set(args.solutions))
|
|
if errors:
|
|
for e in errors:
|
|
print(f" FAIL: {e}")
|
|
sys.exit(1)
|
|
print("ok")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|