Files
tso-paper-eval/analysis/sanity_check.py
T
2026-05-27 21:00:28 +02:00

59 lines
1.8 KiB
Python

#!/usr/bin/env python3
"""Sanity check derived CSVs: shapes, no NaN, expected solution coverage."""
import argparse
import sys
from pathlib import Path
import pandas as pd
REQUIRED = {
"runs.csv": {"setup", "solution", "run", "throughput_bps", "cpu_sender", "cpu_receiver"},
"rtts.csv": {"setup", "solution", "rtt_us"},
}
OPTIONAL = {
"idts.csv": {"setup", "solution", "run", "stream_id", "idt_us"},
"firstflow_bins.csv": {"setup", "solution", "t_ms", "packets"},
}
def check(derived: Path, expected_solutions: set[str]) -> list[str]:
errors = []
for name, cols in {**REQUIRED, **OPTIONAL}.items():
path = derived / name
if not path.exists():
if name in REQUIRED:
errors.append(f"{name}: missing")
continue
df = pd.read_csv(path)
missing_cols = cols - set(df.columns)
if missing_cols:
errors.append(f"{name}: missing columns {missing_cols}")
if df.isna().any().any():
nan_cols = df.columns[df.isna().any()].tolist()
errors.append(f"{name}: NaN in columns {nan_cols}")
if len(df) == 0:
errors.append(f"{name}: empty")
sols = set(df["solution"].unique())
if sols != expected_solutions:
errors.append(f"{name}: solutions {sols} != expected {expected_solutions}")
return errors
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("--derived", required=True, type=Path)
p.add_argument("--solutions", required=True, nargs="+")
args = p.parse_args()
errors = check(args.derived, set(args.solutions))
if errors:
for e in errors:
print(f" FAIL: {e}")
sys.exit(1)
print("ok")
if __name__ == "__main__":
main()