feat(data): added sha256 for data provenance

This commit is contained in:
Sebastian Rust
2026-06-03 12:43:46 +02:00
parent d0223096ca
commit 4bbdda6e38
+10 -1
View File
@@ -6,6 +6,7 @@ one \\pgfkeys block; sources are recorded in the header for provenance.
"""
import argparse
import datetime as dt
import hashlib
import importlib.util
from pathlib import Path
from types import ModuleType
@@ -29,6 +30,14 @@ def file_mtime_iso(p: Path) -> str:
return dt.datetime.fromtimestamp(p.stat().st_mtime).isoformat(timespec="seconds")
def file_sha256(p: Path) -> str:
h = hashlib.sha256()
with p.open("rb") as f:
for chunk in iter(lambda: f.read(1 << 16), b""):
h.update(chunk)
return h.hexdigest()
# Map key-name suffix to siunitx unit.
UNIT_SUFFIXES = {
"-us": r"\micro\second",
@@ -87,7 +96,7 @@ def main() -> None:
all_keys.update(keys)
provenance.append(f"% {name}: {len(keys)} keys")
for src in sources:
provenance.append(f"% {src} (mtime {file_mtime_iso(src)})")
provenance.append(f"% {src} (mtime {file_mtime_iso(src)}, sha256 {file_sha256(src)})")
print(f" {name}: {len(keys)} keys from {len(sources)} sources")
args.out.parent.mkdir(parents=True, exist_ok=True)