feat(data): added sha256 for data provenance

This commit is contained in:
Sebastian Rust
2026-06-03 12:43:46 +02:00
parent d0223096ca
commit 4bbdda6e38
+10 -1
View File
@@ -6,6 +6,7 @@ one \\pgfkeys block; sources are recorded in the header for provenance.
""" """
import argparse import argparse
import datetime as dt import datetime as dt
import hashlib
import importlib.util import importlib.util
from pathlib import Path from pathlib import Path
from types import ModuleType from types import ModuleType
@@ -29,6 +30,14 @@ def file_mtime_iso(p: Path) -> str:
return dt.datetime.fromtimestamp(p.stat().st_mtime).isoformat(timespec="seconds") return dt.datetime.fromtimestamp(p.stat().st_mtime).isoformat(timespec="seconds")
def file_sha256(p: Path) -> str:
h = hashlib.sha256()
with p.open("rb") as f:
for chunk in iter(lambda: f.read(1 << 16), b""):
h.update(chunk)
return h.hexdigest()
# Map key-name suffix to siunitx unit. # Map key-name suffix to siunitx unit.
UNIT_SUFFIXES = { UNIT_SUFFIXES = {
"-us": r"\micro\second", "-us": r"\micro\second",
@@ -87,7 +96,7 @@ def main() -> None:
all_keys.update(keys) all_keys.update(keys)
provenance.append(f"% {name}: {len(keys)} keys") provenance.append(f"% {name}: {len(keys)} keys")
for src in sources: for src in sources:
provenance.append(f"% {src} (mtime {file_mtime_iso(src)})") provenance.append(f"% {src} (mtime {file_mtime_iso(src)}, sha256 {file_sha256(src)})")
print(f" {name}: {len(keys)} keys from {len(sources)} sources") print(f" {name}: {len(keys)} keys from {len(sources)} sources")
args.out.parent.mkdir(parents=True, exist_ok=True) args.out.parent.mkdir(parents=True, exist_ok=True)