Case 06: Elena's Compliance Audit Trail
Persona: Solutions Architect / Compliance
Situation
Elena's team delivers AI solutions to a regulated insurance client. The client's regulator audits the production model and asks five questions:
- What dataset version was used to train the production model?
- What were the training-time evaluation metrics (original numbers, not summaries)?
- What was the Python and library environment at training time?
- How does this model compare to the previous version?
- Who approved the deployment?
The team spent two days searching Git logs, personal Jupyter notebooks, Slack threads, and a shared drive. Some information was estimated. The auditor rejected it: "provide documented evidence."
Without Contexta
- Dataset version: written in a notebook cell or a Slack message. Neither is auditable.
- Eval metrics: in a script's stdout, possibly scrolled off or the terminal was closed.
- Environment:
requirements.txt"might be stale." - Model comparison: "we think it improved" — no structured diff.
- Answers take 2 days and some are estimates. Regulators reject estimates.
With Contexta
# Q1: Dataset version
snapshot = ctx.get_run_snapshot(curr_run_ref)
dataset_event = next(e for e in snapshot.records if e.key == "training.dataset-registered")
# Q2: Original evaluation metrics
eval_records = [o for o in snapshot.records if o.record_type == "metric"]
# Q3: Training environment
audit = ctx.audit_reproducibility(curr_run_ref)
# audit.python_version, audit.platform, audit.package_count
# Q4: Comparison with previous version
env_diff = ctx.compare_environments(prev_run_ref, curr_run_ref)
comp = ctx.compare_runs(prev_run_ref, curr_run_ref)
# Q5: Formal audit document
report = ctx.build_snapshot_report(curr_run_ref)
All answers are backed by records written at training time — not reconstructed. The audit package assembles in under 5 seconds.
Key APIs: EnvironmentSnapshot, get_run_snapshot, audit_reproducibility, compare_environments, compare_runs, build_snapshot_report
Complete Runnable Code
Run the seed script first, then the analysis script:
uv run examples/case_studies/case06_seed_compliance_audit_data.py
uv run examples/case_studies/case06_analyze_compliance_audit.py
case06_seed_compliance_audit_data.py
"""Create compliance-audit records used by the audit case study."""
from __future__ import annotations
import tempfile
from pathlib import Path
from typing import Any
from contexta import Contexta
from contexta.config import UnifiedConfig, WorkspaceConfig
from contexta.contract import (
DeploymentExecution,
EnvironmentSnapshot,
MetricPayload,
MetricRecord,
Project,
RecordEnvelope,
Run,
StageExecution,
StructuredEventPayload,
StructuredEventRecord,
)
PROJECT_NAME = "loss-ratio-predictor"
_rid = 0
def _next_rid() -> str:
global _rid
_rid += 1
return f"r{_rid:04d}"
def _put_metric(record_store: Any, run_name: str, stage_ref: str, key: str, val: float, ts: str) -> None:
run_ref = f"run:{PROJECT_NAME}.{run_name}"
record_store.append(
MetricRecord(
envelope=RecordEnvelope(
record_ref=f"record:{PROJECT_NAME}.{run_name}.{_next_rid()}",
record_type="metric",
recorded_at=ts,
observed_at=ts,
producer_ref="contexta.case06",
run_ref=run_ref,
stage_execution_ref=stage_ref,
completeness_marker="complete",
degradation_marker="none",
),
payload=MetricPayload(
metric_key=key,
value=val,
value_type="float64",
),
)
)
def _make_run_with_env(
store: Any,
record_store: Any,
run_name: str,
dataset_version: str,
metrics: dict[str, float],
python_version: str,
packages: dict[str, str],
started_at: str,
ended_at: str,
) -> str:
run_ref = f"run:{PROJECT_NAME}.{run_name}"
store.runs.put_run(
Run(
run_ref=run_ref,
project_ref=f"project:{PROJECT_NAME}",
name=run_name,
status="completed",
started_at=started_at,
ended_at=ended_at,
)
)
# Training stage
train_ref = f"stage:{PROJECT_NAME}.{run_name}.train"
store.stages.put_stage_execution(
StageExecution(
stage_execution_ref=train_ref,
run_ref=run_ref,
stage_name="train",
status="completed",
started_at=started_at,
ended_at=f"{started_at[:10]}T14:00:00Z",
order_index=0,
)
)
# Evaluate stage
eval_ref = f"stage:{PROJECT_NAME}.{run_name}.evaluate"
store.stages.put_stage_execution(
StageExecution(
stage_execution_ref=eval_ref,
run_ref=run_ref,
stage_name="evaluate",
status="completed",
started_at=f"{started_at[:10]}T14:00:00Z",
ended_at=ended_at,
order_index=1,
)
)
for key, val in metrics.items():
_put_metric(record_store, run_name, eval_ref, key, val, ended_at)
# Record dataset version as a structured event (answers auditor question 1)
record_store.append(
StructuredEventRecord(
envelope=RecordEnvelope(
record_ref=f"record:{PROJECT_NAME}.{run_name}.{_next_rid()}",
record_type="event",
recorded_at=started_at,
observed_at=started_at,
producer_ref="contexta.case06",
run_ref=run_ref,
completeness_marker="complete",
degradation_marker="none",
),
payload=StructuredEventPayload(
event_key="training.dataset-registered",
level="info",
message=f"Training dataset version: {dataset_version}",
attributes={"dataset_version": dataset_version},
origin_marker="explicit_capture",
),
)
)
# Environment snapshot (answers auditor question 3)
all_packages = {**packages, "scikit-learn": "1.3.0", "pandas": "2.0.3"}
env = EnvironmentSnapshot(
environment_snapshot_ref=f"environment:{PROJECT_NAME}.{run_name}.snap",
run_ref=run_ref,
captured_at=started_at,
python_version=python_version,
platform="linux",
packages=all_packages,
environment_variables={},
)
store.environments.put_environment_snapshot(env)
return run_ref
def run_example(workspace: Path | str | None = None) -> dict[str, Any]:
if workspace is None:
workspace = Path(tempfile.mkdtemp(prefix="contexta-case06-")) / ".contexta"
ctx = Contexta(
config=UnifiedConfig(
project_name=PROJECT_NAME,
workspace=WorkspaceConfig(root_path=Path(workspace)),
)
)
store = ctx.metadata_store
try:
store.projects.put_project(
Project(
project_ref=f"project:{PROJECT_NAME}",
name=PROJECT_NAME,
created_at="2025-01-01T00:00:00Z",
)
)
# Previous model (v1) -- deployed March
prev_run_ref = _make_run_with_env(
store, ctx.record_store,
run_name="model-v1",
dataset_version="claims-2024q4",
metrics={"auc": 0.871, "precision": 0.834, "recall": 0.819, "f1": 0.826},
python_version="3.10.12",
packages={"torch": "1.13.0", "xgboost": "1.7.6"},
started_at="2025-03-01T09:00:00Z",
ended_at="2025-03-01T16:00:00Z",
)
store.deployments.put_deployment_execution(
DeploymentExecution(
deployment_execution_ref=f"deployment:{PROJECT_NAME}.prod-v1",
project_ref=f"project:{PROJECT_NAME}",
deployment_name="prod-v1",
status="completed",
started_at="2025-03-02T10:00:00Z",
ended_at="2025-03-02T10:15:00Z",
run_ref=prev_run_ref,
order_index=0,
)
)
# Current production model (v2) -- deployed June, under audit
curr_run_ref = _make_run_with_env(
store, ctx.record_store,
run_name="model-v2",
dataset_version="claims-2025q1",
metrics={"auc": 0.891, "precision": 0.852, "recall": 0.841, "f1": 0.846},
python_version="3.11.5",
packages={"torch": "2.0.1", "xgboost": "2.0.0"},
started_at="2025-06-01T09:00:00Z",
ended_at="2025-06-01T16:00:00Z",
)
store.deployments.put_deployment_execution(
DeploymentExecution(
deployment_execution_ref=f"deployment:{PROJECT_NAME}.prod-v2",
project_ref=f"project:{PROJECT_NAME}",
deployment_name="prod-v2",
status="completed",
started_at="2025-06-02T10:00:00Z",
ended_at="2025-06-02T10:15:00Z",
run_ref=curr_run_ref,
order_index=1,
)
)
return {
"previous_run_id": prev_run_ref,
"current_run_id": curr_run_ref,
"deployment_ids": [
f"deployment:{PROJECT_NAME}.prod-v1",
f"deployment:{PROJECT_NAME}.prod-v2",
],
}
finally:
store.close()
def main() -> None:
from contextlib import redirect_stdout
import io
with redirect_stdout(io.StringIO()):
run_example(Path(".contexta"))
print(f"Seeded {PROJECT_NAME} data in .contexta.")
if __name__ == "__main__":
main()
case06_analyze_compliance_audit.py
"""Assemble audit evidence from previously recorded model runs."""
from pathlib import Path
from contexta import Contexta
from contexta.config import UnifiedConfig, WorkspaceConfig
PROJECT_NAME = "loss-ratio-predictor"
PREVIOUS_RUN = f"run:{PROJECT_NAME}.model-v1"
CURRENT_RUN = f"run:{PROJECT_NAME}.model-v2"
ctx = Contexta(
config=UnifiedConfig(
project_name=PROJECT_NAME,
workspace=WorkspaceConfig(root_path=Path(".contexta")),
)
)
store = ctx.metadata_store
try:
snapshot = ctx.get_run_snapshot(CURRENT_RUN)
print(f"Audit target: {snapshot.run.name}")
for record in snapshot.records:
if record.record_type == "event" and record.key == "training.dataset-registered":
print(f"Dataset: {record.message}")
if record.record_type == "metric":
print(f"{record.key}: {record.value:.4f}")
audit = ctx.audit_reproducibility(CURRENT_RUN)
print(f"\nEnvironment: python={audit.python_version}, packages={audit.package_count}")
print("\nMetric deltas: model-v1 -> model-v2")
comparison = ctx.compare_runs(PREVIOUS_RUN, CURRENT_RUN)
for stage in comparison.stage_comparisons:
for delta in stage.metric_deltas:
if delta.delta is not None:
print(f"{stage.stage_name}/{delta.metric_key}: {delta.delta:+.4f}")
report = ctx.build_snapshot_report(CURRENT_RUN)
print(f"\nReport: {report.title}")
finally:
store.close()