Case 11: Alex's Onboarding Summary
Persona: Alex, Team Lead
Situation
A new ML engineer, Jamie, joins the team. The churn model has been in production for 4 months: 6 training runs, 2 deployments, performance tracked across multiple retrains. Jamie needs answers to five questions on day one:
- How many training runs exist and what are their names?
- How has accuracy evolved over time?
- Which runs were deployed to production?
- Which run is objectively the best?
- What does a structured comparison across all runs look like?
Without tooling, Alex writes a document manually by searching Git logs, Confluence pages, and old Slack threads — half a day of work that goes stale in two weeks.
Without Contexta
- Project history lives across Git, Confluence, Slack, and individual notebooks.
- A handwritten document is a snapshot — it does not update when a new run is created.
- "Which run is best?" requires opening each notebook, finding the final metric, and ranking them manually.
With Contexta
all_runs = ctx.list_runs(PROJECT_NAME)
deployments = ctx.list_deployments(PROJECT_NAME)
best_ref = ctx.select_best_run(run_refs, "accuracy", higher_is_better=True)
report = ctx.build_multi_run_report(run_refs)
The summary regenerates on demand. It reflects the current state of all registered runs —
no manual maintenance required. build_multi_run_report produces a structured, sectioned
report that can be rendered as HTML or exported as CSV.
Key APIs: list_runs, list_deployments, select_best_run, build_multi_run_report
Complete Runnable Code
Run the seed script first, then the analysis script:
uv run examples/case_studies/case11_seed_onboarding_data.py
uv run examples/case_studies/case11_analyze_onboarding.py
"""Create project-history records used by the onboarding case study."""
from __future__ import annotations
import tempfile
from pathlib import Path
from typing import Any
from contexta import Contexta
from contexta.config import UnifiedConfig, WorkspaceConfig
from contexta.contract import (
DeploymentExecution,
MetricPayload,
MetricRecord,
Project,
RecordEnvelope,
Run,
StageExecution,
StructuredEventPayload,
StructuredEventRecord,
)
PROJECT_NAME = "churn-prediction"
_REC_COUNTER = 0
# Chronological run history: (run_name, month_label, started_at, ended_at, accuracy, auc, f1)
_RUN_HISTORY = [
("churn-v1-jan", "Jan", "2025-01-10T09:00:00Z", "2025-01-10T12:00:00Z", 0.821, 0.854, 0.811),
("churn-v2-feb", "Feb", "2025-02-07T09:00:00Z", "2025-02-07T12:00:00Z", 0.843, 0.872, 0.836),
("churn-v3-feb", "Feb", "2025-02-21T09:00:00Z", "2025-02-21T12:00:00Z", 0.861, 0.889, 0.854),
("churn-v4-mar", "Mar", "2025-03-14T09:00:00Z", "2025-03-14T12:00:00Z", 0.878, 0.907, 0.871),
("churn-v5-apr", "Apr", "2025-04-03T09:00:00Z", "2025-04-03T12:00:00Z", 0.894, 0.921, 0.888),
("churn-v6-apr", "Apr", "2025-04-18T09:00:00Z", "2025-04-18T12:00:00Z", 0.902, 0.933, 0.896), # best
]
# Deployments: (deploy_name, linked_run_index 0-based, started_at, ended_at, order_index)
_DEPLOYMENT_HISTORY = [
("prod-deploy-v3", 2, "2025-02-22T15:00:00Z", "2025-02-22T15:12:00Z", 0),
("prod-deploy-v6", 5, "2025-04-19T14:00:00Z", "2025-04-19T14:10:00Z", 1),
]
def _next_rid() -> str:
global _REC_COUNTER
_REC_COUNTER += 1
return f"r{_REC_COUNTER:05d}"
def _build_run(
store: Any,
record_store: Any,
project_name: str,
run_name: str,
started_at: str,
ended_at: str,
accuracy: float,
auc: float,
f1: float,
) -> str:
run_ref = f"run:{project_name}.{run_name}"
store.runs.put_run(
Run(
run_ref=run_ref,
project_ref=f"project:{project_name}",
name=run_name,
status="completed",
started_at=started_at,
ended_at=ended_at,
)
)
train_ref = f"stage:{project_name}.{run_name}.train"
eval_ref = f"stage:{project_name}.{run_name}.evaluate"
store.stages.put_stage_execution(
StageExecution(
stage_execution_ref=train_ref,
run_ref=run_ref,
stage_name="train",
status="completed",
started_at=started_at,
ended_at=f"{started_at[:10]}T10:30:00Z",
order_index=0,
)
)
store.stages.put_stage_execution(
StageExecution(
stage_execution_ref=eval_ref,
run_ref=run_ref,
stage_name="evaluate",
status="completed",
started_at=f"{started_at[:10]}T10:30:00Z",
ended_at=ended_at,
order_index=1,
)
)
obs_ts = ended_at
for key, val in [("accuracy", accuracy), ("auc", auc), ("f1", f1)]:
record_store.append(
MetricRecord(
envelope=RecordEnvelope(
record_ref=f"record:{project_name}.{run_name}.{_next_rid()}",
record_type="metric",
recorded_at=obs_ts,
observed_at=obs_ts,
producer_ref="contexta.case11",
run_ref=run_ref,
stage_execution_ref=eval_ref,
completeness_marker="complete",
degradation_marker="none",
),
payload=MetricPayload(
metric_key=key,
value=val,
value_type="float64",
),
)
)
# Log a notes event describing what changed in this run
record_store.append(
StructuredEventRecord(
envelope=RecordEnvelope(
record_ref=f"record:{project_name}.{run_name}.{_next_rid()}",
record_type="event",
recorded_at=started_at,
observed_at=started_at,
producer_ref="contexta.case11",
run_ref=run_ref,
completeness_marker="complete",
degradation_marker="none",
),
payload=StructuredEventPayload(
event_key="training.run-registered",
level="info",
message=f"Training run {run_name} started.",
origin_marker="explicit_capture",
),
)
)
return run_ref
def run_example(workspace: Path | str | None = None) -> dict[str, Any]:
"""Create 6 runs over 4 months and 2 deployment records."""
if workspace is None:
root = Path(tempfile.mkdtemp(prefix="contexta-case11-"))
workspace_path = root / ".contexta"
else:
workspace_path = Path(workspace)
ctx = Contexta(
config=UnifiedConfig(
project_name=PROJECT_NAME,
workspace=WorkspaceConfig(root_path=workspace_path),
)
)
store = ctx.metadata_store
try:
store.projects.put_project(
Project(
project_ref=f"project:{PROJECT_NAME}",
name=PROJECT_NAME,
created_at="2025-01-01T00:00:00Z",
description="Customer churn prediction model",
)
)
run_refs: list[str] = []
for run_name, _month, started, ended, acc, auc, f1 in _RUN_HISTORY:
ref = _build_run(
store, ctx.record_store, PROJECT_NAME,
run_name=run_name,
started_at=started,
ended_at=ended,
accuracy=acc, auc=auc, f1=f1,
)
run_refs.append(ref)
for deploy_name, run_idx, started, ended, order in _DEPLOYMENT_HISTORY:
store.deployments.put_deployment_execution(
DeploymentExecution(
deployment_execution_ref=f"deployment:{PROJECT_NAME}.{deploy_name}",
project_ref=f"project:{PROJECT_NAME}",
deployment_name=deploy_name,
status="completed",
started_at=started,
ended_at=ended,
run_ref=run_refs[run_idx],
order_index=order,
)
)
return {
"run_ids": run_refs,
"deployment_count": len(_DEPLOYMENT_HISTORY),
}
finally:
store.close()
def main() -> None:
from contextlib import redirect_stdout
import io
with redirect_stdout(io.StringIO()):
run_example(Path(".contexta"))
print(f"Seeded {PROJECT_NAME} data in .contexta.")
if __name__ == "__main__":
main()
"""Build an onboarding summary from previously recorded project history."""
from pathlib import Path
from contexta import Contexta
from contexta.config import UnifiedConfig, WorkspaceConfig
PROJECT_NAME = "churn-prediction"
ctx = Contexta(
config=UnifiedConfig(
project_name=PROJECT_NAME,
workspace=WorkspaceConfig(root_path=Path(".contexta")),
)
)
store = ctx.metadata_store
try:
runs = ctx.list_runs(PROJECT_NAME)
deployments = ctx.list_deployments(PROJECT_NAME)
run_ids = [run.run_id for run in runs]
print(f"Runs: {len(runs)}")
for run in runs:
print(f" {run.name}")
print(f"\nDeployments: {len(deployments)}")
for deployment in deployments:
print(f" {deployment.deployment_id} -> {deployment.run_id}")
best_run = ctx.select_best_run(run_ids, "accuracy", stage_name="evaluate", higher_is_better=True)
print(f"\nBest run by accuracy: {best_run}")
report = ctx.build_multi_run_report(run_ids)
print(f"Report: {report.title}")
finally:
store.close()