Skip to main content

Case 11: Alex's Onboarding Summary

Persona: Alex, Team Lead

Situation

A new ML engineer, Jamie, joins the team. The churn model has been in production for 4 months: 6 training runs, 2 deployments, performance tracked across multiple retrains. Jamie needs answers to five questions on day one:

  1. How many training runs exist and what are their names?
  2. How has accuracy evolved over time?
  3. Which runs were deployed to production?
  4. Which run is objectively the best?
  5. What does a structured comparison across all runs look like?

Without tooling, Alex writes a document manually by searching Git logs, Confluence pages, and old Slack threads — half a day of work that goes stale in two weeks.

Without Contexta

  • Project history lives across Git, Confluence, Slack, and individual notebooks.
  • A handwritten document is a snapshot — it does not update when a new run is created.
  • "Which run is best?" requires opening each notebook, finding the final metric, and ranking them manually.

With Contexta

all_runs = ctx.list_runs(PROJECT_NAME)
deployments = ctx.list_deployments(PROJECT_NAME)
best_ref = ctx.select_best_run(run_refs, "accuracy", higher_is_better=True)
report = ctx.build_multi_run_report(run_refs)

The summary regenerates on demand. It reflects the current state of all registered runs — no manual maintenance required. build_multi_run_report produces a structured, sectioned report that can be rendered as HTML or exported as CSV.

Key APIs: list_runs, list_deployments, select_best_run, build_multi_run_report


Complete Runnable Code

Run the seed script first, then the analysis script:

uv run examples/case_studies/case11_seed_onboarding_data.py
uv run examples/case_studies/case11_analyze_onboarding.py
case11_seed_onboarding_data.py
"""Create project-history records used by the onboarding case study."""

from __future__ import annotations

import tempfile
from pathlib import Path
from typing import Any

from contexta import Contexta
from contexta.config import UnifiedConfig, WorkspaceConfig
from contexta.contract import (
DeploymentExecution,
MetricPayload,
MetricRecord,
Project,
RecordEnvelope,
Run,
StageExecution,
StructuredEventPayload,
StructuredEventRecord,
)


PROJECT_NAME = "churn-prediction"

_REC_COUNTER = 0

# Chronological run history: (run_name, month_label, started_at, ended_at, accuracy, auc, f1)
_RUN_HISTORY = [
("churn-v1-jan", "Jan", "2025-01-10T09:00:00Z", "2025-01-10T12:00:00Z", 0.821, 0.854, 0.811),
("churn-v2-feb", "Feb", "2025-02-07T09:00:00Z", "2025-02-07T12:00:00Z", 0.843, 0.872, 0.836),
("churn-v3-feb", "Feb", "2025-02-21T09:00:00Z", "2025-02-21T12:00:00Z", 0.861, 0.889, 0.854),
("churn-v4-mar", "Mar", "2025-03-14T09:00:00Z", "2025-03-14T12:00:00Z", 0.878, 0.907, 0.871),
("churn-v5-apr", "Apr", "2025-04-03T09:00:00Z", "2025-04-03T12:00:00Z", 0.894, 0.921, 0.888),
("churn-v6-apr", "Apr", "2025-04-18T09:00:00Z", "2025-04-18T12:00:00Z", 0.902, 0.933, 0.896), # best
]

# Deployments: (deploy_name, linked_run_index 0-based, started_at, ended_at, order_index)
_DEPLOYMENT_HISTORY = [
("prod-deploy-v3", 2, "2025-02-22T15:00:00Z", "2025-02-22T15:12:00Z", 0),
("prod-deploy-v6", 5, "2025-04-19T14:00:00Z", "2025-04-19T14:10:00Z", 1),
]


def _next_rid() -> str:
global _REC_COUNTER
_REC_COUNTER += 1
return f"r{_REC_COUNTER:05d}"


def _build_run(
store: Any,
record_store: Any,
project_name: str,
run_name: str,
started_at: str,
ended_at: str,
accuracy: float,
auc: float,
f1: float,
) -> str:
run_ref = f"run:{project_name}.{run_name}"

store.runs.put_run(
Run(
run_ref=run_ref,
project_ref=f"project:{project_name}",
name=run_name,
status="completed",
started_at=started_at,
ended_at=ended_at,
)
)

train_ref = f"stage:{project_name}.{run_name}.train"
eval_ref = f"stage:{project_name}.{run_name}.evaluate"

store.stages.put_stage_execution(
StageExecution(
stage_execution_ref=train_ref,
run_ref=run_ref,
stage_name="train",
status="completed",
started_at=started_at,
ended_at=f"{started_at[:10]}T10:30:00Z",
order_index=0,
)
)
store.stages.put_stage_execution(
StageExecution(
stage_execution_ref=eval_ref,
run_ref=run_ref,
stage_name="evaluate",
status="completed",
started_at=f"{started_at[:10]}T10:30:00Z",
ended_at=ended_at,
order_index=1,
)
)

obs_ts = ended_at
for key, val in [("accuracy", accuracy), ("auc", auc), ("f1", f1)]:
record_store.append(
MetricRecord(
envelope=RecordEnvelope(
record_ref=f"record:{project_name}.{run_name}.{_next_rid()}",
record_type="metric",
recorded_at=obs_ts,
observed_at=obs_ts,
producer_ref="contexta.case11",
run_ref=run_ref,
stage_execution_ref=eval_ref,
completeness_marker="complete",
degradation_marker="none",
),
payload=MetricPayload(
metric_key=key,
value=val,
value_type="float64",
),
)
)

# Log a notes event describing what changed in this run
record_store.append(
StructuredEventRecord(
envelope=RecordEnvelope(
record_ref=f"record:{project_name}.{run_name}.{_next_rid()}",
record_type="event",
recorded_at=started_at,
observed_at=started_at,
producer_ref="contexta.case11",
run_ref=run_ref,
completeness_marker="complete",
degradation_marker="none",
),
payload=StructuredEventPayload(
event_key="training.run-registered",
level="info",
message=f"Training run {run_name} started.",
origin_marker="explicit_capture",
),
)
)

return run_ref


def run_example(workspace: Path | str | None = None) -> dict[str, Any]:
"""Create 6 runs over 4 months and 2 deployment records."""

if workspace is None:
root = Path(tempfile.mkdtemp(prefix="contexta-case11-"))
workspace_path = root / ".contexta"
else:
workspace_path = Path(workspace)

ctx = Contexta(
config=UnifiedConfig(
project_name=PROJECT_NAME,
workspace=WorkspaceConfig(root_path=workspace_path),
)
)

store = ctx.metadata_store
try:
store.projects.put_project(
Project(
project_ref=f"project:{PROJECT_NAME}",
name=PROJECT_NAME,
created_at="2025-01-01T00:00:00Z",
description="Customer churn prediction model",
)
)

run_refs: list[str] = []
for run_name, _month, started, ended, acc, auc, f1 in _RUN_HISTORY:
ref = _build_run(
store, ctx.record_store, PROJECT_NAME,
run_name=run_name,
started_at=started,
ended_at=ended,
accuracy=acc, auc=auc, f1=f1,
)
run_refs.append(ref)

for deploy_name, run_idx, started, ended, order in _DEPLOYMENT_HISTORY:
store.deployments.put_deployment_execution(
DeploymentExecution(
deployment_execution_ref=f"deployment:{PROJECT_NAME}.{deploy_name}",
project_ref=f"project:{PROJECT_NAME}",
deployment_name=deploy_name,
status="completed",
started_at=started,
ended_at=ended,
run_ref=run_refs[run_idx],
order_index=order,
)
)

return {
"run_ids": run_refs,
"deployment_count": len(_DEPLOYMENT_HISTORY),
}
finally:
store.close()


def main() -> None:
from contextlib import redirect_stdout
import io

with redirect_stdout(io.StringIO()):
run_example(Path(".contexta"))

print(f"Seeded {PROJECT_NAME} data in .contexta.")


if __name__ == "__main__":
main()
case11_analyze_onboarding.py
"""Build an onboarding summary from previously recorded project history."""

from pathlib import Path

from contexta import Contexta
from contexta.config import UnifiedConfig, WorkspaceConfig


PROJECT_NAME = "churn-prediction"

ctx = Contexta(
config=UnifiedConfig(
project_name=PROJECT_NAME,
workspace=WorkspaceConfig(root_path=Path(".contexta")),
)
)

store = ctx.metadata_store
try:
runs = ctx.list_runs(PROJECT_NAME)
deployments = ctx.list_deployments(PROJECT_NAME)
run_ids = [run.run_id for run in runs]

print(f"Runs: {len(runs)}")
for run in runs:
print(f" {run.name}")

print(f"\nDeployments: {len(deployments)}")
for deployment in deployments:
print(f" {deployment.deployment_id} -> {deployment.run_id}")

best_run = ctx.select_best_run(run_ids, "accuracy", stage_name="evaluate", higher_is_better=True)
print(f"\nBest run by accuracy: {best_run}")

report = ctx.build_multi_run_report(run_ids)
print(f"Report: {report.title}")
finally:
store.close()