Case 11: Alex's Onboarding Summary

Persona: Alex, Team Lead

Situation

A new ML engineer, Jamie, joins the team. The churn model has been in production for 4 months: 6 training runs, 2 deployments, performance tracked across multiple retrains. Jamie needs answers to five questions on day one:

How many training runs exist and what are their names?
How has accuracy evolved over time?
Which runs were deployed to production?
Which run is objectively the best?
What does a structured comparison across all runs look like?

Without tooling, Alex writes a document manually by searching Git logs, Confluence pages, and old Slack threads — half a day of work that goes stale in two weeks.

Without Contexta

Project history lives across Git, Confluence, Slack, and individual notebooks.
A handwritten document is a snapshot — it does not update when a new run is created.
"Which run is best?" requires opening each notebook, finding the final metric, and ranking them manually.

With Contexta

all_runs    = ctx.list_runs(PROJECT_NAME)
deployments = ctx.list_deployments(PROJECT_NAME)
best_ref    = ctx.select_best_run(run_refs, "accuracy", higher_is_better=True)
report      = ctx.build_multi_run_report(run_refs)

The summary regenerates on demand. It reflects the current state of all registered runs — no manual maintenance required. build_multi_run_report produces a structured, sectioned report that can be rendered as HTML or exported as CSV.

Key APIs: list_runs, list_deployments, select_best_run, build_multi_run_report

Complete Runnable Code

Run the seed script first, then the analysis script:

uv run examples/case_studies/case11_seed_onboarding_data.py
uv run examples/case_studies/case11_analyze_onboarding.py

case11_seed_onboarding_data.py
"""Create project-history records used by the onboarding case study."""

from __future__ import annotations

import tempfile
from pathlib import Path
from typing import Any

from contexta import Contexta
from contexta.config import UnifiedConfig, WorkspaceConfig
from contexta.contract import (
    DeploymentExecution,
    MetricPayload,
    MetricRecord,
    Project,
    RecordEnvelope,
    Run,
    StageExecution,
    StructuredEventPayload,
    StructuredEventRecord,
)


PROJECT_NAME = "churn-prediction"

_REC_COUNTER = 0

# Chronological run history: (run_name, month_label, started_at, ended_at, accuracy, auc, f1)
_RUN_HISTORY = [
    ("churn-v1-jan", "Jan", "2025-01-10T09:00:00Z", "2025-01-10T12:00:00Z", 0.821, 0.854, 0.811),
    ("churn-v2-feb", "Feb", "2025-02-07T09:00:00Z", "2025-02-07T12:00:00Z", 0.843, 0.872, 0.836),
    ("churn-v3-feb", "Feb", "2025-02-21T09:00:00Z", "2025-02-21T12:00:00Z", 0.861, 0.889, 0.854),
    ("churn-v4-mar", "Mar", "2025-03-14T09:00:00Z", "2025-03-14T12:00:00Z", 0.878, 0.907, 0.871),
    ("churn-v5-apr", "Apr", "2025-04-03T09:00:00Z", "2025-04-03T12:00:00Z", 0.894, 0.921, 0.888),
    ("churn-v6-apr", "Apr", "2025-04-18T09:00:00Z", "2025-04-18T12:00:00Z", 0.902, 0.933, 0.896),  # best
]

# Deployments: (deploy_name, linked_run_index 0-based, started_at, ended_at, order_index)
_DEPLOYMENT_HISTORY = [
    ("prod-deploy-v3", 2, "2025-02-22T15:00:00Z", "2025-02-22T15:12:00Z", 0),
    ("prod-deploy-v6", 5, "2025-04-19T14:00:00Z", "2025-04-19T14:10:00Z", 1),
]


def _next_rid() -> str:
    global _REC_COUNTER
    _REC_COUNTER += 1
    return f"r{_REC_COUNTER:05d}"


def _build_run(
    store: Any,
    record_store: Any,
    project_name: str,
    run_name: str,
    started_at: str,
    ended_at: str,
    accuracy: float,
    auc: float,
    f1: float,
) -> str:
    run_ref = f"run:{project_name}.{run_name}"

    store.runs.put_run(
        Run(
            run_ref=run_ref,
            project_ref=f"project:{project_name}",
            name=run_name,
            status="completed",
            started_at=started_at,
            ended_at=ended_at,
        )
    )

    train_ref = f"stage:{project_name}.{run_name}.train"
    eval_ref = f"stage:{project_name}.{run_name}.evaluate"

    store.stages.put_stage_execution(
        StageExecution(
            stage_execution_ref=train_ref,
            run_ref=run_ref,
            stage_name="train",
            status="completed",
            started_at=started_at,
            ended_at=f"{started_at[:10]}T10:30:00Z",
            order_index=0,
        )
    )
    store.stages.put_stage_execution(
        StageExecution(
            stage_execution_ref=eval_ref,
            run_ref=run_ref,
            stage_name="evaluate",
            status="completed",
            started_at=f"{started_at[:10]}T10:30:00Z",
            ended_at=ended_at,
            order_index=1,
        )
    )

    obs_ts = ended_at
    for key, val in [("accuracy", accuracy), ("auc", auc), ("f1", f1)]:
        record_store.append(
            MetricRecord(
                envelope=RecordEnvelope(
                    record_ref=f"record:{project_name}.{run_name}.{_next_rid()}",
                    record_type="metric",
                    recorded_at=obs_ts,
                    observed_at=obs_ts,
                    producer_ref="contexta.case11",
                    run_ref=run_ref,
                    stage_execution_ref=eval_ref,
                    completeness_marker="complete",
                    degradation_marker="none",
                ),
                payload=MetricPayload(
                    metric_key=key,
                    value=val,
                    value_type="float64",
                ),
            )
        )

    # Log a notes event describing what changed in this run
    record_store.append(
        StructuredEventRecord(
            envelope=RecordEnvelope(
                record_ref=f"record:{project_name}.{run_name}.{_next_rid()}",
                record_type="event",
                recorded_at=started_at,
                observed_at=started_at,
                producer_ref="contexta.case11",
                run_ref=run_ref,
                completeness_marker="complete",
                degradation_marker="none",
            ),
            payload=StructuredEventPayload(
                event_key="training.run-registered",
                level="info",
                message=f"Training run {run_name} started.",
                origin_marker="explicit_capture",
            ),
        )
    )

    return run_ref


def run_example(workspace: Path | str | None = None) -> dict[str, Any]:
    """Create 6 runs over 4 months and 2 deployment records."""

    if workspace is None:
        root = Path(tempfile.mkdtemp(prefix="contexta-case11-"))
        workspace_path = root / ".contexta"
    else:
        workspace_path = Path(workspace)

    ctx = Contexta(
        config=UnifiedConfig(
            project_name=PROJECT_NAME,
            workspace=WorkspaceConfig(root_path=workspace_path),
        )
    )

    store = ctx.metadata_store
    try:
        store.projects.put_project(
            Project(
                project_ref=f"project:{PROJECT_NAME}",
                name=PROJECT_NAME,
                created_at="2025-01-01T00:00:00Z",
                description="Customer churn prediction model",
            )
        )

        run_refs: list[str] = []
        for run_name, _month, started, ended, acc, auc, f1 in _RUN_HISTORY:
            ref = _build_run(
                store, ctx.record_store, PROJECT_NAME,
                run_name=run_name,
                started_at=started,
                ended_at=ended,
                accuracy=acc, auc=auc, f1=f1,
            )
            run_refs.append(ref)

        for deploy_name, run_idx, started, ended, order in _DEPLOYMENT_HISTORY:
            store.deployments.put_deployment_execution(
                DeploymentExecution(
                    deployment_execution_ref=f"deployment:{PROJECT_NAME}.{deploy_name}",
                    project_ref=f"project:{PROJECT_NAME}",
                    deployment_name=deploy_name,
                    status="completed",
                    started_at=started,
                    ended_at=ended,
                    run_ref=run_refs[run_idx],
                    order_index=order,
                )
            )

        return {
            "run_ids": run_refs,
            "deployment_count": len(_DEPLOYMENT_HISTORY),
        }
    finally:
        store.close()


def main() -> None:
    from contextlib import redirect_stdout
    import io

    with redirect_stdout(io.StringIO()):
        run_example(Path(".contexta"))

    print(f"Seeded {PROJECT_NAME} data in .contexta.")


if __name__ == "__main__":
    main()

case11_analyze_onboarding.py
"""Build an onboarding summary from previously recorded project history."""

from pathlib import Path

from contexta import Contexta
from contexta.config import UnifiedConfig, WorkspaceConfig


PROJECT_NAME = "churn-prediction"

ctx = Contexta(
    config=UnifiedConfig(
        project_name=PROJECT_NAME,
        workspace=WorkspaceConfig(root_path=Path(".contexta")),
    )
)

store = ctx.metadata_store
try:
    runs = ctx.list_runs(PROJECT_NAME)
    deployments = ctx.list_deployments(PROJECT_NAME)
    run_ids = [run.run_id for run in runs]

    print(f"Runs: {len(runs)}")
    for run in runs:
        print(f"  {run.name}")

    print(f"\nDeployments: {len(deployments)}")
    for deployment in deployments:
        print(f"  {deployment.deployment_id} -> {deployment.run_id}")

    best_run = ctx.select_best_run(run_ids, "accuracy", stage_name="evaluate", higher_is_better=True)
    print(f"\nBest run by accuracy: {best_run}")

    report = ctx.build_multi_run_report(run_ids)
    print(f"Report: {report.title}")
finally:
    store.close()

Situation​

Without Contexta​

With Contexta​

Complete Runnable Code​

Situation

Without Contexta

With Contexta

Complete Runnable Code