Skip to main content

Contexta Testing Guide

This guide explains how to validate Contexta workflows during the current prototype stage.

The key idea is simple:

  • test semantics first
  • use the smallest suite that proves the change
  • keep examples tied to real execution, not only prose

Current Test Runner

The project uses pytest.

For repository-based test runs, pytest already receives src/ through the project configuration, so the usual commands work without manually setting PYTHONPATH.

That is different from ad-hoc scripts in a local checkout, where PYTHONPATH=src is still the safest current path.

Useful Commands

Full Suite

uv run pytest -q

Use this when you want the broadest confidence before a larger merge or release step.

Core End-To-End Flow

uv run pytest tests/e2e/test_capture_to_report.py -q

Use this when your change affects:

  • core onboarding
  • query/report behavior
  • facade-level read workflows

Quickstart Example Validation

uv run pytest tests/e2e/test_quickstart_examples.py -q

Use this when your change affects:

  • README.md quickstart guidance
  • docs/tutorials/getting-started.md
  • examples/quickstart/
  • the public onboarding path for new users

The two regression programs exercised by this check are displayed here because they are part of the documented validation surface:

verified_quickstart.py
"""Verified quickstart example for Contexta."""

from __future__ import annotations

import argparse
import tempfile
from pathlib import Path
from typing import Any

from contexta import Contexta
from contexta.config import UnifiedConfig, WorkspaceConfig
from contexta.contract import (
MetricPayload,
MetricRecord,
Project,
RecordEnvelope,
Run,
StageExecution,
)


PROJECT_NAME = "quickstart-proj"
RUN_NAME = "demo-run"
RUN_REF = f"run:{PROJECT_NAME}.{RUN_NAME}"


def _resolve_workspace(workspace: Path | str | None) -> Path:
if workspace is None:
root = Path(tempfile.mkdtemp(prefix="contexta-quickstart-"))
return root / ".contexta"
return Path(workspace)


def run_example(workspace: Path | str | None = None) -> dict[str, Any]:
"""Create a minimal workspace, query one run, and build a report."""

workspace_path = _resolve_workspace(workspace)
ctx = Contexta(
config=UnifiedConfig(
project_name=PROJECT_NAME,
workspace=WorkspaceConfig(root_path=workspace_path),
)
)

project = Project(
project_ref=f"project:{PROJECT_NAME}",
name=PROJECT_NAME,
created_at="2024-06-01T12:00:00Z",
)
run = Run(
run_ref=RUN_REF,
project_ref=f"project:{PROJECT_NAME}",
name=RUN_NAME,
status="completed",
started_at="2024-06-01T12:00:00Z",
ended_at="2024-06-01T12:05:00Z",
)
stage = StageExecution(
stage_execution_ref=f"stage:{PROJECT_NAME}.{RUN_NAME}.train",
run_ref=RUN_REF,
stage_name="train",
status="completed",
started_at="2024-06-01T12:01:00Z",
ended_at="2024-06-01T12:04:00Z",
order_index=0,
)
metric = MetricRecord(
envelope=RecordEnvelope(
record_ref=f"record:{PROJECT_NAME}.{RUN_NAME}.m0001",
record_type="metric",
recorded_at="2024-06-01T12:03:00Z",
observed_at="2024-06-01T12:03:00Z",
producer_ref="contexta.quickstart",
run_ref=RUN_REF,
),
payload=MetricPayload(
metric_key="accuracy",
value=0.93,
value_type="float64",
),
)

store = ctx.metadata_store
try:
store.projects.put_project(project)
store.runs.put_run(run)
store.stages.put_stage_execution(stage)
ctx.record_store.append(metric)

runs = ctx.list_runs(PROJECT_NAME)
snapshot = ctx.get_run_snapshot(RUN_REF)
doc = ctx.build_snapshot_report(RUN_REF)

report_path = ctx.config.workspace.reports_path / "quickstart-report.md"
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(doc.to_markdown(), encoding="utf-8")

return {
"workspace": str(workspace_path),
"run_ref": RUN_REF,
"runs_visible": len(runs),
"snapshot_stage_count": len(snapshot.stages),
"report_title": doc.title,
"report_path": str(report_path),
}
finally:
store.close()


def main() -> None:
parser = argparse.ArgumentParser(description="Run the verified Contexta quickstart example.")
parser.add_argument(
"--workspace",
type=Path,
default=None,
help="Optional workspace root. Defaults to a temporary .contexta workspace.",
)
args = parser.parse_args()

result = run_example(args.workspace)
print(f"Workspace: {result['workspace']}")
print(f"Run ref: {result['run_ref']}")
print(f"Runs visible: {result['runs_visible']}")
print(f"Report title: {result['report_title']}")
print(f"Report path: {result['report_path']}")


if __name__ == "__main__":
main()
runtime_capture_preview.py
"""Runtime capture preview example for Contexta."""

from __future__ import annotations

import argparse
import tempfile
from pathlib import Path
from typing import Any

from contexta import Contexta
from contexta.config import UnifiedConfig, WorkspaceConfig


PROJECT_NAME = "capture-proj"
RUN_NAME = "demo-run"


def _resolve_workspace(workspace: Path | str | None) -> Path:
if workspace is None:
root = Path(tempfile.mkdtemp(prefix="contexta-capture-preview-"))
return root / ".contexta"
return Path(workspace)


def run_example(workspace: Path | str | None = None) -> dict[str, Any]:
"""Exercise the runtime scope API and record local capture output."""

workspace_path = _resolve_workspace(workspace)
ctx = Contexta(
config=UnifiedConfig(
project_name=PROJECT_NAME,
workspace=WorkspaceConfig(root_path=workspace_path),
)
)

with ctx.run(RUN_NAME) as run:
run.event("dataset.loaded", message="dataset prepared")
with run.stage("train") as stage:
stage.metric("accuracy", 0.93, unit="ratio")
stage.metric("loss", 0.12)

record_capture_path = ctx.config.workspace.cache_path / "capture" / "record.jsonl"
captured_record_count = 0
if record_capture_path.exists():
captured_record_count = sum(1 for line in record_capture_path.read_text(encoding="utf-8").splitlines() if line)

return {
"workspace": str(workspace_path),
"run_ref": run.ref,
"record_capture_path": str(record_capture_path),
"record_capture_exists": record_capture_path.exists(),
"captured_record_count": captured_record_count,
}


def main() -> None:
parser = argparse.ArgumentParser(description="Run the Contexta runtime capture preview.")
parser.add_argument(
"--workspace",
type=Path,
default=None,
help="Optional workspace root. Defaults to a temporary .contexta workspace.",
)
args = parser.parse_args()

result = run_example(args.workspace)
print(f"Workspace: {result['workspace']}")
print(f"Run ref: {result['run_ref']}")
print(f"Capture file: {result['record_capture_path']}")
print(f"Capture file exists: {result['record_capture_exists']}")
print(f"Captured records: {result['captured_record_count']}")


if __name__ == "__main__":
main()

Recovery Example Validation

uv run pytest tests/e2e/test_recovery_examples.py -q

Use this when your change affects:

  • examples/recovery/
  • backup, replay, or artifact transfer examples
  • operator-facing recovery onboarding

Observable Workflow Example Validation

uv run pytest tests/e2e/test_observed_workflow_examples.py -q

Use this when your change affects the ML, deep learning, or LLM workflow examples. This suite verifies that displayed examples execute and capture evidence from their work.

What The Current Evidence Covers

The strongest workflow-level evidence in the repository today comes from:

  • tests/e2e/test_capture_to_report.py
  • tests/e2e/test_quickstart_examples.py
  • tests/e2e/test_recovery_examples.py
  • tests/e2e/test_observed_workflow_examples.py

Together, these cover:

  • facade lifecycle and read flows
  • query, compare, diagnostics, and report behavior
  • quickstart example validation
  • recovery example validation
  • measured ML/DL capture and local mock-API evaluation examples

Layered Testing Model

The documentation and design baseline describe the test story in layers.

Unit

Use unit tests for:

  • helper functions
  • serializers and deserializers
  • small validation and parsing rules

Contract

Use contract tests for canonical model validation, deterministic serialization, and stable result shapes.

Plane Integration

Use plane integration tests for:

  • metadata store behavior
  • record append and replay
  • artifact ingest and verification

Recovery

Use recovery and migration tests for:

  • replay behavior
  • backup and restore

Surface

Use surface tests for:

  • Python facade behavior
  • CLI behavior
  • HTTP JSON behavior
  • HTML UI behavior

End-To-End

Use end-to-end tests when you want confidence that a user journey still works across multiple layers at once.

How To Validate Documentation Changes

Documentation should stay attached to executable reality.

If you change:

  • getting-started examples
    • rerun the onboarding script or the nearest e2e flow
  • common query or report guidance
    • rerun the core e2e flow
  • recovery guidance
    • rerun recovery example coverage or the nearest recovery suite

For doc-heavy changes, the goal is not to rerun everything blindly. The goal is to rerun the closest proof that the guidance is still true.

What To Assert

Prefer semantic assertions over formatting-sensitive assertions.

Good assertions:

  • a run snapshot contains the expected run id, stages, and records
  • a comparison exposes the expected metric or stage differences
  • a report has the expected title and sections

Weaker assertions:

  • exact incidental ordering when ordering is not part of the contract
  • large brittle string snapshots for outputs that are still evolving quickly

Example Validation Expectations

Examples in public docs should prove at least one of these:

  • canonical import paths work
  • a workspace can be created and read
  • a run can be queried
  • a report can be built

Examples should not silently depend on internal modules or private helper paths.

Prototype Notes

At the current prototype stage:

  • the source-tree script story still relies on PYTHONPATH=src
  • the package and CLI names are aligned as contexta

That is why the testing guide emphasizes executable repository commands and file-scoped test suites rather than a polished install-and-run-from-anywhere story.

Where To Go Next

Continue with: