Contexta Testing Guide
This guide explains how to validate Contexta workflows during the current prototype stage.
The key idea is simple:
- test semantics first
- use the smallest suite that proves the change
- keep examples tied to real execution, not only prose
Current Test Runner
The project uses pytest.
For repository-based test runs, pytest already receives src/ through the project configuration, so the usual commands work without manually setting PYTHONPATH.
That is different from ad-hoc scripts in a local checkout, where PYTHONPATH=src is still the safest current path.
Useful Commands
Full Suite
uv run pytest -q
Use this when you want the broadest confidence before a larger merge or release step.
Core End-To-End Flow
uv run pytest tests/e2e/test_capture_to_report.py -q
Use this when your change affects:
- core onboarding
- query/report behavior
- facade-level read workflows
Quickstart Example Validation
uv run pytest tests/e2e/test_quickstart_examples.py -q
Use this when your change affects:
README.mdquickstart guidancedocs/tutorials/getting-started.mdexamples/quickstart/- the public onboarding path for new users
The two regression programs exercised by this check are displayed here because they are part of the documented validation surface:
"""Verified quickstart example for Contexta."""
from __future__ import annotations
import argparse
import tempfile
from pathlib import Path
from typing import Any
from contexta import Contexta
from contexta.config import UnifiedConfig, WorkspaceConfig
from contexta.contract import (
MetricPayload,
MetricRecord,
Project,
RecordEnvelope,
Run,
StageExecution,
)
PROJECT_NAME = "quickstart-proj"
RUN_NAME = "demo-run"
RUN_REF = f"run:{PROJECT_NAME}.{RUN_NAME}"
def _resolve_workspace(workspace: Path | str | None) -> Path:
if workspace is None:
root = Path(tempfile.mkdtemp(prefix="contexta-quickstart-"))
return root / ".contexta"
return Path(workspace)
def run_example(workspace: Path | str | None = None) -> dict[str, Any]:
"""Create a minimal workspace, query one run, and build a report."""
workspace_path = _resolve_workspace(workspace)
ctx = Contexta(
config=UnifiedConfig(
project_name=PROJECT_NAME,
workspace=WorkspaceConfig(root_path=workspace_path),
)
)
project = Project(
project_ref=f"project:{PROJECT_NAME}",
name=PROJECT_NAME,
created_at="2024-06-01T12:00:00Z",
)
run = Run(
run_ref=RUN_REF,
project_ref=f"project:{PROJECT_NAME}",
name=RUN_NAME,
status="completed",
started_at="2024-06-01T12:00:00Z",
ended_at="2024-06-01T12:05:00Z",
)
stage = StageExecution(
stage_execution_ref=f"stage:{PROJECT_NAME}.{RUN_NAME}.train",
run_ref=RUN_REF,
stage_name="train",
status="completed",
started_at="2024-06-01T12:01:00Z",
ended_at="2024-06-01T12:04:00Z",
order_index=0,
)
metric = MetricRecord(
envelope=RecordEnvelope(
record_ref=f"record:{PROJECT_NAME}.{RUN_NAME}.m0001",
record_type="metric",
recorded_at="2024-06-01T12:03:00Z",
observed_at="2024-06-01T12:03:00Z",
producer_ref="contexta.quickstart",
run_ref=RUN_REF,
),
payload=MetricPayload(
metric_key="accuracy",
value=0.93,
value_type="float64",
),
)
store = ctx.metadata_store
try:
store.projects.put_project(project)
store.runs.put_run(run)
store.stages.put_stage_execution(stage)
ctx.record_store.append(metric)
runs = ctx.list_runs(PROJECT_NAME)
snapshot = ctx.get_run_snapshot(RUN_REF)
doc = ctx.build_snapshot_report(RUN_REF)
report_path = ctx.config.workspace.reports_path / "quickstart-report.md"
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(doc.to_markdown(), encoding="utf-8")
return {
"workspace": str(workspace_path),
"run_ref": RUN_REF,
"runs_visible": len(runs),
"snapshot_stage_count": len(snapshot.stages),
"report_title": doc.title,
"report_path": str(report_path),
}
finally:
store.close()
def main() -> None:
parser = argparse.ArgumentParser(description="Run the verified Contexta quickstart example.")
parser.add_argument(
"--workspace",
type=Path,
default=None,
help="Optional workspace root. Defaults to a temporary .contexta workspace.",
)
args = parser.parse_args()
result = run_example(args.workspace)
print(f"Workspace: {result['workspace']}")
print(f"Run ref: {result['run_ref']}")
print(f"Runs visible: {result['runs_visible']}")
print(f"Report title: {result['report_title']}")
print(f"Report path: {result['report_path']}")
if __name__ == "__main__":
main()
"""Runtime capture preview example for Contexta."""
from __future__ import annotations
import argparse
import tempfile
from pathlib import Path
from typing import Any
from contexta import Contexta
from contexta.config import UnifiedConfig, WorkspaceConfig
PROJECT_NAME = "capture-proj"
RUN_NAME = "demo-run"
def _resolve_workspace(workspace: Path | str | None) -> Path:
if workspace is None:
root = Path(tempfile.mkdtemp(prefix="contexta-capture-preview-"))
return root / ".contexta"
return Path(workspace)
def run_example(workspace: Path | str | None = None) -> dict[str, Any]:
"""Exercise the runtime scope API and record local capture output."""
workspace_path = _resolve_workspace(workspace)
ctx = Contexta(
config=UnifiedConfig(
project_name=PROJECT_NAME,
workspace=WorkspaceConfig(root_path=workspace_path),
)
)
with ctx.run(RUN_NAME) as run:
run.event("dataset.loaded", message="dataset prepared")
with run.stage("train") as stage:
stage.metric("accuracy", 0.93, unit="ratio")
stage.metric("loss", 0.12)
record_capture_path = ctx.config.workspace.cache_path / "capture" / "record.jsonl"
captured_record_count = 0
if record_capture_path.exists():
captured_record_count = sum(1 for line in record_capture_path.read_text(encoding="utf-8").splitlines() if line)
return {
"workspace": str(workspace_path),
"run_ref": run.ref,
"record_capture_path": str(record_capture_path),
"record_capture_exists": record_capture_path.exists(),
"captured_record_count": captured_record_count,
}
def main() -> None:
parser = argparse.ArgumentParser(description="Run the Contexta runtime capture preview.")
parser.add_argument(
"--workspace",
type=Path,
default=None,
help="Optional workspace root. Defaults to a temporary .contexta workspace.",
)
args = parser.parse_args()
result = run_example(args.workspace)
print(f"Workspace: {result['workspace']}")
print(f"Run ref: {result['run_ref']}")
print(f"Capture file: {result['record_capture_path']}")
print(f"Capture file exists: {result['record_capture_exists']}")
print(f"Captured records: {result['captured_record_count']}")
if __name__ == "__main__":
main()
Recovery Example Validation
uv run pytest tests/e2e/test_recovery_examples.py -q
Use this when your change affects:
examples/recovery/- backup, replay, or artifact transfer examples
- operator-facing recovery onboarding
Observable Workflow Example Validation
uv run pytest tests/e2e/test_observed_workflow_examples.py -q
Use this when your change affects the ML, deep learning, or LLM workflow examples. This suite verifies that displayed examples execute and capture evidence from their work.
What The Current Evidence Covers
The strongest workflow-level evidence in the repository today comes from:
tests/e2e/test_capture_to_report.pytests/e2e/test_quickstart_examples.pytests/e2e/test_recovery_examples.pytests/e2e/test_observed_workflow_examples.py
Together, these cover:
- facade lifecycle and read flows
- query, compare, diagnostics, and report behavior
- quickstart example validation
- recovery example validation
- measured ML/DL capture and local mock-API evaluation examples
Layered Testing Model
The documentation and design baseline describe the test story in layers.
Unit
Use unit tests for:
- helper functions
- serializers and deserializers
- small validation and parsing rules
Contract
Use contract tests for canonical model validation, deterministic serialization, and stable result shapes.
Plane Integration
Use plane integration tests for:
- metadata store behavior
- record append and replay
- artifact ingest and verification
Recovery
Use recovery and migration tests for:
- replay behavior
- backup and restore
Surface
Use surface tests for:
- Python facade behavior
- CLI behavior
- HTTP JSON behavior
- HTML UI behavior
End-To-End
Use end-to-end tests when you want confidence that a user journey still works across multiple layers at once.
How To Validate Documentation Changes
Documentation should stay attached to executable reality.
If you change:
- getting-started examples
- rerun the onboarding script or the nearest e2e flow
- common query or report guidance
- rerun the core e2e flow
- recovery guidance
- rerun recovery example coverage or the nearest recovery suite
For doc-heavy changes, the goal is not to rerun everything blindly. The goal is to rerun the closest proof that the guidance is still true.
What To Assert
Prefer semantic assertions over formatting-sensitive assertions.
Good assertions:
- a run snapshot contains the expected run id, stages, and records
- a comparison exposes the expected metric or stage differences
- a report has the expected title and sections
Weaker assertions:
- exact incidental ordering when ordering is not part of the contract
- large brittle string snapshots for outputs that are still evolving quickly
Example Validation Expectations
Examples in public docs should prove at least one of these:
- canonical import paths work
- a workspace can be created and read
- a run can be queried
- a report can be built
Examples should not silently depend on internal modules or private helper paths.
Prototype Notes
At the current prototype stage:
- the source-tree script story still relies on
PYTHONPATH=src - the package and CLI names are aligned as
contexta
That is why the testing guide emphasizes executable repository commands and file-scoped test suites rather than a polished install-and-run-from-anywhere story.
Where To Go Next
Continue with: