Artifact Handling

Artifacts are durable files attached to evidence. Prefer files that were produced or consumed by the observed operation: a fitted model, a checkpoint, an evaluation set, a prompt template, or a generated report.

Executable Artifact Examples

The examples below register output files when the workflow produces them. Other runtime evidence is captured within the same run context.

Machine Learning
Deep Learning

"""Persist a fitted regression model and register it as observed evidence."""

import pickle
from pathlib import Path

from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

from contexta import Contexta
from contexta.capture import LocalJsonlSink


features, targets = load_diabetes(return_X_y=True)
train_x, test_x, train_y, test_y = train_test_split(
    features, targets, test_size=0.2, random_state=42
)

workspace = Path(".contexta")
ctx = Contexta(workspace=str(workspace), config={"project_name": "diabetes-artifact"})
local_sink = next(sink for sink in ctx.sinks if isinstance(sink, LocalJsonlSink))
model = LinearRegression()

with ctx.run("fitted-model", dataset_ref="dataset:sklearn.diabetes") as run:
    with run.stage("train"):
        model.fit(train_x, train_y)

    with run.stage("evaluate") as stage:
        r2 = r2_score(test_y, model.predict(test_x))
        stage.metric("r2", r2, unit="ratio")

    model_path = workspace / "models" / "linear-regression.pkl"
    model_path.parent.mkdir(parents=True, exist_ok=True)
    model_path.write_bytes(pickle.dumps(model))
    registration = run.register_artifact(
        "model",
        str(model_path),
        attributes={"framework": "scikit-learn", "format": "pickle"},
    )

artifact_ref = registration.payload["manifest"].artifact_ref
artifacts_path = local_sink.file_path_for("ARTIFACT").relative_to(Path.cwd())

print(f"Captured run: {run.ref}")
print(f"Measured r2: {r2:.3f}")
print(f"Registered artifact: {artifact_ref}")
print(f"Model file: {model_path.as_posix()}")
print(f"Artifact records: {artifacts_path.as_posix()}")

"""Train a tiny CNN, save its checkpoint, and register the checkpoint artifact."""

from pathlib import Path

import torch
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

from contexta import Contexta
from contexta.capture import LocalJsonlSink


class TinyCNN(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.layers = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(8 * 4 * 4, 10),
        )

    def forward(self, features: torch.Tensor) -> torch.Tensor:
        return self.layers(features)


torch.manual_seed(7)
digits = load_digits()
train_x, test_x, train_y, test_y = train_test_split(
    digits.images, digits.target, test_size=0.2, stratify=digits.target, random_state=7
)
train_data = TensorDataset(
    torch.tensor(train_x[:, None] / 16.0, dtype=torch.float32),
    torch.tensor(train_y, dtype=torch.long),
)
loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_features = torch.tensor(test_x[:, None] / 16.0, dtype=torch.float32)
test_targets = torch.tensor(test_y, dtype=torch.long)

workspace = Path(".contexta")
ctx = Contexta(workspace=str(workspace), config={"project_name": "digits-artifact"})
local_sink = next(sink for sink in ctx.sinks if isinstance(sink, LocalJsonlSink))
model = TinyCNN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()

with ctx.run("trained-checkpoint", dataset_ref="dataset:sklearn.digits") as run:
    with run.stage("train"):
        for features, targets in loader:
            optimizer.zero_grad()
            loss = loss_fn(model(features), targets)
            loss.backward()
            optimizer.step()

    with run.stage("evaluate") as stage:
        with torch.no_grad():
            predictions = model(test_features).argmax(dim=1)
        accuracy = (predictions == test_targets).float().mean().item()
        stage.metric("accuracy", accuracy, unit="ratio")

    checkpoint_path = workspace / "checkpoints" / "tiny-cnn.pt"
    checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
    torch.save(model.state_dict(), checkpoint_path)
    registration = run.register_artifact(
        "checkpoint",
        str(checkpoint_path),
        attributes={"framework": "pytorch", "epochs": 1},
    )

artifact_ref = registration.payload["manifest"].artifact_ref
artifacts_path = local_sink.file_path_for("ARTIFACT").relative_to(Path.cwd())

print(f"Captured run: {run.ref}")
print(f"Measured validation accuracy: {accuracy:.3f}")
print(f"Registered artifact: {artifact_ref}")
print(f"Checkpoint file: {checkpoint_path.as_posix()}")
print(f"Artifact records: {artifacts_path.as_posix()}")

Naming Guidance

Use lower snake case for artifact kinds: model, checkpoint, prompt_template, eval_set, schema, report.

Do not create a text file labelled "model" or "checkpoint" merely to make an example look complete. When a page describes training, register the artifact actually written by the fitted model or framework.

Executable Artifact Examples​

Naming Guidance​

Related Pages​

Executable Artifact Examples

Naming Guidance

Related Pages