{
  "schemaKind": "protocol",
  "protocolVersion": "1.0",
  "meta": {
    "archetype": "ml_train",
    "eligibility": "eligible",
    "repo": {
      "owner": "trevin-creator",
      "name": "autoresearch-mlx",
      "defaultBranch": "main",
      "cloneUrl": "https://github.com/trevin-creator/autoresearch-mlx"
    },
    "purposeStatement": "Improve the Apple Silicon MLX training recipe to minimize validation bits-per-byte under a fixed local training budget.",
    "createdAt": "2026-05-03T15:04:16Z",
    "updatedAt": "2026-05-03T15:04:16Z",
    "protocolBundleId": "trevin-creator-autoresearch-mlx-ba6ebf6-20260503"
  },
  "environment": {
    "osHints": [
      "darwin-arm64",
      "Apple Silicon Mac"
    ],
    "packageManagers": [
      "uv"
    ],
    "setupCommands": [
      "uv sync",
      "uv run prepare.py"
    ],
    "assetPrep": [
      "Prepare local data and tokenizer cache at ~/.cache/autoresearch/ via uv run prepare.py."
    ],
    "constraints": {
      "noNewDependencies": true,
      "networkPolicy": "full"
    }
  },
  "mutableSurface": {
    "allowedGlobs": [
      "train.py"
    ],
    "forbiddenGlobs": [
      "prepare.py",
      "pyproject.toml",
      "uv.lock"
    ],
    "allowedKinds": [
      "code_edit"
    ]
  },
  "immutableHarness": {
    "paths": [
      "prepare.py"
    ],
    "rationale": "prepare.py defines the data preparation, tokenizer, dataloader, fixed constants, and evaluate_bpb metric; modifying it would invalidate comparisons."
  },
  "execution": {
    "command": "uv run train.py",
    "cwd": ".",
    "stopCondition": {
      "type": "wall_clock",
      "trainingSecondsBudget": 300,
      "excludeCompilationFromBudget": true,
      "notes": "The script runs a fixed 5-minute training budget, with startup/compilation and final eval overhead outside the training timer."
    },
    "hardTimeoutSeconds": 900,
    "determinism": {
      "seedPolicy": "optional",
      "notes": "Compare fresh baseline and trials on the same Apple Silicon hardware and prepared data snapshot."
    }
  },
  "measurement": {
    "primaryMetric": {
      "name": "val_bpb",
      "direction": "minimize",
      "extract": {
        "kind": "regex",
        "pattern": "^val_bpb:\\s+([0-9]+(?:\\.[0-9]+)?)",
        "exampleStdout": "---\nval_bpb:          2.534000\ntraining_seconds: 312.4\ntotal_seconds:    405.7\npeak_vram_mb:     27528.9\nmfu_percent:      0.00\ntotal_tokens_M:   39.8\nnum_steps:        46\nnum_params_M:     50.3\ndepth:            8"
      }
    },
    "secondaryMetrics": [
      {
        "name": "peak_vram_mb",
        "direction": "minimize",
        "extract": {
          "kind": "regex",
          "pattern": "^peak_vram_mb:\\s+([0-9]+(?:\\.[0-9]+)?)",
          "exampleStdout": "peak_vram_mb:     27528.9"
        }
      },
      {
        "name": "num_steps",
        "direction": "maximize",
        "extract": {
          "kind": "regex",
          "pattern": "^num_steps:\\s+([0-9]+)",
          "exampleStdout": "num_steps:        46"
        }
      }
    ],
    "baselinePolicy": {
      "establishOnHardware": true,
      "sameDataSnapshot": true,
      "baselineNotes": "Baseline was established on this hardware after uv run prepare.py using local ~/.cache/autoresearch/ data/tokenizer snapshot. FINAL_EVAL_BATCH_SIZE was reduced from 256 to 64 in train.py to fit this machine's Metal buffer limit while preserving the same total eval token budget."
    }
  },
  "provenance": {
    "resultsLog": {
      "format": "tsv",
      "path": "results.tsv",
      "columns": [
        "commit",
        "val_bpb",
        "memory_gb",
        "status",
        "description"
      ]
    },
    "gitWorkflow": {
      "branchPattern": "autoresearch/<tag>",
      "commitScope": "Only stage experiment changes and result log updates for this repo.",
      "stagingExample": "git add train.py results.tsv"
    },
    "baselineArtifactPath": ".autoresearch/create/baseline_run.log"
  },
  "safety": {
    "oomPolicy": "reduce_batch",
    "crashStatus": "discard"
  },
  "agentRules": {
    "simplicityCriterion": true,
    "autonomy": {
      "noAskHumanToContinue": true
    },
    "experimentTimeoutNotes": "Treat runs exceeding 15 minutes as failed or hung.",
    "logRedirectExample": "uv run train.py > run.log 2>&1"
  },
  "archetypeExtensions": {
    "ml_train": {
      "dataSnapshot": "Local ~/.cache/autoresearch/ data and tokenizer prepared by uv run prepare.py.",
      "hardwareNotes": "Apple Silicon MLX results are hardware-sensitive; compare against a fresh baseline on the same machine or clearly compatible hardware class."
    }
  }
}