{
  "generatedAt": "2026-04-28T02:56:04.091Z",
  "source": {
    "matrixArtifact": "artifacts/skill-matrix-2026-04-20T00-20-28-068Z.json",
    "soakArtifact": "artifacts/mini-soak-2026-03-31T12-15-42-170Z.json",
    "baseUrl": "https://api.silksocket.ai"
  },
  "totals": {
    "totalSkillsCovered": 18,
    "totalSkillsRegistered": 20,
    "totalCalls": 157,
    "totalSuccesses": 157,
    "totalFailures": 0
  },
  "aggregate": {
    "successRate": 1,
    "retryRate": 0,
    "timeoutRate": 0,
    "skillP50Ms": 166,
    "skillP95Ms": 1179,
    "skillP99Ms": 1179,
    "e2eP50Ms": 473,
    "e2eP95Ms": 1448,
    "e2eP99Ms": 1448,
    "guardBlockRateOnUnsafe": 1,
    "guardFalsePositiveRate": 0,
    "multiStepCompletionRate": 1,
    "auditCompleteness": 1
  },
  "pillars": [
    {
      "id": "reliability",
      "title": "Reliability",
      "narrative": "Every skill call the matrix smoke ran is counted; no retries are hidden. Successes mean the provider returned a structured result without error.",
      "metrics": [
        {
          "id": "exec_success_rate",
          "label": "Execution success rate",
          "value": "100.00%",
          "rawValue": 1,
          "unit": "percent",
          "target": "≥ 99%",
          "status": "healthy",
          "description": "Share of skill calls that returned a structured result without provider or transport error.",
          "sampleSize": 157
        },
        {
          "id": "retry_rate",
          "label": "Retry rate",
          "value": "0.00%",
          "rawValue": 0,
          "unit": "percent",
          "target": "< 5%",
          "status": "healthy",
          "description": "Share of calls that needed at least one retry to succeed.",
          "sampleSize": 157
        },
        {
          "id": "timeout_rate",
          "label": "Timeout rate",
          "value": "0.00%",
          "rawValue": 0,
          "unit": "percent",
          "target": "< 1%",
          "status": "healthy",
          "description": "Share of calls that exceeded the per-call timeout budget.",
          "sampleSize": 157
        }
      ]
    },
    {
      "id": "speed",
      "title": "Speed",
      "narrative": "Skill latency measures only the skill execution path. End-to-end includes rent + release orchestration for utility skills.",
      "metrics": [
        {
          "id": "skill_p50",
          "label": "Skill p50",
          "value": "166 ms",
          "rawValue": 166,
          "unit": "ms",
          "target": "",
          "status": "healthy",
          "description": "Median latency for the skill execution path (proxy-only for utility, direct for signature).",
          "sampleSize": 157
        },
        {
          "id": "skill_p95",
          "label": "Skill p95",
          "value": "1.18 s",
          "rawValue": 1179,
          "unit": "ms",
          "target": "< 3000 ms",
          "status": "healthy",
          "description": "95th percentile skill execution latency.",
          "sampleSize": 157
        },
        {
          "id": "e2e_p95",
          "label": "End-to-end p95",
          "value": "1.45 s",
          "rawValue": 1448,
          "unit": "ms",
          "target": "< 5000 ms",
          "status": "healthy",
          "description": "95th percentile end-to-end latency including rent + proxy + release for utility skills.",
          "sampleSize": 157
        }
      ]
    },
    {
      "id": "safety",
      "title": "Safety",
      "narrative": "Agent Guard is exercised with known-unsafe proposed actions (destructive, exfiltration, jailbreak phrasing) and known-safe counterparts.",
      "metrics": [
        {
          "id": "guard_block_unsafe",
          "label": "Guard block rate on unsafe",
          "value": "100.0%",
          "rawValue": 1,
          "unit": "percent",
          "target": "≥ 95%",
          "status": "healthy",
          "description": "Share of known-unsafe requests that Agent Guard detected and blocked under the default policy.",
          "sampleSize": 5
        },
        {
          "id": "guard_false_positive",
          "label": "Guard false positives",
          "value": "0.0%",
          "rawValue": 0,
          "unit": "percent",
          "target": "< 5%",
          "status": "healthy",
          "description": "Share of known-safe requests that Agent Guard incorrectly blocked.",
          "sampleSize": 5
        },
        {
          "id": "human_override",
          "label": "Human override rate",
          "value": "—",
          "rawValue": null,
          "unit": null,
          "target": "Tracked via operator console",
          "status": "instrumenting",
          "description": "Share of agent executions that required human intervention. Emitted by the operator console on a rolling 7-day window (instrumenting).",
          "sampleSize": null
        }
      ]
    },
    {
      "id": "consistency",
      "title": "Consistency",
      "narrative": "Consistency measures whether repeated, structured requests produce the same final state and whether multi-step plans actually finish.",
      "metrics": [
        {
          "id": "multi_step_completion",
          "label": "Multi-step plan completion",
          "value": "100.0%",
          "rawValue": 1,
          "unit": "percent",
          "target": "≥ 95%",
          "status": "healthy",
          "description": "Share of 3-task Gameplan flows that reached the done state across all tasks without human nudging.",
          "sampleSize": 3
        },
        {
          "id": "deterministic_completion",
          "label": "Deterministic completion",
          "value": "100.00%",
          "rawValue": 1,
          "unit": "percent",
          "target": "≥ 99%",
          "status": "healthy",
          "description": "Share of structured skill calls that returned the expected shape. Derived from the matrix smoke across all registered skills.",
          "sampleSize": 157
        },
        {
          "id": "cross_model_consistency",
          "label": "Cross-model consistency",
          "value": "—",
          "rawValue": null,
          "unit": null,
          "target": "Planned for dual-model harness",
          "status": "instrumenting",
          "description": "Same prompt, two models: how often the Agent Capability Layer converges to the same acceptable outcome (instrumenting).",
          "sampleSize": null
        }
      ]
    },
    {
      "id": "auditability",
      "title": "Auditability",
      "narrative": "Every call through the Agent Capability Layer writes a usage record and a security log. Signature skills add a separate action log.",
      "metrics": [
        {
          "id": "audit_completeness",
          "label": "Audit completeness",
          "value": "100.0%",
          "rawValue": 1,
          "unit": "percent",
          "target": "= 100%",
          "status": "healthy",
          "description": "Share of sampled calls where a full audit record (request, decision, outcome) was persisted server-side.",
          "sampleSize": 157
        },
        {
          "id": "trace_coverage",
          "label": "Trace coverage",
          "value": "18/20 skills",
          "rawValue": 0.9,
          "unit": "skills",
          "target": "100% of registered skills",
          "status": "watch",
          "description": "Share of registered skills exercised by the last matrix smoke run. Uncovered skills are not backed by published numbers.",
          "sampleSize": 18
        },
        {
          "id": "decision_log_completeness",
          "label": "Guard decision log",
          "value": "10 decisions logged",
          "rawValue": 10,
          "unit": "decisions",
          "target": "Every guard call logged",
          "status": "healthy",
          "description": "Count of guard evaluations retained in the decision log during the sample window.",
          "sampleSize": 10
        }
      ]
    }
  ],
  "perSkill": [
    {
      "skillId": "skill_echo",
      "name": "echo",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 160,
      "p95Ms": 299,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_math",
      "name": "math_eval",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 149,
      "p95Ms": 335,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_json_parse",
      "name": "json_parse",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 144,
      "p95Ms": 393,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_qr_code",
      "name": "qr_code",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 152,
      "p95Ms": 287,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_url_shorten",
      "name": "url_shorten",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 243,
      "p95Ms": 652,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_currency_convert",
      "name": "currency_convert",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 176,
      "p95Ms": 742,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_weather",
      "name": "weather",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 188,
      "p95Ms": 294,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_prompt_lookup",
      "name": "prompt_lookup",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 202,
      "p95Ms": 290,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_file_read",
      "name": "file_read",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 142,
      "p95Ms": 182,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_email_send",
      "name": "email_send",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 326,
      "p95Ms": 361,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_web_search",
      "name": "web_search",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 887,
      "p95Ms": 1179,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_web_browse",
      "name": "web_browse",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 203,
      "p95Ms": 240,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_image_search",
      "name": "image_search",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 144,
      "p95Ms": 519,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_document_create",
      "name": "document_create",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 166,
      "p95Ms": 246,
      "sampleSize": 10,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_agent_guard",
      "name": "agent_guard",
      "mode": "direct_provider",
      "successRate": 1,
      "p50Ms": 132,
      "p95Ms": 219,
      "sampleSize": 5,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_gameplan",
      "name": "gameplan",
      "mode": "direct_provider",
      "successRate": 1,
      "p50Ms": 120,
      "p95Ms": 160,
      "sampleSize": 5,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_safehouse",
      "name": "safehouse",
      "mode": "direct_provider",
      "successRate": 1,
      "p50Ms": 169,
      "p95Ms": 186,
      "sampleSize": 5,
      "status": "healthy",
      "note": null
    },
    {
      "skillId": "skill_deep_think",
      "name": "deep_think",
      "mode": "socket_proxy",
      "successRate": 1,
      "p50Ms": 866,
      "p95Ms": 947,
      "sampleSize": 2,
      "status": "healthy",
      "note": null
    }
  ],
  "platform": {
    "platformSuccessRate": 1,
    "measuredCalls": 157,
    "measuredSuccesses": 157,
    "configGapSkillCount": 0
  },
  "landingClaims": {
    "totalSkillsLabel": "20 skills live",
    "successRateLabel": "100.00%",
    "p95LatencyLabel": "1.18 s",
    "guardBlockLabel": "100%",
    "auditLabel": "100%"
  }
}
