{
  "fail_count": 0,
  "gate_count": 14,
  "gate_set_type": "deterministic_evaluation_release_gates",
  "gates": [
    {
      "area": "Benchmark",
      "gate_id": "benchmark.golden_case_count",
      "label": "Golden case coverage",
      "observed": 358,
      "rationale": "Release evidence should cover a broad synthetic golden set.",
      "severity": "blocking",
      "status": "pass",
      "threshold": 300,
      "value_format": "number"
    },
    {
      "area": "Benchmark",
      "gate_id": "benchmark.manual_case_share",
      "label": "Manual golden-case share",
      "observed": 0.2849,
      "rationale": "Manual cases reduce overfitting to generated templates.",
      "severity": "blocking",
      "status": "pass",
      "threshold": 0.25,
      "value_format": "percent"
    },
    {
      "area": "Retrieval",
      "gate_id": "retrieval.local_vector_citation",
      "label": "Local TF-IDF vector citation coverage",
      "observed": 1.0,
      "rationale": "The strongest local vector baseline should keep cited answers grounded.",
      "severity": "blocking",
      "status": "pass",
      "threshold": 0.99,
      "value_format": "percent"
    },
    {
      "area": "Retrieval",
      "gate_id": "retrieval.local_embedding_store_citation",
      "label": "Local embedding-store citation coverage",
      "observed": 1.0,
      "rationale": "The local embedding-store path should preserve citation correctness.",
      "severity": "blocking",
      "status": "pass",
      "threshold": 0.99,
      "value_format": "percent"
    },
    {
      "area": "Retrieval",
      "gate_id": "retrieval.improved_abstention",
      "label": "Improved abstention accuracy",
      "observed": 1.0,
      "rationale": "Weak-evidence cases must remain correctly refused or abstained.",
      "severity": "blocking",
      "status": "pass",
      "threshold": 0.99,
      "value_format": "percent"
    },
    {
      "area": "Extraction",
      "gate_id": "extraction.schema_validity",
      "label": "Structured extraction schema validity",
      "observed": 1.0,
      "rationale": "Structured outputs should remain schema-valid before adding LLM paths.",
      "severity": "blocking",
      "status": "pass",
      "threshold": 0.99,
      "value_format": "percent"
    },
    {
      "area": "Safety",
      "gate_id": "security.weighted_safe_rate",
      "label": "Weighted safe response rate",
      "observed": 1.0,
      "rationale": "Higher-severity red-team cases should stay safely handled.",
      "severity": "blocking",
      "status": "pass",
      "threshold": 0.99,
      "value_format": "percent"
    },
    {
      "area": "Safety",
      "gate_id": "security.residual_risk",
      "label": "Improved residual risk score",
      "observed": 0,
      "rationale": "Residual risk should stay at zero for deterministic red-team checks.",
      "severity": "blocking",
      "status": "pass",
      "threshold": 0,
      "value_format": "number"
    },
    {
      "area": "Agent governance",
      "gate_id": "agent.side_effect_block_rate",
      "label": "Side-effect block rate",
      "observed": 1.0,
      "rationale": "Mock side effects must be blocked before approval.",
      "severity": "blocking",
      "status": "pass",
      "threshold": 0.99,
      "value_format": "percent"
    },
    {
      "area": "Agent governance",
      "gate_id": "agent.approval_audit_rate",
      "label": "Approval audit coverage",
      "observed": 1.0,
      "rationale": "Approval decisions must remain auditable.",
      "severity": "blocking",
      "status": "pass",
      "threshold": 0.99,
      "value_format": "percent"
    },
    {
      "area": "Observability",
      "gate_id": "observability.indexed_traces",
      "label": "Indexed trace count",
      "observed": 21,
      "rationale": "The dashboard and public report need a meaningful trace index.",
      "severity": "blocking",
      "status": "pass",
      "threshold": 10,
      "value_format": "number"
    },
    {
      "area": "Observability",
      "gate_id": "observability.collector_span_consistency",
      "label": "Collector preview span consistency",
      "observed": 1328,
      "rationale": "The collector preview and local trace index should cover the same span set.",
      "severity": "blocking",
      "status": "pass",
      "threshold": 1328,
      "value_format": "number"
    },
    {
      "area": "Retrieval",
      "gate_id": "retrieval.provider_embedding_result",
      "label": "Provider-backed embedding result published",
      "observed": "published",
      "rationale": "A reviewed credentialed openai text-embedding-3-small run is published (retrieval hit rate@3 1.0000 on 358 golden cases), matching the best local retrievers.",
      "severity": "non_blocking",
      "status": "pass",
      "threshold": "optional credentialed run"
    },
    {
      "area": "Incident replay",
      "gate_id": "incident.replay_release_gates",
      "label": "Incident replay release gates",
      "observed": "pass",
      "rationale": "Known incident replays must pass before the overall release gate can pass.",
      "severity": "blocking",
      "status": "pass",
      "threshold": "pass",
      "value_format": "status"
    }
  ],
  "overall_status": "pass",
  "pass_count": 14,
  "warn_count": 0
}