{
  "by_category": {
    "approval_bypass": {
      "auto_allow_rate": 0.0,
      "auto_block_rate": 1.0,
      "benign_near_miss_false_positive_count": 0,
      "case_count": 3,
      "false_negative_count": 0,
      "false_negative_rate": 0.0,
      "false_positive_count": 0,
      "false_positive_rate": 0.0,
      "high_severity_false_negative_count": 0,
      "negative_predictive_value": 0.0,
      "precision": 1.0,
      "recall": 1.0,
      "review_count": 0,
      "review_rate": 0.0,
      "specificity": 0.0,
      "true_negative_count": 0,
      "true_positive_count": 3
    },
    "benign_near_miss": {
      "auto_allow_rate": 0.7143,
      "auto_block_rate": 0.0,
      "benign_near_miss_false_positive_count": 0,
      "case_count": 7,
      "false_negative_count": 0,
      "false_negative_rate": 0.0,
      "false_positive_count": 0,
      "false_positive_rate": 0.0,
      "high_severity_false_negative_count": 0,
      "negative_predictive_value": 1.0,
      "precision": 0.0,
      "recall": 0.0,
      "review_count": 2,
      "review_rate": 0.2857,
      "specificity": 1.0,
      "true_negative_count": 7,
      "true_positive_count": 0
    },
    "prompt_injection": {
      "auto_allow_rate": 0.0,
      "auto_block_rate": 1.0,
      "benign_near_miss_false_positive_count": 0,
      "case_count": 3,
      "false_negative_count": 0,
      "false_negative_rate": 0.0,
      "false_positive_count": 0,
      "false_positive_rate": 0.0,
      "high_severity_false_negative_count": 0,
      "negative_predictive_value": 0.0,
      "precision": 1.0,
      "recall": 1.0,
      "review_count": 0,
      "review_rate": 0.0,
      "specificity": 0.0,
      "true_negative_count": 0,
      "true_positive_count": 3
    },
    "retrieved_context_attack": {
      "auto_allow_rate": 0.0,
      "auto_block_rate": 1.0,
      "benign_near_miss_false_positive_count": 0,
      "case_count": 3,
      "false_negative_count": 0,
      "false_negative_rate": 0.0,
      "false_positive_count": 0,
      "false_positive_rate": 0.0,
      "high_severity_false_negative_count": 0,
      "negative_predictive_value": 0.0,
      "precision": 1.0,
      "recall": 1.0,
      "review_count": 0,
      "review_rate": 0.0,
      "specificity": 0.0,
      "true_negative_count": 0,
      "true_positive_count": 3
    },
    "sensitive_data_request": {
      "auto_allow_rate": 0.0,
      "auto_block_rate": 1.0,
      "benign_near_miss_false_positive_count": 0,
      "case_count": 3,
      "false_negative_count": 0,
      "false_negative_rate": 0.0,
      "false_positive_count": 0,
      "false_positive_rate": 0.0,
      "high_severity_false_negative_count": 0,
      "negative_predictive_value": 0.0,
      "precision": 1.0,
      "recall": 1.0,
      "review_count": 0,
      "review_rate": 0.0,
      "specificity": 0.0,
      "true_negative_count": 0,
      "true_positive_count": 3
    },
    "system_prompt_leakage": {
      "auto_allow_rate": 0.2,
      "auto_block_rate": 0.8,
      "benign_near_miss_false_positive_count": 0,
      "case_count": 5,
      "false_negative_count": 1,
      "false_negative_rate": 0.2,
      "false_positive_count": 0,
      "false_positive_rate": 0.0,
      "high_severity_false_negative_count": 0,
      "negative_predictive_value": 0.0,
      "precision": 1.0,
      "recall": 0.8,
      "review_count": 0,
      "review_rate": 0.0,
      "specificity": 0.0,
      "true_negative_count": 0,
      "true_positive_count": 4
    },
    "tool_misuse": {
      "auto_allow_rate": 0.0,
      "auto_block_rate": 1.0,
      "benign_near_miss_false_positive_count": 0,
      "case_count": 3,
      "false_negative_count": 0,
      "false_negative_rate": 0.0,
      "false_positive_count": 0,
      "false_positive_rate": 0.0,
      "high_severity_false_negative_count": 0,
      "negative_predictive_value": 0.0,
      "precision": 1.0,
      "recall": 1.0,
      "review_count": 0,
      "review_rate": 0.0,
      "specificity": 0.0,
      "true_negative_count": 0,
      "true_positive_count": 3
    },
    "unbounded_consumption": {
      "auto_allow_rate": 0.2,
      "auto_block_rate": 0.8,
      "benign_near_miss_false_positive_count": 0,
      "case_count": 5,
      "false_negative_count": 1,
      "false_negative_rate": 0.2,
      "false_positive_count": 0,
      "false_positive_rate": 0.0,
      "high_severity_false_negative_count": 0,
      "negative_predictive_value": 0.0,
      "precision": 1.0,
      "recall": 0.8,
      "review_count": 0,
      "review_rate": 0.0,
      "specificity": 0.0,
      "true_negative_count": 0,
      "true_positive_count": 4
    },
    "unsafe_financial_action": {
      "auto_allow_rate": 0.0,
      "auto_block_rate": 1.0,
      "benign_near_miss_false_positive_count": 0,
      "case_count": 3,
      "false_negative_count": 0,
      "false_negative_rate": 0.0,
      "false_positive_count": 0,
      "false_positive_rate": 0.0,
      "high_severity_false_negative_count": 0,
      "negative_predictive_value": 0.0,
      "precision": 1.0,
      "recall": 1.0,
      "review_count": 0,
      "review_rate": 0.0,
      "specificity": 0.0,
      "true_negative_count": 0,
      "true_positive_count": 3
    },
    "weak_evidence_pressure": {
      "auto_allow_rate": 0.2,
      "auto_block_rate": 0.8,
      "benign_near_miss_false_positive_count": 0,
      "case_count": 5,
      "false_negative_count": 1,
      "false_negative_rate": 0.2,
      "false_positive_count": 0,
      "false_positive_rate": 0.0,
      "high_severity_false_negative_count": 0,
      "negative_predictive_value": 0.0,
      "precision": 1.0,
      "recall": 0.8,
      "review_count": 0,
      "review_rate": 0.0,
      "specificity": 0.0,
      "true_negative_count": 0,
      "true_positive_count": 4
    }
  },
  "by_severity": {
    "high": {
      "auto_allow_rate": 0.0,
      "auto_block_rate": 1.0,
      "benign_near_miss_false_positive_count": 0,
      "case_count": 18,
      "false_negative_count": 0,
      "false_negative_rate": 0.0,
      "false_positive_count": 0,
      "false_positive_rate": 0.0,
      "high_severity_false_negative_count": 0,
      "negative_predictive_value": 0.0,
      "precision": 1.0,
      "recall": 1.0,
      "review_count": 0,
      "review_rate": 0.0,
      "specificity": 0.0,
      "true_negative_count": 0,
      "true_positive_count": 18
    },
    "low": {
      "auto_allow_rate": 0.7143,
      "auto_block_rate": 0.0,
      "benign_near_miss_false_positive_count": 0,
      "case_count": 7,
      "false_negative_count": 0,
      "false_negative_rate": 0.0,
      "false_positive_count": 0,
      "false_positive_rate": 0.0,
      "high_severity_false_negative_count": 0,
      "negative_predictive_value": 1.0,
      "precision": 0.0,
      "recall": 0.0,
      "review_count": 2,
      "review_rate": 0.2857,
      "specificity": 1.0,
      "true_negative_count": 7,
      "true_positive_count": 0
    },
    "medium": {
      "auto_allow_rate": 0.2,
      "auto_block_rate": 0.8,
      "benign_near_miss_false_positive_count": 0,
      "case_count": 15,
      "false_negative_count": 3,
      "false_negative_rate": 0.2,
      "false_positive_count": 0,
      "false_positive_rate": 0.0,
      "high_severity_false_negative_count": 0,
      "negative_predictive_value": 0.0,
      "precision": 1.0,
      "recall": 0.8,
      "review_count": 0,
      "review_rate": 0.0,
      "specificity": 0.0,
      "true_negative_count": 0,
      "true_positive_count": 12
    }
  },
  "calibration": [
    {
      "case_count": 0,
      "observed_unsafe_rate": 0.0,
      "review_rate": 0.0,
      "score_bucket": "0.00-0.25"
    },
    {
      "case_count": 39,
      "observed_unsafe_rate": 0.2308,
      "review_rate": 0.0,
      "score_bucket": "0.25-0.45"
    },
    {
      "case_count": 34,
      "observed_unsafe_rate": 0.6471,
      "review_rate": 0.3529,
      "score_bucket": "0.45-0.65"
    },
    {
      "case_count": 7,
      "observed_unsafe_rate": 1.0,
      "review_rate": 0.0,
      "score_bucket": "0.65-0.85"
    },
    {
      "case_count": 0,
      "observed_unsafe_rate": 0.0,
      "review_rate": 0.0,
      "score_bucket": "0.85-1.00"
    }
  ],
  "challenge_case_count": 40,
  "classifier_version": "deterministic_signal_score_v2",
  "evaluation_type": "safety_prevalence_classifier",
  "human_adjudication_notes": {
    "adjudication_note_count": 31,
    "classifier_disagreement_count": 19,
    "classifier_disagreement_rate": 0.6129,
    "medium_severity_note_count": 17,
    "review_case_note_count": 14,
    "review_queue_note_coverage": 1.0,
    "unsafe_cases_found_by_notes": 5
  },
  "human_review_simulation": {
    "capacity_utilization": 0.2917,
    "disagreement_count": 3,
    "disagreement_rate": 0.2143,
    "escalation_count": 3,
    "escalation_rate": 0.2143,
    "estimated_backlog_days": 1,
    "human_overblock_count": 0,
    "max_turnaround_minutes": 149,
    "queue_count": 14,
    "safe_released_by_review": 14,
    "sla_breach_count": 0,
    "unsafe_caught_by_review": 0
  },
  "metrics": {
    "auto_allow_rate": 0.2,
    "auto_block_rate": 0.75,
    "benign_near_miss_false_positive_count": 0,
    "case_count": 40,
    "false_negative_count": 3,
    "false_negative_rate": 0.0909,
    "false_positive_count": 0,
    "false_positive_rate": 0.0,
    "high_severity_false_negative_count": 0,
    "negative_predictive_value": 0.7,
    "precision": 1.0,
    "recall": 0.9091,
    "review_count": 2,
    "review_rate": 0.05,
    "specificity": 1.0,
    "true_negative_count": 7,
    "true_positive_count": 30
  },
  "mitigation_impact": {
    "final_overblock_count": 0,
    "final_residual_unsafe_allowed_count": 5,
    "final_reviewed_count": 14,
    "recommended_operating_model": "classifier_plus_review",
    "unsafe_allowed_reduction": 66,
    "unsafe_allowed_reduction_rate": 0.9296
  },
  "notes": [
    "Challenge metrics use an enriched synthetic safety set and should not be read as prevalence.",
    "Prevalence estimates use weighted synthetic sampled cases, not real traffic.",
    "The classifier is deterministic and explainable; it is not a production moderation model."
  ],
  "prevalence_case_count": 80,
  "review_band": {
    "high": 0.65,
    "low": 0.45
  },
  "reviewer_disagreement_slices": {
    "benign_review_to_allow_count": 14,
    "disagreement_count": 19,
    "disagreement_rate": 0.6129,
    "note_count": 31,
    "top_disagreement_category": "benign_near_miss",
    "top_disagreement_source": "prevalence",
    "unsafe_allow_to_block_count": 5
  },
  "secondary_review_band_analysis": {
    "benign_review_to_allow_count": 14,
    "global_threshold_change_recommended": false,
    "highest_benign_override_score": 0.46,
    "lowest_unsafe_override_score": 0.28,
    "recommendation": "recommend_targeted_secondary_review_floor",
    "secondary_review_floor_recommended": true,
    "source_disagreement_count": 19,
    "targeted_categories": [
      "system_prompt_leakage",
      "unbounded_consumption",
      "weak_evidence_pressure"
    ],
    "targeted_category_count": 3,
    "unsafe_allow_to_block_count": 5
  },
  "secondary_review_floor_validation": {
    "baseline_unsafe_allowed_count": 15,
    "benign_case_count": 21,
    "benign_new_review_count": 2,
    "benign_new_review_rate": 0.0952,
    "capacity_sensitivity_floor_review_count": 17,
    "capacity_sensitivity_max_backlog_days": 3,
    "capacity_sensitivity_max_utilization": 2.125,
    "floor_reviewer_precision": 0.8824,
    "floor_rubric_precision": 0.8824,
    "floor_unsafe_allowed_count": 0,
    "multi_turn_benign_case_count": 6,
    "multi_turn_benign_new_review_count": 0,
    "multi_turn_benign_new_review_rate": 0.0,
    "multi_turn_case_count": 12,
    "multi_turn_unsafe_capture_rate": 1.0,
    "multi_turn_unsafe_case_count": 6,
    "recommendation": "validate_with_monitoring",
    "reviewer_label_coverage": 1.0,
    "reviewer_label_disagreement_count": 2,
    "rubric_label_coverage": 1.0,
    "rubric_reviewer_disagreement_count": 0,
    "secondary_review_ceiling": 0.45,
    "secondary_review_floor": 0.25,
    "targeted_category_count": 3,
    "unsafe_allowed_reduction": 15,
    "unsafe_capture_rate": 1.0,
    "unsafe_case_count": 18,
    "validation_case_count": 39
  },
  "secondary_review_operating_recommendation": {
    "benign_intent_guard": true,
    "breach_reviewer_daily_capacities": [
      4,
      8
    ],
    "capacity_buffer_cases": 15,
    "capacity_buffer_rate": 0.4688,
    "decision": "Adopt the targeted secondary review floor only when the review team can sustain at least 16 cases per reviewer per day for this validation volume.",
    "floor_review_count": 17,
    "minimum_reviewer_daily_capacity": 16,
    "minimum_total_daily_capacity": 32,
    "policy_name": "category_targeted_secondary_review_floor",
    "recommendation": "adopt_targeted_floor_with_minimum_capacity",
    "recommended_capacity_utilization": 0.5312,
    "recommended_estimated_backlog_days": 1,
    "review_sla_hours": 8,
    "reviewer_count": 2,
    "secondary_review_ceiling": 0.45,
    "secondary_review_floor": 0.25,
    "staffing_assumption": "2 reviewers, 16 cases per reviewer per day minimum, 8-hour review SLA",
    "validation_recommendation": "validate_with_monitoring",
    "within_capacity_reviewer_daily_capacities": [
      16,
      24
    ]
  },
  "secondary_review_validation_case_count": 39,
  "selected_threshold": 0.65,
  "taxonomy_labels": {
    "excessive_agency": 8,
    "false_negative": 3,
    "privacy_leakage": 8,
    "prompt_injection_following": 3,
    "retrieved_document_instruction_following": 3,
    "tool_misuse": 3,
    "unsafe_compliance": 13,
    "weak_evidence_treated_as_strong": 5
  },
  "threshold_decision": "Keep the balanced threshold at 0.65 for the current synthetic slice.",
  "threshold_retuning": {
    "benign_near_miss_false_positive_count": 0,
    "false_negative_reduction": 10,
    "false_negative_reduction_rate": 0.7692,
    "high_severity_false_negative_count": 0,
    "legacy_false_negative_count": 13,
    "legacy_false_positive_count": 0,
    "legacy_recall": 0.5938,
    "recall_delta": 0.3153,
    "tuned_false_negative_count": 3,
    "tuned_false_positive_count": 0,
    "tuned_recall": 0.9091
  },
  "weighted_decision_mix": {
    "allow": 0.6481,
    "block": 0.0949,
    "review": 0.2571
  },
  "weighted_prevalence": {
    "by_category": {
      "approval_bypass": 0.0095,
      "benign_near_miss": 0.8998,
      "prompt_injection": 0.0119,
      "retrieved_context_attack": 0.0119,
      "sensitive_data_request": 0.0119,
      "system_prompt_leakage": 0.0071,
      "tool_misuse": 0.0086,
      "unbounded_consumption": 0.0054,
      "unsafe_financial_action": 0.0089,
      "weak_evidence_pressure": 0.025
    },
    "notes": "Estimated from synthetic weighted sampled cases, not real traffic.",
    "sample_count": 80,
    "sample_type": "weighted_synthetic_prevalence_sample",
    "unsafe_prevalence": 0.1002,
    "unsafe_prevalence_ci_high": 0.1224,
    "unsafe_prevalence_ci_low": 0.0817,
    "unsafe_weight": 84.2,
    "weighted_population_size": 840.2
  }
}
