# ─────────────────────────────────────────────────────────────────────
# SESSION 2.5: MONTE CARLO CATCH ESTIMATION
# Narali Province · Annual panel 2015–2022 · N_SIM = 2,000
# ─────────────────────────────────────────────────────────────────────
library(tidyverse)
library(patchwork)
library(scales)

set.seed(42)            # ensures the same random draws every run

N_SIM <- 10000           # number of Monte Carlo simulations
# increase to 10000 for publication-quality results

years <- 2010:2025

# Shared colour palette — used in every plot
# https://r-charts.com/colors/
method_colours <- c(
  "Reported"       = "#94a3b8",
  "Reconstruction" = "#f59e0b",
  "Observer"       = "#10b981",
  "Trade"          = "#8b5cf6",
  "AIS/VMS"        = "#ef4444",
  "Ensemble"       = "#0891b2"
)

# ── Shared base data ──────────────────────────────────────────────────
base_data <- tibble(
  year             = years,
  reported_catch = c(10200, 10850, 11300, 11750, 12050, 
                     12400, 13100, 12800, 11900, 12200,
                     13500, 14100, 13800, 14350, 14800, 15100),# metric tonnes
  price_usd_t = c(880,  910,  940,  975, 1020, 1050, 1080, 1100, 1090, 1120,
                  1150, 1200, 1230, 1265, 1310, 1350),
  licensed_vessels = c(128, 133, 137, 135, 142, 148, 152, 155, 150, 158,
                       163, 170, 168, 173, 177, 180)
)

# Helper: summarise any MC tibble to quantile-based CI
# This function is reused after every method's MC simulation
mc_summary <- function(mc_df, catch_col = sim_catch) {
  mc_df |>
    group_by(year) |>
    summarise(
      catch_mean = mean({{ catch_col }}),
      # median (50%)
      catch_p50  = as.numeric(quantile({{ catch_col }}, 0.500)),
      # practically 95% CI
      catch_p025 = as.numeric(quantile({{ catch_col }}, 0.025)),
      catch_p975 = as.numeric(quantile({{ catch_col }}, 0.975)),
      # 80% credible interval
      catch_p10  = as.numeric(quantile({{ catch_col }}, 0.100)),
      catch_p90  = as.numeric(quantile({{ catch_col }}, 0.900)),
      # standard deviation
      catch_sd   = sd({{ catch_col }}),
      .groups    = "drop"
    )
}

# ──────────────────────────────────────────────────────────────────────
# ───     Catch Reconstruction      ────────────────────────────────────
# ──────────────────────────────────────────────────────────────────────
# ── Parameter ranges from Sea Around Us literature ────────────────────
# Pauly and Zeller 2016 https://doi.org/10.1038/ncomms10244
# https://www.seaaroundus.org/catch-reconstruction-and-allocation-methods/
# component:   low   mid   high   → SD from range / (2*1.96)
# subsistence: 0.03  0.06  0.09  → sd = 0.015
# recreational:0.02  0.04  0.07  → sd = 0.013
# discards:    0.05  0.09  0.15  → sd = 0.026
# Urbanisation trend: subsistence declines as rural pop. moves to cities 
              # Béné et al 2015 http://dspace.azti.es/handle/24689/257

urb_lookup <- tibble(
  year      = years,
  urb_trend = seq(1.00, 0.88, length.out = length(years))
)

recon_mc <- crossing(base_data, sim_id = 1:N_SIM) |>
  left_join(urb_lookup, by = "year") |>
  mutate(
    # Each row draws its OWN independent random expansion factors
    
    # Subsistence: scaled by urbanisation trend
    ef_sub = pmax(0, rnorm(n(), mean = 0.06, sd = (0.09 - 0.03) / (2 * 1.96))) *
      urb_trend,
    
    # Recreational: no time trend assumed
    ef_rec = pmax(0, rnorm(n(), mean = 0.04, sd = (0.07 - 0.02) / (2 * 1.96))),
    
    # Discards: slight improvement over time (better selectivity)
    ef_dis = pmax(0, rnorm(n(), mean = 0.09, sd = (0.15 - 0.05) / (2 * 1.96))),
    
    # Reconstructed actual catch for this simulation draw
    sim_catch = reported_catch * (1 + ef_sub + ef_rec + ef_dis)
  )

# Summarise: extract percentiles across the 10000 simulations
recon_summary <- mc_summary(recon_mc) |>
  mutate(method = "Reconstruction")

recon_summary

# View the distribution of one year's expansion factors
# This shows WHAT the MC is drawing — crucial for teaching
recon_mc |>
  filter(year == 2025) |>
  select(sim_id, ef_sub, ef_rec, ef_dis) |>
  pivot_longer(-sim_id, names_to = "factor", values_to = "value") |>
  ggplot(aes(x = value, fill = factor)) +
  geom_density(alpha = 0.4) +
  scale_fill_manual(values = c("pink", "skyblue", "yellow"),
                    labels = c("Discards", "Recreational", "Subsistence")) +
  labs(title    = "Distribution of Sampled Expansion Factors (2025, N=10000)",
       subtitle = "Each draw represents one plausible world — spread = uncertainty",
       x = "Expansion factor value", y = "Density", fill = NULL) +
  theme_minimal()

# ─────────────────────────────────────────────────────────────────────
# ───     Observer Extrapolation      ─────────────────────────────────
# ─────────────────────────────────────────────────────────────────────
# Bremner et al. 2009 https://doi-org.ezbusc.usc.gal/10.1016/j.marpol.2008.11.006
# Observer programme data with year-specific misreporting ratio and SE
observer_data <- tibble(
  year                       = years,
  observed_trips             = c(28,  35,  44,  54,  66, 76,  89,  102, 98,
                                 145, 168, 201, 220, 248, 272, 295),
  obs_catch_per_trip_kg      = c(3380, 3430, 3480, 3520, 3580, 
                                 3650, 3720, 3580, 3490, 3620, 
                                 3780, 3940, 3870, 3960, 4030, 4090),
  declared_catch_per_trip_kg = c(2720, 2780, 2840, 2900, 2960, 3100, 
                                 3180, 3050, 2980, 3090, 3220, 3360, 
                                 3290, 3410, 3500, 3580)
) |>
  mutate(
    misreport_ratio = obs_catch_per_trip_kg / declared_catch_per_trip_kg,
    ratio_se        = misreport_ratio / sqrt(observed_trips)
  )

# ── Observer programme - Monte Carlo ──────────────────────────────────
observer_mc <- crossing(observer_data, sim_id = 1:N_SIM) |>
  left_join(base_data |> select(year, reported_catch), by = "year") |>
  mutate(
    # Draw ratio from its sampling distribution
    # Clamp at 1.0 — observers cannot record less than was declared
    sim_ratio = pmax(1.00, rnorm(n(), misreport_ratio, ratio_se)),
    sim_catch = reported_catch * sim_ratio
  )

observer_summary <- mc_summary(observer_mc) |>
  mutate(method = "Observer")

# ── Convergence check: is N_SIM = 10000 enough? ───────────────────────
# Run the same quantile calculation with increasing n_sim values.
# If the CI width stabilises, we have enough simulations.
n_values <- c(50, 100, 250, 500, 1000, 2000, 3000, 4000, 
              5000, 6000, 7000, 8000, 9000, 10000)

convergence_check <- map_dfr(n_values, function(n) {
  observer_mc |>
    filter(year == 2022) |>
    slice_sample(n = n) |>
    summarise(
      n_sim     = n,
      p025      = as.numeric(quantile(sim_catch, 0.025)),
      p975      = as.numeric(quantile(sim_catch, 0.975)),
      ci_width  = p975 - p025
    )
})

ggplot(convergence_check, aes(x = n_sim, y = ci_width)) +
  geom_line(colour = "#10b981", linewidth = 1.2) +
  geom_point(colour = "#10b981", size = 3.5) +
  geom_vline(xintercept = 1000, linetype = "dashed", colour = "grey50") +
  annotate("text", x = 1200, y = max(convergence_check$ci_width) * 0.95,
           label = "CI width stabilises\naround n=1000",
           hjust = 0, size = 3.2, colour = "grey40") +
  scale_x_continuous(breaks = n_values) +
  labs(title    = "Convergence Check: Observer CI Width vs N_SIM (year = 2022)",
       subtitle = "If CI width plateaus, N_SIM is sufficient — 2000 gives stable results",
       x = "Number of simulations", y = "95% CI width (tonnes)") +
  theme_minimal()

# ─────────────────────────────────────────────────────────────────────
# ───     Trade Discrepancy      ──────────────────────────────────────
# ─────────────────────────────────────────────────────────────────────
# Clarke et al. 2006 https://doi.org/10.1111/j.1461-0248.2006.00968.x
# Annual trade discrepancy (import recorded − export declared, all partners)
trade_discrepancy <- tibble(
  year                = years,
  total_discrepancy_t = c(580,  640,  700,  760,  870, 
                          980, 1050, 1030, 880, 970, 
                          1130, 1260, 1170, 1310, 1380, 1440)
)

trade_mc <- crossing(trade_discrepancy, sim_id = 1:N_SIM) |>
  left_join(base_data |> select(year, reported_catch), by = "year") |>
  mutate(
    # Source 1: Measurement error in the discrepancy itself
    # Customs data typically have ±5% measurement uncertainty
    meas_mult  = rnorm(n(), mean = 1.00, sd = 0.05),
    
    # Source 2: Attribution fraction — what % of the gap is truly IUU?
    # (vs. re-exports, processing weight changes, species mis-classification)
    # Expert range: 70%–100% is IUU. No preferred value → Uniform.
    attr_frac  = runif(n(), min = 0.70, max = 1.00),
    
    # Combined: discrepancy scaled by both uncertainty sources
    sim_discrepancy = total_discrepancy_t * meas_mult * attr_frac,
    
    sim_catch = reported_catch + sim_discrepancy
  )

trade_summary <- mc_summary(trade_mc) |>
  mutate(method = "Trade")

# Show how the two sources contribute to total uncertainty
# Decomposition: hold one factor fixed, vary only the other
decomp <- trade_mc |>
  filter(year == 2025) |>
  mutate(
    only_meas = total_discrepancy_t * meas_mult * 0.85,  # attr fixed at midpoint
    only_attr = total_discrepancy_t * 1.00  * attr_frac  # meas fixed at 1
  ) |>
  select(sim_id, only_meas, only_attr, sim_discrepancy) |>
  pivot_longer(-sim_id, names_to = "source", values_to = "disc_t")

ggplot(decomp, aes(x = disc_t, fill = source)) +
  geom_density(alpha = 0.55) +
  scale_fill_manual(
    values = c("skyblue", "pink", "purple"),
    labels = c("Meas. error only", "Attribution only", "Combined")) +
  labs(title    = "Uncertainty Decomposition: Trade Discrepancy (2022)",
       subtitle = "Attribution fraction contributes more uncertainty than measurement error",
       x = "Discrepancy (tonnes)", y = "Density", fill = "Uncertainty source") +
  theme_minimal()

# ─────────────────────────────────────────────────────────────────────
# ───     AIS/VMS Dark Vessel Analysis      ───────────────────────────
# ─────────────────────────────────────────────────────────────────────
# Park et al. 2020 https://doi.org/10.1126/sciadv.abb1197
# Welch et al. 2022 https://doi.org/10.1126/sciadv.abq2109
# ── Fleet and monitoring parameters ──────────────────────────────────
aisvms_data <- tibble(
  year             = years,
  licensed_vessels = base_data$licensed_vessels,
  # VMS active compliance rate (proportion of fleet transmitting)
  vms_active_pct   = c(0.24, 0.28, 0.33, 0.38, 0.44, 0.52, 0.55, 0.58, 0.56,
                       0.64, 0.70, 0.76, 0.80, 0.84, 0.87, 0.90),
  # Average fishing days per vessel per year
  fishing_days_yr  = c(206, 210, 212, 208, 214, 220, 218, 222, 215,
                       224, 226, 228, 225, 227, 229, 231), 
  # catch per vessel-day (kg)
  cpvd_kg          = c(132, 136, 140, 143, 147, 152, 158, 154, 149,
                       156, 162, 168, 165, 170, 174, 178) 
) |>
  mutate(dark_vessel_days =
           licensed_vessels * fishing_days_yr * (1 - vms_active_pct))

# ── Monte Carlo ───────────────────────────────────────────────────────

# Log-normal parameters for CPVD:
# If mean = mu and CV = 0.15, then:
#   sigma² = log(1 + CV²)  →  sigma = sqrt(log(1 + 0.15²))
#   mu_log = log(mu) - sigma²/2
cv_cpvd <- 0.15
sigma_cpvd <- sqrt(log(1 + cv_cpvd^2))

aisvms_mc <- crossing(aisvms_data, sim_id = 1:N_SIM) |>
  left_join(base_data |> select(year, reported_catch), by = "year") |>
  mutate(
    # CPVD: strictly positive, right-skewed → log-normal
    # mu_log ensures the median equals cpvd_kg
    sim_cpvd   = rlnorm(n(),
                        meanlog = log(cpvd_kg) - sigma_cpvd^2 / 2,
                        sdlog   = sigma_cpvd),
    
    # Dark vessel detection multiplier: radar/VIIRS has ±20% coverage gaps
    # Normal centred at 1 with SD derived from ±20% range
    dark_mult  = pmax(0.4, rnorm(n(), mean = 1.00,
                                 sd = 0.20 / 1.96)),
    
    # Simulated dark catch and total actual catch
    sim_dark_catch = dark_vessel_days * dark_mult * sim_cpvd / 1000,
    sim_catch      = reported_catch + sim_dark_catch
  )

aisvms_summary <- mc_summary(aisvms_mc) |>
  mutate(method = "AIS/VMS")

# Compare log-normal vs normal for CPVD (teaching comparison)
tibble(
  lognormal = rlnorm(5000,
                     meanlog = log(160) - sigma_cpvd^2 / 2,
                     sdlog   = sigma_cpvd),
  normal    = pmax(0, rnorm(5000, mean = 160, sd = 160 * 0.15))
) |>
  pivot_longer(everything(), names_to = "distribution", values_to = "cpvd") |>
  ggplot(aes(x = cpvd, fill = distribution)) +
  geom_density(alpha = 0.55) +
  geom_vline(xintercept = 160, linetype = "dashed") +
  scale_fill_manual(values = c("skyblue", "pink")) +
  labs(title    = "Log-normal vs Normal for CPVD (mean = 160 kg)",
       subtitle = "Log-normal has heavier right tail — more realistic for catch rates",
       x = "Catch per vessel-day (kg)", y = "Density",
       fill = "Distribution") +
  theme_minimal()

# ── Collect all method MC simulations ─────────────────────────────────
# Each method contributes N_SIM rows per year.
# The sim_id column links across methods (same sim_id = same simulated world)

all_mc <- bind_rows(
  recon_mc   |> select(year, sim_id, sim_catch) |> mutate(method = "Reconstruction"),
  observer_mc |> select(year, sim_id, sim_catch) |> mutate(method = "Observer"),
  trade_mc   |> select(year, sim_id, sim_catch) |> mutate(method = "Trade"),
  aisvms_mc  |> select(year, sim_id, sim_catch) |> mutate(method = "AIS/VMS")
) |>
  mutate(method = factor(method, levels = names(method_colours)))

cat("Total rows in all_mc:", nrow(all_mc), "\n")  
# 4 methods × 16 years × 10000 = 640,000

# ── Summary table: one row per method per year ────────────────────────
all_summaries <- bind_rows(
  recon_summary,
  observer_summary,
  trade_summary,
  aisvms_summary,
  # Add reported catch as a fixed baseline (no uncertainty)
  base_data |>
    transmute(year,
              catch_mean = reported_catch, catch_p50  = reported_catch,
              catch_p025 = reported_catch, catch_p975 = reported_catch,
              catch_p10  = reported_catch, catch_p90  = reported_catch,
              catch_sd   = 0,              method     = "Reported")
) |>
  mutate(method = factor(method, levels = names(method_colours)))

# How much does each method's MC SD differ?
# Higher SD = more uncertain estimate
all_summaries |>
  filter(year == 2010, method != "Reported") |>
  select(method, catch_p50, catch_p025, catch_p975, catch_sd) |>
  mutate(
    ci_width  = round(catch_p975 - catch_p025),
    cv_pct    = round(catch_sd / catch_p50 * 100, 1)
  ) |>
  arrange(cv_pct)

# ── Step 1: Compute method-level precision (weight = 1/variance) ─────
# Using the MC-derived SD as the uncertainty measure — more honest than
# the analytical SE used in the previous version of this session

method_precision <- all_mc |>
  group_by(year, method) |>
  summarise(
    mc_sd     = sd(sim_catch),
    mc_var    = var(sim_catch),
    .groups   = "drop"
  ) |>
  mutate(weight = 1 / mc_var)   # inverse-variance weight

# Show how weights vary by method and year
method_precision |>
  group_by(year) |>
  mutate(weight_pct = round(weight / sum(weight) * 100, 1)) |>
  ungroup() |>
  select(year, method, mc_sd, weight_pct) |>
  pivot_wider(names_from = method, values_from = weight_pct)

# ── Step 2: For each sim_id × year, compute weighted ensemble ─────────
ensemble_mc <- all_mc |>
  left_join(method_precision |> select(year, method, weight),
            by = c("year", "method")) |>
  group_by(year, sim_id) |>
  summarise(
    # Weighted mean across the four method estimates for this sim_id
    ensemble_sim = sum(weight * sim_catch) / sum(weight),
    .groups      = "drop"
  )

# ── Step 3: Summarise the ensemble distribution ───────────────────────
ensemble_summary <- ensemble_mc |>
  mc_summary(catch_col = ensemble_sim) |>
  mutate(method = "Ensemble")

# ── Step 4: Derive IUU loss from the ensemble distribution ───────────
ensemble_loss <- ensemble_mc |>
  left_join(base_data |> select(year, reported_catch, price_usd_t),
            by = "year") |>
  mutate(
    # IUU catch is derived directly inside the MC loop
    iuu_catch_sim = pmax(0, ensemble_sim - reported_catch),
    iuu_loss_sim  = iuu_catch_sim * price_usd_t
  ) |>
  group_by(year) |>
  summarise(
    iuu_catch_p50  = as.numeric(quantile(iuu_catch_sim, 0.500)),
    iuu_catch_p025 = as.numeric(quantile(iuu_catch_sim, 0.025)),
    iuu_catch_p975 = as.numeric(quantile(iuu_catch_sim, 0.975)),
    iuu_loss_p50   = as.numeric(quantile(iuu_loss_sim,  0.500)),
    iuu_loss_p025  = as.numeric(quantile(iuu_loss_sim,  0.025)),
    iuu_loss_p975  = as.numeric(quantile(iuu_loss_sim,  0.975)),
    .groups        = "drop"
  )

ensemble_loss

# Grand total IUU loss with full MC uncertainty
ensemble_mc |>
  left_join(base_data |> select(year, reported_catch, price_usd_t),
            by = "year") |>
  mutate(iuu_loss_sim = pmax(0, ensemble_sim - reported_catch) * price_usd_t) |>
  group_by(sim_id) |>
  summarise(total_loss = sum(iuu_loss_sim), .groups = "drop") |>
  summarise(
    median_loss_m  = round(median(total_loss) / 1e6, 2),
    p025_loss_m    = round(as.numeric(quantile(total_loss, 0.025)) / 1e6, 2),
    p975_loss_m    = round(as.numeric(quantile(total_loss, 0.975)) / 1e6, 2)
  ) |>
  mutate(label = paste0("Total IUU Loss 2015–2022: USD ", median_loss_m,
                        "M [95% CI: ", p025_loss_m, "–", p975_loss_m, "M]")) |>
  pull(label) |>
  (\(x) cat("\n", x, "\n"))()

# Overlay density curves from each method for 2022
# This shows the SHAPE of uncertainty — not just its width

all_mc |>
  filter(year == 2025) |>
  bind_rows(
    ensemble_mc |>
      filter(year == 2025) |>
      transmute(year, sim_id, sim_catch = ensemble_sim, method = "Ensemble") |>
      mutate(method = factor(method, levels = names(method_colours)))
  ) |>
  ggplot(aes(x = sim_catch / 1000, colour = method, fill = method)) +
  geom_density(alpha = 0.15, linewidth = 0.5) +
  # Highlight ensemble with a thicker line
  geom_density(data = filter(ensemble_mc, year == 2025),
               aes(x = ensemble_sim / 1000),
               colour = "#0891b2", linewidth = 1,
               fill = "#0891b2", alpha = 0.08,
               inherit.aes = FALSE) +
  geom_vline(xintercept = 15.1, linetype = "dashed", colour = "#94a3b8") +
  annotate("text", x = 15.4, y = 2.2,
           label = "Reported\n15.1 kt",
           hjust = 1, size = 3, colour = "#94a3b8") +
  scale_colour_manual(values = method_colours) +
  scale_fill_manual(values = method_colours) +
  labs(title    = "MC Distributions of Actual Catch — All Methods (2025)",
       subtitle = "Width of each curve = uncertainty · Ensemble (bold) combines all four",
       x        = "Estimated Actual Catch (thousand tonnes)",
       y        = "Probability density",
       colour   = "Method", fill = "Method") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold"),
        legend.position = "right")

# Fan chart stacks multiple probability bands (50%, 80%, 95%)
# The innermost band is most likely; the outermost captures rare outcomes

fan_data <- ensemble_mc |>
  group_by(year) |>
  summarise(
    p50  = as.numeric(quantile(ensemble_sim, 0.500)),
    p10  = as.numeric(quantile(ensemble_sim, 0.100)),
    p90  = as.numeric(quantile(ensemble_sim, 0.900)),
    p025 = as.numeric(quantile(ensemble_sim, 0.025)),
    p975 = as.numeric(quantile(ensemble_sim, 0.975)),
    p25  = as.numeric(quantile(ensemble_sim, 0.250)),
    p75  = as.numeric(quantile(ensemble_sim, 0.750)),
    .groups = "drop"
  )

p_fan <- ggplot(fan_data, aes(x = year)) +
  # 95% CI (outermost, most transparent)
  geom_ribbon(aes(ymin = p025 / 1000, ymax = p975 / 1000),
              fill = "#0891b2", alpha = 0.10) +
  # 80% CI
  geom_ribbon(aes(ymin = p10 / 1000, ymax = p90 / 1000),
              fill = "#0891b2", alpha = 0.18) +
  # 50% CI (innermost, darkest)
  geom_ribbon(aes(ymin = p25 / 1000, ymax = p75 / 1000),
              fill = "#0891b2", alpha = 0.30) +
  # Median line
  geom_line(aes(y = p50 / 1000), colour = "#0891b2", linewidth = 1.5) +
  geom_point(aes(y = p50 / 1000), colour = "#0891b2", size = 3) +
  # Reported catch for reference
  geom_line(data = base_data, aes(x = year, y = reported_catch / 1000),
            colour = "#94a3b8", linetype = "dashed", linewidth = 0.9,
            inherit.aes = FALSE) +
  scale_x_continuous(breaks = years) +
  labs(title    = "Ensemble Catch Estimate — Fan Chart (2015–2022)",
       subtitle = "Bands: 50% · 80% · 95% CI · Dashed = reported catch",
       x = NULL, y = "Actual Catch (thousand tonnes)") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold"))

# IUU loss fan chart
p_loss_fan <- ensemble_loss |>
  ggplot(aes(x = year)) +
  geom_ribbon(aes(ymin = iuu_loss_p025 / 1e6, ymax = iuu_loss_p975 / 1e6),
              fill = "#ef4444", alpha = 0.15) +
  geom_line(aes(y = iuu_loss_p50 / 1e6),
            colour = "#ef4444", linewidth = 1.4) +
  geom_point(aes(y = iuu_loss_p50 / 1e6),
             colour = "#ef4444", size = 3) +
  geom_label(aes(y = iuu_loss_p50 / 1e6,
                 label = paste0("$", round(iuu_loss_p50 / 1e6, 1), "M")),
             size = 2.8, nudge_y = 0.25) +
  scale_x_continuous(breaks = years) +
  labs(title = "Ensemble IUU Loss Estimate (95% MC CI)",
       x = NULL, y = "IUU Loss (million USD)") +
  theme_minimal()

p_fan / p_loss_fan + plot_layout(heights = c(2, 1))

# Violin plots show the full distribution shape, not just CI bounds.
# This reveals skewness, multiple modes, and outliers — features a
# simple error bar would hide entirely.

all_mc |>
  mutate(year = factor(year)) |>
  ggplot(aes(x = year, y = sim_catch / 1000, fill = method)) +
  geom_violin(scale = "width", alpha = 0.75,
              draw_quantiles = c(0.025, 0.50, 0.975)) +
  facet_wrap(~ method, nrow = 2) +
  scale_fill_manual(values = method_colours, guide = "none") +
  labs(title    = "MC Distribution Shape by Method and Year",
       subtitle = "Width = probability · Lines at 2.5th, 50th, 97.5th percentile",
       x = "Year", y = "Simulated Actual Catch (thousand tonnes)") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold"),
        strip.text = element_text(face = "bold"))

# Save All Results
# ── 1. Method-level MC summaries (quantiles per method per year) ──────
all_summaries |>
  mutate(across(where(is.numeric), ~ round(.x, 1))) |>
  write_csv("session25_mc_method_summaries.csv")

# ── 2. Ensemble MC summary + IUU loss ────────────────────────────────
ensemble_summary |>
  left_join(ensemble_loss, by = "year") |>
  left_join(base_data |> select(year, reported_catch), by = "year") |>
  mutate(across(where(is.numeric), ~ round(.x, 1))) |>
  write_csv("session25_mc_ensemble_iuu.csv")

# ── 3. Full simulation draws — all methods (large file: 64,000 rows) ──
all_mc |>
  mutate(sim_catch = round(sim_catch, 1)) |>
  write_csv("session25_mc_all_simulations.csv")

# ── 4. Ensemble simulation draws (8,000 rows — for downstream use) ───
ensemble_mc |>
  mutate(ensemble_sim = round(ensemble_sim, 1)) |>
  write_csv("session25_mc_ensemble_simulations.csv")

cat("✓ Four output files saved to:", getwd(), "\n")
cat("  session25_mc_method_summaries.csv\n")
cat("  session25_mc_ensemble_iuu.csv\n")
cat("  session25_mc_all_simulations.csv\n")
cat("  session25_mc_ensemble_simulations.csv\n")
