# ── Install once (skip if already installed) ──────────────────────────
install.packages(c("tidyverse", "mgcv", "gratia",
                   "patchwork", "GGally", "scales", "broom"))

# ── Load every session ────────────────────────────────────────────────
library(tidyverse)   # dplyr, ggplot2, tidyr, readr, purrr, tibble
library(mgcv)        # gam(), s(), te(), bam() — the GAM engine
library(gratia)      # draw(), appraise(), smooth_estimates() for GAMs
library(patchwork)   # combine ggplot objects with + / / | operators
library(GGally)      # ggpairs() for correlation matrix plots
library(scales)      # percent(), comma() for axis labels
library(broom)       # tidy(), glance() for linear model output

# Confirm loaded
sessionInfo()

# ─────────────────────────────────────────────────────────────────────
# SESSION 3: GAM + REMOTE SENSING FOR IUU ESTIMATION
# Synthetic monthly panel: 4 provinces × 2 vessel types × 36 months
# ─────────────────────────────────────────────────────────────────────
rm(list=ls())
set.seed(2024)   # for reproducibility

provinces   <- c("Province A", "Province B", "Province C", "Province D")
vessel_types <- c("Small-scale", "Commercial")

# Create full combination of all month/province/vessel type combinations
fish_monthly <- expand_grid(
  month_seq   = 1:36,           # 36 months across 3 years
  province    = provinces,
  vessel_type = vessel_types
) |>
  mutate(
    year  = 2020 + (month_seq - 1) %/% 12,
    month = ((month_seq - 1) %% 12) + 1,
    date  = as.Date(paste(year, sprintf("%02d", month), "01", sep = "-"))
  )

cat("Dataset dimensions:", nrow(fish_monthly), "rows ×",
    ncol(fish_monthly), "columns\n")  # 288 rows

# Province-level baseline parameters
prov_params <- tibble(
  province      = provinces,
  sst_base      = c(28.5, 27.8, 26.2, 29.1),  # baseline SST (°C)
  night_base    = c(38,   30,   22,   45),    # baseline night light
  patrol_mu     = c(7.5,  5.0,  9.5,  4.0),  # mean monthly patrol days
  vms_start     = c(0.55, 0.48, 0.66, 0.40)  # VMS coverage in 2020
)

fish_monthly <- fish_monthly |>
  left_join(prov_params, by = "province") |>
  mutate(
    
    # ── Sea Surface Temperature (°C) ──────────────────────────────────
    # Seasonal sinusoidal cycle + province baseline + noise
    sst = sst_base + 1.8 * sin(2 * pi * (month - 3) / 12) +
      rnorm(n(), 0, 0.35),
    
    # ── Chlorophyll-a (mg/m³) ────────────────────────────────────────
    # Cooler, upwelling waters = more nutrients = higher chl-a
    chl_a = exp(1.4 - 0.065 * sst + rnorm(n(), 0, 0.22)),
    
    # ── Night Light Index (0–100): VIIRS boat detections ─────────────
    # Seasonal peak in cooler months; higher where enforcement is weaker
    night_light = pmax(5, pmin(95,
                               night_base + 12 * cos(2 * pi * month / 12) +
                                 rnorm(n(), 0, 5)
    )),
    
    # ── AIS Vessel Density (vessels per 100 km²) ─────────────────────
    # Correlated with night light; log-linear relationship
    ais_density = exp(1.2 + 0.022 * night_light +
                        rnorm(n(), 0, 0.25)),
    
    # ── Patrol Days per Month ─────────────────────────────────────────
    # Province-specific capacity; slight upward trend from 2022 reforms
    patrol_days = round(pmax(1, pmin(22,
                                     patrol_mu + 0.04 * month_seq +    # gradual improvement over time
                                       rnorm(n(), 0, 1.8)
    ))),
    
    # ── VMS Coverage (0–1): improving over time ───────────────────────
    vms_coverage = pmax(0.25, pmin(0.96,
                                   vms_start + 0.006 * month_seq +   # rollout of VMS programme
                                     rnorm(n(), 0, 0.03)
    ))
  ) |>
  select(-sst_base, -night_base, -patrol_mu, -vms_start)  # drop helpers

# ── Vessel-type baseline catch (t/month) ────────────────────────────
catch_params <- tibble(
  province    = rep(provinces, each = 2),
  vessel_type = rep(vessel_types, length(provinces)),
  base_catch  = c(115, 430,   # Province A: small, commercial
                  88,  305,   # Province B
                  75,  275,   # Province C
                  135, 490),  # Province D
  base_price  = c(1180, 940,
                  1120, 890,
                  1050, 920,
                  1220, 960)
)

fish_monthly <- fish_monthly |>
  left_join(catch_params, by = c("province", "vessel_type")) |>
  mutate(
    
    # ── Reported catch: driven by productivity (chl-a) and season ────
    reported_catch = round(pmax(20,
                                base_catch * (0.7 + 0.5 * chl_a / mean(chl_a)) +
                                  rnorm(n(), 0, 18)
    )),
    
    # ── Market price: slight upward trend + noise ─────────────────────
    price_per_tonne = round(base_price + 12 * month_seq / 12 +
                              rnorm(n(), 0, 28)),
    
    # ── TRUE IUU RATE: non-linear data-generating process ─────────────
    # This is the ground truth we are trying to RECOVER with GAM.
    # A linear model will systematically fail because:
    #   (a) SST has a HUMP effect (peak IUU at ~28.5°C optimal temp)
    #   (b) patrol_days has DIMINISHING RETURNS (log relationship)
    #   (c) chl_a has a NON-LINEAR threshold effect
    # In logit space (link function of Beta regression):
    logit_iuu =
      -0.30                                  # intercept
    - 0.18 * log(patrol_days)              # diminishing returns (non-linear)
    - 2.40 * vms_coverage                  # strong linear deterrent
    - 0.38 * (sst - 28.5)^2               # hump: peak IUU near 28.5°C
    + 0.45 * log1p(chl_a)                # log-transformed chl-a effect
    + 0.028 * night_light                # positive: more night vessels
    + ifelse(vessel_type == "Commercial", 0.55, 0) # vessel type offset
    + rnorm(n(), 0, 0.18),              # observation-level noise
    
    # Convert logit → probability, then clamp to (0.04, 0.88)
    obs_iuu_rate = pmax(0.04, pmin(0.88, plogis(logit_iuu))),
    
    # Back-calculate actual catch and IUU quantities
    actual_catch  = round(reported_catch / (1 - obs_iuu_rate)),
    iuu_catch     = actual_catch - reported_catch,
    iuu_loss_usd  = iuu_catch * price_per_tonne
  ) |>
  select(-base_catch, -base_price, -logit_iuu)  # drop helpers

glimpse(fish_monthly)

# Select numeric variables for the correlation plot
fish_monthly |>
  select(sst, chl_a, night_light, ais_density,
         patrol_days, vms_coverage, obs_iuu_rate) |>
  GGally::ggpairs(
    columnLabels = c("SST", "Chl-a", "Night Light", "AIS Density",
                     "Patrol Days", "VMS", "IUU Rate"),
    upper = list(continuous = wrap("cor", size = 3.2)),
    lower = list(continuous = wrap("points", alpha = 0.3, size = 0.6)),
    diag  = list(continuous = wrap("densityDiag", fill = "#7c3aed",
                                   alpha = 0.4))
  ) +
  theme_minimal(base_size = 10) +
  labs(title = "Pairwise Correlations — Remote Sensing & IUU Variables")

# Helper theme for all panels
ts_theme <- theme_minimal() +
  theme(legend.position = "bottom",
        panel.grid.minor = element_blank(),
        strip.text = element_text(face = "bold"))

# Monthly mean per province (average across vessel types)
monthly_mean <- fish_monthly |>
  group_by(date, province) |>
  summarise(across(c(sst, chl_a, night_light, obs_iuu_rate),
                   mean, .names = "{.col}"),
            .groups = "drop")

# Plot 1: SST
p_sst <- ggplot(monthly_mean, aes(x = date, y = sst, colour = province)) +
  geom_line(alpha = 0.85) +
  labs(title = "Sea Surface Temperature",
       y = "SST (°C)", x = NULL, colour = NULL) + ts_theme

# Plot 2: Chlorophyll-a
p_chl <- ggplot(monthly_mean, aes(x = date, y = chl_a, colour = province)) +
  geom_line(alpha = 0.85) +
  labs(title = "Chlorophyll-a",
       y = "Chl-a (mg/m³)", x = NULL, colour = NULL) + ts_theme

# Plot 3: Night Light
p_nl <- ggplot(monthly_mean, aes(x = date, y = night_light, colour = province)) +
  geom_line(alpha = 0.85) +
  labs(title = "Night Light Index (VIIRS)",
       y = "Index (0–100)", x = NULL, colour = NULL) + ts_theme

# Plot 4: Observed IUU Rate
p_iuu <- ggplot(monthly_mean, aes(x = date, y = obs_iuu_rate, colour = province)) +
  geom_line(alpha = 0.85) +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "Observed IUU Rate",
       y = "IUU Rate", x = NULL, colour = NULL) + ts_theme

# Combine with patchwork: stack all 4 panels, shared legend
(p_sst / p_chl / p_nl / p_iuu) +
  plot_layout(guides = "collect") &
  theme(legend.position = "bottom")

fish_monthly |>
  mutate(province = fct_reorder(province, obs_iuu_rate, .fun = median)) |>
  ggplot(aes(x = province, y = obs_iuu_rate, fill = vessel_type)) +
  geom_boxplot(alpha = 0.75, outlier.size = 1.5) +
  geom_jitter(alpha = 0.2, width = 0.15, size = 0.8) +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_manual(values = c("#7c3aed", "#0ea5e9")) +
  labs(title    = "Distribution of Observed IUU Rate",
       subtitle = "By province (ordered by median) and vessel type",
       x = NULL, y = "Observed IUU Rate", fill = "Vessel Type") +
  theme_minimal()

# Scatter plots with loess smoother — reveals the TRUE shape
# A linear model would draw a straight line through these curves

scatter_opts <- list(
  geom_point(alpha = 0.18, size = 1.2, colour = "#64748b"),
  geom_smooth(method = "loess", span = 0.6,
              colour = "#7c3aed", fill = "#7c3aed", alpha = 0.15),
  geom_smooth(method = "lm",
              colour = "#ef4444", linetype = "dashed", se = FALSE),
  scale_y_continuous(labels = scales::percent),
  theme_minimal()
)

p1 <- ggplot(fish_monthly, aes(x = sst, y = obs_iuu_rate)) +
  scatter_opts +
  labs(title = "SST vs IUU Rate",
       subtitle = "Hump shape — peak IUU near 28.5°C",
       x = "Sea Surface Temp. (°C)", y = "IUU Rate")

p2 <- ggplot(fish_monthly, aes(x = patrol_days, y = obs_iuu_rate)) +
  scatter_opts +
  labs(title = "Patrol Effort vs IUU Rate",
       subtitle = "Diminishing returns — steep then flattening",
       x = "Monthly Patrol Days", y = "IUU Rate")

p3 <- ggplot(fish_monthly, aes(x = chl_a, y = obs_iuu_rate)) +
  scatter_opts +
  labs(title = "Chlorophyll-a vs IUU Rate",
       subtitle = "Log-like threshold effect",
       x = "Chl-a (mg/m³)", y = "IUU Rate")

p4 <- ggplot(fish_monthly, aes(x = night_light, y = obs_iuu_rate)) +
  scatter_opts +
  labs(title = "Night Light vs IUU Rate",
       subtitle = "Near-linear positive relationship",
       x = "Night Light Index", y = "IUU Rate")

# Combine into 2×2 panel
# Purple loess = TRUE shape | Red dashed = what LM assumes
(p1 + p2) / (p3 + p4) +
  plot_annotation(
    title   = "Non-Linear Relationships Between Predictors and IUU Rate",
    subtitle = "Purple = loess (true shape) · Red dashed = linear assumption"
  )

# ── Fit the GAM with Beta family ──────────────────────────────────────
# betar() is Beta regression in mgcv; the response must be strictly (0,1)
# method = "REML" is recommended for smoothness selection

gam1 <- gam(
  obs_iuu_rate ~
    s(patrol_days,  k = 10) +   # smooth of patrol days (up to k-1 knots)
    s(vms_coverage, k = 10) +   # smooth of VMS coverage
    s(sst,          k = 12) +   # SST hump — allow more flexibility
    s(chl_a,        k = 10) +   # chlorophyll-a
    s(night_light,  k = 10) +   # night light index
    s(ais_density,  k = 10) +   # AIS vessel density
    vessel_type,                  # parametric: Commercial vs Small-scale
  family = betar(),              # Beta regression (response in (0,1))
  data   = fish_monthly,
  method = "REML"               # Restricted Maximum Likelihood
)

# ── Model summary ─────────────────────────────────────────────────────
summary(gam1)

# KEY OUTPUT TO READ:
#   Parametric coefficients: vessel_typeSmall-scale — direction and significance
#   Approximate significance of smooth terms:
#     edf  = effective degrees of freedom (1 = linear, >1 = non-linear)
#     Ref.df, F, p-value — significance of each smooth
#   R-sq.(adj) — variance explained on the response scale
#   Deviance explained — overall fit quality (like R² but for GAMs)

# Tidy table of smooth term significance and non-linearity (edf)
tidy(gam1, parametric = FALSE) |>   # smooth terms only
  select(term, edf, statistic, p.value) |>
  mutate(
    nonlinearity = case_when(
      edf < 1.5  ~ "Near-linear",
      edf < 3.0  ~ "Mildly non-linear",
      edf < 6.0  ~ "Non-linear",
      TRUE        ~ "Highly non-linear"
    ),
    significant  = ifelse(p.value < 0.05, "Yes ✓", "No"),
    p.value      = round(p.value, 4),
    edf          = round(edf, 2)
  )

# Overall fit statistics
glance(gam1)   # deviance, df.residual, nobs

# Deviance explained and R-squared (printed by summary but also extractable)
cat("Deviance explained:",
    round(summary(gam1)$dev.expl * 100, 1), "%\n")
cat("Adj. R-squared:    ",
    round(summary(gam1)$r.sq, 3), "\n")

# draw() plots every smooth in the model with 95% confidence intervals.
# The y-axis is on the link scale (logit); the dashed line at 0 is the mean.
# A nearly horizontal band means the predictor has little effect.
# The shape of the curve shows the TRUE non-linear relationship recovered.

gratia::draw(gam1) +
  plot_annotation(
    title    = "GAM Model 1 — Estimated Smooth Functions",
    subtitle = "Y-axis: partial effect on logit(IUU rate) · Shading: 95% CI"
  )

# Draw a single smooth for closer inspection (e.g., SST hump)
gratia::draw(gam1, select = "s(sst)") +
  labs(title    = "Partial Effect of SST on IUU Rate",
       subtitle = "GAM recovers the inverted-U hump centered near 28.5°C")

# Extract smooth estimates as a tibble (for custom ggplot2 plots)
sst_smooth <- gratia::smooth_estimates(gam1, smooth = "s(sst)")
head(sst_smooth)

# Custom ggplot2 smooth plot for SST
sst_smooth |>
  mutate(
    lower_ci = .estimate - 1.96 * .se,
    upper_ci = .estimate + 1.96 * .se
  ) |>
  ggplot(aes(x = sst, y = .estimate)) +
  geom_ribbon(aes(ymin = lower_ci, ymax = upper_ci),
              fill = "#7c3aed", alpha = 0.2) +
  geom_line(colour = "#7c3aed", linewidth = 1.3) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey50") +
  labs(title    = "Partial Effect of SST on IUU Rate (logit scale)",
       subtitle = "Values above 0 increase IUU rate; below 0 decrease it",
       x = "Sea Surface Temperature (°C)",
       y = "Partial effect (logit scale)") +
  theme_minimal()

# appraise() produces four diagnostic panels:
#   (1) Q-Q plot of deviance residuals   → check distributional assumption
#   (2) Residuals vs linear predictor    → check homoscedasticity
#   (3) Histogram of residuals           → check normality
#   (4) Response vs fitted values        → check overall fit

gratia::appraise(gam1, method = "simulate") +
  plot_annotation(title = "GAM Model 1 — Diagnostic Plots")

# Also check the basis dimension adequacy (k-check)
# If p-value is very small AND k-index < 1, increase k
k.check(gam1)

# Add model residuals and fitted values to the data
fish_monthly <- fish_monthly |>
  mutate(
    gam1_fitted = fitted(gam1),
    gam1_resid  = residuals(gam1, type = "deviance")
  )

# Residuals over time — look for patterns (shouldn't be systematic)
fish_monthly |>
  group_by(date) |>
  summarise(mean_resid = mean(gam1_resid), .groups = "drop") |>
  ggplot(aes(x = date, y = mean_resid)) +
  geom_hline(yintercept = 0, colour = "#ef4444", linetype = "dashed") +
  geom_point(colour = "#7c3aed") +
  geom_smooth(method = "loess", span = 0.5,
              colour = "#7c3aed", fill = "#7c3aed", alpha = 0.15) +
  labs(title = "Deviance Residuals Over Time",
       x = NULL, y = "Mean Deviance Residual") +
  theme_minimal()

# Convert province to factor (required for random effect smoother bs="re")
fish_monthly <- fish_monthly |>
  mutate(province = factor(province))

gam2 <- gam(
  obs_iuu_rate ~
    s(patrol_days,  k = 10)          +
    s(vms_coverage, k = 10)          +
    te(sst, chl_a,  k = c(8, 8))     + # tensor product interaction
    s(night_light,  k = 10)          +
    s(ais_density,  k = 10)          + # AIS vessel density
    s(month, bs = "cc", k = 12)      + # cyclic: Jan ↔ Dec
    s(province, bs = "re")           + # province random intercept
    vessel_type,
  family = betar(),
  data   = fish_monthly,
  method = "REML",
  knots  = list(month = c(0.5, 12.5))      # cyclic boundary knots
)

summary(gam2)

# Visualise the SST × Chl-a interaction surface
gratia::draw(gam2, select = "te(sst,chl_a)") +
  labs(title    = "Joint Partial Effect of SST × Chl-a on IUU Rate",
       subtitle = "Contour surface — hot = high IUU, cool = low IUU")

# Visualise the cyclic seasonal smooth
gratia::draw(gam2, select = "s(month)") +
  scale_x_continuous(breaks = 1:12,
                     labels = c("Jan","Feb","Mar","Apr","May","Jun",
                                "Jul","Aug","Sep","Oct","Nov","Dec")) +
  labs(title    = "Seasonal Effect on IUU Rate (Cyclic Spline)",
       subtitle = "Endpoints connected: December ↔ January")

# ── Baseline: linear model (GAM with all edf fixed to 1) ─────────────
lm_base <- gam(
  obs_iuu_rate ~
    patrol_days + vms_coverage + sst + chl_a + night_light + ais_density + 
    vessel_type,
  family = betar(),
  data   = fish_monthly,
  method = "REML"
)

# ── Comparison table ──────────────────────────────────────────────────
model_comp <- tibble(
  Model = c("LM (linear, Beta)", "GAM1 (additive smooths)",
            "GAM2 (tensor + cyclic)"),
  AIC   = c(AIC(lm_base), AIC(gam1), AIC(gam2)),
  Dev_explained = c(
    summary(lm_base)$dev.expl,
    summary(gam1)$dev.expl,
    summary(gam2)$dev.expl
  ) * 100,
  Adj_Rsq = c(
    summary(lm_base)$r.sq,
    summary(gam1)$r.sq,
    summary(gam2)$r.sq
  )
) |>
  mutate(
    AIC             = round(AIC, 1),
    Dev_explained   = round(Dev_explained, 1),
    Adj_Rsq         = round(Adj_Rsq, 3),
    delta_AIC       = round(AIC - min(AIC), 1)   # ΔAIC from best
  )

print(model_comp)

# ── AIC comparison plot ───────────────────────────────────────────────
# Lower AIC = better fit (penalised for complexity)
# ΔAIC > 2 is meaningful; ΔAIC > 10 is very strong evidence
model_comp |>
  mutate(Model = fct_inorder(Model)) |>
  ggplot(aes(x = Dev_explained, y = Model, fill = Model)) +
  geom_col(width = 0.5, show.legend = FALSE) +
  geom_text(aes(label = paste0(Dev_explained, "%  (ΔAIC = ", delta_AIC, ")")),
            hjust = -0.05, size = 3.5, fontface = "bold") +
  scale_fill_manual(values = c("#94a3b8", "#7c3aed", "#0ea5e9")) +
  scale_x_continuous(limits = c(0, 100),
                     expand = expansion(mult = c(0, 0.25))) +
  labs(title    = "Model Comparison: Deviance Explained (%)",
       subtitle = "ΔAIC relative to best model shown in brackets",
       x = "Deviance Explained (%)", y = NULL) +
  theme_minimal()

# Build a prediction grid: vary patrol_days while holding others at mean/mode
pred_patrol <- expand_grid(
  patrol_days  = seq(1, 22, by = 0.5),
  vessel_type  = c("Small-scale", "Commercial")
) |>
  mutate(
    vms_coverage = mean(fish_monthly$vms_coverage),
    sst          = mean(fish_monthly$sst),
    chl_a        = mean(fish_monthly$chl_a),
    night_light  = mean(fish_monthly$night_light),
    ais_density  = mean(fish_monthly$ais_density),
    month        = 6,    # June (mid-year)
    province     = factor("Province A", levels = levels(fish_monthly$province))
  )

# Predict with standard errors on the link (logit) scale
pred_link <- predict(gam1, newdata = pred_patrol,
                     type = "link", se.fit = TRUE)

# Back-transform to response scale and build 95% CIs
pred_patrol <- pred_patrol |>
  mutate(
    fit    = plogis(pred_link$fit),
    se     = pred_link$se.fit,
    ci_lo  = plogis(pred_link$fit - 1.96 * se),
    ci_hi  = plogis(pred_link$fit + 1.96 * se)
  )

# Plot: marginal effect of patrol effort with uncertainty
ggplot(pred_patrol, aes(x = patrol_days, y = fit,
                        colour = vessel_type, fill = vessel_type)) +
  geom_ribbon(aes(ymin = ci_lo, ymax = ci_hi),
              alpha = 0.2, colour = NA) +
  geom_line(linewidth = 1.3) +
  scale_y_continuous(labels = scales::percent,
                     limits = c(0, 0.8)) +
  scale_colour_manual(values = c("#7c3aed", "#0ea5e9")) +
  scale_fill_manual(values = c("#7c3aed", "#0ea5e9")) +
  labs(title    = "Predicted IUU Rate vs Monthly Patrol Days",
       subtitle = "Diminishing returns: doubling patrol beyond 10 days has limited gain",
       x        = "Monthly Patrol Days",
       y        = "Predicted IUU Rate (95% CI)",
       colour   = "Vessel Type", fill = "Vessel Type") +
  theme_minimal()

# Counterfactual analysis: compare current state vs two policy scenarios
# Scenario A (status quo): observed patrol days and VMS
# Scenario B (+50% patrol): patrol_days × 1.5
# Scenario C (high VMS): vms_coverage → 0.90

make_scenario <- function(data, label, patrol_mult = 1, vms_override = NULL) {
  data |>
    mutate(
      scenario     = label,
      patrol_days  = pmin(22, patrol_days * patrol_mult),
      vms_coverage = if (is.null(vms_override)) vms_coverage
      else pmin(0.96, vms_override)
    )
}

scenarios_list <- bind_rows(
  make_scenario(fish_monthly, "A: Status Quo"),
  make_scenario(fish_monthly, "B: +50% Patrol",   patrol_mult  = 1.5),
  make_scenario(fish_monthly, "C: High VMS (90%)", vms_override = 0.90)
)

# Predict IUU rate for all scenarios
scenarios_list <- scenarios_list |>
  mutate(pred_iuu_rate = predict(gam1, newdata = scenarios_list,
                                 type = "response"))

# Compare predicted IUU losses by scenario
scenarios_list |>
  mutate(
    pred_actual   = reported_catch / (1 - pred_iuu_rate),
    pred_iuu_catch = pred_actual - reported_catch,
    pred_iuu_loss  = pred_iuu_catch * price_per_tonne
  ) |>
  group_by(scenario) |>
  summarise(
    total_loss_m_usd = round(sum(pred_iuu_loss) / 1e6, 2),
    mean_iuu_rate    = round(mean(pred_iuu_rate) * 100, 1),
    .groups = "drop"
  )

# ── Step 1: Predict IUU rate with SE for all observations ────────────
pred_all <- predict(gam1, newdata = fish_monthly,
                    type = "link", se.fit = TRUE)

fish_results <- fish_monthly |>
  mutate(
    # as.numeric() strips the matrix class returned by predict()
    pred_rate      = as.numeric(plogis(pred_all$fit)),
    pred_rate_lo   = as.numeric(plogis(pred_all$fit - 1.96 * pred_all$se.fit)),
    pred_rate_hi   = as.numeric(plogis(pred_all$fit + 1.96 * pred_all$se.fit)),
    
    pred_actual    = reported_catch / (1 - pred_rate),
    pred_iuu_catch = as.numeric(pred_actual - reported_catch),
    pred_iuu_loss  = as.numeric(pred_iuu_catch * price_per_tonne),
    
    pred_actual_lo = reported_catch / (1 - pred_rate_lo),
    pred_iuu_lo    = as.numeric((pred_actual_lo - reported_catch) * price_per_tonne),
    pred_actual_hi = reported_catch / (1 - pred_rate_hi),
    pred_iuu_hi    = as.numeric((pred_actual_hi - reported_catch) * price_per_tonne)
  )

# ── Step 2: Annual IUU loss with CI by province ───────────────────────
annual_loss <- fish_results |>
  group_by(year, province) |>
  summarise(
    loss_central = sum(pred_iuu_loss) / 1e6,
    loss_lo      = sum(pred_iuu_lo)   / 1e6,
    loss_hi      = sum(pred_iuu_hi)   / 1e6,
    .groups      = "drop"
  )

# ── Step 3: Plot annual loss with CI ribbon ───────────────────────────
ggplot(annual_loss, aes(x = year, y = loss_central,
                        colour = province, fill = province)) +
  geom_ribbon(aes(ymin = loss_lo, ymax = loss_hi),
              alpha = 0.2, colour = NA) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 3) +
  facet_wrap(~ province, ncol = 2) +
  scale_x_continuous(breaks = 2020:2022) +
  scale_colour_manual(values = c("#7c3aed", "#0ea5e9", "#10b981", "#f59e0b")) +
  scale_fill_manual(values   = c("#7c3aed", "#0ea5e9", "#10b981", "#f59e0b")) +
  labs(title    = "GAM-Estimated Annual IUU Loss by Province (2020–2022)",
       subtitle = "Shaded band = 95% confidence interval around GAM prediction",
       x        = "Year",
       y        = "Estimated IUU Loss (million USD)") +
  theme_minimal() +
  theme(legend.position = "none")

# ── Step 4: Grand total with uncertainty ─────────────────────────────
loss_summary <- fish_results |>
  summarise(
    total_loss_central_m = round(sum(pred_iuu_loss) / 1e6, 2),
    total_loss_lo_m      = round(sum(pred_iuu_lo)   / 1e6, 2),
    total_loss_hi_m      = round(sum(pred_iuu_hi)   / 1e6, 2)
  ) |>
  mutate(
    summary = paste0("USD ", total_loss_central_m, "M",
                     " [95% CI: ", total_loss_lo_m,
                     "–", total_loss_hi_m, "M]")
  )

cat("\nGAM-Estimated Total IUU Loss (2020–2022):", loss_summary$summary, "\n")

# ── Method comparison tibble ──────────────────────────────────────────
# (Replace these numbers with your actual outputs from each session)
method_comparison <- tibble(
  session  = c("Session 1", "Session 2", "Session 2",
               "Session 2", "Session 3"),
  method   = c("Simple difference",
               "Multiplier — Low",
               "Multiplier — Central",
               "Multiplier — High",
               "GAM (Beta regression)"),
  approach = c("Basic", "Data-Poor", "Data-Poor",
               "Data-Poor", "Advanced"),
  # Estimated losses in million USD (replace with actual numbers)
  loss_central = c(sum(fish_results$iuu_loss_usd) * 0.68 / 1e6,   # approx S1
                   sum(fish_results$iuu_loss_usd) * 0.72 / 1e6,   # S2 low
                   sum(fish_results$iuu_loss_usd) * 0.96 / 1e6,   # S2 central
                   sum(fish_results$iuu_loss_usd) * 1.24 / 1e6,   # S2 high
                   sum(fish_results$pred_iuu_loss) / 1e6),       # S3 GAM
  loss_lo  = c(NA, NA, NA, NA,
               sum(fish_results$pred_iuu_lo) / 1e6),
  loss_hi  = c(NA, NA, NA, NA,
               sum(fish_results$pred_iuu_hi) / 1e6)
) |>
  mutate(
    method  = fct_inorder(method),
    session = factor(session)
  )

# ── Main synthesis chart ──────────────────────────────────────────────
synth_plot <- ggplot(method_comparison,
                     aes(x = loss_central, y = fct_rev(method), fill = approach)) +
  geom_col(width = 0.6) +
  geom_errorbar(
    aes(xmin = loss_lo, xmax = loss_hi),
    width = 0.25, linewidth = 0.9, colour = "#1e293b",
    na.rm = TRUE
  ) +
  geom_text(
    aes(label = paste0("$", round(loss_central, 1), "M")),
    hjust = -0.12, fontface = "bold", size = 3.8, colour = "#1e293b"
  ) +
  scale_fill_manual(values = c(
    "Basic"     = "#94a3b8",
    "Data-Poor" = "#f59e0b",
    "Advanced"  = "#7c3aed"
  )) +
  scale_x_continuous(expand = expansion(mult = c(0, 0.22))) +
  facet_grid(session ~ ., scales = "free_y", space = "free_y") +
  labs(
    title    = "IUU Loss Estimates Across All Methods (Sessions 1–3)",
    subtitle = "Error bar on GAM = 95% confidence interval · Others = point estimates",
    x        = "Estimated IUU Economic Loss (million USD)",
    y        = NULL,
    fill     = "Approach"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    strip.text       = element_text(face = "bold", size = 10),
    legend.position  = "bottom",
    plot.title       = element_text(face = "bold"),
    panel.grid.minor = element_blank()
  )

synth_plot

# ── Save the final plot ───────────────────────────────────────────────
ggsave("iuu_synthesis_all_sessions.png",
       synth_plot, width = 10, height = 7, dpi = 300)
cat("✓ Synthesis chart saved.\n")

# Save the full results tibble
fish_results |>
  select(date, year, month, province, vessel_type,
         reported_catch, actual_catch, iuu_catch, obs_iuu_rate,
         pred_rate, pred_rate_lo, pred_rate_hi,
         pred_iuu_catch, pred_iuu_loss, pred_iuu_lo, pred_iuu_hi,
         sst, chl_a, night_light, ais_density, patrol_days, vms_coverage) |>
  write_csv("session3_gam_results.csv")

# Save annual provincial loss table
annual_loss |>
  mutate(across(where(is.numeric), ~ round(.x, 3))) |>
  write_csv("session3_annual_loss_by_province.csv")

cat("✓ All Session 3 outputs saved to:", getwd(), "\n")