# ─────────────────────────────────────────────────────────
# SESSION 2: IUU SCENARIO ANALYSIS
# ─────────────────────────────────────────────────────────
library(tidyverse)
library(broom)    # for tidy regression output

# Scenario A · Data-Poor Estimation
rm(list=ls())
reported_only <- tibble(
  year            = rep(2018:2022, each = 4),
  province        = rep(c("Province A", "Province A", "Province B", "Province B"), 5),
  vessel_type     = rep(c("Small-scale", "Commercial"), 10),
  reported_catch  = c(1200, 4500, 980,  3200,
                      1150, 4800, 1020, 3400,
                      1050, 4200, 890,  2900,
                      1300, 5100, 1100, 3600,
                      1400, 5400, 1200, 3900),
  price_per_tonne = c(1200, 950,  1100, 900,
                      1250, 970,  1150, 920,
                      1300, 1000, 1200, 950,
                      1350, 1020, 1250, 970,
                      1400, 1050, 1300, 1000)
)

# IUU rates from literature (assumptions)
#   Small-scale: ~40% of actual catch is unreported
#   Commercial:  ~50% of actual catch is unreported
# Expansion multiplier = 1 / (1 - IUU_rate)

iuu_rate_small <- 0.40
iuu_rate_comm  <- 0.50

reported_only <- reported_only |>
  mutate(
    multiplier       = case_when(
      vessel_type == "Small-scale" ~ 1 / (1 - iuu_rate_small),
      vessel_type == "Commercial"  ~ 1 / (1 - iuu_rate_comm)
    ),
    est_actual_catch = reported_catch * multiplier,
    est_iuu_catch    = est_actual_catch - reported_catch,
    est_iuu_loss_usd = est_iuu_catch * price_per_tonne
  )

# Summary results
reported_only |>
  summarise(
    total_iuu_catch_t  = round(sum(est_iuu_catch)),
    total_iuu_loss_usd = round(sum(est_iuu_loss_usd))
  )

# Define three assumption scenarios in a tibble
scenarios <- tibble(
  scenario       = c("Low (optimistic)", "Central (base)", "High (pessimistic)"),
  rate_small     = c(0.25, 0.40, 0.55),
  rate_commercial = c(0.35, 0.50, 0.65)
)

# Helper function: estimate total IUU loss for given rates
estimate_loss <- function(rs, rc) {
  reported_only |>
    mutate(
      mult      = case_when(
        vessel_type == "Small-scale" ~ 1 / (1 - rs),
        vessel_type == "Commercial"  ~ 1 / (1 - rc)
      ),
      iuu_loss  = (reported_catch * mult - reported_catch) * price_per_tonne
    ) |>
    summarise(total = sum(iuu_loss)) |>
    pull(total)    # pull() extracts a single column as a vector
}

# Apply the function across all scenarios using mutate + map2()
scenarios <- scenarios |>
  mutate(
    total_loss_usd   = map2_dbl(rate_small, rate_commercial, estimate_loss),
    total_loss_m_usd = round(total_loss_usd / 1e6, 2)
  )

# Print the table
scenarios |> select(scenario, rate_small, rate_commercial, total_loss_m_usd)

# Visualise with ggplot2
scenarios |>
  mutate(scenario = fct_inorder(scenario)) |>  # preserve order for x-axis
  ggplot(aes(x = scenario, y = total_loss_m_usd,
             fill = scenario)) +
  geom_col(width = 0.5, show.legend = FALSE) +
  geom_text(aes(label = paste0("$", total_loss_m_usd, "M")),
            vjust = -0.5, fontface = "bold") +
  scale_fill_manual(values = c("#4ade80", "#facc15", "#f87171")) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
  labs(title    = "Data-Poor: IUU Loss Under Different Rate Assumptions",
       subtitle = "Sensitivity to assumed IUU percentage",
       x        = "Scenario",
       y        = "Estimated IUU Loss (million USD)") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold"))

# Scenario B · Data-Less-Poor Estimation
fish_rich <- tibble(
  year            = rep(2018:2022, each = 4),
  province        = rep(c("Province A", "Province A", "Province B", "Province B"), 5),
  vessel_type     = rep(c("Small-scale", "Commercial"), 10),
  reported_catch  = c(1200, 4500, 980,  3200,
                      1150, 4800, 1020, 3400,
                      1050, 4200, 890,  2900,
                      1300, 5100, 1100, 3600,
                      1400, 5400, 1200, 3900),
  actual_catch    = c(1680, 6750, 1274, 5120,
                      1725, 7200, 1428, 5440,
                      1575, 6720, 1246, 4640,
                      1950, 7650, 1540, 5760,
                      2100, 8100, 1680, 6240),
  price_per_tonne = c(1200, 950,  1100, 900,
                      1250, 970,  1150, 920,
                      1300, 1000, 1200, 950,
                      1350, 1020, 1250, 970,
                      1400, 1050, 1300, 1000),
  patrol_days     = c(45, 45, 30, 30,
                      50, 50, 28, 28,
                      20, 20, 18, 18,
                      35, 35, 32, 32,
                      55, 55, 40, 40),
  vms_coverage    = c(0.60, 0.80, 0.50, 0.70,
                      0.65, 0.82, 0.55, 0.72,
                      0.58, 0.78, 0.48, 0.68,
                      0.70, 0.85, 0.62, 0.75,
                      0.75, 0.88, 0.68, 0.80),
  survey_cpue     = c(320, 890, 280, 760,
                      310, 920, 290, 790,
                      300, 870, 270, 740,
                      330, 950, 300, 800,
                      345, 980, 315, 820)
)

# Calculate observed IUU rate
fish_rich <- fish_rich |>
  mutate(
    obs_iuu_catch = actual_catch - reported_catch,
    obs_iuu_rate  = obs_iuu_catch / actual_catch
  )

# Scatter plot with a trend line
ggplot(fish_rich, aes(x = patrol_days, y = obs_iuu_rate,
                      colour = vessel_type)) +
  geom_point(size = 3.5, alpha = 0.8) +
  geom_smooth(method = "lm", se = TRUE, alpha = 0.15) +  # linear trend + CI band
  scale_colour_manual(values = c("#0369a1", "#b45309")) +
  scale_y_continuous(labels = scales::percent) +            # show y as %
  labs(title   = "Patrol Effort vs Observed IUU Rate",
       subtitle = "More patrol days → lower IUU rate",
       x        = "Patrol Days per Year",
       y        = "Observed IUU Rate",
       colour   = "Vessel Type") +
  theme_minimal()

# Facet by vessel type for a cleaner view
ggplot(fish_rich, aes(x = patrol_days, y = obs_iuu_rate)) +
  geom_point(colour = "#0f766e", size = 3) +
  geom_smooth(method = "lm", colour = "#b45309", se = FALSE) +
  facet_wrap(~ vessel_type, scales = "free_y") +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "IUU Rate by Vessel Type",
       x = "Patrol Days", y = "Observed IUU Rate") +
  theme_minimal()

# Fit the model (lm() is base R — tidyverse doesn't replace it)
model <- lm(obs_iuu_rate ~ patrol_days + vms_coverage + vessel_type,
            data = fish_rich)

summary(model)

# broom::tidy() gives a clean tibble of coefficients (better than summary())
tidy(model)

# broom::glance() gives model-level statistics (R², AIC, etc.)
glance(model)

# broom::augment() adds fitted values and residuals back to the data
model_output <- augment(model, data = fish_rich)
glimpse(model_output)   # .fitted = predicted IUU rate, .resid = residual

# Plot coefficient estimates with confidence intervals
tidy(model, conf.int = TRUE) |>
  filter(term != "(Intercept)") |>
  ggplot(aes(x = estimate, y = term)) +
  geom_point(size = 3, colour = "#0369a1") +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high),
                 height = 0.2, colour = "#0369a1") +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey50") +
  labs(title = "Regression Coefficients: Predictors of IUU Rate",
       x = "Coefficient Estimate (with 95% CI)",
       y = "Predictor") +
  theme_minimal()

# Add predicted IUU rates to the data using augment()
# Generate confidence intervals directly from predict()
pred_ci <- predict(model, newdata = fish_rich,
                   interval = "confidence", level = 0.95) |>
  as_tibble() |>
  rename(pred_iuu_rate = fit,
         ci_lo         = lwr,
         ci_hi         = upr)

# Bind to fish_rich and compute losses with CI bounds
fish_rich <- bind_cols(fish_rich, pred_ci) |>
  mutate(
    # Clamp all three to valid [0, 1] range
    pred_iuu_rate = pmax(0, pmin(1, pred_iuu_rate)),
    ci_lo         = pmax(0, pmin(1, ci_lo)),
    ci_hi         = pmax(0, pmin(1, ci_hi)),
    
    # Central estimate
    model_actual_catch = reported_catch / (1 - pred_iuu_rate),
    model_iuu_catch    = model_actual_catch - reported_catch,
    model_iuu_loss     = model_iuu_catch * price_per_tonne,
    
    # Lower bound of loss (lower IUU rate → less loss)
    model_iuu_loss_lo  = (reported_catch / (1 - ci_lo) - reported_catch) * price_per_tonne,
    
    # Upper bound of loss (higher IUU rate → more loss)
    model_iuu_loss_hi  = (reported_catch / (1 - ci_hi) - reported_catch) * price_per_tonne
  )

# Overall summary
fish_rich |>
  summarise(
    # Central estimates
    total_iuu_catch_t     = round(sum(model_iuu_catch)),
    total_iuu_loss_usd    = round(sum(model_iuu_loss)),
    mean_pred_iuu_rate    = paste0(round(mean(pred_iuu_rate) * 100, 1), "%"),
    
    # Lower bound (ci_lo → less IUU)
    total_iuu_catch_lo_t  = round(sum(model_iuu_loss_lo / price_per_tonne)),
    total_iuu_loss_lo_usd = round(sum(model_iuu_loss_lo)),
    mean_iuu_rate_lo      = paste0(round(mean(ci_lo) * 100, 1), "%"),
    
    # Upper bound (ci_hi → more IUU)
    total_iuu_catch_hi_t  = round(sum(model_iuu_loss_hi / price_per_tonne)),
    total_iuu_loss_hi_usd = round(sum(model_iuu_loss_hi)),
    mean_iuu_rate_hi      = paste0(round(mean(ci_hi) * 100, 1), "%")
  )

# Grouped by year
fish_rich |>
  group_by(year) |>
  summarise(
    iuu_loss_m    = round(sum(model_iuu_loss)    / 1e6, 2),
    iuu_loss_lo_m = round(sum(model_iuu_loss_lo) / 1e6, 2),
    iuu_loss_hi_m = round(sum(model_iuu_loss_hi) / 1e6, 2),
    .groups = "drop"
  ) |>
  mutate(
    ci_label = paste0("$", iuu_loss_m, "M [",
                      iuu_loss_lo_m, "–", iuu_loss_hi_m, "M]")
  )

# Build a tidy comparison tibble
# Multiplier scenarios have no formal CI in this framework → NA
# Regression row carries the model CI
comparison <- tibble(
  scenario   = c("Poor\n(Low)", "Poor\n(Central)",
                 "Poor\n(High)", "Less-Poor\n(Regression)"),
  method     = c("Multiplier", "Multiplier", "Multiplier", "Regression"),
  loss_m_usd = c(
    estimate_loss(0.25, 0.35) / 1e6,
    estimate_loss(0.40, 0.50) / 1e6,
    estimate_loss(0.55, 0.65) / 1e6,
    sum(fish_rich$model_iuu_loss) / 1e6
  ),
  loss_lo_m  = c(NA, NA, NA, sum(fish_rich$model_iuu_loss_lo) / 1e6),
  loss_hi_m  = c(NA, NA, NA, sum(fish_rich$model_iuu_loss_hi) / 1e6)
) |>
  mutate(
    scenario   = fct_inorder(scenario),
    loss_m_usd = round(loss_m_usd, 2),
    loss_lo_m  = round(loss_lo_m,  2),
    loss_hi_m  = round(loss_hi_m,  2)
  )

# Horizontal bar chart, coloured by method
ggplot(comparison, aes(x = loss_m_usd, y = scenario, fill = method)) +
  geom_col(width = 0.6) +
  # CI error bar — only appears where loss_lo_m / loss_hi_m are not NA
  geom_errorbar(
    aes(xmin = loss_lo_m, xmax = loss_hi_m),
    width     = 0.25,
    linewidth = 0.9,
    colour    = "#1e293b",
    na.rm     = TRUE          # silently skips the three NA rows
  ) +
  geom_text(
    aes(label = ifelse(
      !is.na(loss_lo_m),
      paste0("$", loss_m_usd, "M [", loss_lo_m, "–", loss_hi_m, "M]"),
      paste0("$", loss_m_usd, "M")
    )),
    hjust    = -0.08,
    fontface = "bold",
    size     = 3.5
  ) +
  scale_fill_manual(values = c("Multiplier" = "#f59e0b",
                               "Regression" = "#0369a1")) +
  scale_x_continuous(expand = expansion(mult = c(0, 0.30))) +
  labs(title    = "IUU Loss Estimates Across All Scenarios",
       subtitle = "Error bar on regression row = 95% confidence interval · Multiplier rows = point estimates",
       x        = "Estimated IUU Loss (million USD)",
       y        = "Scenario",
       fill     = "Method") +
  theme_minimal() +
  theme(plot.title      = element_text(face = "bold"),
        legend.position = "bottom")

# write_csv() from readr (tidyverse alternative to write.csv())
# Advantage: never adds row numbers, cleaner output
fish_rich |>
  select(year, province, vessel_type,
         reported_catch,
         pred_iuu_rate, ci_lo, ci_hi,          # rate + CI bounds
         model_iuu_catch,                        # central catch estimate
         model_iuu_loss,                         # central loss
         model_iuu_loss_lo, model_iuu_loss_hi) |>  # loss CI bounds
  write_csv("iuu_results_lesspoor.csv")

comparison |>
  write_csv("iuu_scenario_comparison.csv")

cat("✓ Results saved. Working directory:", getwd(), "\n")

############# Session 2.5: "basic" actual catch estimation ###########
# Setup and Baseline Data
rm(list=ls())
library(tidyverse)

# Set seed for reproducibility
set.seed(2026)

# Generate Baseline Reported Data (Official Statistics)
years <- 2011:2025
reported_catch <- data.frame(
  year = years,
  reported = seq(5000, 4200, length.out = length(years)) + rnorm(length(years), 0, 100)
)

# Catch Reconstruction (SAU Expansion Factors)
# Define expansion factors (e.g., subsistence adds 15%, recreational 5%)
# Uncertainty: Score 2 (Poor) = ±30%
reconstruction_data <- reported_catch %>%
  mutate(
    subsistence = reported * 0.15,
    recreational = reported * 0.05,
    reconstructed_total = reported + subsistence + recreational
  )

# Observer Program Extrapolation
# Simulate 10% observer coverage
total_trips <- 1000
observed_trips <- 100
observed_discards <- 50 # tons caught in 100 trips

observer_extrap <- reconstruction_data %>%
  mutate(
    discard_rate = observed_discards / observed_trips,
    est_discards = discard_rate * total_trips
  )

# AIS/VMS Dark Vessel Analysis
# Assume 'dark activity' is 20% of reported AIS effort
dark_factor <- 0.20 
observer_extrap <- observer_extrap %>%
  mutate(dark_vessel_catch = reported * dark_factor)

# Trade Discrepancy (Mirror Statistics)
# Simulate trade discrepancy (Partner Imports - Self Exports)
observer_extrap <- observer_extrap %>%
  mutate(trade_gap = reported * 0.12) # 12% gap identified in customs

# Triangulation and Uncertainty (Monte Carlo)
# Number of simulations
n_sim <- 10000

simulate_catch <- function(row) {
  # Assigning Quality Scores (1=Low, 4=High)
  # Reported (Score 4), Reconstruction (Score 2), AIS (Score 1)
  # SAU quality scores: 1 (±50%), 2 (±30%), 3 (±20%), 4 (±10%)
  rep_sim <- rnorm(n_sim, row$reported, row$reported * 0.05) # ±10% at 2SD
  rec_sim <- rnorm(n_sim, row$subsistence + row$recreational, (row$subsistence + row$recreational) * 0.15) # ±30%
  dark_sim <- rnorm(n_sim, row$dark_vessel_catch, row$dark_vessel_catch * 0.25) # ±50%
  discard_sim <- rnorm(n_sim, row$est_discards, row$est_discards * 0.10) # ±20%
  
  total <- rep_sim + rec_sim + dark_sim + discard_sim
  return(c(mean = mean(total), lower = quantile(total, 0.025), upper = quantile(total, 0.975)))
}

# Apply simulation across years
results <- observer_extrap %>%
  rowwise() %>%
  mutate(sim = list(simulate_catch(cur_data()))) %>%
  unnest_wider(sim)

# Visualization of Findings
ggplot(results, aes(x = year)) +
  geom_line(aes(y = reported, color = "Reported (Official)"), size = 1, linetype = "dashed") +
  geom_line(aes(y = mean, color = "Estimated Actual Catch"), size = 1.2) +
  geom_ribbon(aes(ymin = `lower.2.5%`, ymax = `upper.97.5%`, fill = "95% Uncertainty"), alpha = 0.2) +
  labs(title = "Estimated Actual vs. Reported Catch (2011-2025)",
       subtitle = "Triangulated from Reconstruction, AIS, and Observer Data",
       y = "Catch (Metric Tons)", x = "Year", color = "Source", fill = "Range") +
  theme_minimal()

################################################