You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
bio-oracle/bio-oracle-prediction.qmd

127 lines
3.7 KiB

## Prediction Pipeline (Projection)
This file contains the prediction stage and consumes artifacts produced by `bio-oracle-learning.qmd`.
## Load required R packages
```{r}
library(tidyr)
library(dplyr)
library(terra)
library(mregions2)
library(biooracler)
library(stringr)
library(tibble)
library(catboost)
library(sf)
```
## Load shared helpers and define run configuration
```{r}
source("R/shared-utils.R")
config <- list(
range_shapefile = "data/iucn/Pagophilus_groenlandicus.shp",
bbox_expand_degrees = 5,
artifacts = list(
dynamic_layers = "dynamic_layers.rds",
subset_layer_names = "subset_baseline_layer_names.rds",
seal_range_df = "seal_range_df.rds",
seal_range_raster = "seal_range_raster.tif",
model = "cat_model.cbm",
manifest = "artifacts-manifest-learning.csv"
)
)
```
## Recreate spatial constraints
These bounds are needed for downloading future Bio-ORACLE slices.
```{r}
study_bounds <- make_study_bounds(
range_shapefile = config$range_shapefile,
expand_degrees = config$bbox_expand_degrees
)
seal_range <- study_bounds$seal_range
lon_range <- study_bounds$lon_range
lat_range <- study_bounds$lat_range
```
## Load shared artifacts from learning stage
```{r}
required_artifacts <- unlist(config$artifacts[c("dynamic_layers", "subset_layer_names", "seal_range_df", "seal_range_raster", "model")])
assert_required_files(required_artifacts)
cat_model <- catboost.load_model(config$artifacts$model)
subset_baseline_layer_names = readRDS(config$artifacts$subset_layer_names)
seal_range_df = readRDS(config$artifacts$seal_range_df)
seal_range_raster = rast(config$artifacts$seal_range_raster)
dynamic_layers = readRDS(config$artifacts$dynamic_layers)
```
## Shared artifacts manifest (optional inspection)
```{r}
if (file.exists(config$artifacts$manifest)) {
artifacts_manifest <- utils::read.csv(config$artifacts$manifest)
artifacts_manifest
}
```
## Prediction function
```{r}
get_prediction = function(ssp_code, decade) {
ssp_slice = download_biooracle_slice_subset(
dynamic_layers = dynamic_layers,
scenario_value = ssp_code,
decade_start = decade,
layers_to_download = subset_baseline_layer_names,
lon_range = lon_range,
lat_range = lat_range
)
ssp_slice_brick = rast(ssp_slice)
ssp_slice_brick = set_brick_names_with_depth(ssp_slice_brick)
ssp_slice_df = ssp_slice_brick |>
as.data.frame(cells = TRUE, xy = TRUE)
ssp_slice_features = ssp_slice_df |> select(-cell, -x, -y)
ssp_slice_pool <- catboost.load_pool(data = ssp_slice_features)
preds_prob <- catboost.predict(cat_model, ssp_slice_pool, prediction_type = "Probability")
preds_class <- ifelse(preds_prob > 0.5, 1, 0)
ssp_slice_prediction = ssp_slice_df |>
mutate(prediction = preds_class) |>
select(cell, prediction)
ssp_slice_diff = seal_range_df |>
left_join(ssp_slice_prediction, by = "cell") |>
mutate(diff = 2 * target + prediction)
r = rast(ssp_slice_brick)
r[ssp_slice_diff$cell] = ssp_slice_diff$diff
writeRaster(r[[1]], paste0(ssp_code, "-", decade, ".tif"), overwrite = TRUE)
png(filename = paste0(ssp_code, "-", decade, ".png"), width = 800, height = 800)
plot(
r[[1]],
type = "classes",
col = c("grey", "green", "red", "purple"),
levels = c("00", "01", "10", "11"),
main = paste0(ssp_code, "-", decade)
)
dev.off()
}
```
## Example runs
```{r}
get_prediction("ssp585", 2020)
```
```{r}
sapply(seq(2050, 2050, by = 10), function(decade) {
get_prediction("ssp585", decade)
})
```