You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

250 lines
6.3 KiB

```{r}
library(tidyr)
library(dplyr)
library(terra)
library(mregions2)
library(biooracler)
library(stringr)
library(tibble)
library(catboost)
library(caret)
library(blockCV)
library(sf)
```
```{r}
# mammals_range = vect("data/iucn/MAMMALS_MARINE_ONLY.shp")
# seal_range = mammals_range |>
# subset(mammals_range$sci_name == "Pagophilus groenlandicus")
# writeVector(seal_range, "data/iucn/Pagophilus_groenlandicus.shp")
seal_range = vect("data/iucn/Pagophilus_groenlandicus.shp")
```
```{r}
bbox = ext(seal_range) |> extend(5)
lon_range = c(bbox$xmin, bbox$xmax)
lat_range = c(bbox$ymin, bbox$ymax)
constraints = list(
longitude = lon_range,
latitude = lat_range
)
```
## Bio-Oracle
```{r}
layers = list_layers()
```
Фиксируем слои, на которые нет прогнозных данных. Их мы не будем использовать в обучении и предсказании.
```{r}
# Нет прогнозных данных :/
removed_layers_ids = c(
"par_mean_baseline_2000_2020_depthsurf",
"kdpar_mean_baseline_2000_2020_depthsurf",
"chl_baseline_2000_2018_depthmax",
"chl_baseline_2000_2018_depthmean",
"chl_baseline_2000_2018_depthmin"
)
```
```{r}
constant_layers_ids = c("terrain_characteristics")
constant_layers = layers |>
filter(dataset_id %in% constant_layers_ids)
```
```{r}
terrain_raster = download_layers(
constant_layers$dataset_id[1],
constraints = constraints,
directory = "data/bio-oracle-2/terrain_characteristics"
)
# Переименовываем слои в полные названия, чтобы потом поля понятно назывались
names(terrain_raster) = longnames(terrain_raster)
```
```{r}
dynamic_layers = layers |>
filter(! dataset_id %in% c(constant_layers_ids, removed_layers_ids)) |>
separate_wider_delim(dataset_id, delim = "_", names = c("var", "scenario", "year_star", "year_end", "depth"), cols_remove = FALSE)
```
```{r}
download_slice = function(scenario_value, decade_start) {
scenario_layers = dynamic_layers |>
filter(scenario == scenario_value)
time_point = paste0(decade_start, "-01-01T00:00:00Z")
slice_constraints = list(
time = c(time_point, time_point),
longitude = lon_range,
latitude = lat_range
)
download_dir = file.path("./data/bio-oracle-2", scenario_value, decade_start)
dir.create(download_dir, recursive = TRUE, showWarnings = FALSE)
slice_rasters = sapply(
scenario_layers$dataset_id,
function(id) download_layers(
id,
constraints = slice_constraints,
directory = download_dir
),
simplify = TRUE
)
return(slice_rasters)
}
```
```{r}
baseline_rasters = download_slice("baseline", 2010)
```
```{r}
baseline_brick = rast(baseline_rasters)
baseline_brick_depths = baseline_brick |>
names() |>
str_extract("depth[:alpha:]+")
baseline_brick_longnames = baseline_brick |> longnames()
# baseline_brick_varnames = baseline_brick |> varnames() // краткая запись называний слоёв
# Человекочитаемые названия слоёв -> полей в датафрейме (см. следующий блок)
names(baseline_brick) = paste(baseline_brick_longnames, baseline_brick_depths)
```
```{r}
# features_brick = c(baseline_brick, terrain_raster)
features_brick = c(baseline_brick)
```
```{r}
# cropped_bbox = ext(
# bbox$xmin + 80,
# bbox$xmax - 90,
# bbox$ymin + 20,
# bbox$ymax
# )
cropped_bbox = ext(
-20,
72,
60,
85
)
```
```{r}
cropped_features_brick = features_brick |>
crop(cropped_bbox)
```
```{r}
baseline_df = cropped_features_brick |>
as.data.frame(cells = TRUE)
```
```{r}
cropped_seal_range_raster = seal_range |>
rasterize(cropped_features_brick[[1]], field="", background=0)
```
```{r}
plot(cropped_seal_range_raster)
```
```{r}
cropped_seal_range_df = cropped_seal_range_raster |>
as.data.frame(xy = TRUE, cells = TRUE) |>
rename(target = layer)
#st_as_sf(coords = c("x", "y"), crs = 4326)
block_size = 12
cropped_seal_range_df_index = cropped_seal_range_df |>
mutate(
# Create grid indices based on coordinates
grid_x = floor(x / block_size),
grid_y = floor(y / block_size),
# Assign to "A" or "B" in a checkerboard pattern
block_id = (grid_x + grid_y) %% 2
)
train_cells = cropped_seal_range_df_index |>
filter(block_id == 0) |>
pull(cell)
test_cells = cropped_seal_range_df_index |>
filter(block_id == 1) |>
pull(cell)
```
```{r}
learn_orca = function(hyperparam) {
baseline_species_df = baseline_df |>
left_join(cropped_seal_range_df, by = "cell") |>
filter(!is.na(target))
train_df = baseline_species_df |>
filter(cell %in% train_cells)
train_features = train_df %>% select(-cell, -target, -x, -y) # параметры
train_labels = train_df$target # наличие ареала
train_pool = catboost.load_pool(data = train_features, label = train_labels)
test_df = baseline_species_df |>
filter(cell %in% test_cells)
test_features = test_df %>% select(-cell, -target, -x, -y)
test_labels = test_df$target
test_pool = catboost.load_pool(data = test_features, label = test_labels)
# Обучение
model = catboost.train(train_pool, test_pool = test_pool, params = hyperparam)
return(model)
}
```
```{r}
fit_params <- list(
iterations = 100,
learning_rate = 0.01,
depth = 6,
loss_function = 'Logloss',
eval_metric = 'AUC',
random_seed = 42,
verbose = 10, # Print progress every 100 iterations
od_type = "Iter", # Optional: Early stopping
od_wait = 20
)
```
попробовать разделить по диагонали
```{r}
m_seal = learn_orca(fit_params)
```
```{r}
i_seal = catboost.get_feature_importance(m_seal) |> as.data.frame() |> tibble::rownames_to_column("VALUE")
```
```{r}
sf_use_s2(FALSE)
target_test_blocks = cv_spatial(
x = cropped_seal_range_sf,
column = "layer",
size = 1e+06,
k = 2
)
```
```{r}
plot(cropped_seal_range_raster)
```