You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
250 lines
6.3 KiB
250 lines
6.3 KiB
```{r}
|
|
library(tidyr)
|
|
library(dplyr)
|
|
library(terra)
|
|
library(mregions2)
|
|
library(biooracler)
|
|
library(stringr)
|
|
library(tibble)
|
|
library(catboost)
|
|
library(caret)
|
|
library(blockCV)
|
|
library(sf)
|
|
```
|
|
|
|
|
|
```{r}
|
|
# mammals_range = vect("data/iucn/MAMMALS_MARINE_ONLY.shp")
|
|
# seal_range = mammals_range |>
|
|
# subset(mammals_range$sci_name == "Pagophilus groenlandicus")
|
|
# writeVector(seal_range, "data/iucn/Pagophilus_groenlandicus.shp")
|
|
seal_range = vect("data/iucn/Pagophilus_groenlandicus.shp")
|
|
```
|
|
|
|
|
|
```{r}
|
|
bbox = ext(seal_range) |> extend(5)
|
|
|
|
lon_range = c(bbox$xmin, bbox$xmax)
|
|
lat_range = c(bbox$ymin, bbox$ymax)
|
|
|
|
constraints = list(
|
|
longitude = lon_range,
|
|
latitude = lat_range
|
|
)
|
|
```
|
|
|
|
## Bio-Oracle
|
|
|
|
```{r}
|
|
layers = list_layers()
|
|
```
|
|
|
|
|
|
Фиксируем слои, на которые нет прогнозных данных. Их мы не будем использовать в обучении и предсказании.
|
|
|
|
```{r}
|
|
# Нет прогнозных данных :/
|
|
removed_layers_ids = c(
|
|
"par_mean_baseline_2000_2020_depthsurf",
|
|
"kdpar_mean_baseline_2000_2020_depthsurf",
|
|
"chl_baseline_2000_2018_depthmax",
|
|
"chl_baseline_2000_2018_depthmean",
|
|
"chl_baseline_2000_2018_depthmin"
|
|
)
|
|
```
|
|
|
|
```{r}
|
|
constant_layers_ids = c("terrain_characteristics")
|
|
|
|
constant_layers = layers |>
|
|
filter(dataset_id %in% constant_layers_ids)
|
|
```
|
|
|
|
```{r}
|
|
terrain_raster = download_layers(
|
|
constant_layers$dataset_id[1],
|
|
constraints = constraints,
|
|
directory = "data/bio-oracle-2/terrain_characteristics"
|
|
)
|
|
# Переименовываем слои в полные названия, чтобы потом поля понятно назывались
|
|
names(terrain_raster) = longnames(terrain_raster)
|
|
```
|
|
|
|
```{r}
|
|
dynamic_layers = layers |>
|
|
filter(! dataset_id %in% c(constant_layers_ids, removed_layers_ids)) |>
|
|
separate_wider_delim(dataset_id, delim = "_", names = c("var", "scenario", "year_star", "year_end", "depth"), cols_remove = FALSE)
|
|
```
|
|
|
|
```{r}
|
|
download_slice = function(scenario_value, decade_start) {
|
|
scenario_layers = dynamic_layers |>
|
|
filter(scenario == scenario_value)
|
|
|
|
time_point = paste0(decade_start, "-01-01T00:00:00Z")
|
|
|
|
slice_constraints = list(
|
|
time = c(time_point, time_point),
|
|
longitude = lon_range,
|
|
latitude = lat_range
|
|
)
|
|
|
|
download_dir = file.path("./data/bio-oracle-2", scenario_value, decade_start)
|
|
dir.create(download_dir, recursive = TRUE, showWarnings = FALSE)
|
|
|
|
slice_rasters = sapply(
|
|
scenario_layers$dataset_id,
|
|
function(id) download_layers(
|
|
id,
|
|
constraints = slice_constraints,
|
|
directory = download_dir
|
|
),
|
|
simplify = TRUE
|
|
)
|
|
|
|
return(slice_rasters)
|
|
}
|
|
```
|
|
|
|
```{r}
|
|
baseline_rasters = download_slice("baseline", 2010)
|
|
```
|
|
|
|
```{r}
|
|
baseline_brick = rast(baseline_rasters)
|
|
baseline_brick_depths = baseline_brick |>
|
|
names() |>
|
|
str_extract("depth[:alpha:]+")
|
|
baseline_brick_longnames = baseline_brick |> longnames()
|
|
# baseline_brick_varnames = baseline_brick |> varnames() // краткая запись называний слоёв
|
|
# Человекочитаемые названия слоёв -> полей в датафрейме (см. следующий блок)
|
|
names(baseline_brick) = paste(baseline_brick_longnames, baseline_brick_depths)
|
|
```
|
|
|
|
```{r}
|
|
# features_brick = c(baseline_brick, terrain_raster)
|
|
features_brick = c(baseline_brick)
|
|
```
|
|
|
|
```{r}
|
|
# cropped_bbox = ext(
|
|
# bbox$xmin + 80,
|
|
# bbox$xmax - 90,
|
|
# bbox$ymin + 20,
|
|
# bbox$ymax
|
|
# )
|
|
|
|
cropped_bbox = ext(
|
|
-20,
|
|
72,
|
|
60,
|
|
85
|
|
)
|
|
```
|
|
|
|
```{r}
|
|
cropped_features_brick = features_brick |>
|
|
crop(cropped_bbox)
|
|
```
|
|
|
|
```{r}
|
|
baseline_df = cropped_features_brick |>
|
|
as.data.frame(cells = TRUE)
|
|
```
|
|
|
|
```{r}
|
|
cropped_seal_range_raster = seal_range |>
|
|
rasterize(cropped_features_brick[[1]], field="", background=0)
|
|
```
|
|
|
|
```{r}
|
|
plot(cropped_seal_range_raster)
|
|
```
|
|
|
|
```{r}
|
|
cropped_seal_range_df = cropped_seal_range_raster |>
|
|
as.data.frame(xy = TRUE, cells = TRUE) |>
|
|
rename(target = layer)
|
|
#st_as_sf(coords = c("x", "y"), crs = 4326)
|
|
|
|
block_size = 12
|
|
|
|
cropped_seal_range_df_index = cropped_seal_range_df |>
|
|
mutate(
|
|
# Create grid indices based on coordinates
|
|
grid_x = floor(x / block_size),
|
|
grid_y = floor(y / block_size),
|
|
# Assign to "A" or "B" in a checkerboard pattern
|
|
block_id = (grid_x + grid_y) %% 2
|
|
)
|
|
|
|
train_cells = cropped_seal_range_df_index |>
|
|
filter(block_id == 0) |>
|
|
pull(cell)
|
|
test_cells = cropped_seal_range_df_index |>
|
|
filter(block_id == 1) |>
|
|
pull(cell)
|
|
```
|
|
|
|
```{r}
|
|
learn_orca = function(hyperparam) {
|
|
baseline_species_df = baseline_df |>
|
|
left_join(cropped_seal_range_df, by = "cell") |>
|
|
filter(!is.na(target))
|
|
|
|
train_df = baseline_species_df |>
|
|
filter(cell %in% train_cells)
|
|
train_features = train_df %>% select(-cell, -target, -x, -y) # параметры
|
|
train_labels = train_df$target # наличие ареала
|
|
train_pool = catboost.load_pool(data = train_features, label = train_labels)
|
|
|
|
test_df = baseline_species_df |>
|
|
filter(cell %in% test_cells)
|
|
test_features = test_df %>% select(-cell, -target, -x, -y)
|
|
test_labels = test_df$target
|
|
test_pool = catboost.load_pool(data = test_features, label = test_labels)
|
|
|
|
|
|
# Обучение
|
|
model = catboost.train(train_pool, test_pool = test_pool, params = hyperparam)
|
|
|
|
return(model)
|
|
}
|
|
```
|
|
|
|
```{r}
|
|
fit_params <- list(
|
|
iterations = 100,
|
|
learning_rate = 0.01,
|
|
depth = 6,
|
|
loss_function = 'Logloss',
|
|
eval_metric = 'AUC',
|
|
random_seed = 42,
|
|
verbose = 10, # Print progress every 100 iterations
|
|
od_type = "Iter", # Optional: Early stopping
|
|
od_wait = 20
|
|
)
|
|
```
|
|
попробовать разделить по диагонали
|
|
```{r}
|
|
m_seal = learn_orca(fit_params)
|
|
```
|
|
|
|
```{r}
|
|
i_seal = catboost.get_feature_importance(m_seal) |> as.data.frame() |> tibble::rownames_to_column("VALUE")
|
|
```
|
|
|
|
```{r}
|
|
sf_use_s2(FALSE)
|
|
target_test_blocks = cv_spatial(
|
|
x = cropped_seal_range_sf,
|
|
column = "layer",
|
|
size = 1e+06,
|
|
k = 2
|
|
)
|
|
```
|
|
|
|
```{r}
|
|
plot(cropped_seal_range_raster)
|
|
``` |