```{r} library(tidyr) library(dplyr) library(terra) library(mregions2) library(biooracler) library(stringr) library(tibble) library(catboost) library(caret) library(blockCV) library(sf) ``` ```{r} # mammals_range = vect("data/iucn/MAMMALS_MARINE_ONLY.shp") # seal_range = mammals_range |> # subset(mammals_range$sci_name == "Pagophilus groenlandicus") # writeVector(seal_range, "data/iucn/Pagophilus_groenlandicus.shp") seal_range = vect("data/iucn/Pagophilus_groenlandicus.shp") ``` ```{r} bbox = ext(seal_range) |> extend(5) lon_range = c(bbox$xmin, bbox$xmax) lat_range = c(bbox$ymin, bbox$ymax) constraints = list( longitude = lon_range, latitude = lat_range ) ``` ## Bio-Oracle ```{r} layers = list_layers() ``` Фиксируем слои, на которые нет прогнозных данных. Их мы не будем использовать в обучении и предсказании. ```{r} # Нет прогнозных данных :/ removed_layers_ids = c( "par_mean_baseline_2000_2020_depthsurf", "kdpar_mean_baseline_2000_2020_depthsurf", "chl_baseline_2000_2018_depthmax", "chl_baseline_2000_2018_depthmean", "chl_baseline_2000_2018_depthmin" ) ``` ```{r} constant_layers_ids = c("terrain_characteristics") constant_layers = layers |> filter(dataset_id %in% constant_layers_ids) ``` ```{r} terrain_raster = download_layers( constant_layers$dataset_id[1], constraints = constraints, directory = "data/bio-oracle-2/terrain_characteristics" ) # Переименовываем слои в полные названия, чтобы потом поля понятно назывались names(terrain_raster) = longnames(terrain_raster) ``` ```{r} dynamic_layers = layers |> filter(! dataset_id %in% c(constant_layers_ids, removed_layers_ids)) |> separate_wider_delim(dataset_id, delim = "_", names = c("var", "scenario", "year_star", "year_end", "depth"), cols_remove = FALSE) ``` ```{r} download_slice = function(scenario_value, decade_start) { scenario_layers = dynamic_layers |> filter(scenario == scenario_value) time_point = paste0(decade_start, "-01-01T00:00:00Z") slice_constraints = list( time = c(time_point, time_point), longitude = lon_range, latitude = lat_range ) download_dir = file.path("./data/bio-oracle-2", scenario_value, decade_start) dir.create(download_dir, recursive = TRUE, showWarnings = FALSE) slice_rasters = sapply( scenario_layers$dataset_id, function(id) download_layers( id, constraints = slice_constraints, directory = download_dir ), simplify = TRUE ) return(slice_rasters) } ``` ```{r} baseline_rasters = download_slice("baseline", 2010) ``` ```{r} baseline_brick = rast(baseline_rasters) baseline_brick_depths = baseline_brick |> names() |> str_extract("depth[:alpha:]+") baseline_brick_longnames = baseline_brick |> longnames() # baseline_brick_varnames = baseline_brick |> varnames() // краткая запись называний слоёв # Человекочитаемые названия слоёв -> полей в датафрейме (см. следующий блок) names(baseline_brick) = paste(baseline_brick_longnames, baseline_brick_depths) ``` ```{r} # features_brick = c(baseline_brick, terrain_raster) features_brick = c(baseline_brick) ``` ```{r} # cropped_bbox = ext( # bbox$xmin + 80, # bbox$xmax - 90, # bbox$ymin + 20, # bbox$ymax # ) cropped_bbox = ext( -20, 72, 60, 85 ) ``` ```{r} cropped_features_brick = features_brick |> crop(cropped_bbox) ``` ```{r} baseline_df = cropped_features_brick |> as.data.frame(cells = TRUE) ``` ```{r} cropped_seal_range_raster = seal_range |> rasterize(cropped_features_brick[[1]], field="", background=0) ``` ```{r} plot(cropped_seal_range_raster) ``` ```{r} cropped_seal_range_df = cropped_seal_range_raster |> as.data.frame(xy = TRUE, cells = TRUE) |> rename(target = layer) #st_as_sf(coords = c("x", "y"), crs = 4326) block_size = 12 cropped_seal_range_df_index = cropped_seal_range_df |> mutate( # Create grid indices based on coordinates grid_x = floor(x / block_size), grid_y = floor(y / block_size), # Assign to "A" or "B" in a checkerboard pattern block_id = (grid_x + grid_y) %% 2 ) train_cells = cropped_seal_range_df_index |> filter(block_id == 0) |> pull(cell) test_cells = cropped_seal_range_df_index |> filter(block_id == 1) |> pull(cell) ``` ```{r} learn_orca = function(hyperparam) { baseline_species_df = baseline_df |> left_join(cropped_seal_range_df, by = "cell") |> filter(!is.na(target)) train_df = baseline_species_df |> filter(cell %in% train_cells) train_features = train_df %>% select(-cell, -target, -x, -y) # параметры train_labels = train_df$target # наличие ареала train_pool = catboost.load_pool(data = train_features, label = train_labels) test_df = baseline_species_df |> filter(cell %in% test_cells) test_features = test_df %>% select(-cell, -target, -x, -y) test_labels = test_df$target test_pool = catboost.load_pool(data = test_features, label = test_labels) # Обучение model = catboost.train(train_pool, test_pool = test_pool, params = hyperparam) return(model) } ``` ```{r} fit_params <- list( iterations = 100, learning_rate = 0.01, depth = 6, loss_function = 'Logloss', eval_metric = 'AUC', random_seed = 42, verbose = 10, # Print progress every 100 iterations od_type = "Iter", # Optional: Early stopping od_wait = 20 ) ``` попробовать разделить по диагонали ```{r} m_seal = learn_orca(fit_params) ``` ```{r} i_seal = catboost.get_feature_importance(m_seal) |> as.data.frame() |> tibble::rownames_to_column("VALUE") ``` ```{r} sf_use_s2(FALSE) target_test_blocks = cv_spatial( x = cropped_seal_range_sf, column = "layer", size = 1e+06, k = 2 ) ``` ```{r} plot(cropped_seal_range_raster) ```