From 656ac983cd778a33538557f6cf0509dd7d957074 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Feret Date: Mon, 7 Nov 2022 15:12:24 +0100 Subject: [PATCH] biodivMapR v1.9.7 ## Addition - progress bar instead of messages - future: multisession instead of multiprocess --- DESCRIPTION | 6 ++- NAMESPACE | 7 ++++ NEWS.md | 6 +++ R/Lib_ImageProcess.R | 10 ++++- R/Lib_MapAlphaDiversity.R | 67 +++++++++++++++++++++++----------- R/Lib_MapBetaDiversity.R | 55 +++++++++++++++++----------- R/Lib_MapSpectralSpecies.R | 75 +++++++++++++++++++++----------------- R/Lib_PerformPCA.R | 50 +++++++++++++++---------- man/apply_kmeans.Rd | 37 +++++++++++++++++++ man/map_alpha_div.Rd | 6 +-- man/map_beta_div.Rd | 4 +- man/ordination_parallel.Rd | 5 ++- man/write_PCA_raster.Rd | 46 +++++++++++++++++++++++ 13 files changed, 269 insertions(+), 105 deletions(-) create mode 100644 man/apply_kmeans.Rd create mode 100644 man/write_PCA_raster.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 368801c3..7ea97fd2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: biodivMapR Title: biodivMapR: an R package for a- and ß-diversity mapping using remotely-sensed images -Version: 1.9.6.1 +Version: 1.9.7 Authors@R: c(person(given = "Jean-Baptiste", family = "Feret", email = "jb.feret@teledetection.fr", @@ -42,7 +42,9 @@ Imports: vegan, zip, sf, - rgeos + rgeos, + progress, + progressr RoxygenNote: 7.2.1 Suggests: knitr, diff --git a/NAMESPACE b/NAMESPACE index 03cdb206..68403558 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(Write_Big_Image) export(Write_Image_NativeRes) export(ZipFile) export(apply_continuum_removal) +export(apply_kmeans) export(build_image_from_list) export(center_reduce) export(change_resolution_HDR) @@ -84,6 +85,7 @@ export(where_to_read) export(where_to_read_kernel) export(where_to_write_kernel) export(write_ENVI_header) +export(write_PCA_raster) export(write_StarsStack) export(write_raster) import(stars) @@ -96,6 +98,7 @@ importFrom(ecodist,nmds) importFrom(emstreeR,ComputeMST) importFrom(fields,rdist) importFrom(future,multiprocess) +importFrom(future,multisession) importFrom(future,plan) importFrom(future,sequential) importFrom(future.apply,future_lapply) @@ -109,6 +112,10 @@ importFrom(matrixStats,rowSds) importFrom(methods,as) importFrom(methods,is) importFrom(mmand,erode) +importFrom(progress,progress_bar) +importFrom(progressr,handlers) +importFrom(progressr,progressor) +importFrom(progressr,with_progress) importFrom(raster,brick) importFrom(raster,cellFromPolygon) importFrom(raster,cellFromXY) diff --git a/NEWS.md b/NEWS.md index d4b595e0..2d949570 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# biodivMapR v1.9.7 + +## Addition +- progress bar instead of messages +- future: multisession instead of multiprocess + # biodivMapR v1.9.6.1 ## Addition diff --git a/R/Lib_ImageProcess.R b/R/Lib_ImageProcess.R index fda67a91..abda3ab8 100644 --- a/R/Lib_ImageProcess.R +++ b/R/Lib_ImageProcess.R @@ -660,7 +660,8 @@ get_BB_from_Vector <- function(path_raster,path_vector,Buffer = 0){ #' Kind (Kernel index) and 'id' the sample ID to be used with the kernel #' @export -get_random_subset_from_image <- function(ImPath, MaskPath, nb_partitions, Pix_Per_Partition, kernel=NULL,MaxRAM = 0.5) { +get_random_subset_from_image <- function(ImPath, MaskPath, nb_partitions, + Pix_Per_Partition, kernel=NULL, MaxRAM = 0.5) { metarast <- raster::raster(ImPath) # updated raster package: do not use brick with 2D raster @@ -1524,6 +1525,7 @@ write_ENVI_header <- function(HDR, HDRpath) { #' @param Image_Format list. description of data format corresponding to ENVI type #' #' @return None +#' @importFrom progress progress_bar #' @export Write_Big_Image <- function(ImgWrite,ImagePath,HDR,Image_Format){ @@ -1535,8 +1537,11 @@ Write_Big_Image <- function(ImgWrite,ImagePath,HDR,Image_Format){ ) close(fidOUT) # for each piece of image + pb <- progress_bar$new( + format = 'Writing raster [:bar] :percent in :elapsedfull', + total = nbPieces, clear = FALSE, width= 100) + for (i in 1:nbPieces) { - print(paste("Writing Image, piece #", i, "/", nbPieces)) # read image and mask data Byte_Start <- SeqRead_Image$ReadByte_Start[i] Line_Start <- SeqRead_Image$Line_Start[i] @@ -1564,6 +1569,7 @@ Write_Big_Image <- function(ImgWrite,ImagePath,HDR,Image_Format){ writeBin(c(as.integer(ImgChunk)), fidOUT, size = Image_Format$Bytes, endian = .Platform$endian, useBytes = FALSE) } close(fidOUT) + pb$tick() } rm(ImgWrite) rm(ImgChunk) diff --git a/R/Lib_MapAlphaDiversity.R b/R/Lib_MapAlphaDiversity.R index 247acff1..b510ad9d 100644 --- a/R/Lib_MapAlphaDiversity.R +++ b/R/Lib_MapAlphaDiversity.R @@ -34,8 +34,8 @@ map_alpha_div <- function(Input_Image_File=FALSE, Output_Dir='', window_size=10, TypePCA = "SPCA", nbclusters = 50, MinSun = 0.25, pcelim = 0.02, - Index_Alpha = "Shannon", FullRes = TRUE, - LowRes = FALSE, MapSTD = FALSE, + Index_Alpha = "Shannon", FullRes = FALSE, + LowRes = TRUE, MapSTD = TRUE, nbCPU = FALSE, MaxRAM = FALSE, ClassifMap = FALSE) { @@ -198,8 +198,9 @@ compute_ALPHA_FromPlot <- function(SpectralSpecies_Plot,pcelim = 0.02){ # @param MinSun numeric. minimum proportion of sunlit pixels required to consider plot # # @return list of mean and SD of alpha diversity metrics -#' @importFrom future plan multiprocess sequential +#' @importFrom future plan multiprocess multisession sequential #' @importFrom future.apply future_lapply +#' @importFrom progressr progressor handlers with_progress #' @importFrom stats sd compute_alpha_metrics <- function(Spectral_Species_Path, window_size, nbclusters, MinSun, pcelim, nbCPU = FALSE, MaxRAM = FALSE, Index_Alpha = "Shannon") { @@ -209,14 +210,18 @@ compute_alpha_metrics <- function(Spectral_Species_Path, window_size, nbclusters if (MaxRAM == FALSE) { MaxRAM <- 0.25 } - nbPieces_Min <- split_image(HDR_SS, MaxRAM) if (nbCPU == FALSE) { nbCPU <- 1 } - if (nbPieces_Min < nbCPU) { - nbPieces_Min <- nbCPU - } - SeqRead.SS <- where_to_read_kernel(HDR_SS, nbPieces_Min, window_size) + nbPieces <- split_image(HDR_SS, MaxRAM) + nbPieces <- nbCPU*(1+(nbPieces-1)%/%nbCPU) + # if (nbPieces < nbCPU) { + # nbPieces <- nbCPU + # } + # if (nbPieces<10){ + # nbPieces <- 10 + # } + SeqRead.SS <- where_to_read_kernel(HDR_SS, nbPieces, window_size) ## prepare SS distribution map and corresponding sunlit map ## # prepare SS distribution map @@ -240,7 +245,7 @@ compute_alpha_metrics <- function(Spectral_Species_Path, window_size, nbclusters close(fidSSD) headerFpath <- paste(SSD_Path, ".hdr", sep = "") write_ENVI_header(HDR_SSD, headerFpath) - SeqWrite.SSD <- where_to_write_kernel(HDR_SS, HDR_SSD, nbPieces_Min, window_size) + SeqWrite.SSD <- where_to_write_kernel(HDR_SS, HDR_SSD, nbPieces, window_size) # prepare proportion of sunlit pixels from each spatial unit Sunlit_Path <- paste(SSD_Path, "_Sunlit", sep = "") @@ -257,11 +262,11 @@ compute_alpha_metrics <- function(Spectral_Species_Path, window_size, nbclusters close(fidSunlit) headerFpath <- paste(Sunlit_Path, ".hdr", sep = "") write_ENVI_header(HDR_Sunlit, headerFpath) - SeqWrite.Sunlit <- where_to_write_kernel(HDR_SS, HDR_Sunlit, nbPieces_Min, window_size) + SeqWrite.Sunlit <- where_to_write_kernel(HDR_SS, HDR_Sunlit, nbPieces, window_size) # for each piece of image ReadWrite <- list() - for (i in 1:nbPieces_Min) { + for (i in 1:nbPieces) { ReadWrite[[i]] <- list() ReadWrite[[i]]$RW_SS <- ReadWrite[[i]]$RW_SSD <- ReadWrite[[i]]$RW_Sunlit <- list() ReadWrite[[i]]$RW_SS$Byte_Start <- SeqRead.SS$ReadByte_Start[i] @@ -283,22 +288,35 @@ compute_alpha_metrics <- function(Spectral_Species_Path, window_size, nbclusters # multiprocess of spectral species distribution and alpha diversity metrics if (nbCPU>1){ - plan(multiprocess, workers = nbCPU) ## Parallelize using four cores - Schedule_Per_Thread <- ceiling(nbPieces_Min / nbCPU) - ALPHA <- future_lapply(ReadWrite, - FUN = convert_PCA_to_SSD, Spectral_Species_Path = Spectral_Species_Path, - HDR_SS = HDR_SS, HDR_SSD = HDR_SSD, SS_Format = SS_Format, SSD_Format = SSD_Format, - ImgFormat = ImgFormat, window_size = window_size, nbclusters = nbclusters, MinSun = MinSun, - pcelim = pcelim, Index_Alpha = Index_Alpha, SSD_Path = SSD_Path, Sunlit_Path = Sunlit_Path, - Sunlit_Format = Sunlit_Format, future.scheduling = Schedule_Per_Thread - ) + plan(multisession, workers = nbCPU) ## Parallelize using four cores + # plan(multiprocess, workers = nbCPU) ## Parallelize using four cores + Schedule_Per_Thread <- ceiling(nbPieces / nbCPU) + handlers(global = TRUE) + handlers("progress") + with_progress({ + p <- progressr::progressor(steps = nbPieces) + ALPHA <- future_lapply(ReadWrite, + FUN = convert_PCA_to_SSD, Spectral_Species_Path = Spectral_Species_Path, + HDR_SS = HDR_SS, HDR_SSD = HDR_SSD, SS_Format = SS_Format, SSD_Format = SSD_Format, + ImgFormat = ImgFormat, window_size = window_size, nbclusters = nbclusters, MinSun = MinSun, + pcelim = pcelim, Index_Alpha = Index_Alpha, SSD_Path = SSD_Path, Sunlit_Path = Sunlit_Path, + Sunlit_Format = Sunlit_Format, p = p) + }) + + + # ALPHA <- future_lapply(ReadWrite, + # FUN = convert_PCA_to_SSD, Spectral_Species_Path = Spectral_Species_Path, + # HDR_SS = HDR_SS, HDR_SSD = HDR_SSD, SS_Format = SS_Format, SSD_Format = SSD_Format, + # ImgFormat = ImgFormat, window_size = window_size, nbclusters = nbclusters, MinSun = MinSun, + # pcelim = pcelim, Index_Alpha = Index_Alpha, SSD_Path = SSD_Path, Sunlit_Path = Sunlit_Path, + # Sunlit_Format = Sunlit_Format, future.scheduling = Schedule_Per_Thread) plan(sequential) } else { ALPHA <- lapply(ReadWrite, FUN = convert_PCA_to_SSD, Spectral_Species_Path = Spectral_Species_Path, HDR_SS = HDR_SS, HDR_SSD = HDR_SSD, SS_Format = SS_Format, SSD_Format = SSD_Format, ImgFormat = ImgFormat, window_size = window_size, nbclusters = nbclusters, MinSun = MinSun, pcelim = pcelim, Index_Alpha = Index_Alpha, SSD_Path = SSD_Path, Sunlit_Path = Sunlit_Path, - Sunlit_Format = Sunlit_Format) + Sunlit_Format = Sunlit_Format,p= NULL) } # create ful map from chunks @@ -348,13 +366,15 @@ compute_alpha_metrics <- function(Spectral_Species_Path, window_size, nbclusters # @param SSD_Path # @param Sunlit_Path # @param Sunlit_Format +# @param p # # @param # @param # @return convert_PCA_to_SSD <- function(ReadWrite, Spectral_Species_Path, HDR_SS, HDR_SSD, SS_Format, SSD_Format, ImgFormat, window_size, nbclusters, - MinSun, pcelim, Index_Alpha, SSD_Path, Sunlit_Path, Sunlit_Format) { + MinSun, pcelim, Index_Alpha, SSD_Path, Sunlit_Path, Sunlit_Format, + p = NULL) { SS_Chunk <- read_BIL_image_subset( Spectral_Species_Path, HDR_SS, ReadWrite$RW_SS$Byte_Start, ReadWrite$RW_SS$lenBin, @@ -398,6 +418,9 @@ convert_PCA_to_SSD <- function(ReadWrite, Spectral_Species_Path, HDR_SS, HDR_SSD Simpson_SD_Chunk <- apply(SSD_Alpha$Simpson, 1:2, sd) rm(SSD_Alpha) gc() + if (!is.null(p)){ + p() + } my_list <- list( "Shannon" = Shannon_Mean_Chunk, "Fisher" = Fisher_Mean_Chunk, "Simpson" = Simpson_Mean_Chunk, "Shannon.SD" = Shannon_SD_Chunk, "Fisher.SD" = Fisher_SD_Chunk, "Simpson.SD" = Simpson_SD_Chunk diff --git a/R/Lib_MapBetaDiversity.R b/R/Lib_MapBetaDiversity.R index 0375fc18..a8a6bae9 100644 --- a/R/Lib_MapBetaDiversity.R +++ b/R/Lib_MapBetaDiversity.R @@ -38,8 +38,8 @@ map_beta_div <- function(Input_Image_File=FALSE, Output_Dir='', window_size=10, TypePCA = 'SPCA', nb_partitions = 20,nbclusters = 50, Nb_Units_Ordin = 2000, MinSun = 0.25, - pcelim = 0.02, scaling = 'PCO', FullRes = TRUE, - LowRes = FALSE, nbCPU = 1, MaxRAM = 0.25, + pcelim = 0.02, scaling = 'PCO', FullRes = FALSE, + LowRes = TRUE, nbCPU = 1, MaxRAM = 0.25, ClassifMap = FALSE, dimMDS=3) { if (ClassifMap == FALSE){ @@ -110,7 +110,7 @@ map_beta_div <- function(Input_Image_File=FALSE, Output_Dir='', window_size=10, #' @param dimMDS numeric. number of dimensions of the NMDS # #' @return BetaNMDS_sel -#' @importFrom future plan multiprocess sequential +#' @importFrom future plan multiprocess multisession sequential #' @importFrom future.apply future_lapply #' @importFrom ecodist nmds #' @importFrom utils find @@ -124,7 +124,8 @@ compute_NMDS <- function(MatBCdist,dimMDS=3) { nbCoresNMDS <- 4 } # multiprocess of spectral species distribution and alpha diversity metrics - plan(multiprocess, workers = nbCoresNMDS) ## Parallelize using four cores + # plan(multiprocess, workers = nbCoresNMDS) ## Parallelize using four cores + plan(multisession, workers = nbCoresNMDS) ## Parallelize using four cores BetaNMDS <- future_lapply(MatBCdist, FUN = nmds, mindim = dimMDS, maxdim = dimMDS, nits = 1, future.packages = c("ecodist")) plan(sequential) # find iteration with minimum stress @@ -158,37 +159,46 @@ compute_NMDS <- function(MatBCdist,dimMDS=3) { #' #' @return Ordination_est coordinates of each spatial unit in ordination space #' @importFrom snow splitRows -#' @importFrom future plan multiprocess sequential +#' @importFrom future plan multiprocess multisession sequential #' @importFrom future.apply future_lapply +#' @importFrom progressr progressor handlers with_progress #' @export ordination_to_NN <- function(Beta_Ordination_sel, SSD_Path, Sample_Sel, coordTotSort, nb_partitions, nbclusters, pcelim, nbCPU = 1) { nb_Sunlit <- dim(coordTotSort)[1] - # define number of samples to be sampled each time during paralle processing + # define number of samples to be sampled each time during parallel processing nb_samples_per_sub <- round(1e7 / dim(Sample_Sel)[1]) # number of paralle processes to run nb.sub <- round(nb_Sunlit / nb_samples_per_sub) if (nb.sub == 0) nb.sub <- 1 - id.sub <- splitRows(as.matrix(seq(1, nb_Sunlit, by = 1), ncol = 1), ncl = nb.sub) + id.sub <- snow::splitRows(as.matrix(seq(1, nb_Sunlit, by = 1), ncol = 1), ncl = nb.sub) # compute ordination coordinates from each subpart Nb_Units_Ordin <- dim(Sample_Sel)[1] if (nbCPU>1){ - plan(multiprocess, workers = nbCPU) ## Parallelize using four cores - Schedule_Per_Thread <- ceiling(nb.sub / nbCPU) - OutPut <- future_lapply(id.sub, - FUN = ordination_parallel, coordTotSort = coordTotSort, SSD_Path = SSD_Path, - Sample_Sel = Sample_Sel, Beta_Ordination_sel = Beta_Ordination_sel, Nb_Units_Ordin = Nb_Units_Ordin, - nb_partitions = nb_partitions, nbclusters = nbclusters, pcelim = pcelim, - future.scheduling = Schedule_Per_Thread, - future.packages = c("vegan", "dissUtils", "R.utils", "tools", "snow", "matlab") - ) + # plan(multiprocess, workers = nbCPU) ## Parallelize using four cores + plan(multisession, workers = nbCPU) ## Parallelize using four cores + handlers(global = TRUE) + handlers("progress") + with_progress({ + p <- progressr::progressor(steps = nb.sub) + OutPut <- future_lapply(id.sub, + FUN = ordination_parallel, coordTotSort = coordTotSort, SSD_Path = SSD_Path, + Sample_Sel = Sample_Sel, Beta_Ordination_sel = Beta_Ordination_sel, Nb_Units_Ordin = Nb_Units_Ordin, + nb_partitions = nb_partitions, nbclusters = nbclusters, pcelim = pcelim, p, + future.packages = c("vegan", "dissUtils", "R.utils", "tools", "snow", "matlab")) + }) plan(sequential) } else { - OutPut <- lapply(id.sub, FUN = ordination_parallel, coordTotSort = coordTotSort, - SSD_Path = SSD_Path, Sample_Sel = Sample_Sel, - Beta_Ordination_sel = Beta_Ordination_sel, Nb_Units_Ordin = Nb_Units_Ordin, - nb_partitions = nb_partitions, nbclusters = nbclusters, pcelim = pcelim) + handlers(global = TRUE) + handlers("progress") + with_progress({ + p <- progressr::progressor(steps = nb.sub) + OutPut <- lapply(id.sub, FUN = ordination_parallel, coordTotSort = coordTotSort, + SSD_Path = SSD_Path, Sample_Sel = Sample_Sel, + Beta_Ordination_sel = Beta_Ordination_sel, Nb_Units_Ordin = Nb_Units_Ordin, + nb_partitions = nb_partitions, nbclusters = nbclusters, pcelim = pcelim, p) + }) } Ordination_est <- do.call("rbind", OutPut) gc() @@ -206,12 +216,13 @@ ordination_to_NN <- function(Beta_Ordination_sel, SSD_Path, Sample_Sel, coordTot #' @param nb_partitions numeric. Number of partitions (repetitions) to be computed then averaged. #' @param nbclusters numeric. Number of clusters defined in k-Means. #' @param pcelim numeric. Minimum contribution in percents required for a spectral species +#' @param p function. # #' @return OutPut list of mean and SD of alpha diversity metrics #' @export ordination_parallel <- function(id.sub, coordTotSort, SSD_Path, Sample_Sel, Beta_Ordination_sel, - Nb_Units_Ordin, nb_partitions, nbclusters, pcelim) { + Nb_Units_Ordin, nb_partitions, nbclusters, pcelim, p = NULL) { # get Spectral species distribution coordPix <- coordTotSort[id.sub, ] @@ -231,6 +242,8 @@ ordination_parallel <- function(id.sub, coordTotSort, SSD_Path, Sample_Sel, Beta # get the knn closest neighbors from each kernel knn <- 3 OutPut <- compute_NN_from_ordination(MatBCtmp, knn, Beta_Ordination_sel) + + p() return(OutPut) } diff --git a/R/Lib_MapSpectralSpecies.R b/R/Lib_MapSpectralSpecies.R index 7208c958..a4851ca4 100644 --- a/R/Lib_MapSpectralSpecies.R +++ b/R/Lib_MapSpectralSpecies.R @@ -40,9 +40,9 @@ map_spectral_species <- function(Input_Image_File, Output_Dir, PCA_Files, Input_ PCA_model = NULL, SpectralFilter = NULL,Continuum_Removal= TRUE) { Kmeans_info <- NULL - if (MaxRAM == FALSE) { - MaxRAM <- 0.25 - } + # if (MaxRAM == FALSE) { + # MaxRAM <- 0.25 + # } # if no prior diversity map has been produced --> need PCA file if (!file.exists(PCA_Files)) { message("") @@ -120,7 +120,6 @@ map_spectral_species <- function(Input_Image_File, Output_Dir, PCA_Files, Input_ if (Kmeans_info$Error==FALSE){ if (Kmeans_Only==FALSE){ ## 3- ASSIGN SPECTRAL SPECIES TO EACH PIXEL - print("apply Kmeans to the whole image and determine spectral species") apply_kmeans(PCA_Files, PC_Select, Input_Mask_File, Kmeans_info, Spectral_Species_Path, nbCPU, MaxRAM) } else { print("'Kmeans_Only' was set to TRUE: kmeans was not applied on the full image") @@ -227,18 +226,23 @@ init_kmeans <- function(dataPCA, Pix_Per_Partition, nb_partitions, nbclusters, n } } -# Applies Kmeans clustering to PCA image -# -# @param PCA_Path path for the PCA image -# @param PC_Select PCs selected from PCA -# @param Input_Mask_File Path for the mask -# @param Kmeans_info information about kmeans computed in previous step -# @param nbCPU -# @param MaxRAM -# @param Spectral_Species_Path path for spectral species file to be written -# -# @return None -apply_kmeans <- function(PCA_Path, PC_Select, Input_Mask_File, Kmeans_info, Spectral_Species_Path, nbCPU = 1, MaxRAM = FALSE) { +#' Applies Kmeans clustering to PCA image and writes spectral species map +#' +#' @param PCA_Path path for the PCA image +#' @param PC_Select PCs selected from PCA +#' @param Input_Mask_File Path for the mask +#' @param Kmeans_info information about kmeans computed in previous step +#' @param Spectral_Species_Path path for spectral species file to be written +#' @param nbCPU numeric. number of CPU to work with in multiprocess task +#' @param MaxRAM numeric. size of image pieces to be read at once +#' +#' @return None +#' @importFrom progress progress_bar +#' @export + +apply_kmeans <- function(PCA_Path, PC_Select, Input_Mask_File, Kmeans_info, + Spectral_Species_Path, nbCPU = 1, MaxRAM = FALSE) { + ImPathHDR <- get_HDR_name(PCA_Path) HDR_PCA <- read_ENVI_header(ImPathHDR) PCA_Format <- ENVI_type2bytes(HDR_PCA) @@ -249,6 +253,10 @@ apply_kmeans <- function(PCA_Path, PC_Select, Input_Mask_File, Kmeans_info, Spec MaxRAM <- 0.25 } nbPieces <- split_image(HDR_PCA, MaxRAM) + nbPieces <- nbCPU*(1+(nbPieces-1)%/%nbCPU) + # if (nbPieces<10){ + # nbPieces <- 10 + # } SeqRead_PCA <- where_to_read(HDR_PCA, nbPieces) SeqRead_Shade <- where_to_read(HDR_Shade, nbPieces) # create output file for spectral species assignment @@ -256,19 +264,11 @@ apply_kmeans <- function(PCA_Path, PC_Select, Input_Mask_File, Kmeans_info, Spec nb_partitions <- length(Kmeans_info$Centroids) HDR_SS$bands <- nb_partitions HDR_SS$`data type` <- 1 - Iter <- paste('Iter', 1:nb_partitions, collapse = ", ") - HDR_SS$`band names` <- Iter - HDR_SS$wavelength <- NULL - HDR_SS$fwhm <- NULL - HDR_SS$resolution <- NULL - HDR_SS$bandwidth <- NULL - HDR_SS$purpose <- NULL + HDR_SS$`band names` <- paste('Iter', 1:nb_partitions, collapse = ", ") + HDR_SS$wavelength <- HDR_SS$fwhm <- HDR_SS$resolution <- HDR_SS$bandwidth <- NULL + HDR_SS$`default bands` <- HDR_SS$`wavelength units` <- HDR_SS$`z plot titles` <- NULL + HDR_SS$purpose <- HDR_SS$`data gain values` <- HDR_SS$`default stretch` <- NULL HDR_SS$interleave <- 'BIL' - HDR_SS$`default bands` <- NULL - HDR_SS$`wavelength units` <- NULL - HDR_SS$`z plot titles` <- NULL - HDR_SS$`data gain values` <- NULL - HDR_SS$`default stretch` <- NULL HDR_SS$`byte order` <- get_byte_order() headerFpath <- paste(Spectral_Species_Path, ".hdr", sep = "") write_ENVI_header(HDR_SS, headerFpath) @@ -279,8 +279,12 @@ apply_kmeans <- function(PCA_Path, PC_Select, Input_Mask_File, Kmeans_info, Spec ) close(fidSS) + # for each piece of image + print(paste('Apply Kmeans to the full raster:',nbPieces,'chunks distributed on',nbCPU,'CPU')) + pb <- progress_bar$new( + format = paste('Write spectral species file [:bar] :percent in :elapsedfull',sep = ''), + total = nbPieces, clear = FALSE, width= 100) for (i in 1:nbPieces) { - print(paste("Spectral Species Piece #", i, "/", nbPieces)) Location_RW <- list() Location_RW$nbLines <- SeqRead_PCA$Lines_Per_Chunk[i] Location_RW$Byte_Start_PCA <- SeqRead_PCA$ReadByte_Start[i] @@ -289,7 +293,10 @@ apply_kmeans <- function(PCA_Path, PC_Select, Input_Mask_File, Kmeans_info, Spec Location_RW$lenBin_Shade <- SeqRead_Shade$ReadByte_End[i] - SeqRead_Shade$ReadByte_Start[i] + 1 Location_RW$Byte_Start_SS <- 1 + (SeqRead_Shade$ReadByte_Start[i] - 1) * nb_partitions Location_RW$lenBin_SS <- nb_partitions * (SeqRead_Shade$ReadByte_End[i] - SeqRead_Shade$ReadByte_Start[i]) + 1 - compute_spectral_species(PCA_Path, Input_Mask_File, Spectral_Species_Path, Location_RW, PC_Select, Kmeans_info, nbCPU) + compute_spectral_species(PCA_Path = PCA_Path, Input_Mask_File = Input_Mask_File, + Spectral_Species_Path = Spectral_Species_Path, Location_RW = Location_RW, + PC_Select = PC_Select, Kmeans_info = Kmeans_info, nbCPU = nbCPU) + pb$tick() } return(invisible()) } @@ -325,14 +332,16 @@ compute_spectral_species <- function(PCA_Path, Input_Mask_File, Spectral_Species HDR_Shade <- read_ENVI_header(ShadeHDR) Shade.Format <- ENVI_type2bytes(HDR_Shade) ImgFormat <- "Shade" - Shade_Chunk <- read_BIL_image_subset(Input_Mask_File, HDR_Shade, Location_RW$Byte_Start_Shade, Location_RW$lenBin_Shade, Location_RW$nbLines, Shade.Format, ImgFormat) + Shade_Chunk <- read_BIL_image_subset(Input_Mask_File, HDR_Shade, Location_RW$Byte_Start_Shade, + Location_RW$lenBin_Shade, Location_RW$nbLines, Shade.Format, ImgFormat) PCA_HDR <- get_HDR_name(PCA_Path) HDR_PCA <- read_ENVI_header(PCA_HDR) PCA_Format <- ENVI_type2bytes(HDR_PCA) # read "unfolded" (2D) PCA image ImgFormat <- "2D" - PCA_Chunk <- read_BIL_image_subset(PCA_Path, HDR_PCA, Location_RW$Byte_Start_PCA, Location_RW$lenBin_PCA, Location_RW$nbLines, PCA_Format, ImgFormat) + PCA_Chunk <- read_BIL_image_subset(PCA_Path, HDR_PCA, Location_RW$Byte_Start_PCA, Location_RW$lenBin_PCA, + Location_RW$nbLines, PCA_Format, ImgFormat) PCA_Chunk <- PCA_Chunk[, PC_Select] if (length(PC_Select) == 1) { PCA_Chunk <- matrix(PCA_Chunk, ncol = 1) @@ -355,7 +364,7 @@ compute_spectral_species <- function(PCA_Path, Input_Mask_File, Spectral_Species nbSubsets <- ceiling(length(keepShade) / nbSamples_Per_Rdist) PCA_Chunk <- snow::splitRows(PCA_Chunk, nbSubsets) if (nbCPU>1){ - plan(multiprocess, workers = nbCPU) ## Parallelize using four cores + plan(multiprocess, workers = nbCPU) Schedule_Per_Thread <- ceiling(nbSubsets / nbCPU) res <- future_lapply(PCA_Chunk, FUN = RdistList, CentroidsArray = CentroidsArray, nbClusters = nrow(Kmeans_info$Centroids[[1]]), diff --git a/R/Lib_PerformPCA.R b/R/Lib_PerformPCA.R index 5a66d6d2..c3ebeee4 100644 --- a/R/Lib_PerformPCA.R +++ b/R/Lib_PerformPCA.R @@ -83,7 +83,7 @@ perform_PCA <- function(Input_Image_File, Input_Mask_File, Output_Dir, Continuu SpectralFilter <- CleanData$Spectral # Compute PCA #1 on DataSubset - print("perform PCA#1 on the subset image") + print(paste('perform',TypePCA,'on the subset image')) if (TypePCA == "PCA" | TypePCA == "SPCA") { PCA_model <- pca(DataSubset, TypePCA) # } else if (TypePCA == "NLPCA") { @@ -105,7 +105,7 @@ perform_PCA <- function(Input_Image_File, Input_Mask_File, Output_Dir, Continuu # after PCA transformation. # In order to exclude these pixels, we compute mean and SD for the 3 first # components and exclude all pixels showing values ouside "mean+-3SD" range - print("perform 2nd filtering: Exclude extreme PCA values") + print("Exclude extreme PCA values") if (dim(PCA_model$x)[2] > 5) { PCsel <- 1:5 } else { @@ -142,7 +142,7 @@ perform_PCA <- function(Input_Image_File, Input_Mask_File, Output_Dir, Continuu CleanData <- rm_invariant_bands(DataSubset, SpectralFilter) DataSubset <- CleanData$DataMatrix SpectralFilter <- CleanData$Spectral - print("perform PCA#2 on the subset image") + print(paste('perform',TypePCA,'#2 on the subset image')) if (TypePCA == "PCA" | TypePCA == "SPCA") { PCA_model <- pca(DataSubset, TypePCA) # } else if (TypePCA == "NLPCA") { @@ -160,7 +160,6 @@ perform_PCA <- function(Input_Image_File, Input_Mask_File, Output_Dir, Continuu PCA_model$Nb_PCs <- Nb_PCs # PCA_model$x <- NULL # CREATE PCA FILE CONTAINING ONLY SELECTED PCs - print("Apply PCA model to the whole image") Output_Dir_PCA <- define_output_subdir(Output_Dir, Input_Image_File, TypePCA, "PCA") PCA_Files <- file.path(Output_Dir_PCA, paste("OutputPCA_", Nb_PCs, "_PCs", sep = "")) write_PCA_raster(Input_Image_File = Input_Image_File, Input_Mask_File = Input_Mask_File, @@ -326,20 +325,23 @@ filter_PCA <- function(Input_Image_File, HDR, Input_Mask_File, Shade_Update, return(Shade_Update) } -# writes an ENVI image corresponding to PCA -# -# @param Input_Image_File path for the raster on which PCA is applied -# @param Input_Mask_File path for the corresponding mask -# @param PCA_Path path for resulting PCA -# @param PCA_model PCA model description -# @param Spectral spectral information to be used in the image -# @param Nb_PCs number of components kept in the resulting PCA raster -# @param CR boolean. If TRUE continuum removal is performed. -# @param TypePCA PCA, SPCA, NLPCA -# @param nbCPU number of CPUs to process data -# @param MaxRAM max RAM when initial image is read (in Gb) -# -# @return None +#' writes an ENVI image corresponding to PCA +#' +#' @param Input_Image_File path for the raster on which PCA is applied +#' @param Input_Mask_File path for the corresponding mask +#' @param PCA_Path path for resulting PCA +#' @param PCA_model PCA model description +#' @param Spectral spectral information to be used in the image +#' @param Nb_PCs number of components kept in the resulting PCA raster +#' @param Continuum_Removal boolean. If TRUE continuum removal is performed. +#' @param TypePCA PCA, SPCA, NLPCA +#' @param nbCPU number of CPUs to process data +#' @param MaxRAM max RAM when initial image is read (in Gb) +#' +#' @return None +#' @importFrom progress progress_bar +#' @export + write_PCA_raster <- function(Input_Image_File, Input_Mask_File, PCA_Path, PCA_model, Spectral, Nb_PCs, Continuum_Removal, TypePCA, nbCPU = 1, MaxRAM = 0.25) { ImPathHDR <- get_HDR_name(Input_Image_File) @@ -393,13 +395,22 @@ write_PCA_raster <- function(Input_Image_File, Input_Mask_File, PCA_Path, PCA_mo # } # prepare for sequential processing: SeqRead_Image informs about byte location to read nbPieces <- split_image(HDR, LimitSizeGb = MaxRAM) + nbPieces <- nbCPU*(1+(nbPieces-1)%/%nbCPU) + + # if (nbPieces<10){ + # nbPieces <- 10 + # } SeqRead_Image <- where_to_read(HDR, nbPieces) SeqRead_Shade <- where_to_read(HDR_Shade, nbPieces) SeqRead_PCA <- where_to_read(HDR_PCA, nbPieces) # for each piece of image + print(paste('Apply PCA model to the full raster:',nbPieces,'chunks distributed on',nbCPU,'CPU')) + pb <- progress_bar$new( + format = paste('Apply ',TypePCA,' on the image [:bar] :percent in :elapsedfull',sep = ''), + total = nbPieces, clear = FALSE, width= 100) for (i in 1:nbPieces) { - print(paste("PCA Piece #", i, "/", nbPieces)) + # print(paste("PCA Piece #", i, "/", nbPieces)) # read image and mask data nbLines <- SeqRead_Image$Lines_Per_Chunk[i] ImgFormat <- "2D" @@ -455,6 +466,7 @@ write_PCA_raster <- function(Input_Image_File, Input_Mask_File, PCA_Path, PCA_mo PCA_Chunk <- aperm(PCA_Chunk, c(2, 3, 1)) writeBin(c(PCA_Chunk), fidOUT, size = PCA_Format$Bytes, endian = .Platform$endian, useBytes = FALSE) close(fidOUT) + pb$tick() } list <- ls() rm(list = list) diff --git a/man/apply_kmeans.Rd b/man/apply_kmeans.Rd new file mode 100644 index 00000000..0c1edc14 --- /dev/null +++ b/man/apply_kmeans.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Lib_MapSpectralSpecies.R +\name{apply_kmeans} +\alias{apply_kmeans} +\title{Applies Kmeans clustering to PCA image and writes spectral species map} +\usage{ +apply_kmeans( + PCA_Path, + PC_Select, + Input_Mask_File, + Kmeans_info, + Spectral_Species_Path, + nbCPU = 1, + MaxRAM = FALSE +) +} +\arguments{ +\item{PCA_Path}{path for the PCA image} + +\item{PC_Select}{PCs selected from PCA} + +\item{Input_Mask_File}{Path for the mask} + +\item{Kmeans_info}{information about kmeans computed in previous step} + +\item{Spectral_Species_Path}{path for spectral species file to be written} + +\item{nbCPU}{numeric. number of CPU to work with in multiprocess task} + +\item{MaxRAM}{numeric. size of image pieces to be read at once} +} +\value{ +None +} +\description{ +Applies Kmeans clustering to PCA image and writes spectral species map +} diff --git a/man/map_alpha_div.Rd b/man/map_alpha_div.Rd index 03e024ac..d434ac5b 100644 --- a/man/map_alpha_div.Rd +++ b/man/map_alpha_div.Rd @@ -13,9 +13,9 @@ map_alpha_div( MinSun = 0.25, pcelim = 0.02, Index_Alpha = "Shannon", - FullRes = TRUE, - LowRes = FALSE, - MapSTD = FALSE, + FullRes = FALSE, + LowRes = TRUE, + MapSTD = TRUE, nbCPU = FALSE, MaxRAM = FALSE, ClassifMap = FALSE diff --git a/man/map_beta_div.Rd b/man/map_beta_div.Rd index d9543bd2..adc7e6d2 100644 --- a/man/map_beta_div.Rd +++ b/man/map_beta_div.Rd @@ -15,8 +15,8 @@ map_beta_div( MinSun = 0.25, pcelim = 0.02, scaling = "PCO", - FullRes = TRUE, - LowRes = FALSE, + FullRes = FALSE, + LowRes = TRUE, nbCPU = 1, MaxRAM = 0.25, ClassifMap = FALSE, diff --git a/man/ordination_parallel.Rd b/man/ordination_parallel.Rd index 1b92fb1f..da736d4d 100644 --- a/man/ordination_parallel.Rd +++ b/man/ordination_parallel.Rd @@ -13,7 +13,8 @@ ordination_parallel( Nb_Units_Ordin, nb_partitions, nbclusters, - pcelim + pcelim, + p = NULL ) } \arguments{ @@ -34,6 +35,8 @@ ordination_parallel( \item{nbclusters}{numeric. Number of clusters defined in k-Means.} \item{pcelim}{numeric. Minimum contribution in percents required for a spectral species} + +\item{p}{function.} } \value{ OutPut list of mean and SD of alpha diversity metrics diff --git a/man/write_PCA_raster.Rd b/man/write_PCA_raster.Rd new file mode 100644 index 00000000..1925d216 --- /dev/null +++ b/man/write_PCA_raster.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Lib_PerformPCA.R +\name{write_PCA_raster} +\alias{write_PCA_raster} +\title{writes an ENVI image corresponding to PCA} +\usage{ +write_PCA_raster( + Input_Image_File, + Input_Mask_File, + PCA_Path, + PCA_model, + Spectral, + Nb_PCs, + Continuum_Removal, + TypePCA, + nbCPU = 1, + MaxRAM = 0.25 +) +} +\arguments{ +\item{Input_Image_File}{path for the raster on which PCA is applied} + +\item{Input_Mask_File}{path for the corresponding mask} + +\item{PCA_Path}{path for resulting PCA} + +\item{PCA_model}{PCA model description} + +\item{Spectral}{spectral information to be used in the image} + +\item{Nb_PCs}{number of components kept in the resulting PCA raster} + +\item{Continuum_Removal}{boolean. If TRUE continuum removal is performed.} + +\item{TypePCA}{PCA, SPCA, NLPCA} + +\item{nbCPU}{number of CPUs to process data} + +\item{MaxRAM}{max RAM when initial image is read (in Gb)} +} +\value{ +None +} +\description{ +writes an ENVI image corresponding to PCA +}