PecanProject · mdietze · Aug 26, 2017 · Aug 2, 2017 · Aug 2, 2017 · Aug 3, 2017
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,12 +17,21 @@ For more information about this file see also [Keep a Changelog](http://keepacha
 
 ### Added
 - Expanded initial conditions workflow for pool-based models, including PEcAn.data.land::prepare_pools to calculate pools from IC file (to be coupled with write.configs)
+
+- #1594 shiny/workflowPlot Adding interactiveness using ggploltly
+- #1594 shiny/workflowPlot Load outputs from multiple runs of the model
+- #1594 shiny/workflowPlot Ways to toggle geometries (e.g. geom_point vs. geom_line).
+- #1594 shiny/workflowPlot Smoothing using geom_smooth (Slider for specifying moving window width)
+- #1594 shiny/workflowPlot Comparing model output vs loaded data according to [tutorial](https://github.com/PecanProject/pecan/blob/develop/documentation/tutorials/AnalyzeOutput/modelVSdata.Rmd) 
+
 - Allow SIPNET and DALEC met files and model2netcdf to start or end mid year
 
+
 ### Changed
 - Clean up directory structure:
     * Move `base` packages (`utils`, `settings`, `db`, `visualizaton`) to a `base` directory, for consistency with `modules` and `models`
     * Move `logger.*` functions out of the `PEcAn.utils` package and into the `pecan.logger` package
+- #1594 shiny/workflowPlot Refactoring of code. `get_workflow_ids` in db/R/query.dplyr.R changed with `ensemble = FALSE`. Also allowing to load all workflow IDs. `load_data_single_run` and `var_names_all` also moved from shiny/workflowPlot/server.R to query.dplyr.R
 
 ## [1.5.10] - Prerelease
 ### Added

diff --git a/base/db/R/query.dplyr.R b/base/db/R/query.dplyr.R
@@ -143,10 +143,16 @@ get_workflow_ids <- function(bety, session, all.ids=FALSE) {
     ids <- unlist(query[names(query) == "workflow_id"], use.names = FALSE)
   } else {
     # Get all workflow IDs
-    ids <- workflows(bety, ensemble = TRUE) %>%
-      dplyr::distinct(workflow_id) %>%
-      dplyr::pull() %>%
-      sort(decreasing = TRUE)
+
+    ids <- workflows(bety, ensemble = FALSE) %>% distinct(workflow_id) %>% collect %>% 
+      .[["workflow_id"]] %>% sort(decreasing = TRUE)
+    # pull(.,workflow_id) %>% sort(decreasing = TRUE)
+
+#    ids <- workflows(bety, ensemble = TRUE) %>%
+#      dplyr::distinct(workflow_id) %>%
+#      dplyr::pull() %>%
+#      sort(decreasing = TRUE)
+
   }
   return(ids)
 }  # get_workflow_ids
@@ -271,9 +277,11 @@ load_data_single_run <- function(bety, workflow_id, run_id) {
         x <- ncdays2date(ncdf4::ncvar_get(nc, 'time'), ncdf4::ncatt_get(nc, 'time'))
         y <- ncdf4::ncvar_get(nc, var_name)
         b <- !is.na(x) & !is.na(y) & sw != 0
-        dates <- if (is.na(dates)) x[b] else c(dates, x[b])
-        dates <- as.Date(dates)
-        vals <- if (is.na(vals)) y[b] else c(vals, y[b])
+
+        dates <- if(is.na(dates)) x[b] else c(dates, x[b])
+        dates <- as.POSIXct(dates)
+        vals <- if(is.na(vals)) y[b] else c(vals, y[b])
+
         xlab <- "Time"
         # Values of the data which we will plot
         valuesDF <- data.frame(dates,vals)

diff --git a/shiny/workflowPlot/helper.R b/shiny/workflowPlot/helper.R
@@ -1,3 +1,4 @@
+# Helper function which checks and downloads required packages
 checkAndDownload<-function(packageNames) {
   for(packageName in packageNames) {
     if(!isInstalled(packageName)) {
@@ -9,9 +10,11 @@ checkAndDownload<-function(packageNames) {
 isInstalled <- function(mypkg){
   is.element(mypkg, installed.packages()[,1])
 }
-checkAndDownload(c('plotly','scales','dplyr'))
+# checkAndDownload(c('plotly','scales','dplyr'))
+
 # Stashing Code for file upload to shiny app 
 # Based on https://shiny.rstudio.com/gallery/file-upload.html
+
 # ui.R 
 # tags$hr(),
 # fileInput('file1', 'Choose CSV File to upload data',
@@ -29,7 +32,10 @@ checkAndDownload(c('plotly','scales','dplyr'))
 #                'Double Quote'='"',
 #                'Single Quote'="'"),
 #              ''),
+# textInput("inputRecordID", "Input Record ID for file", "1000011260"),
+# textInput("formatID", "Format ID for file (Default CSV)", "5000000002"),
 # actionButton("load_data", "Load External Data")
+
 # server.R 
 # loadExternalData <-eventReactive(input$load_data,{
 #   inFile <- input$file1

diff --git a/shiny/workflowPlot/server.R b/shiny/workflowPlot/server.R
@@ -2,23 +2,25 @@ library(PEcAn.visualization)
 library(PEcAn.DB)
 library(PEcAn.settings)
 library(PEcAn.benchmark)
+library(PEcAn.utils)
 library(shiny)
 library(ncdf4)
 library(ggplot2)
 # Helper allows to load functions and variables that could be shared both by server.R and ui.R 
-source('helper.R')
+# source('helper.R')
 library(plotly)
 library(scales)
 library(lubridate)
 library(dplyr)
+library(reshape2)
 # Maximum size of file allowed to be uploaded: 100MB 
 options(shiny.maxRequestSize=100*1024^2) 
 # Define server logic
 server <- shinyServer(function(input, output, session) {
   bety <- betyConnect()
   # Update all workflow ids
   observe({
-    # Ideally get_workflow_ids function (line 137) in db/R/query.dplyr.R should take a flag to check
+    # get_workflow_ids function (line 137) in db/R/query.dplyr.R takes a flag to check
     # if we want to load all workflow ids.
     # get_workflow_id function from query.dplyr.R
     all_ids <- get_workflow_ids(bety, session,all.ids=TRUE)
@@ -33,7 +35,7 @@ server <- shinyServer(function(input, output, session) {
     # Will return a list
     run_id_list <- c()
     for(w_id in w_ids){
-      # For all the workflow ids  
+      # For all the workflow ids
       r_ids <- get_run_ids(bety, w_id)
       for(r_id in r_ids){
         # Each workflow id can have more than one run ids
@@ -98,27 +100,25 @@ server <- shinyServer(function(input, output, session) {
   # Allows to load actual data (different from model output) following the tutorial
   # https://github.com/PecanProject/pecan/blob/develop/documentation/tutorials/AnalyzeOutput/modelVSdata.Rmd
   # @params: bety,settings,File_path,File_format
-  loadObservationData <- function(bety,settings,File_path,File_format){
-    start.year<-as.numeric(lubridate::year(settings$run$start.date))
-    end.year<-as.numeric(lubridate::year(settings$run$end.date))
-    site.id<-settings$run$site$id
+  # loadObservationData <- function(bety,settings,File_path,File_format){
+  loadObservationData <- function(bety,inputs_df){
+    input_id <- inputs_df$input_id
+    # File_format <- getFileFormat(bety,input_id)
+    File_format <- PEcAn.DB::query.format.vars(bety = bety, input.id = input_id)
+    start.year <- as.numeric(lubridate::year(inputs_df$start_date))
+    end.year <- as.numeric(lubridate::year(inputs_df$end_date))
+    File_path <- inputs_df$filePath
+    # TODO There is an issue with the db where file names are not saved properly. 
+    # To make it work with the VM, uncomment the line below
+    # File_path <- paste0(inputs_df$filePath,'.csv')
+    site.id <- inputs_df$site_id
     site<-PEcAn.DB::query.site(site.id,bety$con)
     observations<-PEcAn.benchmark::load_data(data.path = File_path, format= File_format, time.row = File_format$time.row,  site = site, start_year = start.year, end_year = end.year) 
     return(observations)
   }
-  # This function as a wrapper over PEcAn.DB::query.format.vars where 
-  # file format can be retrieved using either by input or format id. 
-  getFileFormat <- function(bety,input.id,format.id=NULL){
-    # Retaining the code for getting file format using format Id as in tutorial
-    # File_format <- PEcAn.DB::query.format.vars(bety = bety, format.id = format.id) 
-    File_format <- PEcAn.DB::query.format.vars(bety = bety, input.id = input.id)
-    return(File_format)
-  }
   getSettingsFromWorkflowId <- function(bety,workflowID){
-    basePath <- tbl(bety, 'workflows') %>% filter(id %in% workflowID) %>% pull(folder)
+    basePath <- dplyr::tbl(bety, 'workflows') %>% dplyr::filter(id %in% workflowID) %>% dplyr::pull(folder)
     configPath <- file.path(basePath, 'pecan.CONFIGS.xml')
-    # Second way of proving configPath. More of a hack
-    # configPath <- paste0("~/output/PEcAn_",workflowID,"/pecan.CONFIGS.xml")
     settings<-PEcAn.settings::read.settings(configPath)
     return(settings)
   }
@@ -134,15 +134,33 @@ server <- shinyServer(function(input, output, session) {
     }
     updateSelectizeInput(session, "all_site_id", choices=site_id_list)
   })
-  # Get input id from selected site id
+  # Get input id from selected site id. Returns inputs_df which is used to load observation data
   getInputs <- function(bety,site_Id){
-    inputIds <- tbl(bety, 'inputs') %>% filter(site_id %in% site_Id) %>% distinct(id) %>% pull(id)
-    inputIds <- sort(inputIds)
-    return(inputIds)
+    # Subsetting the input id list based on the current (VM) machine
+    my_hostname <- PEcAn.utils::fqdn()
+    my_machine_id <- dplyr::tbl(bety, 'machines') %>% dplyr::filter(hostname == my_hostname) %>% dplyr::pull(id)
+    # Inner join 'inputs' table with 'dbfiles' table
+    # inputs_df would contain all the information about the site and input id required for
+    # the tutorial mentioned above to compare model run with actual observations
+    inputs_df <- dplyr::tbl(bety, 'dbfiles') %>% 
+      dplyr::filter(container_type == 'Input', machine_id == my_machine_id) %>%
+      dplyr::inner_join(tbl(bety, 'inputs')  %>% dplyr::filter(site_id %in% site_Id), by = c('container_id' = 'id')) %>%
+      dplyr::collect()
+    # Order by container id (==input id)
+    inputs_df <- inputs_df[order(inputs_df$container_id),]
+    # Mutate column as (input id, name) to be shown to the user
+    inputs_df <- inputs_df %>% 
+      dplyr::mutate(input_selection_list = paste(inputs_df$container_id, inputs_df$name),
+             filePath = paste0(inputs_df$file_path,'/', inputs_df$file_name)) %>%
+      dplyr::select(input_id = container_id,filePath,input_selection_list,start_date,end_date,site_id,name,
+             machine_id,file_name,file_path)
+    return(inputs_df)
   }
+  # Update input id list as (input id, name)
   observe({
     req(input$all_site_id)
-    updateSelectizeInput(session, "all_input_id", choices=getInputs(bety,input$all_site_id))
+    inputs_df <- getInputs(bety,c(input$all_site_id))
+    updateSelectizeInput(session, "all_input_id", choices=inputs_df$input_selection_list)
   })
   # Renders ggplotly 
   output$outputPlot <- renderPlotly({
@@ -154,15 +172,14 @@ server <- shinyServer(function(input, output, session) {
     )
     # Load data
     masterDF <- loadNewData()
-    # masterDF <- rbind(modelData,externalData)
-    # Convert from factor to character. For subsetting 
+    # Convert from factor to character. For subsetting
     masterDF$var_name <- as.character(masterDF$var_name)
-    # Convert to factor. Required for ggplot 
+    # Convert to factor. Required for ggplot
     masterDF$run_id <- as.factor(as.character(masterDF$run_id))
     # Filter by variable name
     df <- masterDF %>%
       dplyr::filter(var_name == input$variable_name)
-    # make dynamic slider 
+    # Another way to make dynamic slider
     # https://stackoverflow.com/questions/18700589/interactive-reactive-change-of-min-max-values-of-sliderinput
     # output$slider <- renderUI({
     #   sliderInput("smooth_n", "Value for smoothing:", min=0, max=nrow(df), value=80)
@@ -172,8 +189,10 @@ server <- shinyServer(function(input, output, session) {
     title <- unique(df$title)
     xlab <- unique(df$xlab)
     ylab <- unique(df$ylab)
-    # ggplot function for now scatter plots.
-    plt <- ggplot(df, aes(x=dates, y=vals, color=run_id)) 
+    # ggplot function for scatter plots.
+    plt <- ggplot(df, aes(x=dates, y=vals, color=run_id))
+    # model_geom <- switch(input$plotType, scatterPlot = geom_point, lineChart = geom_line)
+    # plt <- plt + model_geom() 
     # Toggle chart type using switch
     switch(input$plotType,
            "scatterPlot"  = {
@@ -183,48 +202,51 @@ server <- shinyServer(function(input, output, session) {
              plt <- plt + geom_line()
            }
     )
-    plt <- plt + labs(title=title, x=xlab, y=ylab) + geom_smooth(n=input$smooth_n)
-    # Check if user wants to load external data
+    # Check if user wants to load external data (==observations)
     # Similar to using event reactive
-    if (input$load_data>0) { 
-      # File_format <- getFileFormat(bety,input$formatID)
-      # Retaining the code for getting file format using inputRecordID
-      File_format <- getFileFormat(bety,input$all_input_id)
-      ids_DF <- parse_ids_from_input_runID(input$all_run_id)
-      settings <- getSettingsFromWorkflowId(bety,ids_DF$wID[1])
-      inFile <- input$fileUploaded
-      filePath <- PEcAn.DB::dbfile.file(type = 'Input', id = input$all_input_id,con = bety$con)
-      externalData <- loadObservationData(bety,settings,filePath,File_format)
-      # If variable found in the uploaded file 
+    if (input$load_data>0) {
+      # Input ID is of the form (input id, Name). Split by space and use the first element
+      inputs_df <- getInputs(bety,c(input$all_site_id))
+      inputs_df <- inputs_df %>% dplyr::filter(input_selection_list == input$all_input_id)
+      externalData <- loadObservationData(bety,inputs_df)
+      # If variable found in the uploaded file.
+      # TODO for now, actual observations can be plotted again a single model run (particular run id)
+      # Have to enhance to allow multiple run ids
       if (input$variable_name %in% names(externalData)){
-        externalData <- externalData %>% dplyr::select(posix,dplyr::one_of(input$variable_name))
-        names(externalData) <- c("dates","vals")
-        externalData$dates <- as.Date(externalData$dates) 
+        # No need for subsetting though as align data returns for now only the provided variable name
+        # externalData <- externalData %>% dplyr::select(posix,dplyr::one_of(input$variable_name))
+        var = input$variable_name
+        df = df %>% select(posix = dates, var = vals)
+        colnames(df)[2]<-paste0(var) # Required for align data to work
+        aligned_data = PEcAn.benchmark::align_data(model.calc = df, obvs.calc = externalData, var =var, align_method = "match_timestep")
+        colnames(aligned_data) <- c("model","observations","Date") # Order returned by align_data
+        # Melt dataframe to plot two types of columns together
+        aligned_data <- reshape2::melt(aligned_data, "Date")
         data_geom <- switch(input$data_geom, point = geom_point, line = geom_line)
-        plt <- plt + data_geom(data = externalData,aes(x=dates, y=vals),color='black', linetype = 'dashed')
+        plt <- ggplot(aligned_data, aes(x=Date, y=value, color=variable)) + data_geom()
         output$outputNoVariableFound <- renderText({
-          paste0("Plotting data outputs in black")
+          paste0("Plotting data outputs.")
         })
       }
       # Shiny output if variable not found
       else {
         output$outputNoVariableFound <- renderText({
-          paste0("Not plotting uploaded data because the column is absent. Select another variable")
+          paste0("Data related to variable not found in the observations uploaded. Select another variable")
         })
       }
     }
-    # Earlier smoothing and y labels
+    plt <- plt + labs(title=title, x=xlab, y=ylab) + geom_smooth(n=input$smooth_n)
+    # Earlier code for smoothing, y labels, color and fill values
+    # Retaining if we want to use ggplot instead of ggplotly
     # geom_smooth(aes(fill = "Spline fit")) +
     # scale_y_continuous(labels=fancy_scientific) +
-    # Earlier color and fill values
     # scale_color_manual(name = "", values = "black") +
-    # scale_fill_manual(name = "", values = "grey50") 
+    # scale_fill_manual(name = "", values = "grey50")
     plt<-ggplotly(plt)
     # Not able to add icon over ggplotly
     # add_icon()
   })
-  # Shiny server closes here  
-})
-
+}) # Shiny server closes here  
+# To run the shiny app locally
 # runApp(port=6480, launch.browser=FALSE)
 # runApp(port=5658, launch.browser=FALSE)
diff --git a/shiny/workflowPlot/ui.R b/shiny/workflowPlot/ui.R
@@ -1,13 +1,13 @@
 library(shiny)
+library(plotly)
 # Helper allows to load functions and variables that could be shared both by server.R and ui.R 
-source('helper.R')
+# source('helper.R')
 # Define UI
 ui <- shinyUI(fluidPage(
   # Application title
   titlePanel("Workflow Plots"),
   sidebarLayout(
     sidebarPanel(
-      # helpText(),
       p("Please select the workflow IDs to continue. You can select multiple IDs"),
       selectizeInput("all_workflow_id", "Mutliple Workflow IDs", c(),multiple=TRUE),
       p("Please select the run IDs. You can select multiple IDs"),
@@ -24,14 +24,9 @@ ui <- shinyUI(fluidPage(
       tags$hr(),
       tags$hr(),
       selectizeInput("all_site_id", "Select Site ID", c()),
+      # If loading multiple sites in future
+      # selectizeInput("all_site_id", "Select Site ID", c(), multiple=TRUE),
       selectizeInput("all_input_id", "Select Input ID", c()),
-      # fileInput('fileUploaded', 'Choose file to upload data'
-      #           # accept=c('text/csv', 
-      #           #          'text/comma-separated-values,text/plain', 
-      #           #          '.csv')
-      #           ),
-      # textInput("inputRecordID", "Input Record ID for file", "1000011260"),
-      # textInput("formatID", "Format ID for file (Default CSV)", "5000000002"),
       radioButtons("data_geom", "Plot Type (for loaded data)", 
                    c("Scatter Plot" = "point", 
                      "Line Chart" = "line"),