Ed_Gravy
Ed_Gravy

Reputation: 2033

R Shiny Error in group_by: Must group by variables found in `.data`

I am trying to create a shiny app that:

  1. Let's the user upload a CSV file
  2. Select independent (year or month column) and dependent variables (tmean/TMin/TMax columns)
  3. Based on the selected variables, the app will return a regression summary.
  4. Based on the selected variables, the app will also return a geom_line plot with the regression/slope line.

However I get this error:

Warning: Error in group by: Must group by variables found in `.data`.
x Column `2010_2021` is not found.

How can I fix this?

Data:

The file can be downloaded from here.

Code:

library(tidyverse)
library(shiny)

    options(shiny.maxRequestSize=1024*1024^2)
    ui =   navbarPage(
       # 1st tab
       tabPanel("Regression Analysis",
                dataTableOutput('mytable'),
                sidebarLayout(sidebarPanel(fileInput("file1", "Please choose a CSV file",
                                                     multiple = T,
                                                     accept = c("text/csv",
                                                                "text/comma-separated-values,text/plain",
                                                                ".csv")),
                                           tags$hr(),
                                           checkboxInput("header", "Header", TRUE),
                                           radioButtons("sep", "Separator",
                                                        choices = c(Comma = ",",
                                                                    Semicolon = ";",
                                                                    Tab = "\t"),
                                                        selected = ","),
                                           radioButtons("quote", "Quote",
                                                        choices = c(None = "",
                                                                    "Double Quote" = '"',
                                                                    "Single Quote" = "'"),
                                                        selected = '"'),
                                           tags$hr(),
                                           radioButtons("disp", "Display",
                                                        choices = c(Head = "head",
                                                                    All = "all"),
                                                        selected = "all")),
                              
                mainPanel(
                    tableOutput("contents")),),
                actionButton("choice", "Define Regression Variables"),
                selectInput("independent", "Independent Variables:", choices = NULL, multiple = T),
                uiOutput("dependent1"),
                verbatimTextOutput("regTab")),

        # 2nd tab        
        tabPanel("Regression Plots",
    #Show plots side by side
    splitLayout(
    plotlyOutput(outputId = "RegPlots")
    ))
)

 Tell the server how to assemble inputs into outputs
    server = function(input, output, session) {
     mydf = reactive({
            
            # input$file1 will be NULL initially. After the user selects
            # and uploads a file, header of the will be displayed by default,
            # or all rows if selected, will be shown.
            
            req(input$file1)
            
            df = read.csv(input$file1$datapath,
                           header = input$header,
                           sep = input$sep,
                           quote = input$quote)
            
            if(input$disp == "head") {
                return(head(df))
            }
            else {
                return(df)
            }
            
        })
        
        output$contents = renderTable({ #!!! Table is not showing for some reason
            req(mydf())
            mydf()
        })
        
        # Code for allowing the user to select the variables/columns of interest 
        info = eventReactive(input$choice, {
           req(mydf())
           f = mydf()
           f
        })
        
        observeEvent(input$choice, { ## to update only when you click on the actionButton 
            req(mydf())
            updateSelectInput(session,"independent", "Please select an independent variable:", choices = names(mydf()))
            })
        
        output$dependent1 = renderUI({
            req(mydf(), input$independent)
            radioButtons("dependent1", "Select a dependent variable:",choices=names(mydf())[!names(mydf()) %in% as.character(input$independent)])
        })
        
        ###  need to build your formula correctly; only using one dependent variable for demonstration; you can update as necessary
        ## lm1 = reactive({lm(reformulate(input$IndVar1,input$IndVar2, input$DepVar), data = RegData)})
        
        runRegression = reactive({
            req(mydf(), input$independent, input$dependent1)
            lm(reformulate(input$independent, input$dependent1),data=mydf())
            
            })
        
        output$regTab = renderPrint({
            req(runRegression())
            if(!is.null(input$independent)){
                summary(runRegression())
            } else {
                print(data.frame(Warning="Please select Model Parameters."))
            } 
        })

output$RegPlots = renderPlotly({
          dff = mydf()
          dff %>%   group_by(.data[[year]]) %>% 
            summarise(avgTemp = mean(.data[[input$independent]])) %>% 
            ungroup %>% 
          ggplot(aes(x = .data[[input$dependent1]], y = avgTmean))
          geom_point(color = "deepskyblue4") +
          geom_smooth(method = lm, linetype = 2, color = "red4", se = F) +
          theme(text = element_text(size = 16)) +
          xlab("Year") +
          ylab("Temperature (C)") +
          ggtitle("1980-2021 Regression Plot")
        })

}

The plotting code in Shiny has been adapted from the following R Code and it returns the following plot which I want in the app:

df = df %>% # df is the CSV provided in the question
       group_by(year) %>% 
       summarise(avgTmean = mean(tmean)) %>% 
       ungroup

  df %>% 
  ggplot(aes(x= year, y=avgTmean)) +
  #geom_line(stat = "identity") +
  geom_point(color = "deepskyblue4") +
  geom_smooth(method = lm, linetype = 2, color = "red4", se = F) +
  theme(text = element_text(size = 16)) +
  #geom_hline(yintercept = mean(month.df.bwrd_clip.2005.2021$tmean), color="blue") +
  xlab("Year") +
  ylab("Mean Temperature (C)") +
  ggtitle("1980-2021 Regression Plot")

Plot:

enter image description here

Upvotes: 0

Views: 4247

Answers (1)

starja
starja

Reputation: 10375

Several points:

  • you need .data only in the cases where you want to access data via a variable that contains a string, if the name of the symbol (variable) is the variable name in the data.frame, you don't need .data
  • typo, you've forgot the + after ggplot
  • to avoid errors due to uninitialised inputs, use a req in your plot rendering
output$RegPlots = renderPlotly({
  req(mydf(), input$independent, input$dependent)
  
  mydf() %>%   group_by(year) %>% 
    summarise(avgTemp = mean(.data[[input$independent]])) %>% 
    ungroup %>% 
    ggplot(aes(x = .data[[input$dependent]], y = avgTmean)) +
    geom_point(color = "deepskyblue4") +
    geom_smooth(method = lm, linetype = 2, color = "red4", se = F) +
    theme(text = element_text(size = 16)) +
    xlab("Year") +
    ylab("Temperature (C)") +
    ggtitle("1980-2021 Regression Plot")
})

Upvotes: 1

Related Questions