89_Simple
89_Simple

Reputation: 3805

Running a function in dplyr gives wrong output

My sample data consists of daily rainfall and temperature from day 1 to 365 for year 1981 and 1982

  set.seed(0)
  df <- data.frame(year = rep(1981:1982, each = 365),
                   doy = rep(1:365, times = 2), 
                   rainfall = sample(0:30, 730, replace = T), 
                   tmax = sample(25:35, 730, replace = T)) 

Each year I have two days of the year called ref.doy and for each ref.doy, I have corresponding doy.first, doy.second.

  my.df <- data.frame(year = c(1981, 1981, 1982, 1982),
                      ref.doy = c(250, 260, 230, 240),
                      doy.first = c(280, 300, 290, 310),
                      doy.second = c(310, 330, 340, 350))

What I want to do is for each year, take the first ref.doy and the corresponding doy.first, doy.second and calculate total rainfall and mean temperature from ref.doy:doy.first and doy.first:doy.second`. I wrote a function to do this:

  my.func <- function(x) {

   dat <-  x %>% 
           dplyr::summarise(tot.rain.val1 = sum(rainfall[doy >= ref.doy & doy <= doy.first]),
                            tot.rain.val2 = sum(rainfall[doy >= doy.first & doy <= doy.second]),
                            mean.tmax.val1 = mean(tmax[doy >= ref.doy & doy <= doy.first]),
                            mean.tmax.val2 = sum(tmax[doy >= doy.first & doy <= doy.second]))
   return(dat)
  }

The approach I took is to first join the two data and then run my function

  df <- df %>% left_join(my.df)

  results <- df %>% dplyr::group_by(year, ref.doy) %>% 
             dplyr::summarise(results = paste(my.func(.), collapse = ","))

However, the results look a bit funny and the format is not correct. I need the results in the following format

  year  ref.doy tot.rain.val1 tot.rain.val2 mean.tmax.val1 mean.tmax.val2
  1981   250              
  1981   260   
  1982   230
  1982   240

Upvotes: 0

Views: 84

Answers (2)

AntoniosK
AntoniosK

Reputation: 16121

Your function returns a dataframe in the format you want, so you don't need to use paste, but save those outputs in a list and then unnest.

library(tidyverse)

df <- df %>% left_join(my.df)

df %>% 
  group_by(year, ref.doy) %>% 
  summarise(results = list(my.func(.))) %>% 
  unnest() %>%
  ungroup() %>%
  select(-year, -ref.doy)

# # A tibble: 16 x 6
#    year1 ref.doy1 tot.rain.val1 tot.rain.val2 mean.tmax.val1 mean.tmax.val2
#    <dbl>    <dbl>         <int>         <int>          <dbl>          <int>
# 1  1981      250           396           365           29.6            939
# 2  1981      260           429           489           29.8            926
# 3  1982      230           994           805           29.3           1515
# 4  1982      240          1140           653           29.7           1224
# 5  1981      250           396           365           29.6            939
# 6  1981      260           429           489           29.8            926
# 7  1982      230           994           805           29.3           1515
# 8  1982      240          1140           653           29.7           1224
# 9  1981      250           396           365           29.6            939
#10  1981      260           429           489           29.8            926
#11  1982      230           994           805           29.3           1515
#12  1982      240          1140           653           29.7           1224
#13  1981      250           396           365           29.6            939
#14  1981      260           429           489           29.8            926
#15  1982      230           994           805           29.3           1515
#16  1982      240          1140           653           29.7           1224

Upvotes: 2

s__
s__

Reputation: 9485

What about something like this, if you want it in a function:

library(dplyr)
fun <- function(x,y) {
df1 <- x %>% left_join(y) %>% group_by(year,ref.doy) %>%
summarise(tot.rain.val1 = sum(rainfall[doy >= ref.doy & doy <= doy.first]),
          tot.rain.val2 = sum(rainfall[doy >= doy.first & doy <= doy.second]),
          mean.tmax.val1 = mean(tmax[doy >= ref.doy & doy <= doy.first]),
          mean.tmax.val2 = sum(tmax[doy >= doy.first & doy <= doy.second]))
print(df1)
}

fun(df,my.df)
Joining, by = "year"
# A tibble: 4 x 6
# Groups:   year [?]
   year ref.doy tot.rain.val1 tot.rain.val2 mean.tmax.val1 mean.tmax.val2
  <dbl>   <dbl>         <int>         <int>          <dbl>          <int>
1  1981     250           396           365           29.6            939
2  1981     260           429           489           29.8            926
3  1982     230           994           805           29.3           1515
4  1982     240          1140           653           29.7           1224

Upvotes: 2

Related Questions