vectorize a function that returns more than one variable using tidyverse

Question

I have a function that returns several variables codependent with each other. The output it is a data frame with 1 row and n cols. The number of columns in the output depends on one of the inputs of the function. I need to vetorize it and join to "main" dataframe, something like 'dplyr::mutate()' does.

I really try to make a reprex simples as possible:

#data
df <- data.frame("ob" = 1:30,
                 "ob_pattern" = sample(c("p1", "p2"), size = 30, replace = T),
                 "value" = runif(n = 30))
> head(df)
  ob ob_pattern     value
1  1         p1 0.5442453
2  2         p2 0.1274518
3  3         p2 0.4256460
4  4         p1 0.9319009
5  5         p2 0.9828048
6  6         p2 0.2309473

#patterns
df_pt <- data.frame("pattern" = c("p1", "p1", "p2", "p2", "p2"),
                    "name" = c("n1", "n2", "n1", "n2", "n3" ),
                    "perct" = c(0.4, 0.15, 0.3, 0.5, 0.18))

> df_pt
  pattern name perct
1      p1   n1  0.40
2      p1   n2  0.15
3      p2   n1  0.30
4      p2   n2  0.50
5      p2   n3  0.18

This function creates classes and multiplies a value from the database by a predefined pattern from the pattern table

fun <- function(value, ob_pattern, df_pt){
  
  #filter the pattern
  sel_pt <- df_pt %>% 
    dplyr::filter(pattern == ob_pattern)
  
  out <- data.frame()
  
  for(i in 1:nrow(sel_pt)){
    out[1,i] <- sel_pt[i,2]
    out[2,i] <- sel_pt[i,3] / value
  }
  
  names(out) <- out[1,]
  out <-  out[-1,]
  return(out)
  
}

This function works fine "manually":

fun(10, "p1", df_pt)
> fun(10, "p1", df_pt)
    n1    n2
2 0.04 0.015

fun(10, "p2", df_pt)
> fun(10, "p2", df_pt)
    n1   n2    n3
2 0.03 0.05 0.018

However, not goes well in a map iteration:

pmap(list(value = df$value, ob_pattern = df$ob_pattern, df_pt = df_pt), fun)

> pmap(list(value = df$value, ob_pattern = df$ob_pattern, df_pt = df_pt), fun)
Erro: Element 3 of `.l` must have length 1 or 30, not 3
Run `rlang::last_error()` to see where the error occurred.

df <- df %>% 
  mutate(pmap(list(value = value, ob_pattern = ob_pattern, df_pt = df_pt), fun))

> df <- df %>% 
+   mutate(pmap(list(value = value, ob_pattern = ob_pattern, df_pt = df_pt), fun))
Erro: Problem with `mutate()` input `..1`.
i `..1 = pmap(...)`.
x Element 3 of `.l` must have length 1 or 30, not 3
Run `rlang::last_error()` to see where the error occurred.

What I expect:

# A tibble: 6 x 30
     ob ob_pattern value    n1    n2     n3
             
1     1 p1         0.544 1.36  3.63  NA    
2     2 p2         0.127 0.425 0.255  0.708
3     3 p2         0.426 1.42  0.851  2.36 
4     4 p1         0.932 2.33  6.21  NA    
5     5 p2         0.983 3.28  1.97   5.46 
6     6 p2         0.231 0.770 0.462  1.28

user10917479 · Accepted Answer

As another approach, this is a strong candidate for a nested dataframe.

In this case, we can adjust your function to take the filtered dataframe from the start.

fun2 <- function(value, sel_pt){
  
  #filter the pattern
  out <- data.frame()
  
  for(i in 1:nrow(sel_pt)){
    out[1,i] <- sel_pt[i,1]
    out[2,i] <- sel_pt[i,2] / value
  }
  
  names(out) <- out[1,]
  out <-  out[-1,]
  return(out)
  
}

Now we can nest-join on df_pt and map over that as an input.

library(dplyr)
library(tidyr)
library(purrr)

df %>% 
  nest_join(df_pt, by = c(ob_pattern = "pattern")) %>% 
  mutate(output = map2(value, df_pt, fun2)) %>% 
  select(ob, ob_pattern, value, output) %>% 
  unnest_wider(output)

On another note, this fun2() can easily be rewritten as follows. This returns the columns as numeric, which is probably what you want.

library(tibble)

fun3 <- function(value, sel_pt){
  
  sel_pt %>% 
    mutate(perct = perct / value) %>% 
    deframe()
}

df %>% 
  nest_join(df_pt, by = c(ob_pattern = "pattern")) %>% 
  mutate(output = map2(value, df_pt, fun3)) %>% 
  select(ob, ob_pattern, value, output) %>% 
  unnest_wider(output)

# A tibble: 30 x 6
      ob ob_pattern   value      n1     n2     n3
                   
 1     1 p1         0.898     0.445  0.167 NA    
 2     2 p1         0.413     0.970  0.364 NA    
 3     3 p2         0.507     0.592  0.987  0.355
 4     4 p2         0.544     0.551  0.918  0.331
 5     5 p2         0.504     0.595  0.992  0.357
 6     6 p1         0.00277 145.    54.2   NA    
 7     7 p1         0.453     0.883  0.331 NA    
 8     8 p1         0.175     2.29   0.858 NA    
 9     9 p1         0.595     0.673  0.252 NA    
10    10 p2         0.0358    8.37  13.9    5.02 
# ... with 20 more rows

vectorize a function that returns more than one variable using tidyverse

Answers (2)

Related Questions