Eduardo Marques
Eduardo Marques

Reputation: 45

vectorize a function that returns more than one variable using tidyverse

I have a function that returns several variables codependent with each other. The output it is a data frame with 1 row and n cols. The number of columns in the output depends on one of the inputs of the function. I need to vetorize it and join to "main" dataframe, something like 'dplyr::mutate()' does.

I really try to make a reprex simples as possible:

#data
df <- data.frame("ob" = 1:30,
                 "ob_pattern" = sample(c("p1", "p2"), size = 30, replace = T),
                 "value" = runif(n = 30))
> head(df)
  ob ob_pattern     value
1  1         p1 0.5442453
2  2         p2 0.1274518
3  3         p2 0.4256460
4  4         p1 0.9319009
5  5         p2 0.9828048
6  6         p2 0.2309473

#patterns
df_pt <- data.frame("pattern" = c("p1", "p1", "p2", "p2", "p2"),
                    "name" = c("n1", "n2", "n1", "n2", "n3" ),
                    "perct" = c(0.4, 0.15, 0.3, 0.5, 0.18))

> df_pt
  pattern name perct
1      p1   n1  0.40
2      p1   n2  0.15
3      p2   n1  0.30
4      p2   n2  0.50
5      p2   n3  0.18

This function creates classes and multiplies a value from the database by a predefined pattern from the pattern table

fun <- function(value, ob_pattern, df_pt){
  
  #filter the pattern
  sel_pt <- df_pt %>% 
    dplyr::filter(pattern == ob_pattern)
  
  out <- data.frame()
  
  for(i in 1:nrow(sel_pt)){
    out[1,i] <- sel_pt[i,2]
    out[2,i] <- sel_pt[i,3] / value
  }
  
  names(out) <- out[1,]
  out <-  out[-1,]
  return(out)
  
}

This function works fine "manually":

fun(10, "p1", df_pt)
> fun(10, "p1", df_pt)
    n1    n2
2 0.04 0.015

fun(10, "p2", df_pt)
> fun(10, "p2", df_pt)
    n1   n2    n3
2 0.03 0.05 0.018

However, not goes well in a map iteration:

pmap(list(value = df$value, ob_pattern = df$ob_pattern, df_pt = df_pt), fun)

> pmap(list(value = df$value, ob_pattern = df$ob_pattern, df_pt = df_pt), fun)
Erro: Element 3 of `.l` must have length 1 or 30, not 3
Run `rlang::last_error()` to see where the error occurred.

df <- df %>% 
  mutate(pmap(list(value = value, ob_pattern = ob_pattern, df_pt = df_pt), fun))

> df <- df %>% 
+   mutate(pmap(list(value = value, ob_pattern = ob_pattern, df_pt = df_pt), fun))
Erro: Problem with `mutate()` input `..1`.
i `..1 = pmap(...)`.
x Element 3 of `.l` must have length 1 or 30, not 3
Run `rlang::last_error()` to see where the error occurred.

What I expect:

# A tibble: 6 x 30
     ob ob_pattern value    n1    n2     n3
  <dbl> <chr>      <dbl> <dbl> <dbl>  <dbl>
1     1 p1         0.544 1.36  3.63  NA    
2     2 p2         0.127 0.425 0.255  0.708
3     3 p2         0.426 1.42  0.851  2.36 
4     4 p1         0.932 2.33  6.21  NA    
5     5 p2         0.983 3.28  1.97   5.46 
6     6 p2         0.231 0.770 0.462  1.28 

Upvotes: 3

Views: 87

Answers (2)

user10917479
user10917479

Reputation:

As another approach, this is a strong candidate for a nested dataframe.

In this case, we can adjust your function to take the filtered dataframe from the start.

fun2 <- function(value, sel_pt){
  
  #filter the pattern
  out <- data.frame()
  
  for(i in 1:nrow(sel_pt)){
    out[1,i] <- sel_pt[i,1]
    out[2,i] <- sel_pt[i,2] / value
  }
  
  names(out) <- out[1,]
  out <-  out[-1,]
  return(out)
  
}

Now we can nest-join on df_pt and map over that as an input.

library(dplyr)
library(tidyr)
library(purrr)

df %>% 
  nest_join(df_pt, by = c(ob_pattern = "pattern")) %>% 
  mutate(output = map2(value, df_pt, fun2)) %>% 
  select(ob, ob_pattern, value, output) %>% 
  unnest_wider(output)

On another note, this fun2() can easily be rewritten as follows. This returns the columns as numeric, which is probably what you want.

library(tibble)

fun3 <- function(value, sel_pt){
  
  sel_pt %>% 
    mutate(perct = perct / value) %>% 
    deframe()
}

df %>% 
  nest_join(df_pt, by = c(ob_pattern = "pattern")) %>% 
  mutate(output = map2(value, df_pt, fun3)) %>% 
  select(ob, ob_pattern, value, output) %>% 
  unnest_wider(output)
# A tibble: 30 x 6
      ob ob_pattern   value      n1     n2     n3
   <int> <chr>        <dbl>   <dbl>  <dbl>  <dbl>
 1     1 p1         0.898     0.445  0.167 NA    
 2     2 p1         0.413     0.970  0.364 NA    
 3     3 p2         0.507     0.592  0.987  0.355
 4     4 p2         0.544     0.551  0.918  0.331
 5     5 p2         0.504     0.595  0.992  0.357
 6     6 p1         0.00277 145.    54.2   NA    
 7     7 p1         0.453     0.883  0.331 NA    
 8     8 p1         0.175     2.29   0.858 NA    
 9     9 p1         0.595     0.673  0.252 NA    
10    10 p2         0.0358    8.37  13.9    5.02 
# ... with 20 more rows

Upvotes: 1

akrun
akrun

Reputation: 887911

The issue is that df_pt is a data.frame and it needs to be used as input in each of the looped element. So, wrap it with list so that it gets recycled as a single unit. When we loop over the data.frame, the column is a unit and this triggers the error Erro: Element 3 of .l must have length 1 or 30, not 3 as the number of columns is 3.

library(dplyr)
library(purrr)
pmap_dfr(list(value = df$value, ob_pattern = df$ob_pattern, 
      df_pt = list(df_pt)), fun, .id = 'ob') %>%
     mutate(ob_pattern = df$ob_pattern, .before = 2)

-output

  ob ob_pattern                n1                n2                n3
1   1         p2 0.412805820786703 0.688009701311172 0.247683492472022
2   2         p2 0.819499036723223  1.36583172787204 0.491699422033934
3   3         p2 0.307851399008221 0.513085665013701 0.184710839404932
4   4         p1 0.512735060593463 0.192275647722549              <NA>
5   5         p1 0.583734910383962 0.218900591393986              <NA>
6   6         p1  1.26403823904009 0.474014339640033              <NA>
7   7         p1 0.520375965374508 0.195140987015441              <NA>
8   8         p2 0.519695574800472  0.86615929133412 0.311817344880283
9   9         p1 0.406595728747128 0.152473398280173              <NA>
10 10         p1  1.19690591834918 0.448839719380944              <NA>
11 11         p1 0.935134681128101 0.350675505423038              <NA>
12 12         p2 0.782381874921124  1.30396979153521 0.469429124952674
13 13         p1 0.902566162028802 0.338462310760801              <NA>
14 14         p2 0.412253449371353 0.687089082285588 0.247352069622812
15 15         p2 0.414083431765533 0.690139052942556  0.24845005905932
16 16         p2 0.540922520169042 0.901537533615069 0.324553512101425
17 17         p2 0.306604097963516 0.511006829939193  0.18396245877811
18 18         p2  1.94204963387021  3.23674938978369  1.16522978032213
19 19         p2 0.302096661043879 0.503494435073132 0.181257996626328
20 20         p1 0.478354496206454  0.17938293607742              <NA>
21 21         p2 0.406759159422302 0.677931932370503 0.244055495653381
22 22         p1 0.929982462421745 0.348743423408154              <NA>
23 23         p2 0.850658644553245  1.41776440758874 0.510395186731947
24 24         p1  1.24950965620306 0.468566121076146              <NA>
25 25         p1 0.807136438261923 0.302676164348221              <NA>
26 26         p2  75.9337007291282   126.55616788188  45.5602204374769
27 27         p2 0.487844654295068 0.813074423825113 0.292706792577041
28 28         p1 0.702944374408066 0.263604140403025              <NA>
29 29         p1 0.417447530041509 0.156542823765566              <NA>
30 30         p2  2.14866591202588  3.58110985337647  1.28919954721553

Or if we want to use pmap within mutate

library(tidyr)
df %>% 
   mutate(out = pmap(across(c(value, ob_pattern)), 
      ~ fun(..1, ..2, df_pt))) %>% 
   unnest_wider(c(out)) %>%
   type.convert(as.is = TRUE)

-output

# A tibble: 30 × 6
      ob ob_pattern value    n1    n2     n3
   <int> <chr>      <dbl> <dbl> <dbl>  <dbl>
 1     1 p2         0.727 0.413 0.688  0.248
 2     2 p2         0.366 0.819 1.37   0.492
 3     3 p2         0.974 0.308 0.513  0.185
 4     4 p1         0.780 0.513 0.192 NA    
 5     5 p1         0.685 0.584 0.219 NA    
 6     6 p1         0.316 1.26  0.474 NA    
 7     7 p1         0.769 0.520 0.195 NA    
 8     8 p2         0.577 0.520 0.866  0.312
 9     9 p1         0.984 0.407 0.152 NA    
10    10 p1         0.334 1.20  0.449 NA    
# … with 20 more rows

NOTE: The output generated returns character columns and that is only because of some issue in the OP's fun code


Or using rowwise

df %>% 
  rowwise %>%
  mutate(out = fun(value, ob_pattern, df_pt)) %>%
  ungroup %>%
  unpack(out) %>%
  type.convert(as.is = TRUE)

-output

# A tibble: 30 × 6
      ob ob_pattern value    n1    n2     n3
   <int> <chr>      <dbl> <dbl> <dbl>  <dbl>
 1     1 p2         0.727 0.413 0.688  0.248
 2     2 p2         0.366 0.819 1.37   0.492
 3     3 p2         0.974 0.308 0.513  0.185
 4     4 p1         0.780 0.513 0.192 NA    
 5     5 p1         0.685 0.584 0.219 NA    
 6     6 p1         0.316 1.26  0.474 NA    
 7     7 p1         0.769 0.520 0.195 NA    
 8     8 p2         0.577 0.520 0.866  0.312
 9     9 p1         0.984 0.407 0.152 NA    
10    10 p1         0.334 1.20  0.449 NA    
# … with 20 more rows

Upvotes: 0

Related Questions