davidoj
davidoj

Reputation: 1

Dplyr mutate loop

Is there a better a way to do this in r using dplyr without having to type out a new formula for each variable?

code    dagala_price_1  dagala_price_2  dagala_price_3  dagala_price_4  dagala_price_5  dagala_unit_nb_1    dagala_unit_nb_2    dagala_unit_nb_3    dagala_unit_nb_4    dagala_unit_nb_5
MI-NAL-KA   50  15000   NA  NA  NA  100 1   NA  NA  NA
M-KK-KZ 10000   20000   NA  NA  NA  20  2   NA  NA  NA
M-KK-NK 10000   NA  NA  NA  NA  5   NA  NA  NA  NA
MI-NA-BA    12000   15000   NA  NA  NA  2   1   NA  NA  NA
MI-BD-BT    12000   15000   NA  NA  NA  3   1   NA  NA  NA
MI-MI-ND    12000   80000   NA  NA  NA  8   1   NA  NA  NA
MI-NAL-LT   13000   15000   NA  18000   NA  1   3   NA  1   NA
M-BY-BGY    13000   15000   NA  NA  NA  4   1   NA  NA  NA
MI-NA-NY    13000   NA  NA  NA  NA  2   NA  NA  NA  NA
MI-KAN-BL   18000   35000   15000   NA  NA  1   1   6   NA  NA
MI-KIGO-KR  20000   15000   15000   NA  NA  10  8   4   NA  NA
MI-KAN-KY   20000   16000   NA  NA  NA  2   6   NA  NA  NA
MI-NAL-BB   20000   35000   250000  NA  NA  1   1   1   NA  NA
MI-KAM-AL   30000   14000   13000   NA  NA  1   10  2   NA  NA


df <- df %>% mutate(

      dagala_total_1 = dagala_price_1 * dagala_unit_nb_1,

      dagala_total_2 = dagala_price_2 * dagala_unit_nb_2,

      dagala_total_3 = dagala_price_3 * dagala_unit_nb_3, 

      dagala_total_total =dagala_total_1 + dagala_total_2 + dagala_total_3)       

Upvotes: 0

Views: 1247

Answers (1)

TC Zhang
TC Zhang

Reputation: 2797

According to your data,you can arrange it in the long form ("tidy" in the tidyverse's term ), which will get you a simpler code.

I assume you have dagala units and prices for five groups 1~5, so I added a new group variable in the data.frame to make it tidy, i.e, in the 'long' form

library(tidyr)
library(dplyr)
library(data.table)


df <- data.table::fread(
"code    dagala_price_1  dagala_price_2  dagala_price_3  dagala_price_4  dagala_price_5  dagala_unit_nb_1    dagala_unit_nb_2    dagala_unit_nb_3    dagala_unit_nb_4    dagala_unit_nb_5
MI-NAL-KA   50  15000   NA  NA  NA  100 1   NA  NA  NA
M-KK-KZ 10000   20000   NA  NA  NA  20  2   NA  NA  NA
M-KK-NK 10000   NA  NA  NA  NA  5   NA  NA  NA  NA
MI-NA-BA    12000   15000   NA  NA  NA  2   1   NA  NA  NA
MI-BD-BT    12000   15000   NA  NA  NA  3   1   NA  NA  NA
MI-MI-ND    12000   80000   NA  NA  NA  8   1   NA  NA  NA
MI-NAL-LT   13000   15000   NA  18000   NA  1   3   NA  1   NA
M-BY-BGY    13000   15000   NA  NA  NA  4   1   NA  NA  NA
MI-NA-NY    13000   NA  NA  NA  NA  2   NA  NA  NA  NA
MI-KAN-BL   18000   35000   15000   NA  NA  1   1   6   NA  NA
MI-KIGO-KR  20000   15000   15000   NA  NA  10  8   4   NA  NA
MI-KAN-KY   20000   16000   NA  NA  NA  2   6   NA  NA  NA
MI-NAL-BB   20000   35000   250000  NA  NA  1   1   1   NA  NA
MI-KAM-AL   30000   14000   13000   NA  NA  1   10  2   NA  NA"
)


df.price <- df %>%
  select(code, matches("price_")) %>%
  # gather price by group
  gather(key=groups,value=dagala_price,matches("price_")) %>%
  # extract last number as group
  mutate(groups = gsub(".*(\\d)$","\\1",groups))
#> Warning: package 'bindrcpp' was built under R version 3.4.4


df.unit <- df %>%
  select(code,matches("unit_nb")) %>%
  # gather units by group
  gather(key=groups,value=dagala_unit,matches("unit_")) %>%
  # extract last number as group
  mutate(groups = gsub(".*(\\d)$","\\1",groups))


df.tidy <- left_join(df.price,df.unit)
#> Joining, by = c("code", "groups")

This df.tidy is the 'long' tidy form, which is easier to manipulate in the tidyverse syntax:

# Tidy data.frame
df.tidy

# A tibble: 70 x 4
   code      groups dagala_price dagala_unit
   <chr>     <chr>         <int>       <int>
 1 MI-NAL-KA 1                50         100
 2 M-KK-KZ   1             10000          20
 3 M-KK-NK   1             10000           5
 4 MI-NA-BA  1             12000           2
 5 MI-BD-BT  1             12000           3
 6 MI-MI-ND  1             12000           8
 7 MI-NAL-LT 1             13000           1
 8 M-BY-BGY  1             13000           4
 9 MI-NA-NY  1             13000           2
10 MI-KAN-BL 1             18000           1
# ... with 60 more rows

Then your code can be much simplified like this:

# Then some summarise operations

df.total_by_grp <- df.tidy %>%
  mutate(dagala_total = dagala_price * dagala_unit) 

# summarise by group
head(df.total_by_grp)
#>        code groups dagala_price dagala_unit dagala_total
#> 1 MI-NAL-KA      1           50         100         5000
#> 2   M-KK-KZ      1        10000          20       200000
#> 3   M-KK-NK      1        10000           5        50000
#> 4  MI-NA-BA      1        12000           2        24000
#> 5  MI-BD-BT      1        12000           3        36000
#> 6  MI-MI-ND      1        12000           8        96000


df.total_by_code <- df.tidy %>%
  mutate(dagala_total = dagala_price * dagala_unit) %>%
  group_by(code) %>%
  summarise(code_total = sum(dagala_total,na.rm = TRUE))

# summarise by total
head(df.total_by_code)
#> # A tibble: 6 x 2
#>   code      code_total
#>   <chr>          <int>
#> 1 M-BY-BGY       67000
#> 2 M-KK-KZ       240000
#> 3 M-KK-NK        50000
#> 4 MI-BD-BT       51000
#> 5 MI-KAM-AL     196000
#> 6 MI-KAN-BL     143000

Created on 2018-07-28 by the reprex package (v0.2.0).

Upvotes: 1

Related Questions