user17163779
user17163779

Reputation:

Make connections between two datasets

I would like to make a connection between the x and df2 datasets. Notice that the dataset x, I have a percentage value, which in this case for the day 03-01-2021 is 0.1 and for the days 01-02-2021 and 01-01-2022 it is 0.45. So from that information, I know the percentage value for 03-01-2021 is 0.1, so this value falls into category I of my dataset df2 (since the values range from 0.1 to 0.2). As for the days 02-01-2021 and 01-01-2022, they correspond to category F of the df2,since the values range from 0.4 to 0.5. So, I would like to generate an output table as follows:

enter image description here

library(dplyr)

df1<- structure(
      list(date2= c("01-01-2022","01-01-2022","03-01-2021","03-01-2021","01-02-2021","01-02-2021"),
           Category= c("ABC","CDE","ABC","CDE","ABC","CDE"),
           coef= c(5,4,0,2,4,5)),
      class = "data.frame", row.names = c(NA, -6L))

x<-df1 %>%
  group_by(date2) %>%
  summarize(across("coef", sum),.groups = 'drop')%>% 
  arrange(date2 = as.Date(date2, format = "%d-%m-%Y"))

number<-20

x$Percentage<-x$coef/number

      date2       coef Percentage
  <chr>      <dbl>      <dbl>
1 03-01-2021     2       0.1 
2 01-02-2021     9       0.45
3 01-01-2022     9       0.45


 df2 <- structure(
  list(
    Category = c("A", "B", "C", "D",
               "E", "F", "G", "H", "I", "J"),
    From = c(0.9,
           0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0),
    Until  = c(
      1,
      0.8999,
      0.7999,
      0.6999,
      0.5999,
      0.4999,
      0.3999,
      0.2999,
      0.1999,
      0.0999
    ),
    `1 Val` = c(
      2222,
      2017.8,
      1793.6,
      1621.5,
      1522.4,
      1457.3,
      1325.2,
      1229.15,
      1223.1,
      1177.05
    ),
    `2 Val` = c(3200, 2220, 2560,
                2200, 2220, 2080, 1220, 1240, 1720, 1620),
    `3 Val` = c(
      4665,
      4122.5,
      3732,
      3498.75,
      3265.5,
      3032.25,
      2799,
      2682.375,
      2565.75,
      2449.125
    ),
    `4 Val` = c(
      6112,
      5222.8,
      4889.6,
      4224,
      4278.4,
      3972.8,
      3667.2,
      3224.4,
      3361.6,
      3222.8
    )
  ),
  row.names = c(NA,-10L),
  class = c("tbl_df",
            "tbl", "data.frame")
)


   Category  From  Until   1 Val 2 Val    3 Val    4 Val
   <chr>    <dbl>  <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
 1 A          0.9 1        2222     3200   4665    6112 
 2 B          0.8 0.900    2018    2220   4122   5223
 3 C          0.7 0.800    1794    2560   3732    4890
 4 D          0.6 0.700    1622    2200   3499   4224 
 5 E          0.5 0.600    1522    2220   3266   4278
 6 F          0.4 0.500    1457    2080   3032   3973
 7 G          0.3 0.400    1325    1220   2799    3667
 8 H          0.2 0.300    1229    1240   2682   3224
 9 I          0.1 0.200    1223    1720   2566   3362
10 J          0   0.0999   1177    1620   2449   3223

Upvotes: 1

Views: 41

Answers (1)

akrun
akrun

Reputation: 887118

Using tidyverse, we do a rowwise on the 'x' dataset, slice the rows of 'df2' where the 'Percentage' falls between the 'From' and 'Until', and unpack the data.frame/tibble column

library(dplyr)
library(tidyr)
x %>%
   rowwise %>% 
   mutate(out = df2 %>%
        slice(which(Percentage>= From & 
                Percentage <= Until)[1]) %>% 
             select(-(1:3)) ) %>%
   ungroup %>%
   unpack(out)

-output

# A tibble: 3 × 7
  date2       coef Percentage `1 Val` `2 Val` `3 Val` `4 Val`
  <chr>      <int>      <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
1 03-01-2021     2       0.1    1223.    1720   2566.   3362.
2 01-02-2021     9       0.45   1457.    2080   3032.   3973.
3 01-01-2022     9       0.45   1457.    2080   3032.   3973.

Or this could be done with a non-equi join

library(data.table)
nm1 <- names(df2)[endsWith(names(df2), 'Val')]
setDT(x)[setDT(df2), (nm1) := mget(nm1), 
   on = .(Percentage >= From, Percentage <= Until)]

-output

> x
        date2 coef Percentage  1 Val 2 Val   3 Val  4 Val
1: 03-01-2021    2       0.10 1223.1  1720 2565.75 3361.6
2: 01-02-2021    9       0.45 1457.3  2080 3032.25 3972.8
3: 01-01-2022    9       0.45 1457.3  2080 3032.25 3972.8

Upvotes: 1

Related Questions