Reputation: 177

creating a new column that takes initial values of another column in R

I am working on a cross sectional dataset. I want to create a new column, named "initial", that will contain initial values of another column. More specifically, for each country, the column initial takes the value of another column called "ratio" for the first year for which the data is available and will take the value 0 for all remaining years. Sample code:

country <- c(rep(c("A","B","C","D"),each=5))
year <- c(1980:1984, 1980: 1984, 1980:1984, 1980:1984)
ratio <- runif(n = 20, min = 0.20, max = 0.40)
mydata <- data.frame(country, year, ratio)
mydata$ratio[[1]] <- NA
mydata$ratio[6:7] <- NA
mydata$ratio[16:18] <- NA

The output I want to obtain looks like this:

Is there a way of doing this in R preferably using the dplyr package?

Thanks very much in advance!

Upvotes: 0

Answers (4)

Ronak Shah

Reputation: 389175

You can use match to get index of first non-NA value in the group.

library(dplyr)

mydata %>%
  group_by(country) %>%
  mutate(initial = replace(ratio, -match(TRUE, !is.na(ratio)), 0)) %>%
  ungroup

#  country  year  ratio initial
#   <chr>   <int>  <dbl>   <dbl>
# 1 A        1980 NA       0    
# 2 A        1981  0.268   0.268
# 3 A        1982  0.302   0    
# 4 A        1983  0.263   0    
# 5 A        1984  0.222   0    
# 6 B        1980 NA       0    
# 7 B        1981 NA       0    
# 8 B        1982  0.397   0.397
# 9 B        1983  0.265   0    
#10 B        1984  0.249   0    
#11 C        1980  0.302   0.302
#12 C        1981  0.219   0    
#13 C        1982  0.339   0    
#14 C        1983  0.228   0    
#15 C        1984  0.393   0    
#16 D        1980 NA       0    
#17 D        1981 NA       0    
#18 D        1982 NA       0    
#19 D        1983  0.303   0.303
#20 D        1984  0.218   0

Upvotes: 1

forhad

Reputation: 196

library(tidyverse)

mydata2 <- mydata %>% 
  group_by(country) %>% 
  filter(!is.na(ratio)) %>% 
  mutate(year_rank = rank(year)) %>% 
  mutate(initial = if_else(year_rank == 1, ratio, 0)) %>% 
  right_join(., mydata, by = c('country', 'year', 'ratio')) %>% 
  replace_na(list(initial = '0')) %>% 
  arrange(country, year) %>% 
  select(-year_rank)

Upvotes: 2

stefan

Reputation: 125268

Making use of dplyr::first you could do:


library(dplyr)

mydata %>% 
  group_by(country) %>% 
  mutate(initial = first(ratio[!is.na(ratio)]),
         initial = ifelse(is.na(ratio) | ratio != initial, 0, initial)) %>% 
  ungroup()
#> # A tibble: 20 × 4
#>    country  year  ratio initial
#>    <chr>   <int>  <dbl>   <dbl>
#>  1 A        1980 NA       0    
#>  2 A        1981  0.387   0.387
#>  3 A        1982  0.257   0    
#>  4 A        1983  0.366   0    
#>  5 A        1984  0.328   0    
#>  6 B        1980 NA       0    
#>  7 B        1981 NA       0    
#>  8 B        1982  0.227   0.227
#>  9 B        1983  0.331   0    
#> 10 B        1984  0.341   0    
#> 11 C        1980  0.292   0.292
#> 12 C        1981  0.344   0    
#> 13 C        1982  0.387   0    
#> 14 C        1983  0.251   0    
#> 15 C        1984  0.292   0    
#> 16 D        1980 NA       0    
#> 17 D        1981 NA       0    
#> 18 D        1982 NA       0    
#> 19 D        1983  0.295   0.295
#> 20 D        1984  0.312   0

DATA

set.seed(42)

country <- c(rep(c("A","B","C","D"),each=5))
year <- c(1980:1984, 1980: 1984, 1980:1984, 1980:1984)
ratio <- runif(n = 20, min = 0.20, max = 0.40)
mydata <- data.frame(country, year, ratio)
mydata$ratio[[1]] <- NA
mydata$ratio[6:7] <- NA
mydata$ratio[16:18] <- NA

Upvotes: 2

Paul

Reputation: 2977

This might be improved but I get your expected output:

library(dplyr)

mydata %>% 
  group_by(country) %>% 
  filter(!is.na(ratio)) %>% 
  filter(year == min(year)) %>% 
  rename(initial = ratio) %>% 
  full_join(., mydata) %>% 
  mutate(initial = ifelse(is.na(initial), 0, initial)) %>% 
  arrange(country, year) %>% 
  relocate(initial, .after = last_col())

Output:

  country  year  ratio initial
   <chr>   <int>  <dbl>   <dbl>
 1 A        1980 NA       0    
 2 A        1981  0.341   0.341
 3 A        1982  0.330   0    
 4 A        1983  0.219   0    
 5 A        1984  0.269   0    
 6 B        1980 NA       0    
 7 B        1981 NA       0    
 8 B        1982  0.365   0.365
 9 B        1983  0.210   0    
10 B        1984  0.334   0    
11 C        1980  0.284   0.284
12 C        1981  0.251   0    
13 C        1982  0.358   0    
14 C        1983  0.288   0    
15 C        1984  0.261   0    
16 D        1980 NA       0    
17 D        1981 NA       0    
18 D        1982 NA       0    
19 D        1983  0.252   0.252
20 D        1984  0.301   0

Upvotes: 1

creating a new column that takes initial values of another column in R

Answers (4)

Related Questions