Reputation: 135
Hi I have data frame as
How to create column max_value with max from last 2 years max value
dt <-
structure(list(Name = c("A", "A", "A", "A", "A", "A", "A", "B",
"B", "B", "B", "B", "B"), year = c(2012L, 2012L, 2013L, 2014L,
2015L, 2016L, 2017L, 2012L, 2013L, 2013L, 2014L, 2015L, 2016L
), value = c(22L, 99L, 12L, 1L, 23L, 40L, 12L, 12L, 33L, 40L,
NA, 20L, 20L), max_value = c(NA, NA, 99L, 99L, 12L, 23L, 40L,
NA, 12L, 12L, 40L, 40L, 20L)), .Names = c("Name", "year", "value",
"max_value"), row.names = c(NA, -13L), class = c("data.table",
"data.frame"))
Name year value *max_value*
A 2012 22 NA
A 2012 99 NA
A 2013 12 99
A 2014 01 99
A 2015 23 12
A 2016 40 23
A 2017 12 40
B 2012 12 NA
B 2013 33 12
B 2013 40 12
B 2014 NA 40
B 2015 20 40
B 2016 20 20
Thanks in advance
Upvotes: 4
Views: 410
Reputation: 3909
The complication here is that the years have different numbers of entries. This can be worked around by grouping, then summarizing to get a within-year maximum. Next use slide_int()
to get the max of the two prior years, to ultimately join this yearly data back to the original frame:
library(tidyverse)
library(slider)
sol <- dt %>%
group_by(Name, year) %>%
summarize(max_val = max(value, na.rm = T)) %>%
mutate(slide_max = slide_int(lag(max_val), max,
.before = 1, na.rm = TRUE, .complete = T)) %>%
select(-max_val) %>%
right_join(dt, by = c("Name", "year"))
sol
#> # A tibble: 13 × 5
#> # Groups: Name [2]
#> Name year slide_max value max_value
#> <chr> <int> <int> <int> <int>
#> 1 A 2012 NA 22 NA
#> 2 A 2012 NA 99 NA
#> 3 A 2013 99 12 99
#> 4 A 2014 99 1 99
#> 5 A 2015 12 23 12
#> 6 A 2016 23 40 23
#> 7 A 2017 40 12 40
#> 8 B 2012 NA 12 NA
#> 9 B 2013 12 33 12
#> 10 B 2013 12 40 12
#> 11 B 2014 40 NA 40
#> 12 B 2015 40 20 40
#> 13 B 2016 20 20 20
identical(sol$slide_max, sol$max_value)
#> [1] TRUE
Upvotes: 2
Reputation: 27792
Here is an other data.table approach, using a self-join by .EACHI
library(data.table)
# temporary rowwise id
setDT(dt)[, id := .I]
# set key
setkey(dt, id)
# self join, set infinite values back to NA
dt[dt, max_val2 := {
dt[Name == i.Name & year >= (i.year - 2) & year < i.year, max(value, na.rm = TRUE)]
}, by = .EACHI][is.infinite(max_val2), max_val2 := NA][, id := NULL]
Name year value max_value max_val2
1: A 2012 22 NA NA
2: A 2012 99 NA NA
3: A 2013 12 99 99
4: A 2014 1 99 99
5: A 2015 23 12 12
6: A 2016 40 23 23
7: A 2017 12 40 40
8: B 2012 12 NA NA
9: B 2013 33 12 12
10: B 2013 40 12 12
11: B 2014 NA 40 40
12: B 2015 20 40 NA
13: B 2016 20 20 NA
Upvotes: 2
Reputation: 7958
Here's what I got so far. The slider package is par of the tidyverse,
library(slider)
dt %>% group_by(Name) %>%
mutate(slide_max = slide_dbl(lag(value), max, .before = 2, na.rm = TRUE, .complete = T))
# A tibble: 13 x 5
# Groups: Name [2]
Name year value max_value slide_max
<chr> <int> <int> <int> <dbl>
1 A 2012 22 NA NA
2 A 2012 99 NA NA
3 A 2013 12 99 99
4 A 2014 1 99 99
5 A 2015 23 12 99
6 A 2016 40 23 23
7 A 2017 12 40 40
8 B 2012 12 NA NA
9 B 2013 33 12 NA
10 B 2013 40 12 33
11 B 2014 NA 40 40
12 B 2015 20 40 40
13 B 2016 20 20 40
Upvotes: 1
Reputation: 3379
Here is a base R solution using mapply.
df <- data.frame(Name = c("A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B"),
year = c(2012, 2012, 2013, 2014, 2015, 2016, 2017, 2012, 2013, 2013, 2014, 2015, 2016),
value = c(22, 99, 12, 1, 23, 40, 12, 12, 33, 40, NA, 20, 20),
stringsAsFactors = FALSE)
max.vals <- mapply(function(x, y){
vals <- df[df$year %in% c(x-2,x-1) & df$Name == y,"value"]
max.val <- ifelse(length(vals) > 0, max(vals, na.rm = TRUE), NA)
max.val <- list(y,x,max.val)
names(max.val) <- c("Name","year","max_value")
return(max.val)
},
unique(df[,c("Name","year")])$year,
unique(df[,c("Name","year")])$Name
)
max.vals <- as.data.frame(t(max.vals),stringsAsFactors = FALSE)
df <- merge(df, max.vals)
Upvotes: 0
Reputation: 38520
Here is a method with data.table using aggregation, a two dimensional shift
, apply
, and a join.
library(data.table)
dt[dt[, .(mx=max(value)), by=c("Name", "year")
][, .(year,
max_val=apply(matrix(unlist(shift(mx, 1:2)), ncol=2), 1, max, na.rm=TRUE)),
by=Name],
on=c("Name", "year")][is.infinite(max_val), max_val := NA][]
The first line calculates the maximum value by year and name. The second line, for each name, the year and using apply
the maximum of the two lagged years (using shift(mx, 1:2)
) are returned, dropping NA values. This results in warnings for every line that has 2 NA values, and a -Inf is returned in that place. I had to manually convert the output of shift
into a matrix, in order to feed it to apply, which is not ideal. The resulting data.table is joined onto the original using name and year as IDs. Finally, the -Inf values are replaced with NA in the final line and the result printed with []
.
This returns
Name year value max_value max_val
1: A 2012 22 NA NA
2: A 2012 99 NA NA
3: A 2013 12 99 99
4: A 2014 1 99 99
5: A 2015 23 12 12
6: A 2016 40 23 23
7: A 2017 12 40 40
8: B 2012 12 NA NA
9: B 2013 33 12 12
10: B 2013 40 12 12
11: B 2014 NA 40 40
12: B 2015 20 40 40
13: B 2016 20 20 20
data
dt <-
structure(list(Name = c("A", "A", "A", "A", "A", "A", "A", "B",
"B", "B", "B", "B", "B"), year = c(2012L, 2012L, 2013L, 2014L,
2015L, 2016L, 2017L, 2012L, 2013L, 2013L, 2014L, 2015L, 2016L
), value = c(22L, 99L, 12L, 1L, 23L, 40L, 12L, 12L, 33L, 40L,
NA, 20L, 20L), max_value = c(NA, NA, 99L, 99L, 12L, 23L, 40L,
NA, 12L, 12L, 40L, 40L, 20L)), .Names = c("Name", "year", "value",
"max_value"), row.names = c(NA, -13L), class = c("data.table",
"data.frame"))
Upvotes: 1
Reputation: 994
Use by
:
> by(dat$value, dat$year, function(x) max(x))
dat$year: 2012
[1] 99
------------------------------------------------------------
dat$year: 2013
[1] 40
------------------------------------------------------------
dat$year: 2014
[1] NA
------------------------------------------------------------
dat$year: 2015
[1] 23
------------------------------------------------------------
dat$year: 2016
[1] 40
------------------------------------------------------------
dat$year: 2017
[1] 12
EDIT: Misunderstood the question at first. This should be what you want:
Assign the results to a data.frame:
> dat1=by(dat$value, dat$year, function(x) max(x))
> data.frame("max"=dat1[1:length(dat1)])
max
2012 99
2013 40
2014 NA
2015 23
2016 40
2017 12
Make a new data frame to hold biannual maximum and loop to compare years:
bi_max=data.frame("max"=nrow(dat_max))
for(i in 1:nrow(dat_max)){
bi_max[i,]=max(dat_max$max[i], dat_max$max[i-1], na.rm=T)
}
rownames(bi_max)=rownames(dat_max)
The final result:
> bi_max
max
2012 99
2013 99
2014 40
2015 23
2016 40
2017 40
Upvotes: -1