Reputation: 107
I have this dataframe with time in minutes. I want to create a new column that indicates the timedifference (in minutes), between the row in which a substitution occurs and the previous rows.
# Data
dat <- structure(list(game_id = c(303377L, 303377L, 303377L, 303377L,
303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 303377L,
303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 303377L,
303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 303377L,
303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 303377L,
303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 303377L,
303377L, 303377L, 303377L, 303377L, 303377L, 303377L, 303377L,
303377L, 303377L, 303377L), minute = c(57L, 57L, 57L, 57L, 57L,
60L, 60L, 60L, 60L, 61L, 62L, 62L, 62L, 62L, 62L, 62L, 62L, 62L,
62L, 62L, 62L, 62L, 62L, 62L, 63L, 63L, 63L, 64L, 64L, 64L, 65L,
65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L, 65L,
65L, 65L, 65L, 65L, 66L), team_name = c("Getafe", "Getafe", "Getafe",
"Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe",
"Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe",
"Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe",
"Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe",
"Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe",
"Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe", "Getafe",
"Getafe", "Getafe", "Getafe", "Getafe"), type_name = c("pass",
"dribble", "pass", "foul", "foul", "foul", "foul", "dribble",
"pass", "throw_in", "tackle", "dribble", "tackle", "pass", "dribble",
"interception", "pass", "dribble", "pass", "dribble", "pass",
"dribble", "cross", "pass", "dribble", "interception", "take_on",
"clearance", "dribble", "pass", "dribble", "pass", "dribble",
"freekick_short", "pass", "dribble", "pass", "dribble", "pass",
"dribble", "pass", "dribble", "pass", "dribble", "pass", "dribble",
"cross", "shot", "substitution"), target = c(1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1), timediff = c(0, 0, 0, 0, 0, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9)), row.names = 150:198, class = "data.frame")
I want to calculate the difference in minutes
between a specific row, which is set by the name "substitution", and the rows before that. I currently have created the dataframe below using the following code:
dat$timediff <- ave(dat$minute, dat$game_id, FUN=function(x) replace(x - x[1], 1, 0))
However, this leads to the inverse of what I want to achieve, namely:
game_id minute team_name type_name target timediff
150 303377 57 Getafe pass 1 0
151 303377 57 Getafe dribble 1 0
152 303377 57 Getafe pass 1 0
153 303377 57 Getafe foul 1 0
154 303377 57 Getafe foul 1 0
155 303377 60 Getafe foul 1 3
156 303377 60 Getafe foul 1 3
157 303377 60 Getafe dribble 1 3
158 303377 60 Getafe pass 1 3
159 303377 61 Getafe throw_in 1 4
160 303377 62 Getafe tackle 1 5
161 303377 62 Getafe dribble 1 5
162 303377 62 Getafe tackle 1 5
163 303377 62 Getafe pass 1 5
164 303377 62 Getafe dribble 1 5
165 303377 62 Getafe interception 1 5
166 303377 62 Getafe pass 1 5
167 303377 62 Getafe dribble 1 5
168 303377 62 Getafe pass 1 5
169 303377 62 Getafe dribble 1 5
170 303377 62 Getafe pass 1 5
171 303377 62 Getafe dribble 1 5
172 303377 62 Getafe cross 1 5
173 303377 62 Getafe pass 1 5
174 303377 63 Getafe dribble 1 6
175 303377 63 Getafe interception 1 6
176 303377 63 Getafe take_on 1 6
177 303377 64 Getafe clearance 1 7
178 303377 64 Getafe dribble 1 7
179 303377 64 Getafe pass 1 7
180 303377 65 Getafe dribble 1 8
181 303377 65 Getafe pass 1 8
182 303377 65 Getafe dribble 1 8
183 303377 65 Getafe freekick_short 1 8
184 303377 65 Getafe pass 1 8
185 303377 65 Getafe dribble 1 8
186 303377 65 Getafe pass 1 8
187 303377 65 Getafe dribble 1 8
188 303377 65 Getafe pass 1 8
189 303377 65 Getafe dribble 1 8
190 303377 65 Getafe pass 1 8
191 303377 65 Getafe dribble 1 8
192 303377 65 Getafe pass 1 8
193 303377 65 Getafe dribble 1 8
194 303377 65 Getafe pass 1 8
195 303377 65 Getafe dribble 1 8
196 303377 65 Getafe cross 1 8
197 303377 65 Getafe shot 1 8
198 303377 66 Getafe substitution 1 9
I would like to achieve the opposite, so the row where type_name == substitution
takes place, should have timediff == 0
, and then build up the timediff with the rows before:
game_id minute team_name type_name target timediff
173 303377 62 Getafe pass 1 4
174 303377 63 Getafe dribble 1 3
175 303377 63 Getafe interception 1 3
176 303377 63 Getafe take_on 1 3
177 303377 64 Getafe clearance 1 2
178 303377 64 Getafe dribble 1 2
179 303377 64 Getafe pass 1 2
180 303377 65 Getafe dribble 1 1
181 303377 65 Getafe pass 1 1
182 303377 65 Getafe dribble 1 1
183 303377 65 Getafe freekick_short 1 1
184 303377 65 Getafe pass 1 1
185 303377 65 Getafe dribble 1 1
186 303377 65 Getafe pass 1 1
187 303377 65 Getafe dribble 1 1
188 303377 65 Getafe pass 1 1
189 303377 65 Getafe dribble 1 1
190 303377 65 Getafe pass 1 1
191 303377 65 Getafe dribble 1 1
192 303377 65 Getafe pass 1 1
193 303377 65 Getafe dribble 1 1
194 303377 65 Getafe pass 1 1
195 303377 65 Getafe dribble 1 1
196 303377 65 Getafe cross 1 1
197 303377 65 Getafe shot 1 1
198 303377 66 Getafe substitution 1 0
Upvotes: 0
Views: 45
Reputation: 388817
You can use match
to get the minute
value where type_name
is 'substitution'
and subtract it with every minute
value.
library(dplyr)
dat %>%
group_by(game_id) %>%
mutate(timediff = minute[match('substitution', type_name)] - minute) %>%
ungroup
If you have multiple substitutions in a game, you can use -
dat %>%
group_by(game_id, grp = cumsum(lag(type_name, default = '') == 'substitution')) %>%
mutate(timediff = minute[match('substitution', type_name)] - minute) %>%
ungroup
Upvotes: 1