Reputation: 391
This is a sample dataset -
data.frame(ISIN = c("US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US9898171015", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US5535301064", "US5535301064", "US9898171015",
"US9898171015", "US9898171015"), year = c(2016, 2017, 2009, 2010,
2011, 2012, 2013, 2014, 2015, 2015, 2010, 2011, 2012, 2013, 2014,
2015, 2016, 2017, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2000,
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
2012, 2013, 2014, 2015, 2016, 2017, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2000, 2001, 2002,
2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
2014, 2015, 2016, 2017, 2017, 2009, 2010, 2011, 2006, 2007, 2008,
2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2006, 2007,
2008, 2009, 2010, 2011, 2013, 2014, 2015, 2016, 2017, 2016, 2017,
2007, 2008, 2009), DirectorName = c("Steven Paladino", "Steven Paladino",
"Louise Koopman Goeser", "Louise Koopman Goeser", "Louise Koopman Goeser",
"Louise Koopman Goeser", "Louise Koopman Goeser", "Louise Koopman Goeser",
"Louise Koopman Goeser", "Kalen F Holmes", "Doctor Jonathan L Byrnes",
"Doctor Jonathan L Byrnes", "Doctor Jonathan L Byrnes", "Doctor Jonathan L Byrnes",
"Doctor Jonathan L Byrnes", "Doctor Jonathan L Byrnes", "Doctor Jonathan L Byrnes",
"Doctor Jonathan L Byrnes", "Sarah (Sally) Gaines McCoy", "Sarah (Sally) Gaines McCoy",
"Sarah (Sally) Gaines McCoy", "Sarah (Sally) Gaines McCoy", "Sarah (Sally) Gaines McCoy",
"Sarah (Sally) Gaines McCoy", "Sarah (Sally) Gaines McCoy", "Denis F Kelly",
"Denis F Kelly", "Denis F Kelly", "Denis F Kelly", "Denis F Kelly",
"Denis F Kelly", "Denis F Kelly", "Denis F Kelly", "Denis F Kelly",
"Denis F Kelly", "Denis F Kelly", "Denis F Kelly", "Denis F Kelly",
"Denis F Kelly", "Denis F Kelly", "Denis F Kelly", "Denis F Kelly",
"Denis F Kelly", "Raymond (Ray) B Langton", "Raymond (Ray) B Langton",
"Raymond (Ray) B Langton", "Raymond (Ray) B Langton", "Raymond (Ray) B Langton",
"Raymond (Ray) B Langton", "Raymond (Ray) B Langton", "Raymond (Ray) B Langton",
"Roger B Fradin", "Roger B Fradin", "Roger B Fradin", "Roger B Fradin",
"Roger B Fradin", "Roger B Fradin", "Roger B Fradin", "Roger B Fradin",
"Roger B Fradin", "Roger B Fradin", "Roger B Fradin", "Roger B Fradin",
"Roger B Fradin", "Roger B Fradin", "Roger B Fradin", "Roger B Fradin",
"Philip (Phil) R Peller", "Philip (Phil) R Peller", "Philip (Phil) R Peller",
"Philip (Phil) R Peller", "Philip (Phil) R Peller", "Philip (Phil) R Peller",
"Philip (Phil) R Peller", "Philip (Phil) R Peller", "Philip (Phil) R Peller",
"Philip (Phil) R Peller", "Philip (Phil) R Peller", "Philip (Phil) R Peller",
"Philip (Phil) R Peller", "Philip (Phil) R Peller", "Philip (Phil) R Peller",
"Philip (Phil) R Peller", "Philip (Phil) R Peller", "Philip (Phil) R Peller",
"Scott Andrew Bailey", "William (Bill) Milroy Barnum Jr", "William (Bill) Milroy Barnum Jr",
"William (Bill) Milroy Barnum Jr", "Thomas (Tom) E Davin", "James (Jim) M Weber",
"James (Jim) M Weber", "James (Jim) M Weber", "James (Jim) M Weber",
"James (Jim) M Weber", "Ernest R Johnson", "Ernest R Johnson",
"Ernest R Johnson", "Ernest R Johnson", "Ernest R Johnson", "Ernest R Johnson",
"Matthew (Matt) L Hyde", "Matthew (Matt) L Hyde", "Matthew (Matt) L Hyde",
"Matthew (Matt) L Hyde", "Matthew (Matt) L Hyde", "Matthew (Matt) L Hyde",
"Travis D Smith", "Travis D Smith", "Travis D Smith", "Travis D Smith",
"Travis D Smith", "Michael (Mike) C Kaufmann", "Michael (Mike) C Kaufmann",
"David (Dave) M DeMattei", "David (Dave) M DeMattei", "David (Dave) M DeMattei"
), DirectorID = c("1000169302", "1000169302", "1058973478", "1058973478",
"1058973478", "1058973478", "1058973478", "1058973478", "1058973478",
"11051172801", "11275933344", "11275933344", "11275933344", "11275933344",
"11275933344", "11275933344", "11275933344", "11275933344", "11434863691",
"11434863691", "11434863691", "11434863691", "11434863691", "11434863691",
"11434863691", "1223062984", "1223062984", "1223062984", "1223062984",
"1223062984", "1223062984", "1223062984", "1223062984", "1223062984",
"1223062984", "1223062984", "1223062984", "1223062984", "1223062984",
"1223062984", "1223062984", "1223062984", "1223062984", "1223122984",
"1223122984", "1223122984", "1223122984", "1223122984", "1223122984",
"1223122984", "1223122984", "1223392984", "1223392984", "1223392984",
"1223392984", "1223392984", "1223392984", "1223392984", "1223392984",
"1223392984", "1223392984", "1223392984", "1223392984", "1223392984",
"1223392984", "1223392984", "1223392984", "1223552984", "1223552984",
"1223552984", "1223552984", "1223552984", "1223552984", "1223552984",
"1223552984", "1223552984", "1223552984", "1223552984", "1223552984",
"1223552984", "1223552984", "1223552984", "1223552984", "1223552984",
"1223552984", "174488610522", "20462211719", "20462211719", "20462211719",
"2247441792", "3581636766", "3581636766", "3581636766", "3581636766",
"3581636766", "40425210975", "40425210975", "40425210975", "40425210975",
"40425210975", "40425210975", "4842568996", "4842568996", "4842568996",
"4842568996", "4842568996", "4842568996", "53006212569", "53006212569",
"53006212569", "53006212569", "53006212569", "5532705122", "5532705122",
"759047198", "759047198", "759047198"))
Actually I want to filter out those DIRECTOR_ID
that are same from previous year t-1
to current year t
I run the following code to create group -
ceo1 %>%
group_by(ISIN, YEAR) %>%
mutate(
GROUP_ID = cur_group_id()
)
# A tibble: 38 x 6
# Groups: ISIN, YEAR [12]
ROW ISIN YEAR DIRECTOR_NAME DIRECTOR_ID GROUP_ID
<dbl> <chr> <dbl> <chr> <dbl> <int>
1 1 US98981710~ 2006 Thomas (Tom) E Davin 2247441792 1
2 2 US98981710~ 2006 Matthew (Matt) L Hyde 4842568996 1
3 3 US98981710~ 2007 James (Jim) M Weber 3581636766 2
4 4 US98981710~ 2007 Matthew (Matt) L Hyde 4842568996 2
5 5 US98981710~ 2007 David (Dave) M DeMattei 759047198 2
6 6 US98981710~ 2008 James (Jim) M Weber 3581636766 3
7 7 US98981710~ 2008 Matthew (Matt) L Hyde 4842568996 3
8 8 US98981710~ 2008 David (Dave) M DeMattei 759047198 3
9 9 US98981710~ 2009 William (Bill) Milroy Ba~ 20462211719 4
10 10 US98981710~ 2009 James (Jim) M Weber 3581636766 4
Then I do not know how to filter out those DIRECTOR_ID that are same from year to year. For example, for above outupt - DIRECTOR_ID - 4842568996 is in 2006, 2007, 2008 and DIRECTOR_ID - 3581636766 is in 2007, 2008, 2009. I want to filter them out who are present previous year (t-1
) and current year (t
). Note that I have to keep all variables in the new data set. Thanks
The expected output until year 2012 of sample data will look like this -
A tibble: 16 x 5
ROW ISIN YEAR DIRECTOR_NAME DIRECTOR_ID
<dbl> <chr> <dbl> <chr> <dbl>
1 3 US9898171015 2007 James (Jim) M Weber 3581636766
2 4 US9898171015 2007 Matthew (Matt) L Hyde 4842568996
3 5 US9898171015 2007 David (Dave) M DeMattei 759047198
4 6 US9898171015 2008 James (Jim) M Weber 3581636766
5 7 US9898171015 2008 Matthew (Matt) L Hyde 4842568996
6 8 US9898171015 2008 David (Dave) M DeMattei 759047198
7 9 US9898171015 2009 William (Bill) Milroy Barnum Jr 20462211719
8 10 US9898171015 2009 James (Jim) M Weber 3581636766
9 11 US9898171015 2009 Matthew (Matt) L Hyde 4842568996
10 13 US9898171015 2010 William (Bill) Milroy Barnum Jr 20462211719
11 14 US9898171015 2010 James (Jim) M Weber 3581636766
12 15 US9898171015 2010 Matthew (Matt) L Hyde 4842568996
13 16 US9898171015 2011 Sarah (Sally) Gaines McCoy 11434863691
14 17 US9898171015 2011 William (Bill) Milroy Barnum Jr 20462211719
15 19 US9898171015 2011 Matthew (Matt) L Hyde 4842568996
16 20 US9898171015 2012 Sarah (Sally) Gaines McCoy 11434863691
Upvotes: 0
Views: 91
Reputation: 21757
I think this is what you need:
library(dplyr)
library(tidyr)
dat %>%
complete(DIRECTOR_ID, nesting(YEAR)) %>%
arrange(DIRECTOR_ID, YEAR) %>%
mutate(DIRECTOR_ID = case_when(is.na(ROW) ~ NA_real_,
TRUE ~ DIRECTOR_ID),
inprev = DIRECTOR_ID == lag(DIRECTOR_ID)) %>%
filter(inprev) %>%
na.omit() %>%
select(-inprev) %>%
arrange(ROW)
# # A tibble: 28 x 5
# DIRECTOR_ID YEAR ROW ISIN DIRECTOR_NAME
# <dbl> <dbl> <dbl> <chr> <chr>
# 1 4842568996 2007 4 US9898171… Matthew (Matt) L Hyde
# 2 3581636766 2008 6 US9898171… James (Jim) M Weber
# 3 4842568996 2008 7 US9898171… Matthew (Matt) L Hyde
# 4 759047198 2008 8 US9898171… David (Dave) M DeMattei
# 5 3581636766 2009 10 US9898171… James (Jim) M Weber
# 6 4842568996 2009 11 US9898171… Matthew (Matt) L Hyde
# 7 759047198 2009 12 US9898171… David (Dave) M DeMattei
# 8 20462211719 2010 13 US9898171… William (Bill) Milroy …
# 9 3581636766 2010 14 US9898171… James (Jim) M Weber
# 10 4842568996 2010 15 US9898171… Matthew (Matt) L Hyde
However, I'm not sure because I think the expected output above is still wrong. Only Davin and Hyde are present in 2006, so the only people who should be able to be present in 2007 are either Davin or Hyde. Since Davin is not present in 2007, he does not show up in the output above. If I've misunderstood, happy to reconsider the answer.
Upvotes: 1