Valabe
Valabe

Reputation: 101

reshape data frame and concatenating columns in R

I tried to reshape my data frame from wide to long format. At the moment the data frame looks like this:

structure(list(study_site = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 
5L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 1L, 2L, 3L, 1L, 4L, 1L, 
4L, 3L, 3L, 3L, 1L, 3L, 5L, 4L, 4L, 4L, 3L, 3L, 5L, 5L, 4L, 4L, 
4L, 1L, 4L, 3L, 5L, 5L, 5L, 1L, 3L, 5L, 3L, 3L, 3L, 5L, 5L, 3L, 
4L, 2L), .Label = c("N", "no_nest", "O", "S", "W"), class = "factor"), 
    coords.N = structure(c(54L, 54L, 40L, 40L, 40L, 40L, 39L, 
    67L, 67L, 55L, 55L, 64L, 64L, 64L, 78L, 81L, 47L, 80L, 83L, 
    60L, 46L, 46L, 76L, 88L, 88L, 88L, 84L, 84L, 30L, 58L, 58L, 
    58L, 25L, 25L, 19L, 19L, 42L, 42L, 42L, 29L, 45L, 90L, 91L, 
    91L, 91L, 91L, 89L, 89L, 87L, 87L, 87L, 56L, 56L, 61L, 35L, 
    36L), .Label = c("40.40463", "48.40168", "48.40178", "48.40215", 
    "48.40235", "48.40309", "48.40390", "48.40393", "48.40396", 
    "48.40405", "48.40410", "48.40411", "48.40415", "48.40416", 
    "48.40424", "48.40425", "48.40430", "48.40435", "48.40436 ", 
    "48.40438", "48.40443", "48.40450", "48.40451", "48.40454", 
    "48.40455", "48.40459", "48.40460", "48.40461", "48.40466", 
    "48.40466 ", "48.40467", "48.40469", "48.40471", "48.40477", 
    "48.40479 ", "48.40481", "48.40482", "48.40483", "48.40488 ", 
    "48.40491", "48.40493", "48.40504 ", "48.40508", "48.40513", 
    "48.40515", "48.40519 ", "48.40522 ", "48.40523", "48.40525", 
    "48.40526", "48.40529", "48.40532", "48.40537", "48.40537 ", 
    "48.40538 ", "48.40543 ", "48.40549", "48.40549 ", "48.40557", 
    "48.40557 ", "48.40558", "48.40565", "48.40571", "48.40575", 
    "48.40580", "48.40584", "48.40586 ", "48.40591", "48.40596", 
    "48.40598", "48.40599", "48.40611", "48.40612", "48.40617", 
    "48.40626", "48.40632 ", "48.40633", "48.40635 ", "48.40636", 
    "48.40637", "48.40638 ", "48.40639", "48.40639 ", "48.40641 ", 
    "48.40652", "48.40655", "48.40656 ", "48.40657 ", "48.40687 ", 
    "48.40690 ", "48.40703", "48.40718", "48.40719", "48.40726", 
    "48.40742", "48.40748", "NO_DATA"), class = "factor"), coords.E = structure(c(67L, 
    67L, 49L, 49L, 49L, 49L, 27L, 67L, 67L, 70L, 70L, 68L, 68L, 
    68L, 87L, 94L, 68L, 83L, 90L, 73L, 52L, 52L, 2L, 95L, 95L, 
    95L, 93L, 93L, 32L, 69L, 69L, 69L, 55L, 55L, 24L, 24L, 29L, 
    29L, 29L, 30L, 48L, 85L, 1L, 1L, 1L, 1L, 78L, 78L, 79L, 79L, 
    79L, 64L, 64L, 63L, 66L, 45L), .Label = c(" 015.82024", " 015.82164", 
    "015.80237", "015.80263", "015.80309", "015.80341", "015.80369", 
    "015.80388", "015.80394", "015.80399", "015.80406", "015.80435", 
    "015.80436", "015.80466", "015.80512", "015.80517", "015.80548", 
    "015.80551", "015.80572", "015.80583", "015.80609", "015.80636", 
    "015.80659", "015.80703", "015.80723", "015.80779", "015.80795", 
    "015.80803", "015.80821", "015.80843", "015.80871", "015.80875", 
    "015.80888", "015.80897", "015.80901", "015.80903", "015.80905", 
    "015.80906", "015.80908", "015.80909", "015.80921", "015.80923", 
    "015.80929", "015.80939", "015.80993", "015.81007", "015.81018", 
    "015.81087", "015.81113", "015.81132", "015.81151", "015.81180", 
    "015.81241", "015.81273", "015.81305", "015.81406", "015.81422", 
    "015.81522", "015.81526", "015.81543", "015.81546", "015.81564", 
    "015.81628", "015.81632", "015.81678", "015.81682", "015.81700", 
    "015.81703", "015.81735", "015.81739", "015.81770", "015.81783", 
    "015.81784", "015.81800", "015.81849", "015.81992", "015.82012", 
    "015.82029", "015.82039", "015.82083", "015.82099", "015.82126", 
    "015.82180", "015.82230", "015.82232", "015.82255", "015.82265", 
    "015.82290", "015.82303", "015.82304", "015.82346", "015.82362", 
    "015.82376", "015.82398", "015.82451", "015.82500", "015.82519", 
    "015.82555", "015.82579", "015.82634", "NO_DATA"), class = "factor"), 
    study_ID = c(120L, 120L, 1L, 1L, 1L, 1L, 9L, 39L, 39L, 109L, 
    109L, 110L, 110L, 110L, 45L, 58L, 121L, 96L, 97L, 40L, 43L, 
    43L, 47L, 57L, 57L, 57L, 114L, 114L, 67L, 71L, 71L, 71L, 
    83L, 83L, 4L, 4L, 10L, 10L, 10L, 106L, 108L, 46L, 115L, 115L, 
    115L, 115L, 116L, 116L, 117L, 117L, 117L, 70L, 70L, 119L, 
    95L, 3L), species = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 2L), .Label = c("barn swallow", "no_nest"), class = "factor"), 
    first_visit = c(1L, 2L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 5L, 0L, 
    1L, 0L, 2L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 
    1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 2L, 0L, 3L, 0L, 0L, 0L, 4L, 
    1L, 8L, 0L, 0L, 1L, 2L, 1L, 5L, 0L, 0L, 1L, 0L, 1L, 1L, 0L
    ), second_visit = c(1L, 2L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 5L, 
    0L, 1L, 0L, 2L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 
    0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 2L, 0L, 4L, 0L, 0L, 
    4L, 1L, 0L, 8L, 0L, 1L, 2L, 1L, 0L, 5L, 0L, 1L, 0L, 0L, 1L, 
    0L), third_visit = c(0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 
    0L, 5L, 0L, 1L, 2L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 
    0L, 0L, 0L, 1L, 1L, 0L, 0L, 2L, 0L, 1L, 0L, 2L, 0L, 0L, 6L, 
    1L, 4L, 1L, 0L, 0L, 8L, 1L, 2L, 1L, 0L, 0L, 5L, 1L, 1L, 0L, 
    0L, 0L), used_1st_visit = c(0L, 2L, 1L, 0L, 0L, 0L, 1L, 0L, 
    1L, 2L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 
    0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 3L, 0L, 
    0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
    1L, 0L, 0L), used_2nd_visit = c(0L, 2L, 1L, 0L, 0L, 0L, 1L, 
    0L, 1L, 2L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 
    1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 2L, 0L, 
    4L, 0L, 0L, 0L, 1L, 0L, 5L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 
    0L, 0L, 0L, 0L), used_3rd_visit = c(0L, 0L, 1L, 0L, 0L, 0L, 
    1L, 0L, 1L, 0L, 4L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 
    1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 2L, 0L, 1L, 0L, 2L, 
    0L, 0L, 6L, 1L, 0L, 1L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 2L, 
    0L, 1L, 0L, 0L, 0L), nest_condition = structure(c(3L, 5L, 
    5L, 2L, 5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 2L, 5L, 3L, 2L, 
    4L, 5L, 5L, 5L, 5L, 3L, 2L, 5L, 5L, 2L, 2L, 5L, 1L, 5L, 5L, 
    5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 2L, 3L, 5L, 5L, 5L, 2L, 5L, 
    3L, 5L, 5L, 5L, 2L, 5L, 3L, 5L, 4L), .Label = c(" ready ", 
    "damaged", "in_progress", "no_nest", "ready"), class = "factor"), 
    nesting_site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 3L, 1L, 
    1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L), .Label = c("inside", "no_nest", "outside"), class = "factor"), 
    distance = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 3L, 4L, 3L, 6L, 4L, 4L, 2L, 2L, 4L, 2L, 
    2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L, 2L, 4L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    4L, 6L), .Label = c("1", "2", "3", "4", "no_data", "no_nest"
    ), class = "factor"), material = structure(c(5L, 5L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 5L, 5L, 1L, 1L, 2L), .Label = c("fine", "fine plaster", 
    "medium fine plaster", "no_data", "rough", "rough plaster", 
    "smooth plaster", "under construction", "wood"), class = "factor"), 
    housetype = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 
    3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 
    3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 5L, 5L, 5L, 
    4L, 4L, 5L, 3L, 3L, 3L, 3L, 5L, 5L, 3L, 3L, 3L, 4L, 4L, 5L, 
    5L, 4L), .Label = c("auto repair shop", "barn ", "hall", 
    "residence", "stable"), class = "factor"), usage_house = structure(c(5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L), .Label = c("auto_repair", 
    "barn", "inhabited", "under construction", "used"), class = "factor"), 
    age = c(1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L)), row.names = c(1L, 
2L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 21L, 22L, 23L, 24L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 
35L, 36L, 37L, 38L, 39L, 40L, 41L, 89L, 90L, 91L, 92L, 93L, 94L, 
95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L, 105L, 
106L, 107L, 108L, 109L, 111L), class = "data.frame")

used_1st..2nd... means that the birds have used this certain number of nests at the first, second,... control.

I would like to have that each row in my data frame to always represent a used/ unused nest as well as no_nest:

     ID species `1st_visit` `2nd_visit` `3rd_visit` used_1st_visit used_2nd_visit used_3rd_visit
  <dbl> <chr>         <dbl>       <dbl>       <dbl>          <dbl>          <dbl>          <dbl>
1   120 barn s~           1           1           0              0              0              0
2   120 barn s~           1           1           0              1              1              0
3   120 barn s~           1           1           0              1              1              0
4    39 barn s~           1           1           1              1              1              1
5     8 barn s~           1           1           1              1              0              0
6     8 barn s~           1           1           1              0              0              0

Unfortunately I have no idea how to concatenate the columns to get the final data frame. Does anybody has an idea?

Upvotes: 0

Views: 62

Answers (1)

Em Laskey
Em Laskey

Reputation: 607

I'm not completely sure what you are asking for, but this is what I understood: In the long data frame...

  • if all visits (coulmns used_first_visit, used_sec_visit etc.) are 0, combine them to one row, marking it 0
  • if any visits are not 0, keep as many rows as there are non-zero visits and mark them with 1

This is my dplyr-solution (it's not very pretty, but it works):

# create data
dat <- data.frame("visits" = c("first", "first", "second", "second", "third", "third"), "study_id" = rep(120, 6), "used_first_visit" = c(0, 2, 0, 2, 0, 2), "used_sec_visit" = c(0, 2, 0, 2, 0, 2), "used_thrd_visit" = rep(0, 6), "nest_cond" = c("damaged", "ready", "damaged", "ready", "damaged", "ready"))

# make long data frame and filter values
dat_long <- dat %>%
  pivot_longer(c(3:5),names_to = "whatever", values_to = "used")  %>% # make long data frame
  select(-c(whatever)) %>% # get rid of name column
  group_by(visits, nest_cond) %>% # group data
  mutate(used = ifelse(all(used == 0) & row_number() == 1, 10, used)) %>% # if the whole group is 0, mark one row for later filtering
  filter(used > 0 ) %>% # filter
  mutate(used = ifelse(used == 10, 0, 1)) # change to correct numbers

Let me know if this is not what you are looking for!

Upvotes: 1

Related Questions