Reputation: 338
I have a set of columns that have character object. I want to be able to gather them in a single column. Let's say dataset looks like this: The region is 16 but I showed 6 here.
country regionarc regionarb regionarh regionary regionard regionarw
1 g dome NA
2 g ashi NA
3 g gongo NA
4 g NA salgi
5 g forh
I want it to look like this:
country regionarc regionarb regionarh regionary regionard regionarw district
1 g dome NA dome
2 g ashi NA ashi
3 g gongo NA gongo
4 g NA salgi salgi
5 g NA forh forh
I think I may have to mutate and select the columns but I am not sure how to gather the districts in one columns.How can I gather the district inputs into one column? Thank you in advance.
dput output
structure(list(date = c("08-Jun-20", "08-Jun-20", "09-Jun-20",
"09-Jun-20"), Which.country.do.you.live.in. = c("G ", "G ", "G ",
"G "), Must.be.in.Ghana.form.not.visible = c(NA, NA, NA, NA),
Which.region.do.you.live.in. = c(NA, NA, NA, NA), Which.district.do.you.live.in...Ahafo.Region. = c(NA,
NA, NA, NA), Which.district.do.you.live.in...Ashanti.Region. = c(NA,
NA, NA, NA), Which.district.do.you.live.in...Bono.Region. = c("Dormaa East",
"", "", ""), Which.district.do.you.live.in...Bono.East.Region. = c(NA,
NA, NA, NA), Which.district.do.you.live.in...Central.Region. = c(NA,
NA, NA, NA), Which.district.do.you.live.in...Eastern.Region. = c(NA,
NA, NA, NA), Which.district.do.you.live.in...Greater.Accra.Region. = c(NA,
NA, NA, NA), Which.district.do.you.live.in...Northern.Region. = c(NA,
NA, NA, NA), Which.district.do.you.live.in...Northern.East.Region. = c("",
"East Mamprusi", "", ""), Which.district.do.you.live.in...Oti.Region. = c(NA,
NA, NA, NA), Which.district.do.you.live.in...Savannah.Region. = c("",
"", "", "Central Gonja"), Which.district.do.you.live.in...Upper.East.Region. = c(NA,
NA, NA, NA), Which.district.do.you.live.in...Upper.West.Region. = c(NA,
NA, NA, NA), Which.district.do.you.live.in...Volta.Region. = c("",
"", "Ho Municipal", ""), Which.district.do.you.live.in...Western.Region. = c(NA,
NA, NA, NA), Which.district.do.you.live.in...Western.North.Region. = c(NA,
NA, NA, NA), X = c(NA, NA, NA, NA), X.1 = c(NA, NA, NA, NA
), X.2 = c(NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-4L))
Upvotes: 1
Views: 150
Reputation: 887148
If it is NA
elements, we can use coalesce
library(dplyr)
df1 %>%
mutate(district = coalesce(!!! select(., starts_with('region'))))
# country regionarc regionarb regionarh regionary regionard regionarw district
#1 g dome <NA> <NA> NA <NA> <NA> dome
#2 g <NA> <NA> ashi NA <NA> <NA> ashi
#3 g <NA> gongo <NA> NA <NA> <NA> gongo
#4 g <NA> <NA> <NA> NA salgi <NA> salgi
#5 g <NA> <NA> <NA> NA <NA> forh forh
Or with reduce/coalesce
library(purrr)
df1 %>%
mutate(district = select(., starts_with('region')) %>%
reduce(coalesce))
Or if the columns have blank ("
), we can convert to NA
and then use coalesce
df1 %>%
transmute_at(vars(starts_with('region')), na_if, '') %>%
transmute(district = coalesce(!!! .))
bind_cols(df1, .)
In the OP's dataset, we can select
the column names that starts_with
'Which.district.do.you.live.in'
, convert the blanks (""
) to NA
(na_if
) with mutate/across
(from the dplyr new version) or with mutate_all
and use that in coalesce
df2 <- df2 %>%
mutate(district =coalesce(!!! select(., starts_with('Which.district.do.you.live.in')) %>%
mutate(across(everything(), na_if, ""))) )
df2$district
#[1] "Dormaa East" "East Mamprusi" "Ho Municipal" "Central Gonja"
Or with mutate_all
df2 %>%
mutate(district =coalesce(!!! select(., starts_with('Which.district.do.you.live.in')) %>%
mutate_all(na_if, "")))
Or in base R
with pmin/pmax
df1$district <- do.call(pmax, c(df1[-1], na.rm = TRUE))
df1$district
#[1] "dome" "ashi" "gongo" "salgi" "forh"
Or we can use max.col
df1$district <- df1[-1][cbind(seq_len(nrow(df1)), max.col(!is.na(df1[-1]), 'first'))]
df1$district
#[1] "dome" "ashi" "gongo" "salgi" "forh"
df1 <- structure(list(country = c("g", "g", "g", "g", "g"), regionarc = c("dome",
NA, NA, NA, NA), regionarb = c(NA, NA, "gongo", NA, NA), regionarh = c(NA,
"ashi", NA, NA, NA), regionary = c(NA, NA, NA, NA, NA), regionard = c(NA,
NA, NA, "salgi", NA), regionarw = c(NA, NA, NA, NA, "forh")),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5"))
Upvotes: 3