Reputation: 3109
I have the following data.frame
user_id 1 2 3 4 5 6 7 8 9
1 54449024717783 0 0 1 0 0 0 0 0 0
2 117592134783793 0 0 0 0 0 1 0 0 0
3 187145545782493 0 0 1 0 0 0 0 0 0
4 245003020993334 0 0 0 0 0 1 0 0 0
5 332625230637592 0 1 0 0 0 0 0 0 0
6 336336752713947 0 1 0 0 0 0 0 0 0
what I would like to do is to create one column (and remove 1:9) and insert the column name where I have the value 1 , each user contain only column with the value 1 ,
If im running the following function:
rowSums(users_cluster(users_cluster), dims = 1)
it will summarize all the rows value but I need to duplicate it with the column name
Upvotes: 3
Views: 161
Reputation: 42564
For the sake of completeness, here is also a data.table
solution which uses melt()
to reshape from wide to long format:
library(data.table)
melt(setDT(DF), id = "user_id")[value == 1L][order(user_id), !"value"]
user_id variable
1: 54449024717783 3
2: 117592134783793 6
3: 187145545782493 3
4: 245003020993334 6
5: 332625230637592 2
6: 336336752713947 2
This takes advantage of the fact that the sample dataset is already sorted by ascending user_id
.
In case the sample dataset has a different order which should be maintained in the final result, it is necessary to remember that order by introducing a temporary row id:
melt(setDT(DF), id = "user_id")[, rn := rowid(variable)][value == 1L][
order(rn), !c("rn", "value")]
or, alternatively,
melt(setDT(DF), id = "user_id")[, rn := rowid(variable)][, setorder(.SD, rn)][
value == 1L, !c("rn", "value")]
library(data.table)
DF <- fread(
"i user_id 1 2 3 4 5 6 7 8 9
1 54449024717783 0 0 1 0 0 0 0 0 0
2 117592134783793 0 0 0 0 0 1 0 0 0
3 187145545782493 0 0 1 0 0 0 0 0 0
4 245003020993334 0 0 0 0 0 1 0 0 0
5 332625230637592 0 1 0 0 0 0 0 0 0
6 336336752713947 0 1 0 0 0 0 0 0 0"
, drop = 1L)[, lapply(.SD, as.integer), by = user_id]
Upvotes: 1
Reputation: 887511
Another approach is max.col
from base R
as the user specified each user contain only column with the value 1
cbind(dat[1], ind = max.col(dat[-1], 'first'))
# user_id ind
#1 54449024717783 3
#2 117592134783793 6
#3 187145545782493 3
#4 245003020993334 6
#5 332625230637592 2
#6 336336752713947 2
Upvotes: 5
Reputation: 39154
A solution using the tidyverse.
library(tidyverse)
dat2 <- dat %>%
mutate(ID = 1:n()) %>%
gather(Column, Value, -user_id, -ID) %>%
filter(Value == 1) %>%
arrange(ID) %>%
select(-Value, -ID) %>%
as.data.frame()
dat2
# user_id Column
# 1 54449024717783 3
# 2 117592134783793 6
# 3 187145545782493 3
# 4 245003020993334 6
# 5 332625230637592 2
# 6 336336752713947 2
DATA
dat <- read.table(text = " user_id 1 2 3 4 5 6 7 8 9
1 54449024717783 0 0 1 0 0 0 0 0 0
2 117592134783793 0 0 0 0 0 1 0 0 0
3 187145545782493 0 0 1 0 0 0 0 0 0
4 245003020993334 0 0 0 0 0 1 0 0 0
5 332625230637592 0 1 0 0 0 0 0 0 0
6 336336752713947 0 1 0 0 0 0 0 0 0",
header = TRUE, stringsAsFactors = FALSE)
library(tidyverse)
dat <- as.tibble(dat) %>%
setNames(sub("X", "", names(.))) %>%
mutate(user_id = as.character(user_id))
Upvotes: 3
Reputation: 25395
Another base R solution:
df$ind = apply(df[,-1]>0,1,which)
df[,c("user_id","ind")]
Output:
user_id ind
1 5.444902e+13 3
2 1.175921e+14 6
3 1.871455e+14 3
4 2.450030e+14 6
5 3.326252e+14 2
6 3.363368e+14 2
Upvotes: 3
Reputation: 24198
Here's another base
R option:
inds <- which(df[,-1]!=0,TRUE)
df$newcol <- inds[order(row.names(inds)),][,2]
df[,c(1,11)]
# user_id newcol
#1 5.444902e+13 3
#2 1.175921e+14 6
#3 1.871455e+14 3
#4 2.450030e+14 6
#5 3.326252e+14 2
#6 3.363368e+14 2
Upvotes: 5
Reputation: 28369
Base R solution:
data.frame(user_id = df[, 1],
name = which(t(df[, -1] == 1)) %% (ncol(df) - 1))
# user_id name
# 1 54449024717783 3
# 2 117592134783793 6
# 3 187145545782493 3
# 4 245003020993334 6
# 5 332625230637592 2
# 6 336336752713947 2
Upvotes: 8