useR
useR

Reputation: 3082

data standardization for all group data.frame in R

I have a dataset as below

Date <- rep(c("Jan", "Feb"), 3)[1:5]
Group <- c(rep(letters[1:2],each=2),"c")
value <- sample(1:10,5)
data <- data.frame(Date, Group, value)

> data
  Date Group value
1  Jan     a     2
2  Feb     a     7
3  Jan     b     3
4  Feb     b     9
5  Jan     c     1

As you can observed, for group c it do not have data on Date=Feb. How can i make a dataset such that

> DATA
  Date Group value
1  Jan     a     2
2  Feb     a     7
3  Jan     b     3
4  Feb     b     9
5  Jan     c     1
6  Feb     c     0 

I have added last row such that value for group c in feb is 0.

Thanks

Upvotes: 0

Views: 85

Answers (3)

akrun
akrun

Reputation: 887781

Using reshape2

library(reshape2)     
melt(dcast(data, Date~Group, value.var="value",fill=0), id.var="Date") #values differ as there was no set.seed()
#   Date variable value
#1  Feb        a     1
#2  Jan        a    10
#3  Feb        b     7
#4  Jan        b     4
#5  Feb        c     0
#6  Jan        c     5

Or using dplyr

 library(dplyr)
 library(tidyr)
  data%>%
  spread(Group, value, fill=0) %>% 
  gather(Group, value, a:c)
 #  Date Group value
 #1  Feb     a     1
 #2  Jan     a    10
 #3  Feb     b     7
 #4  Jan     b     4
 #5  Feb     c     0
 #6  Jan     c     5

Upvotes: 1

talat
talat

Reputation: 70336

With base R you can use xtabs wrapped in as.data.frame:

as.data.frame(xtabs(formula = value ~ Date + Group, data = data))
#  Date Group Freq
#1  Feb     a    8
#2  Jan     a    6
#3  Feb     b    4
#4  Jan     b    1
#5  Feb     c    0
#6  Jan     c   10

Upvotes: 3

zx8754
zx8754

Reputation: 56239

Using merge:

#get all combinations of 2 columns
all.comb <- expand.grid(unique(data$Date),unique(data$Group))
colnames(all.comb) <- c("Date","Group")

#merge with all.x=TRUE to keep nonmatched rows
res <- merge(all.comb,data,all.x=TRUE)

#convert NA to 0
res$value[is.na(res$value)] <- 0

#result
res
# Date Group value
# 1  Feb     a     3
# 2  Feb     b     4
# 3  Feb     c     0
# 4  Jan     a     5
# 5  Jan     b     7
# 6  Jan     c    10

Upvotes: 2

Related Questions