Reputation: 1435
I have a data frame with grouped rows and columns and want to add the group of rows as a whole a number of times based on information contained in another data frame. In the example data I have shown the final output I wish to have by duplicating rows with id 1 (January 1, 1990) three times. variable "dups" indicates how many times the group should be duplicated.
Here is a sample dput:
> dput(df)
structure(list(date = c("01-jan-90", "01-jan-90", "01-jan-90",
"01-jan-90", "01-jan-90", "02-jan-90", "02-jan-90", "02-jan-90",
"02-jan-90", "02-jan-90"), rdate = c("08-jan-90", "15-jan-90",
"01-jan-90", "22-jan-90", "29-jan-90", "09-jan-90", "16-jan-90",
"02-jan-90", "23-jan-90", "30-jan-90"), id = c(1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L)), datalabel = "", time.stamp = "30 Dec 2013 16:39", .Names = c("date",
"rdate", "id"), formats = c("%9s", "%9s", "%8.0g"), types = c(9L,
9L, 251L), val.labels = c("", "", ""), var.labels = c("", "",
""), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9",
"10"), version = 12L, class = "data.frame")
And information on date and the number of times to duplicate is found below:
date dups id
01-jan-90 3 1
02-jan-90 2 2
Output for ID 1 on January 1, 1990 looks like this:
date rdate id newid
1 01-jan-90 08-jan-90 1 1
2 01-jan-90 15-jan-90 1 1
3 01-jan-90 01-jan-90 1 1
4 01-jan-90 22-jan-90 1 1
5 01-jan-90 29-jan-90 1 1
6 01-jan-90 08-jan-90 1 2
7 01-jan-90 15-jan-90 1 2
8 01-jan-90 01-jan-90 1 2
9 01-jan-90 22-jan-90 1 2
10 01-jan-90 29-jan-90 1 2
11 01-jan-90 08-jan-90 1 3
12 01-jan-90 15-jan-90 1 3
13 01-jan-90 01-jan-90 1 3
14 01-jan-90 22-jan-90 1 3
15 01-jan-90 29-jan-90 1 3
Upvotes: 0
Views: 195
Reputation: 193497
Here's a fairly straightforward approach using merge
:
First, we'll assume your "duplicator" data.frame
is called "mydf". Second, merge
, create a sequence
of your dups
column, "expand" your rows, and bind your new ID back in.
A <- merge(df, mydf)
newID <- sequence(A$dups)
out <- cbind(A[rep(rownames(A), A$dups), c("date", "id", "rdate")], newID)
head(out)
# date id rdate newID
# 1 01-jan-90 1 08-jan-90 1
# 1.1 01-jan-90 1 08-jan-90 2
# 1.2 01-jan-90 1 08-jan-90 3
# 2 01-jan-90 1 15-jan-90 1
# 2.1 01-jan-90 1 15-jan-90 2
# 2.2 01-jan-90 1 15-jan-90 3
The order is not the same as your sample output, but that's pretty easily fixed by using order
, as follows:
outordered <- out[with(out, do.call(order, list(date, id, newID))), ]
head(outordered, 10)
# date id rdate newID
# 1 01-jan-90 1 08-jan-90 1
# 2 01-jan-90 1 15-jan-90 1
# 3 01-jan-90 1 01-jan-90 1
# 4 01-jan-90 1 22-jan-90 1
# 5 01-jan-90 1 29-jan-90 1
# 1.1 01-jan-90 1 08-jan-90 2
# 2.1 01-jan-90 1 15-jan-90 2
# 3.1 01-jan-90 1 01-jan-90 2
# 4.1 01-jan-90 1 22-jan-90 2
# 5.1 01-jan-90 1 29-jan-90 2
tail(outordered)
# date id rdate newID
# 10 02-jan-90 2 30-jan-90 1
# 6.1 02-jan-90 2 09-jan-90 2
# 7.1 02-jan-90 2 16-jan-90 2
# 8.1 02-jan-90 2 02-jan-90 2
# 9.1 02-jan-90 2 23-jan-90 2
# 10.1 02-jan-90 2 30-jan-90 2
Upvotes: 1
Reputation: 52637
# Create the data.frame with the number of duplications
# assuming it is unique on date/id
id <- unique(df[, c("date", "id")])
id$dups <- c(3, 2)
library(plyr)
# Use ddply to cycle through the id df, and lapply to
# duplicate the df
ddply(id, c("date", "id"),
function(df.sub) {
if(nrow(df.sub) != 1L) stop("Logic error") # id df shoudl be unique on date/id
df.to.rep <- subset(df, date==df.sub$date & id==df.sub$id)
df.rep <- lapply(1:df.sub$dups, function(newid) transform(df.to.rep, newid=newid))
do.call(rbind, df.rep)
}
)
# date rdate id newid
# 1 01-jan-90 08-jan-90 1 1
# 2 01-jan-90 15-jan-90 1 1
# 3 01-jan-90 01-jan-90 1 1
# 4 01-jan-90 22-jan-90 1 1
# 5 01-jan-90 29-jan-90 1 1
# 6 01-jan-90 08-jan-90 1 2
# 7 01-jan-90 15-jan-90 1 2
# 8 01-jan-90 01-jan-90 1 2
# 9 01-jan-90 22-jan-90 1 2
# 10 01-jan-90 29-jan-90 1 2
# 11 01-jan-90 08-jan-90 1 3
# 12 01-jan-90 15-jan-90 1 3
# 13 01-jan-90 01-jan-90 1 3
# 14 01-jan-90 22-jan-90 1 3
# 15 01-jan-90 29-jan-90 1 3
# 16 02-jan-90 09-jan-90 2 1
# 17 02-jan-90 16-jan-90 2 1
# 18 02-jan-90 02-jan-90 2 1
# 19 02-jan-90 23-jan-90 2 1
# 20 02-jan-90 30-jan-90 2 1
# 21 02-jan-90 09-jan-90 2 2
# 22 02-jan-90 16-jan-90 2 2
# 23 02-jan-90 02-jan-90 2 2
# 24 02-jan-90 23-jan-90 2 2
# 25 02-jan-90 30-jan-90 2 2
Upvotes: 1
Reputation: 54237
df2 <- read.table(textConnection("date dups id
01-jan-90 3 1
02-jan-90 2 2"), sep=" ", header=T)
dup <- function(df, date, num) {
i <- which(df$date==date)
cbind(df[rep(i,num),], newid=rep(1:num, each=length(i)))
}
df3 <- data.frame()
for (x in 1:nrow(df2)) {
df3 <- rbind(df3, dup(df, df2$date[x], df2$dups[x]))
}
df3
# date rdate id newid
# 1 01-jan-90 08-jan-90 1 1
# 2 01-jan-90 15-jan-90 1 1
# 3 01-jan-90 01-jan-90 1 1
# 4 01-jan-90 22-jan-90 1 1
# 5 01-jan-90 29-jan-90 1 1
# 1.1 01-jan-90 08-jan-90 1 2
# 2.1 01-jan-90 15-jan-90 1 2
# 3.1 01-jan-90 01-jan-90 1 2
# 4.1 01-jan-90 22-jan-90 1 2
# 5.1 01-jan-90 29-jan-90 1 2
# 1.2 01-jan-90 08-jan-90 1 3
# 2.2 01-jan-90 15-jan-90 1 3
# 3.2 01-jan-90 01-jan-90 1 3
# 4.2 01-jan-90 22-jan-90 1 3
# 5.2 01-jan-90 29-jan-90 1 3
# 6 02-jan-90 09-jan-90 2 1
# 7 02-jan-90 16-jan-90 2 1
# 8 02-jan-90 02-jan-90 2 1
# 9 02-jan-90 23-jan-90 2 1
# 10 02-jan-90 30-jan-90 2 1
# 6.1 02-jan-90 09-jan-90 2 2
# 7.1 02-jan-90 16-jan-90 2 2
# 8.1 02-jan-90 02-jan-90 2 2
# 9.1 02-jan-90 23-jan-90 2 2
# 10.1 02-jan-90 30-jan-90 2 2
Upvotes: 1