Randomly distributing values across multiple rows in a data frame

Question

I have two data frames.
DF1 contains unique ID's which are assigned to an area and a count of how many people are in each ID. (the COUNT column).
DF2 contains how many more or less people need to be assigned an area (the CHANGE column).

Is there an efficient way of, in the case of Area A for example, adding the extra 24 people from CHANGE column in DF2 to the COUNT column in DF1 randomly across the rows assigned to Area A.
Thank you.

DF1 <- data.frame(matrix(0, nrow=20, ncol=3))
DF1[,1] <- 1:20
DF1[,2] <- rep(c("A","B","C","D"), each=5)
DF1[,3] <- sample(10:30,20,rep=TRUE)
colnames(DF1) <- c("ID","AREA","COUNT")

DF2 <- data.frame(matrix(0, nrow=4, ncol=2))
DF2[,1] <- c("A","B","C","D")
DF2[,2] <- c(24,-17,-1,5)
colnames(DF2) <- c("AREA","CHANGE")

EDIT: This is my current solution. However, my actual dataset contains thousands of rows and takes a few hours to complete. Hence why I am after a more efficient way of achieving the same goal.

for (i in 1:length(unique(DF2[,1]))){
DF_Area <- unique(DF1[,2])
DF1_Subset <- with(DF1, DF1[AREA == DF_Area[i],])
DF2_Row <- DF2[DF2$AREA %in% DF_Area[i],]

if(DF2_Row$CHANGE!=0){
DF1_Update <- as.data.frame(DF1_Subset$COUNT)

if(DF2_Row$CHANGE>=0){ALLOCATION_VALUE <- 1}else{ALLOCATION_VALUE <- -1}

for (GG in 1:abs(DF2_Row$CHANGE)){
DF1_Update_Row <- sample(which(DF1_Update > 0),1)
DF1_Update[DF1_Update_Row, ] <- DF1_Update[DF1_Update_Row, ] + ALLOCATION_VALUE}

DF1_Subset$COUNT <- DF1_Update[,1]
DF1$COUNT[match(DF1$ID, DF1_Subset$ID, nomatch = 0) != 0] <- DF1_Subset$COUNT[match(DF1$ID, DF1_Subset$ID, nomatch = 0)]}}

timat · Accepted Answer

This does work for any number of area and with any ID number but it can give a negative count if too much people are removed in an ID

library(data.table)

DF1 <- as.data.table(DF1,key="ID")
DF1$AREA <- as.factor(DF1$AREA)  #to change area as level
dt_all <-NULL

for (i in levels(DF1$AREA)) {

  if (DF2[DF2$AREA == i,]$CHANGE != 0) {
    bool_pos <- (DF2[DF2$AREA == i,]$CHANGE > 0) #to know to add or remove from count

    ID <- sample(1:(length(DF1[AREA == i,]$ID)),abs(DF2[DF2$AREA == i,]$CHANGE), rep=TRUE)
    ID <- DF1[AREA == i,]$ID[ID] # select random id for each value in change
    df_temp <- as.data.table(table(ID),key="ID") 
    df_temp$ID <- as.integer(df_temp$ID)
    if (!bool_pos) {
      df_temp$N <- (df_temp$N)*-1
    }

    dt_all <- rbind(dt_all,df_temp )
  }
}

DF1 <- merge(DF1, dt_all,all.x=TRUE, by="ID")  
DF1[is.na(N), N:=0]
DF1[, COUNT:=COUNT+N]
DF1[,N:=NULL]
dt_all <-NULL

Randomly distributing values across multiple rows in a data frame

Answers (2)

Related Questions