Reputation: 53
I'm newish to R and stack overflow, plus I'm not experienced at coding, and I'm hoping for some assistance. I have a dataframe where I'd like to do the same action on multiple variables. I wrote a function for the actions I'd like to take, but I'm not sure how to change the column names so the function acts on each variable separately.
#Fake Data
#index for a list of traits, and the current food type for each pet
shelterpets <- base::data.frame(
ID = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"),
index_agility = round(runif(10, min=-0.4, max=0.4), digits = 2),
index_boldness = round(runif(10, min=-0.4, max=0.4), digits = 2),
index_curiousity = round(runif(10, min=-0.4, max=0.4), digits = 2),
index_dexterity = round(runif(10, min=-0.4, max=0.4), digits = 2),
index_empathy = round(runif(10, min=-0.4, max=0.4), digits = 2),
food_type = c("diet_food", "diet_food", "regular_food", "diet_food", "regular_food", "regular_food", "regular_food", "diet_food", "diet_food", "regular_food")
)
# function to look at index for each trait, current food type, and suggest changes to food type
function(petfood) {
# variable to capture predicted food type: diet_food, regular_food
shelterpets$food10_trait <- NA
#pet previously on diet_food and above 0.10 then confirm diet_food, else predict regular_food
shelterpets$food10_trait <- ifelse(shelterpets$food_type == "diet_food",
ifelse(shelterpets$index_trait >= 0.10, "diet_food", "regular_food"),
shelterpets$food10_trait)
#pet previously on regular_food and below -0.10 then confirm regular_food, else predict diet_food
shelterpets$food10_trait <- ifelse(shelterpets$food_type == "regular_food",
ifelse(shelterpets$index_trait <= -0.10, "regular_food", "diet_food" ),
shelterpets$food10_trait)
#typecast
shelterpets$food10_trait <- as.factor(shelterpets$food10_trait)
#update trait so replace "trait" with "agility", then "boldness", etc.
}
And what I want it to look like is
ID index_agility index_boldness index_curiousity index_dexterity index_empathy food_type food10_agility food10_boldness
1 1 0.26 -0.28 0.17 0.17 0.28 diet_food diet_food regular_food
2 2 0.17 -0.12 -0.25 0.06 0.06 diet_food diet_food regular_food
3 3 0.24 0.14 -0.13 0.25 0.28 regular_food diet_food diet_food
4 4 -0.07 0.30 -0.32 0.06 0.23 diet_food regular_food diet_food
5 5 0.33 0.00 0.13 0.23 -0.18 regular_food diet_food diet_food
6 6 0.17 -0.20 0.01 0.25 0.17 regular_food diet_food regular_food
food10_curiousity food10_dexterity food10_empathy
1 diet_food diet_food diet_food
2 regular_food regular_food regular_food
3 regular_food diet_food diet_food
4 regular_food regular_food diet_food
5 diet_food diet_food regular_food
6 diet_food diet_food diet_food
I made this to start
#get names in array to hopefully pass to the function, so drop ID and food_type
pet <- as.matrix(colnames(shelterpets))
pet <- pet[-c(1,7),,drop=F]
and I saw these questions, but I didn't quite follow how they worked enough to adapt them:
Thanks for any pointers you can give.
Upvotes: 3
Views: 148
Reputation: 107652
As mentioned, consider using long data usually preferred format for analytical operations including merging, plotting, modeling, etc. rather than wide usually better format for presentation and reporting. While data does result with more rows, you avoid looping for vectorized operations. Consider also within
to avoid the repeating data frame name in new column calculation.
index_cols <- names(shelterpets)[grep("index", names(shelterpets))]
shelterpets_long <- stats::reshape(
shelterpets, varying=index_cols, times=index_cols,
v.names="value", timevar="indicator", ids=NULL,
idvar=c("ID", "food_type"), direction="long",
new.row.names = 1:1E5
)
shelterpets_long <- base::within(shelterpets_long, {
# pet previously on diet_food and above 0.10 then confirm diet_food, else predict regular_food
food10_trait <- ifelse(food_type == "diet_food",
ifelse(value >= 0.10, "diet_food", "regular_food"),
NA)
# pet previously on regular_food and below -0.10 then confirm regular_food, else predict diet_food
food10_trait <- ifelse(food_type == "regular_food",
ifelse(value <= -0.10, "regular_food", "diet_food" ),
food10_trait)
# typecast
food10_trait <- as.factor(food10_trait)
})
head(shelterpets_long)
# ID food_type indicator value food10_trait
# 1 1 diet_food index_agility -0.29 regular_food
# 2 2 diet_food index_agility -0.39 regular_food
# 3 3 regular_food index_agility 0.23 diet_food
# 4 4 diet_food index_agility -0.36 regular_food
# 5 5 regular_food index_agility -0.34 regular_food
# 6 6 regular_food index_agility -0.01 diet_food
For possibly faster reshape
to long format:
shelterpets_long2 <- base::data.frame(
base::expand.grid(ID=unique(shelterpets$ID), indicator=index_cols,
stringsAsFactors = FALSE),
food_type = shelterpets$food_type,
index = base::matrix(data.matrix(shelterpets[index_cols]),
ncol=1, byrow=TRUE)
)
all.equal(shelterpets_long[c("ID", "food_type", "indicator", "value")],
shelterpets_long2[c("ID", "food_type", "indicator", "value")])
# [1] TRUE
And if ever you need to reshape to wide, use this faster approach from slight modification of @Moody_Mudskipper's answer. As needed, merge
to original, shelterpet
, by ID variable:
### FASTER RESHAPE WIDE
### (https://stackoverflow.com/a/55973705/1422451)
matrix_spread <- function(df1, id, key, value){
unique_ids <- unique(df1[[key]])
mat <- matrix( df1[[value]], ncol=length(unique_ids), byrow = FALSE)
df2 <- data.frame(unique(df1[[id]]), mat)
names(df2) <- c(id, paste0(value,"_",unique_ids))
df2
}
shelterpets_wide <- matrix_spread(
shelterpets_long,
id = "ID",
key = "indicator",
value = "food10_trait"
)
shelterpets_wide
# ID food10_trait_index_agility food10_trait_index_boldness food10_trait_index_curiousity food10_trait_index_dexterity food10_trait_index_empathy
# 1 1 regular_food regular_food diet_food regular_food regular_food
# 2 2 regular_food regular_food regular_food regular_food regular_food
# 3 3 diet_food diet_food diet_food diet_food diet_food
# 4 4 regular_food regular_food regular_food regular_food regular_food
# 5 5 regular_food diet_food diet_food regular_food diet_food
# 6 6 diet_food regular_food diet_food regular_food regular_food
# 7 7 regular_food diet_food diet_food diet_food regular_food
# 8 8 regular_food diet_food diet_food regular_food diet_food
# 9 9 diet_food regular_food regular_food regular_food regular_food
# 10 10 regular_food regular_food regular_food regular_food regular_food
Upvotes: 2
Reputation: 79228
You could simply write a function like below:
my_function<- function(x, y){
ifelse(y == "diet_food",
ifelse(x >= 0.10, "diet_food", "regular_food"),
ifelse(x <= -0.10, "regular_food", "diet_food"))
}
data.frame(lapply(df[2:6], my_function, y=df[,7]))
index_agility index_boldness index_curiousity index_dexterity index_empathy
1 diet_food regular_food diet_food diet_food diet_food
2 diet_food regular_food regular_food regular_food regular_food
3 diet_food diet_food regular_food diet_food diet_food
4 regular_food diet_food regular_food regular_food diet_food
5 diet_food diet_food diet_food diet_food regular_food
6 diet_food regular_food diet_food diet_food diet_food
Then you could use cbind
to bind the results to the original df
. You can also use sapply
instead of lapply
Upvotes: 2
Reputation: 20811
Your attempt is almost there, but you can loop over each trait and assign a new column to your data frame with the result of the function. I made some minor changes:
ifelse(shelterpets$index_trait
to
ifelse(shelterpets[, paste0('index_', trait)]
the input can be each trait
as a character string, and the return value can just be as.factor(...)
# function to look at index for each trait, current food type, and suggest changes to food type
f <- function(trait, data = shelterpets) {
# variable to capture predicted food type: diet_food, regular_food
data$food10_trait <- NA
#pet previously on diet_food and above 0.10 then confirm diet_food, else predict regular_food
data$food10_trait <- ifelse(data$food_type == "diet_food",
ifelse(data[, paste0('index_', trait)] >= 0.10, "diet_food", "regular_food"),
data$food10_trait)
#pet previously on regular_food and below -0.10 then confirm regular_food, else predict diet_food
data$food10_trait <- ifelse(data$food_type == "regular_food",
ifelse(data[, paste0('index_', trait)] <= -0.10, "regular_food", "diet_food" ),
data$food10_trait)
#typecast
as.factor(data$food10_trait)
#update trait so replace "trait" with "agility", then "boldness", etc.
}
## test
f('agility')
# [1] diet_food diet_food regular_food regular_food diet_food regular_food regular_food diet_food regular_food diet_food
# Levels: diet_food regular_food
Apply over each trait
traits <- gsub('.*_', '', grep('index', names(shelterpets), value = TRUE))
shelterpets[, paste0('food10_', traits)] <- lapply(traits, f)
# ID index_agility index_boldness index_curiousity index_dexterity index_empathy food_type food10_agility food10_boldness food10_curiousity food10_dexterity food10_empathy
# 1 1 0.06 -0.34 -0.25 0.28 0.22 diet_food regular_food regular_food regular_food diet_food diet_food
# 2 2 0.37 -0.01 -0.13 0.22 0.35 diet_food diet_food regular_food regular_food diet_food diet_food
# 3 3 0.33 -0.07 -0.03 0.20 0.22 regular_food diet_food diet_food diet_food diet_food diet_food
# 4 4 0.07 -0.23 -0.14 -0.29 0.05 diet_food regular_food regular_food regular_food regular_food regular_food
# 5 5 0.23 0.06 0.09 0.24 -0.17 regular_food diet_food diet_food diet_food diet_food regular_food
# 6 6 -0.27 -0.19 -0.23 0.37 -0.35 regular_food regular_food regular_food regular_food diet_food regular_food
# 7 7 0.17 0.30 -0.14 -0.14 -0.11 regular_food diet_food diet_food regular_food regular_food regular_food
# 8 8 -0.22 0.13 0.21 -0.06 0.08 diet_food regular_food diet_food diet_food regular_food regular_food
# 9 9 -0.25 0.21 -0.02 0.09 -0.29 diet_food regular_food diet_food regular_food regular_food regular_food
# 10 10 -0.35 0.39 -0.34 0.20 0.13 regular_food regular_food diet_food regular_food diet_food diet_food
Upvotes: 3