Reputation: 510
I'm interested in using iteration with multiple substitutions of code in R to create multiple ggplot2 derived objects, but I cannot figure it out. Please help.
I created 2 datasets, diamonds_top300
and diamonds_bottom300
using the code below.
diamonds_top300 <- data.frame(dplyr::top_n(diamonds, 300, table))
diamonds_bottom300 <- data.frame(dplyr::bottom_n(diamonds, -300, table))
I want to create 4 histograms using a 2x2 design [2 (data = diamonds_top300 or diamonds_bottom300) by 2 (DV = price or carat) design, for a total of 4 histograms].
I can do it manually using the code below, and subsitution where appropriate:
# manual histogram input
# ---- NOTE: 2 (data = diamonds_top300 or diamonds_bottom300) by 2 (DV = price or carat) design, for a total of 4 histograms
## Model 1: DV - price, data = diamonds_top300
(ggplot2::ggplot(diamonds_top300, aes(y=price))
+ ggplot2::geom_histogram(
aes(y=..density..),
alpha=0.5,
position="identity",
binwidth = as.numeric(
nclass.Sturges(diamonds_top300$price))
)
+ geom_density(alpha=.2)
)
## Model 2: DV - price, data = diamonds_bottom300
(ggplot2::ggplot(diamonds_bottom300, aes(y=price))
+ ggplot2::geom_histogram(
aes(y=..density..),
alpha=0.5,
position="identity",
binwidth = as.numeric(
nclass.Sturges(diamonds_bottom300$price))
)
+ geom_density(alpha=.2)
)
## Model 3: DV - y, data = diamonds_top300
(ggplot2::ggplot(diamonds_top300, aes(y=y))
+ ggplot2::geom_histogram(
aes(y=..density..),
alpha=0.5,
position="identity",
binwidth = as.numeric(
nclass.Sturges(diamonds_top300$y))
)
+ geom_density(alpha=.2)
)
## Model 4: DV - y, data = diamonds_bottom300
(ggplot2::ggplot(diamonds_bottom300, aes(y=y))
+ ggplot2::geom_histogram(
aes(y=..density..),
alpha=0.5,
position="identity",
binwidth = as.numeric(
nclass.Sturges(diamonds_bottom300$y))
)
+ geom_density(alpha=.2)
)
When I try to use a function and mapply to iterate this task and reduce code length, it doesn't work. Here is the code
# using mapply
# ---- NOTE: desired order: Model 1: DV - DVS_OF_INTEREST_list_model, data = diamonds_top300; ## Model 2: DV - DVS_OF_INTEREST_list_model, data = diamonds_bottom300; ## Model 3: DV - y, data = diamonds_top300; ## Model 4: DV - y, data = diamonds_bottom300
## creates function
function_histogram_diamonds_sub_dataset_and_DV_vary <-
function(DVS_OF_INTEREST_list_model, DATASET_list_model)
{
(ggplot2::ggplot(DATASET_list_model, aes(y=DVS_OF_INTEREST_list_model))
+ ggplot2::geom_histogram(
aes(y=..density..),
alpha=0.5,
position="identity",
binwidth = as.numeric(
nclass.Sturges(DATASET_list_model$DVS_OF_INTEREST_list_model))
)
+ geom_density(alpha=.2)
)
}
## runs function with desired lists
mapply(function_histogram_diamonds_sub_dataset_and_DV_vary, DVS_OF_INTEREST_list, DATASET_list, SIMPLIFY = FALSE)
Here is the error I got:
> mapply(function_histogram_diamonds_sub_dataset_and_DV_vary, DVS_OF_INTEREST_list, DATASET_list, SIMPLIFY = FALSE)
Error: `data` must be a data frame, or other object coercible by `fortify()`, not a character vector
Run `rlang::last_error()` to see where the error occurred.
Is there any way I can do this successfully? Any help would be greatly appreciated.
FYI, I use a 2013 Macbook Pro with a 2.4 GHz dual-core intel chip, 8 GB of ram, macOS big sur 11.2.2, RStudio Version 1.4.1106, and the R Base Package 4.04.
Thanks.
Here is the code I used for the model
#### histograms ####
# Loads packages
# ---- NOTE: making plots and diamonds dataset
if(!require(ggplot2)){install.packages("ggplot2")}
# ---- NOTE: for data wrangling
if(!require(dplyr)){install.packages("dplyr")}
# dataset creation
## for dataset with top 300 rows
# ---- NOTE: selects only the top 300 rows of the dataset
diamonds_top300 <- data.frame(dplyr::top_n(diamonds, 300, table))
# ---- NOTE: gives dataset info
head(diamonds_top300)
str(diamonds_top300)
colnames(diamonds_top300)
nrow(diamonds_top300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_top300$price)
unique(diamonds_top300$y)
unique(diamonds_top300$cut)
unique(diamonds_top300$color)
unique(diamonds_top300$carat)
unique(diamonds_top300$clarity)
unique(diamonds_top300$depth)
unique(diamonds_top300$table)
## for dataset with bottom 300 rows
### dataset
# ---- NOTE: selects only the bottom 300 rows of the dataset
diamonds_bottom300 <- data.frame(dplyr::bottom_n(diamonds, -300, table))
# ---- NOTE: gives dataset info
head(diamonds_bottom300)
str(diamonds_bottom300)
colnames(diamonds_bottom300)
nrow(diamonds_bottom300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_bottom300$price)
unique(diamonds_bottom300$y)
# lists all variables/objects of interest
DVS_OF_INTEREST_list <- c("price", "carat")
DATASET_list <- c("diamonds_top300", "diamonds_bottom300")
# manual histogram input
# ---- NOTE: 2 (data = diamonds_top300 or diamonds_bottom300) by 2 (DV = price or carat) design, for a total of 4 histograms
## Model 1: DV - price, data = diamonds_top300
(ggplot2::ggplot(diamonds_top300, aes(y=price))
+ ggplot2::geom_histogram(
aes(y=..density..),
alpha=0.5,
position="identity",
binwidth = as.numeric(
nclass.Sturges(diamonds_top300$price))
)
+ geom_density(alpha=.2)
)
## Model 2: DV - price, data = diamonds_bottom300
(ggplot2::ggplot(diamonds_bottom300, aes(y=price))
+ ggplot2::geom_histogram(
aes(y=..density..),
alpha=0.5,
position="identity",
binwidth = as.numeric(
nclass.Sturges(diamonds_bottom300$price))
)
+ geom_density(alpha=.2)
)
## Model 3: DV - y, data = diamonds_top300
(ggplot2::ggplot(diamonds_top300, aes(y=y))
+ ggplot2::geom_histogram(
aes(y=..density..),
alpha=0.5,
position="identity",
binwidth = as.numeric(
nclass.Sturges(diamonds_top300$y))
)
+ geom_density(alpha=.2)
)
## Model 4: DV - y, data = diamonds_bottom300
(ggplot2::ggplot(diamonds_bottom300, aes(y=y))
+ ggplot2::geom_histogram(
aes(y=..density..),
alpha=0.5,
position="identity",
binwidth = as.numeric(
nclass.Sturges(diamonds_bottom300$y))
)
+ geom_density(alpha=.2)
)
# using mapply
# ---- NOTE: desired order: Model 1: DV - DVS_OF_INTEREST_list_model, data = diamonds_top300; ## Model 2: DV - DVS_OF_INTEREST_list_model, data = diamonds_bottom300; ## Model 3: DV - y, data = diamonds_top300; ## Model 4: DV - y, data = diamonds_bottom300
## creates function
function_histogram_diamonds_sub_dataset_and_DV_vary <-
function(DVS_OF_INTEREST_list_model, DATASET_list_model)
{
(ggplot2::ggplot(DATASET_list_model, aes(y=DVS_OF_INTEREST_list_model))
+ ggplot2::geom_histogram(
aes(y=..density..),
alpha=0.5,
position="identity",
binwidth = as.numeric(
nclass.Sturges(DATASET_list_model$DVS_OF_INTEREST_list_model))
)
+ geom_density(alpha=.2)
)
}
## runs function with desired lists
mapply(function_histogram_diamonds_sub_dataset_and_DV_vary, DVS_OF_INTEREST_list, DATASET_list, SIMPLIFY = FALSE)
Upvotes: 0
Views: 54
Reputation: 21937
How about something like this:
function_histogram_diamonds_sub_dataset_and_DV_vary <-
function(DVS_OF_INTEREST_list_model, DATASET_list_model)
{require(ggplot2)
ggplot(DATASET_list_model, aes_string(x=DVS_OF_INTEREST_list_model))+
geom_histogram(
aes(y=..density..),
alpha=0.5,
position="identity",
binwidth=nclass.Sturges)+
geom_density(alpha=.2)
}
hists <- purrr::map2(list("price", "price", "y", "y"),
list(diamonds_top300, diamonds_bottom300, diamonds_top300, diamonds_bottom300),
~function_histogram_diamonds_sub_dataset_and_DV_vary(.x, .y))
do.call(cowplot::plot_grid, hists)
The function had to be cleaned up a bit. You need to use aes_string()
if you're trying to pass in character string names of variables. You also needed the variable of interest to be defined as x
in the aesthetic. I'm not sure the Sturges breaks are doing the trick here, but that's easily changed in the code.
Upvotes: 1