Alberto Stefanelli
Alberto Stefanelli

Reputation: 320

plot data.frame columns with ggplot

I need to (1) generate a markdown file with a (2) series of plots for specific columns of a dataset that (3) match a particular string

At the moment I am stuck a the second point. I want to plot some ordered characters factors using ggplot for the columns which name start with "pre_" Here the code that I've worked on till now

#load ggplot
library(ggplot2)
#reproduce a generic dataset
level=c("Strongly Agree", "Agree", "Neither agree or disagree","Disagree", "Strongly disagree",NA)
df <- data.frame(pre_1=as.character(sample(level, 20, replace = T)), 
                 pre_2=as.character(sample(level, 20, replace = T)),
                 post_1=as.character(sample(level, 20, replace = T)),
                 post_2=as.character(sample(level, 20, replace = T)),
                 stringsAsFactors=T)

   ## function to plot each colum of the dataset that starts with pre_
dfplot_pre <- function(x)
{
  df <- x
  ln <- length(names(dplyr::select(df, starts_with("pre_"))))
  for(i in 1:ln){
    out <- lapply(df , function(x) factor(x, c("Strongly Agree", "Agree", "Neither agree or disagree","Disagree", "Strongly disagree"),ordered = T ))
    df <- do.call(data.frame , out )
    if(is.factor(df[,i])){ggplot(na.omit(data.frame(df[,i],stringsAsFactors = T)), aes(x=na.omit(df[i]))) +
        theme_bw() +  
        geom_bar(aes(y = (..count..)/sum(..count..))) + 
        scale_y_continuous(labels=percent) + 
        scale_x_discrete(drop=FALSE)}
    else{print (fail)}
  }
}

Unfortunately, the code does not display any plot. I am able to correctly plot the columns using the command

plot(df[,i])

so i suspect it's something wrong in ggplot but not sure what. Running the code outside of the function and storing it in an object gives this error:

Don't know how to automatically pick scale for object of type data.frame. Defaulting to continuous.
Error in (function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE,  : 
  arguments imply differing number of rows: 0, 1

Many thanks

Upvotes: 1

Views: 249

Answers (2)

Alberto Stefanelli
Alberto Stefanelli

Reputation: 320

ORIGINAL SOLUTION:

plot_pre <- function(df){
  select(data, starts_with("pre_")) %>% 
    length() %>% 
    seq(1,.,1)  %>% 
    for (i in .){
      if (dummy(as.character(select(data, starts_with("pre_"))[[i]])) == TRUE) {
        data.frame(select(data, starts_with("pre_"))[[i]])  %>%
          na.omit() %>%
          ggplot(.,aes(x=.))  +  
          geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") + 
          geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
          scale_y_continuous(labels=percent,limits = c(-0, 1)) + 
          scale_x_discrete(drop=FALSE) + 
          ylab("Relative Frequencies (%)") + 
          ggtitle(names(select(data, starts_with("pre_")))[i]) +
          theme_light(base_size = 18) +
          theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
          theme(plot.title = element_text(hjust = 0.5)) -> dummyplot
        print(dummyplot)}
      else {
        factor(select(data, starts_with("pre_"))[[i]], c("Strongly Agree", "Agree", "Neither agree or disagree","Disagree", "Strongly disagree"),ordered = T ) %>% 
          data.frame() %>%
          na.omit() %>%
          ggplot(.,aes(x=.))  +  
          geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") + 
          geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
          scale_y_continuous(labels=percent,limits = c(-0, 1)) + 
          scale_x_discrete(drop=FALSE) + 
          ylab("Relative Frequencies (%)")+
          ggtitle(names(select(data, starts_with("pre_")))[i]) +
          theme_light(base_size = 18) +
          theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
          theme(plot.title = element_text(hjust = 0.5))-> contplot
        print(contplot)
      }}  
}

dfplot_pre(df)

Upvotes: 1

Richard Telford
Richard Telford

Reputation: 9923

Alternative solution

library(tidyverse)
library(scales)

dfplot_pre <- function(df)  {
   select(df, starts_with("pre_")) %>% 
      na.omit() %>% 
      gather() %>% 
      mutate(value = factor(value, levels = c("Strongly Agree", "Agree", "Neither agree or disagree","Disagree", "Strongly disagree"), ordered = TRUE)) %>% 
      ggplot(aes(x = value))  +  
      geom_bar(aes(y = (..count..)/sum(..count..))) + 
      scale_y_continuous(labels=percent) + 
      scale_x_discrete(drop=FALSE) +  
      facet_wrap(~ key) +
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
 }

Upvotes: 0

Related Questions