Marc Brinkmann
Marc Brinkmann

Reputation: 142

How to count and display factors on the x/y scale?


I am trying to count the data factor-wise and display it on the scale of a axis. My closest solution is the following: aes(x=(paste(A_REF,"(n=", length(A_REF), ")"))

n is the number displaying how many occurances of the factor exist in the data field.

Edit: How do I achieve that the first and fifth factor of V43 show up? --> forgot to library("foreign")

Current State: Solved

Current State

My Code so far

# Load libraries & packages =================================

# Data setup =================================
spss_file_path <- "D:\\Programming\\Testing\\2017-03-15_data_import&ggplot2\\Beispieldatensatz(fiktiv).sav"
exampledata <- read.spss(spss_file_path, use.value.labels = TRUE,
                = TRUE, reencode = TRUE)

names(exampledata) <- c(V101, A_REF, V43)

exampledata$V43   <- factor(exampledata$V43,
                          levels = c(1,2,3,4,5),
                          labels = c("1 Sehr zufrieden","2","3","4", "5 Sehr unzufrieden"))

exampledata$V43   <- factor(exampledata$V43, levels = rev(unique(levels(exampledata$V43))))
exampledata$A_REF <- factor(exampledata$A_REF, levels = rev(unique(levels(exampledata$A_REF))))
exampledata$V101  <- factor(exampledata$V101, levels = rev(unique(levels(exampledata$V101))))

labels <- exampledata %>% 
  filter(!, ! %>% 
  count(A_REF) %>% 
  mutate(labels = paste(A_REF,"(n=", n, ")")) %>% 
  select(A_REF, labels)

plot_data <-  exampledata %>% 
  filter(!, ! %>% 
  left_join(labels, by = "A_REF")

# Plot =================================
ggplot(plot_data, aes(x = labels, fill = V43)) +
  geom_bar(position = "fill") + 
  scale_y_continuous(labels = scales::percent, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
  labs(y=NULL, x=NULL, fill=NULL) + 
  ggtitle(paste(attr(exampledata, "variable.labels")[77])) + 
  theme_classic() + 
  geom_text(stat="count",aes(label = scales::percent((..count..)/sum(..count..))), position = position_fill(vjust=0.5)) +


structure(list(exampledata.V101 = structure(c(2L, NA, 2L, 2L, 
2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, NA, 2L, 2L, 2L, 1L, 2L, NA, 
NA, NA, 1L, 1L, 2L, NA, 2L, 2L, 2L, NA, 2L, 2L, NA, NA, 1L, NA, 
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, NA, NA, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, NA, 1L, NA, 1L, NA, 
1L, 2L, NA, NA, 2L, NA, 1L, 2L, 2L, NA, 2L, NA, 2L, 2L, 1L, 2L, 
1L, 2L, 1L, 1L, 2L, 1L, NA, 2L, 2L, 2L, 2L, NA, 2L, 1L, 2L, 2L
), .Label = c("Weiblich", "Männlich"), class = "factor"), exampledata.A_REF = structure(c(18L, 
18L, 18L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L, 
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L, 18L, 16L, 18L, 
16L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 
16L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 16L, 18L, 
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 18L, 18L, 
16L, 18L, 16L, 18L, 18L, 16L, 16L, 18L, 18L, 18L, 18L, 18L, 18L, 
18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L, 
16L, 16L, 18L, 18L, 18L, 17L, 16L, 18L), .Label = c("Zertifikat eines Aufbau- oder Ergänzungsstudiums", 
"LA Berufliche Schulen", "LA Sonderschule", "LA Gymnasium", "LA Haupt- und Realschule", 
"LA Grundschule", "Künstlerischer/musischer Abschluss", "Kirchlicher Abschluss", 
"Staatsexamen (ohne Lehramt)", "Diplom Fachhochschule, Diplom I an Gesamthochschulen", 
"Diplom Universität, Diplom II an Gesamthochschulen", "Sonstiges", 
"Promotion", "Staatsexamen", "Magister", "Diplom", "Master", 
"Bachelor"), class = "factor"), exampledata.V43 = structure(c(3L, 
5L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 4L, 3L, 3L, 2L, NA, 4L, 5L, 5L, 
4L, 4L, 4L, 4L, NA, 2L, 4L, 3L, 5L, 4L, 4L, 4L, NA, 4L, 4L, NA, 
NA, 3L, 5L, 2L, 4L, 5L, 4L, 4L, 5L, 5L, 4L, NA, NA, 4L, NA, 3L, 
4L, 5L, 5L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 5L, 4L, 5L, NA, 4L, 
NA, 4L, NA, 4L, 5L, 4L, NA, 5L, NA, 4L, 4L, 4L, NA, 4L, NA, 5L, 
4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 2L, 4L, 4L, 4L, 3L, 4L, NA, 4L, 
5L, 5L, 4L), .Label = c("5 Sehr unzufrieden", "4", "3", "2", 
"1 Sehr zufrieden"), class = "factor")), .Names = c("exampledata.V101", 
"exampledata.A_REF", "exampledata.V43"), row.names = c(NA, 100L
), class = "data.frame")

Upvotes: 0

Views: 37

Answers (1)


Reputation: 2436

I think the easiest way is to compute the labels outside of ggplot. Note that with your data, the 5th level of V43 doesn't show up.

names(exampledata) <- c("V101", "A_REF", "V43")

I count A_REF and then apply your formula to compute the labels.

labels <- exampledata %>% 
  filter(!, ! %>% 
  count(A_REF) %>% 
  mutate(labels = paste(A_REF,"(n=", n, ")")) %>% 
  select(A_REF, labels)

I then join the labels to the data

plot_data <-  exampledata %>% 
  filter(!, ! %>% 
  left_join(labels, by = "A_REF")

And finally, here is the plot. Note that the title doesn't show up as well.

ggplot(plot_data, aes(x = labels, fill = V43)) +
  geom_bar(position = "fill") + 
  scale_y_continuous(labels = scales::percent, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
  labs(y=NULL, x=NULL, fill=NULL) + 
  ggtitle(paste(attr(exampledata, "variable.labels")[77])) + 
  theme_classic() + 
  geom_text(stat="count",aes(label = scales::percent((..count..)/sum(..count..))), position = position_fill(vjust=0.5)) +

enter image description here

Upvotes: 1

Related Questions