Reputation: 140
I have a dataframe where the first column has different types of bacteria, and the rest of the columns the samples, each sample belong to a specific time (T0, T1...) and the last 39 columns are the control group.
What I pretend is to plot each bacteria in one plot. And the plot must contain the different times in x-axis and the value in the y-axis (I was thinking in a bar plot or box plot with the errors coef.
Any idea about how can I group the data for different times and for different bacteria?
Here a small example of the data:
thanks!
structure(list(Bacteria = c("Methanobrevibacter", "Methanosphaera",
"Methanomassiliicoccus"), PIE2001_T0_TORUNDA = c(2.279974027,
0.670536115, -0.022611066), PIE2001_T1_TORUNDA = c(2.021643324,
-0.057798217, -0.057798217), PIE2001_T5_COMPL = c(2.788566988,
0.648500825, -0.044646356), PIE2006_T0_TORUNDA = c(0.07550014,
1.684938052, 0.07550014), PIE2007_T0_TORUNDA = c(2.072075243,
1.261145027, -0.125149334), PIE2007_T1_TORUNDA = c(2.601582257,
1.279826417, -0.106467944), PIE2007_T2 = c(2.81564899, 1.765826865,
-0.180083284), PIE2007_T3 = c(0.639040509, 3.081387545, -0.054106671
), PIE2013_T0_COMPLETA = c(2.683794403, -0.024255798, -0.024255798
), PIE2013_T1_COMPLETA = c(2.614756053, -0.024301277, -0.024301277
), PIE2013_T4_COMP = c(2.653056483, 0.013999154, 0.013999154),
PIE2013_T5_COMPL = c(1.861263144, -0.084647005, -0.084647005
), PIE2014_COMP = c(2.304771706, 1.005488722, -0.093123567
), PIE2016_T0_COMPLETA = c(-0.141271428, -0.141271428, -0.141271428
), PIE2016_T1_COMPLETA = c(-0.081696055, -0.081696055, -0.081696055
), PIE2016_T3 = c(-0.019385468, -0.019385468, -0.019385468
), PIE2016_T3_TOR = c(0.045856809, 0.045856809, 0.045856809
), PIE2017_T0_COMPLETA = c(4.493506636, 0.189441543, 0.189441543
), PIE2017_T1_COMPLETA = c(5.001671041, 0.71808448, 0.024937299
), PIE2017_T2_TOR = c(5.887191114, 0.672255357, -0.020891824
), PIE2017_T3 = c(3.306066839, 0.703377154, 0.010229973),
PIE2017_T4_COMP = c(5.560847286, 1.371192544, -0.015101817
), PIE2017_T5_COMPL = c(5.688626959, -0.025105846, -0.025105846
), PIE2018_T1 = c(0.158551089, 0.158551089, 0.158551089),
PIE2019_T1_COMPL = c(6.659430141, 0.833430034, 0.140282853
)), row.names = c(NA, 3L), class = "data.frame")
Script updated:
colnames(df)[363:401] <- gsub("T0", "T6", colnames(df)[363:401])
df %>%
pivot_longer(-Bacteria) %>%
mutate(group = gsub('_.*$', '', name),
time = gsub('^.*_(T\\d+).*$', '\\1', name)) %>%
filter(grepl('T\\d+', time)) %>%
ggplot(aes(time, value, fill = Bacteria)) +
geom_bar(stat = 'summary', fun = 'mean', position = 'dodge') +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, position = position_dodge(0.9)) +
theme_minimal() +
facet_grid(Bacteria ~ ., scale = 'free_y') +
scale_fill_brewer(palette = 'Set1') +
theme(panel.border = element_rect(fill = NA, color = 'gray75'))
Upvotes: 0
Views: 166
Reputation: 17049
Pivot your data to long format, then facet by bacterium:
library(tidyr)
library(dplyr)
library(forcats)
library(ggplot2)
dat_long <- dat %>%
pivot_longer(!Bacteria, names_to = "sample") %>%
mutate(sample = fct_inorder(sample))
ggplot(dat_long, aes(sample, value)) +
geom_col() +
facet_wrap(vars(Bacteria), ncol = 1) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Or make a line graph with bacteria mapped to color
:
ggplot(dat_long, aes(sample, value)) +
geom_line(aes(color = Bacteria, group = Bacteria)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Upvotes: 2
Reputation: 173793
You need to reshape your data. You can then do a comparative boxplot:
library(tidyverse)
df %>%
pivot_longer(-Bacteria) %>%
mutate(group = gsub('_.*$', '', name),
time = gsub('^.*_(T\\d+).*$', '\\1', name)) %>%
filter(grepl('T\\d+', time)) %>%
ggplot(aes(time, value, fill = Bacteria)) +
geom_boxplot() +
theme_minimal() +
facet_grid(Bacteria ~ ., scale = 'free_y') +
scale_fill_brewer(palette = 'Set2') +
theme(panel.border = element_rect(fill = NA, color = 'gray75'))
Upvotes: 2