Reputation: 67
I need to make a correlation plot (something like what we get out of "corrplot" package), but I have too many variables and don't want to have all variables names on the plot. I want each variables from the same group to be next to each other with the group name over them (see an example of what I want in bottom).
my dataset looks like this
you can have it from here
sub_my_data <-
structure(
list(
metabolites = c(
"Acetate",
"Acetoacetate",
"Acetone",
"Ala",
"Albumin",
"ApoA1",
"ApoB",
"ApoB_by_ApoA1",
"bOHbutyrate",
"Cholines",
"Citrate",
"Clinical_LDL_C",
"Creatinine",
"DHA",
"Gln",
"Glucose",
"Gly",
"GlycA",
"HDL_C",
"HDL_CE",
"HDL_FC",
"HDL_L",
"HDL_P",
"HDL_PL",
"HDL_size",
"HDL_TG",
"His",
"IDL_C",
"IDL_CE",
"IDL_FC",
"IDL_L",
"IDL_P",
"IDL_PL",
"IDL_TG",
"Ile",
"L_HDL_C",
"L_HDL_CE",
"L_HDL_FC",
"L_HDL_L",
"L_HDL_P",
"L_HDL_PL",
"L_HDL_TG",
"L_LDL_C",
"L_LDL_CE",
"L_LDL_FC",
"L_LDL_L",
"L_LDL_P",
"L_LDL_PL",
"L_LDL_TG",
"L_VLDL_C",
"L_VLDL_CE",
"L_VLDL_FC",
"L_VLDL_L",
"L_VLDL_P",
"L_VLDL_PL",
"L_VLDL_TG",
"LA",
"Lactate",
"LDL_C",
"LDL_CE",
"LDL_FC",
"LDL_L",
"LDL_P",
"LDL_PL",
"LDL_size",
"LDL_TG",
"Leu",
"M_HDL_C",
"M_HDL_CE",
"M_HDL_FC",
"M_HDL_L",
"M_HDL_P",
"M_HDL_PL",
"M_HDL_TG",
"M_LDL_C",
"M_LDL_CE",
"M_LDL_FC",
"M_LDL_L",
"M_LDL_P",
"M_LDL_PL",
"M_LDL_TG",
"M_VLDL_C",
"M_VLDL_CE",
"M_VLDL_FC",
"M_VLDL_L",
"M_VLDL_P",
"M_VLDL_PL",
"M_VLDL_TG",
"MUFA",
"non_HDL_C",
"Omega_3",
"Omega_6",
"Phe",
"Phosphatidylc",
"Phosphoglyc",
"PUFA",
"Pyruvate",
"Remnant_C",
"S_HDL_C",
"S_HDL_CE",
"S_HDL_FC",
"S_HDL_L",
"S_HDL_P",
"S_HDL_PL",
"S_HDL_TG",
"S_LDL_C",
"S_LDL_CE",
"S_LDL_FC",
"S_LDL_L",
"S_LDL_P",
"S_LDL_PL",
"S_LDL_TG",
"S_VLDL_C",
"S_VLDL_CE",
"S_VLDL_FC",
"S_VLDL_L",
"S_VLDL_P",
"S_VLDL_PL",
"S_VLDL_TG",
"SFA",
"Sphingomyelins",
"TG_by_PG",
"Total_BCAA",
"Total_C",
"Total_CE",
"Total_FA",
"Total_FC",
"Total_L",
"Total_P",
"Total_PL",
"Total_TG",
"Tyr",
"Unsaturation",
"Val",
"VLDL_C",
"VLDL_CE",
"VLDL_FC",
"VLDL_L",
"VLDL_P",
"VLDL_PL",
"VLDL_size",
"VLDL_TG",
"XL_HDL_C",
"XL_HDL_CE",
"XL_HDL_FC",
"XL_HDL_L",
"XL_HDL_P",
"XL_HDL_PL",
"XL_HDL_TG",
"XL_VLDL_C",
"XL_VLDL_CE",
"XL_VLDL_FC",
"XL_VLDL_L",
"XL_VLDL_P",
"XL_VLDL_PL",
"XL_VLDL_TG",
"XS_VLDL_C",
"XS_VLDL_CE",
"XS_VLDL_FC",
"XS_VLDL_L",
"XS_VLDL_P",
"XS_VLDL_PL",
"XS_VLDL_TG",
"XXL_VLDL_C",
"XXL_VLDL_CE",
"XXL_VLDL_FC",
"XXL_VLDL_L",
"XXL_VLDL_P",
"XXL_VLDL_PL",
"XXL_VLDL_TG"
),
Group = c(
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Amino acids",
"Lipoprotein subclasses",
"Apolipoproteins",
"Apolipoproteins",
"Apolipoproteins",
"Lipoprotein subclasses",
"Other lipids",
"Lipoprotein subclasses",
"Cholesterol",
"Lipoprotein subclasses",
"Fatty acids",
"Glycolysis related metabolites",
"Fluid balance",
"Glycolysis related metabolites",
"Lipoprotein subclasses",
"Cholesterol",
"Cholesteryl esters",
"Free cholesterol",
"Total lipids",
"Lipoprotein particle concentrations",
"Phospholipids",
"Lipoprotein particle sizes",
"Triglycerides",
"Glycolysis related metabolites",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Glycolysis related metabolites",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Fatty acids",
"Fluid balance",
"Cholesterol",
"Cholesteryl esters",
"Free cholesterol",
"Total lipids",
"Lipoprotein particle concentrations",
"Phospholipids",
"Lipoprotein particle sizes",
"Triglycerides",
"Ketone bodies",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Fatty acids",
"Cholesterol",
"Fatty acids",
"Fatty acids",
"Ketone bodies",
"Other lipids",
"Other lipids",
"Fatty acids",
"Inflammation",
"Cholesterol",
"",
"",
"",
"",
"",
"",
"",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Fatty acids",
"Other lipids",
"Other lipids",
"Glycolysis related metabolites",
"Cholesterol",
"Cholesteryl esters",
"Fatty acids",
"Free cholesterol",
"Total lipids",
"Lipoprotein particle concentrations",
"Phospholipids",
"Triglycerides",
"Ketone bodies",
"Fatty acids",
"Ketone bodies",
"Cholesterol",
"Cholesteryl esters",
"Free cholesterol",
"Total lipids",
"Lipoprotein particle concentrations",
"Phospholipids",
"Lipoprotein particle sizes",
"Triglycerides",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses",
"Lipoprotein subclasses"
),
V1 = c(
0.44,
0.08,
0.11,
0.42,
0.58,
0.34,
0.22,
0.23,
0.12,
0.28,
0.34,
0.29,
0.56,
0.43,
0.43,
0.25,
0.62,
0.18,
0.37,
0.39,
0.33,
0.34,
0.35,
0.34,
0.39,
0.18,
0.51,
0.31,
0.3,
0.32,
0.25,
0.16,
0.21,
0.06,
0.51,
0.4,
0.42,
0.36,
0.38,
0.38,
0.38,
0.14,
0.31,
0.31,
0.35,
0.28,
0.23,
0.28,
0.06,
0.16,
0.19,
0.13,
0.1,
0.09,
0.1,
0.09,
0.14,
0.4,
0.3,
0.29,
0.34,
0.27,
0.24,
0.28,
0.63,
0.06,
0.73,
0.4,
0.42,
0.35,
0.4,
0.39,
0.4,
0.2,
0.29,
0.26,
0.35,
0.27,
0.27,
0.28,
0.07,
0.25,
0.26,
0.22,
0.18,
0.22,
0.2,
0.12,
0.11,
0.24,
0.3,
0.19,
0.54,
0.31,
0.3,
0.21,
0.2,
0.17,
0.49,
0.6,
0.32,
0.35,
0.47,
0.35,
0.18,
0.27,
0.24,
0.37,
0.25,
0.22,
0.25,
0.05,
0.19,
0.18,
0.23,
0.16,
0.15,
0.21,
0.15,
0.15,
0.27,
0.1,
0.69,
0.3,
0.31,
0.16,
0.23,
0.22,
0.34,
0.27,
0.07,
0.49,
0.57,
0.74,
0.16,
0.17,
0.15,
0.1,
0.14,
0.12,
0.36,
0.07,
0.35,
0.36,
0.3,
0.32,
0.31,
0.32,
0.1,
0.12,
0.17,
0.09,
0.08,
0.08,
0.08,
0.07,
0.13,
0.16,
0.07,
0.08,
0.09,
0.05,
0.12,
0.03,
0.01,
0.05,
0.03,
0.02,
0.03,
0.04
),
V2 = c(
0.48,
0,
0.38,
0.18,
0.25,
0.5,
0.16,
0.12,
0.6,
0.43,
0.5,
0.09,
0.3,
0.29,
0.09,
0.22,
0.46,
0.21,
0.45,
0.45,
0.47,
0.51,
0.41,
0.55,
0.55,
0.52,
0.08,
0.17,
0.17,
0.17,
0.18,
0.06,
0.19,
0.38,
0.06,
0.51,
0.52,
0.49,
0.53,
0.52,
0.56,
0.42,
0.15,
0.17,
0.13,
0.15,
0.19,
0.09,
0.33,
0.25,
0.27,
0.22,
0.19,
0.19,
0.23,
0.17,
0.26,
0.35,
0.15,
0.17,
0.09,
0.15,
0.17,
0.11,
0.75,
0.35,
0.08,
0.5,
0.51,
0.49,
0.57,
0.54,
0.61,
0.55,
0.14,
0.16,
0.09,
0.16,
0.12,
0.14,
0.35,
0.16,
0.12,
0.2,
0.23,
0.22,
0.22,
0.24,
0.38,
0.14,
0.42,
0.36,
0.13,
0.51,
0.49,
0.38,
0.12,
0.13,
0.38,
0.46,
0.41,
0.43,
0.4,
0.49,
0.45,
0.14,
0.16,
0.07,
0.16,
0.18,
0.15,
0.28,
0.22,
0.23,
0.21,
0.3,
0.3,
0.26,
0.4,
0.33,
0.24,
0.28,
0.11,
0.23,
0.24,
0.38,
0.2,
0.29,
0.39,
0.4,
0.25,
0.16,
0.46,
0.16,
0.21,
0.19,
0.23,
0.21,
0.26,
0.24,
0.44,
0.21,
0.42,
0.44,
0.34,
0.44,
0.43,
0.46,
0.35,
0.21,
0.22,
0.19,
0.19,
0.2,
0.19,
0.2,
0.18,
0.17,
0.2,
0.24,
0.22,
0.25,
0.48,
0.18,
0.15,
0.21,
0.17,
0.18,
0.19,
0.16
),
V3 = c(
0.66,
0,
0.25,
0.21,
0.31,
0.74,
0.31,
0.14,
0.48,
0.66,
0.43,
0.36,
0.37,
0.45,
0.31,
0.2,
1,
0.29,
0.72,
0.71,
0.74,
0.76,
0.66,
0.8,
0.65,
0.49,
0.3,
0.48,
0.47,
0.49,
0.47,
0.31,
0.46,
0.39,
0.08,
0.74,
0.74,
0.71,
0.76,
0.73,
0.81,
0.48,
0.43,
0.43,
0.44,
0.42,
0.31,
0.38,
0.38,
0.15,
0.19,
0.12,
0.09,
0.09,
0.11,
0.06,
0.36,
0.37,
0.4,
0.4,
0.41,
0.39,
0.31,
0.36,
0.66,
0.37,
0.13,
0.75,
0.75,
0.76,
0.8,
0.77,
0.84,
0.52,
0.36,
0.35,
0.37,
0.35,
0.31,
0.32,
0.36,
0.3,
0.31,
0.28,
0.22,
0.25,
0.26,
0.13,
0.36,
0.36,
0.41,
0.45,
0.16,
0.72,
0.7,
0.47,
0.12,
0.31,
0.58,
0.61,
0.64,
0.57,
0.57,
0.63,
0.34,
0.31,
0.3,
0.36,
0.32,
0.29,
0.34,
0.22,
0.27,
0.26,
0.3,
0.28,
0.26,
0.3,
0.28,
0.42,
0.55,
0.1,
0.14,
0.53,
0.54,
0.43,
0.45,
0.48,
0.64,
0.65,
0.16,
0.19,
0.47,
0.17,
0.25,
0.26,
0.22,
0.15,
0.27,
0.2,
0.21,
0.09,
0.6,
0.62,
0.52,
0.6,
0.59,
0.61,
0.38,
0.13,
0.17,
0.09,
0.07,
0.08,
0.08,
0.06,
0.37,
0.39,
0.32,
0.35,
0.34,
0.31,
0.43,
0.05,
0.03,
0.08,
0.03,
0.04,
0.05,
0.02
),
V4 = c(
0.66,
0.14,
0.14,
0.43,
0.38,
0.3,
0.28,
0.31,
0.15,
0.28,
0.47,
0.38,
0.22,
0.23,
0.07,
0.28,
0.5,
0.51,
0.32,
0.33,
0.28,
0.28,
0.32,
0.28,
0.31,
0.16,
0.29,
0.39,
0.37,
0.41,
0.32,
0.26,
0.29,
0.07,
0.41,
0.32,
0.33,
0.3,
0.3,
0.3,
0.3,
0.1,
0.38,
0.37,
0.43,
0.35,
0.31,
0.37,
0.09,
0.18,
0.22,
0.14,
0.09,
0.08,
0.11,
0.06,
0.21,
0.53,
0.37,
0.35,
0.43,
0.35,
0.31,
0.36,
0.73,
0.09,
0.5,
0.35,
0.38,
0.3,
0.35,
0.34,
0.35,
0.18,
0.35,
0.31,
0.44,
0.34,
0.29,
0.36,
0.1,
0.32,
0.35,
0.27,
0.19,
0.25,
0.24,
0.08,
0.21,
0.31,
0.1,
0.26,
0.37,
0.25,
0.29,
0.24,
0.34,
0.25,
0.52,
0.64,
0.31,
0.37,
0.51,
0.37,
0.2,
0.34,
0.31,
0.44,
0.31,
0.28,
0.31,
0.07,
0.25,
0.24,
0.28,
0.17,
0.15,
0.24,
0.09,
0.17,
0.36,
0.08,
0.47,
0.34,
0.36,
0.21,
0.28,
0.25,
0.32,
0.29,
0.06,
0.93,
0.48,
0.47,
0.22,
0.23,
0.2,
0.11,
0.17,
0.16,
0.36,
0.05,
0.31,
0.32,
0.29,
0.28,
0.27,
0.27,
0.06,
0.16,
0.22,
0.11,
0.09,
0.09,
0.11,
0.06,
0.18,
0.21,
0.12,
0.12,
0.14,
0.1,
0.11,
0.08,
0.06,
0.1,
0.08,
0.06,
0.09,
0.08
),
V5 = c(
0.44,
0.36,
0.06,
0.55,
0.79,
0.57,
0.2,
0.13,
0.16,
0.45,
0.55,
0.27,
0.24,
0.44,
0.15,
0.17,
0.47,
0.62,
0.59,
0.6,
0.55,
0.59,
0.53,
0.61,
0.55,
0.26,
0.34,
0.34,
0.32,
0.36,
0.3,
0.13,
0.27,
0.18,
0.35,
0.61,
0.62,
0.56,
0.6,
0.57,
0.63,
0.24,
0.28,
0.28,
0.32,
0.27,
0.22,
0.27,
0.19,
0.13,
0.15,
0.1,
0.08,
0.07,
0.08,
0.07,
0.21,
0.58,
0.28,
0.27,
0.3,
0.26,
0.23,
0.26,
0.68,
0.19,
0.36,
0.63,
0.64,
0.59,
0.64,
0.62,
0.68,
0.28,
0.28,
0.27,
0.29,
0.27,
0.25,
0.24,
0.19,
0.23,
0.26,
0.19,
0.16,
0.19,
0.17,
0.1,
0.23,
0.23,
0.31,
0.29,
0.39,
0.41,
0.46,
0.3,
0.36,
0.19,
0.55,
0.63,
0.46,
0.47,
0.53,
0.51,
0.2,
0.25,
0.23,
0.31,
0.23,
0.19,
0.22,
0.12,
0.17,
0.15,
0.19,
0.15,
0.14,
0.19,
0.17,
0.27,
0.51,
0.08,
0.38,
0.36,
0.38,
0.28,
0.29,
0.31,
0.5,
0.44,
0.09,
0.39,
0.6,
0.41,
0.16,
0.16,
0.15,
0.1,
0.15,
0.12,
0.27,
0.07,
0.49,
0.51,
0.42,
0.48,
0.45,
0.47,
0.18,
0.1,
0.13,
0.08,
0.06,
0.07,
0.07,
0.05,
0.18,
0.21,
0.14,
0.15,
0.15,
0.12,
0.22,
0.06,
0.03,
0.09,
0.06,
0.05,
0.06,
0.07
)
),
class = "data.frame",
row.names = c(NA,-170L)
)
columns with names Va, V2, ...etc represent participants.
I tried this code but it gave me a missy plot with group name repeated on each variable
sub_my_data<- sub_my_data[,-1]
tra3<- t( sub_my_data)
rownames(tra3) <- NULL
colnames(tra3) <- tra3[1,]
tra3 <- tra3[-1, ]
tra3<- as.data.frame(tra3)
str(tra3)
names(tra3)
for (t in 1:170) {
tra3[,t]<-as.numeric(tra3[,t])
}
tiff('metabolites correlation plot2.jpg', width = 5000, height = 6000, res=300)
corrplot(cor(tra3))
dev.off()
But, it didn't work!
the wish to have something like this
thank you in advance!
Upvotes: 0
Views: 430
Reputation: 1763
I am still not really convinced by the representation but here is some hints to make it look like as you want :
sub_my_data<- sub_my_data[order(sub_my_data$Group),]
sub_my_data$metabolites=factor(sub_my_data$metabolites,levels=sub_my_data$metabolites)
Group=table(sub_my_data$Group)
sub_my_data<- sub_my_data[,-2]
tra3<- t( sub_my_data)
rownames(tra3) <- NULL
colnames(tra3) <- tra3[1,]
tra3 <- tra3[-1, ]
tra3<- as.data.frame(tra3)
str(tra3)
names(tra3)
for (t in 1:170) {
tra3[,t]<-as.numeric(tra3[,t])
}
par(mar=c(5,5,5,5),xpd=NA)
corrplot(cor(tra3),tl.pos='n',mar=c(5,5,5,5))
n=sum(Group)
for (k in 1:length(names(Group))){
mtext(text = names(Group)[k], side = 2, line = -2.5, at = n-(Group[[k]]/2), las = 1,cex=0.5 )
n=n-Group[[k]]
}
n=0
for (k in 1:length(names(Group))){
text(labels = names(Group)[k], y=sum(Group)+1, x = n+(Group[[k]]/2),srt = 90,adj=0,cex=0.5)
n=n+Group[[k]]
}
You will need to work around parameters to make it adapted to your R session, notably margins. With mtext and text, I add the label Group at the middle of the cluster of samples related to the Group. Let's imagine you could add some bars to distinguish the clusters afterwards, you can adapt it as you want but this is basis because there is no options with corrplot() to make it as you want : you will need to adapt it with custom code lines.
Upvotes: 0