user432797
user432797

Reputation: 593

How to to prevent data.frame function from sorting my data

How to prevent data.frame function from sorting out my data, I used match() to prevent sorting in my initial code, but when I moved to ggplot, with the use of data.frame, it seems it sorts out the data, but I want ot keep the data as is, so they can the same way in the plot.

Data are in this link

My code with help from some great coders on this community, the first part keep the data sorted the way I want:

library(dplyr, warn.conflicts = FALSE)
library(tidyverse, warn.conflicts = FALSE)
library(stringr, warn.conflicts = FALSE)
library(matrixStats, warn.conflicts = FALSE)
library(pheatmap, warn.conflicts = FALSE)
library(heatmaps, warn.conflicts = FALSE)
library(ggplot2, warn.conflicts = FALSE)

dfc <- read.csv(url("https://github.com/learnseq/learning/raw/main/GSE133399_Fig2_FPKM.csv"))

values <- c('S100a10', 'Esm1', 'Itgb1', 'Anxa2', 'Hist1h1b', 
                                                'Il2rb', 'Lgals1', 'Mki67', 'Rora', 'S100a4', 
                                                'S100a6', 'Adam8', 'Areg', 'Bcl2l1', 'Calca', 
                                                'Capg', 'Ccr2', 'Cd44', 'Csda', 'Ehd1', 
                                                'Id2', 'Il10', 'Il1rl1', 'Il2ra', 'Lmna', 
                                                'Maf', 'Penk', 'Podnl1', 'Tiam1', 'Vim',
                                                'Ern1', 'Furin', 'Ifng', 'Igfbp7', 'Il13', 
                                                'Il4', 'Il5', 'Nrp1', 'Ptprs', 'Rbpj', 
                                                'Spry1', 'Tnfsf11', 'Vdr', 'Xcl1', 'Bmpr2', 
                                                'Csf1', 'Dst', 'Foxp3', 'Itgav', 'Itgb8', 
                                                'Lamc1', 'Myo1e', 'Pmaip1', 'Prdm1', 'Ptpn5', 
                                                'Ramp1', 'Sdc4')

dfg <- dfc[match(rev(values), dfc$tracking_id), ]

dfg$CD44low_rep <- rowMeans(dfg[,c('CD44low_rep1', 'CD44low_rep2')], na.rm=TRUE)
dfg$CD44hi_CD69low_rep <- rowMeans(dfg[,c('CD44hi_CD69low_rep1', 'CD44hi_CD69low_rep2')], na.rm=TRUE)
dfg$CD44hi_CD69hi_CD103low_rep <- rowMeans(dfg[,c('CD44hi_CD69hi_CD103low_rep1', 'CD44hi_CD69hi_CD103low_rep2')], na.rm=TRUE)
dfg$CD44hi_CD69hi_CD103hi_rep <- rowMeans(dfg[,c('CD44hi_CD69hi_CD103hi_rep1', 'CD44hi_CD69hi_CD103hi_rep2')], na.rm=TRUE)
rownameshm <-paste(dfg[,1])
colnameshm <- paste(dQuote(colnames(dfg[0, 10:13])), collapse = ", ")
dfg$Mean <- rowMeans(dfg[,10:13])
dfg$sd <- rowSds(as.matrix(dfg[,10:13]))

zScore <- function(p){
for(n in 10:13){
    p[[n]]=(as.numeric(p[[n]])-as.numeric(p[[14]]))/as.numeric(p[[15]])
    }
return(p)
}

Matrix_zScore <- t(apply(dfg,1,zScore))

Matrix_zScore_temp <- mapply(Matrix_zScore[,10:13], FUN=as.numeric)
Matrix_zScore_temp <- matrix(data=Matrix_zScore_temp, ncol=4, nrow=57)
Matrix_zScore_temp1<-as.data.frame(Matrix_zScore_temp)

rownames(Matrix_zScore_temp) <- dfg$tracking_id
plot_frame <- reshape2::melt(Matrix_zScore_temp)

This following (second) part of the code that causing the data to be sorted (which I don't want), I'm thinking the data.frame function in ggplot is causing the problem:

library("tidyverse", warn.conflicts = FALSE)
library(repr, warn.conflicts = FALSE)
options(repr.plot.width=3, repr.plot.height=8)
tiff("test.tiff", units="in", width=3, height=8, res=300)

ggplot(plot_frame, aes(Var2, Var1, fill = value)) + 
  geom_tile(color = "white", position = position_dodge(), show.legend = TRUE) +
  geom_point(data = data.frame(Var2 = 1:4, Var1 = "", value = 0), size = 5,
             aes(color = factor(Var2))) +
  geom_point(data = data.frame(Var2 = 1:4, Var1 = " ", value = 1), alpha = 0) +
  scale_color_manual(values = c("black", "forestgreen", "red4", "blue4"),
                     labels = c("CD44 T Cells",
                                "CD44 CD69 T Cells",
                                "CD44 CD69 CD103-lo T Cells",
                                "CD44 CD69 CD103-hi T Cells"),
                     guide = guide_legend(override.aes = list(fill = NA))) +
  scale_y_discrete(position = "right") +
  labs(y = "", fill = "") +
  scale_fill_gradientn(colors = c("#3C57A8", "white", "#DE2D29"),
                       breaks = c(1.5, 0, -1.5),
                        labels = c("1.0", "0", "-1.0"),
                        limits = c(-1.5, 1.5),
                       space = "Lab",
                       na.value = "grey50",
                       guide = "colourbar",
                       aesthetics = "fill") +
  theme_minimal() + 
theme (panel.grid.major = element_blank(), axis.text.y.right = element_text(margin = margin(l = unit(-5, "cm")))) +
theme (axis.text.y = element_text(face="italic", size=7, color="black")) +
    guides(
        fill = guide_colourbar(
            title = "Relative gene expression \n (z score)",
            title.position = "right",
            title.theme = element_text(angle = -90, size = 7.5),
            direction = "vertical",
            ticks = TRUE
        )) +
theme(
    legend.justification = c(-0.9, 0), 
    legend.direction = "vertical",
    legend.key.size = unit(0.6, "cm"),
    legend.key.width = unit(0.2,"cm"),
    legend.title.align = 0.5)

dev.off()

Upvotes: 1

Views: 60

Answers (1)

The empty string specification in Var1 in your geom_point functions is messing up the ggplot2 behavior, since it's expecting a numeric value. Replace it by 0 and it should work as expected.


From:

...
geom_point(data = data.frame(Var2 = 1:4, Var1 = "", value = 0), size = 5,
           aes(color = factor(Var2))) +
  geom_point(data = data.frame(Var2 = 1:4, Var1 = " ", value = 1), alpha = 0) +
...

To:

...
geom_point(data = data.frame(Var2 = 1:4, Var1 = 0, value = 0), size = 5,
           aes(color = factor(Var2))) +
  geom_point(data = data.frame(Var2 = 1:4, Var1 = 0, value = 1), alpha = 0) +
...

Upvotes: 1

Related Questions