Reputation: 978
I have simple dataframe which I have obtained after running WGCNA where the correaltion was turned into and edge list from that object Im giving a small subset
dput(dd)
structure(list(gene1 = c("GBA3", "GBA3", "GBA3", "GBA3", "GBA3",
"GBA3", "GBA3", "GBA3", "GBA3", "GBA3", "GBA3", "GBA3", "GBA3",
"GBA3", "GBA3", "IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52",
"IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52",
"IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", "GGNBP1",
"GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1",
"GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1",
"OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6",
"OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6",
"OR52B6"), gene2 = c("LRP2BP", "ADGB", "ASNSP3", "HSD17B2", "HSP90B1",
"IFT22", "P4HB", "TTC22", "XKR9", "IQSEC2", "NECAB2", "ANO1",
"CPPED1", "MAGEE1", "MAPRE3", "COTL1P1", "OR13G1", "FTH1P11",
"KRT8P44", "LINC00243", "MYOZ1", "PARD6G", "PDLIM5", "RN7SL67P",
"PARP3", "SH3BGRL3", "KIF1B", "CDK6", "CYP24A1", "TFEB", "LRP2BP",
"ADGB", "ASNSP3", "HSD17B2", "HSP90B1", "IFT22", "P4HB", "TTC22",
"XKR9", "IQSEC2", "NECAB2", "ANO1", "CPPED1", "MAGEE1", "MAPRE3",
"COTL1P1", "OR13G1", "FTH1P11", "KRT8P44", "LINC00243", "MYOZ1",
"PARD6G", "PDLIM5", "RN7SL67P", "PARP3", "SH3BGRL3", "KIF1B",
"CDK6", "CYP24A1", "TFEB"), correlation = c(1.19842058210312e-07,
3.95592260312023e-09, 1.18879994893077e-09, 3.67331679745971e-10,
5.48302012245219e-09, 7.97197389702251e-06, 9.7387584019434e-08,
5.77878345171157e-08, 1.01118703571283e-08, 1.81543845754574e-07,
3.7673420265534e-08, 1.02575704450652e-08, 4.82487451740043e-08,
1.65401803325697e-07, 2.95827225165244e-09, 1.35635056964288e-07,
1.16813988688191e-09, 1.34340296981193e-07, 5.26153755948588e-08,
5.06031471203736e-05, 1.63465042896832e-09, 2.10400523574347e-09,
1.08460550923374e-08, 1.09938266167239e-06, 3.31572488037795e-08,
3.97957891649769e-07, 2.0833042793021e-08, 4.16797585733493e-06,
1.02162139939232e-07, 3.74962089757379e-06, 5.10285758466629e-07,
0.000165189152741692, 0.000572780674091671, 2.43056928465514e-07,
0.00166978419035755, 2.3826397075692e-07, 0.000204964046470693,
1.32648351252772e-06, 2.79759921075308e-06, 1.11117833192239e-06,
6.87171744654038e-09, 2.33022551088771e-09, 2.7732284839245e-06,
1.74867497254059e-06, 1.16457488078883e-08, 2.58493584273799e-05,
0.000117632422231583, 0.000115191350816912, 3.45926695804785e-05,
6.60444623946169e-07, 8.48280303856373e-09, 9.3470012463335e-07,
2.33358874243648e-05, 9.13982092399789e-05, 6.16545562787355e-06,
0.0014007113940871, 1.549339320847e-06, 0.000373320941277797,
2.87750585085082e-08, 0.00105876974504533), module1 = structure(c(9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L), .Label = c("black", "blue", "brown", "cyan", "green",
"greenyellow", "grey", "magenta", "midnightblue", "pink", "purple",
"red", "salmon", "tan", "turquoise", "yellow"), class = "factor"),
module2 = structure(c(3L, 7L, 2L, 7L, 1L, 4L, 1L, 3L, 5L,
15L, 15L, 7L, 15L, 3L, 15L, 15L, 2L, 15L, 2L, 3L, 7L, 16L,
15L, 11L, 15L, 15L, 15L, 2L, 3L, 15L, 3L, 7L, 2L, 7L, 1L,
4L, 1L, 3L, 5L, 15L, 15L, 7L, 15L, 3L, 15L, 15L, 2L, 15L,
2L, 3L, 7L, 16L, 15L, 11L, 15L, 15L, 15L, 2L, 3L, 15L), .Label = c("black",
"blue", "brown", "cyan", "green", "greenyellow", "grey",
"magenta", "midnightblue", "pink", "purple", "red", "salmon",
"tan", "turquoise", "yellow"), class = "factor")), row.names = c(NA,
-60L), class = c("tbl_df", "tbl", "data.frame"))
My dataframe looks something like this
gene1 gene2 correlation module1 module2
<chr> <chr> <dbl> <fct> <fct>
1 GBA3 LRP2BP 1.20e- 7 midnightblue brown
2 GBA3 ADGB 3.96e- 9 midnightblue grey
3 GBA3 ASNSP3 1.19e- 9 midnightblue blue
4 GBA3 HSD17B2 3.67e-10 midnightblue grey
5 GBA3 HSP90B1 5.48e- 9 midnightblue black
6 GBA3 IFT22 7.97e- 6 midnightblue cyan
In my above small subset of the data I have two modules combined which can be seen on the module1 column which is my source column and moduel2 my target column color along with the strenth of correlation between the genes which are in first two columns
To turn it into igraph object I do the following
g1 <- graph_from_data_frame(dd)
g1 <- simplify(g1,remove.loops = TRUE)
degree(g1)
plot.igraph(g1,layout = layout.fruchterman.reingold(g1),directed = FALSE)
I get something like this, which looks fine but what I have seen and which i would like to know how to do,
Any suggestion or help would be really appreciated
Upvotes: 1
Views: 88
Reputation: 1385
Here's an approach leveraging ggraph. The first thing I do here is create a separate dataframe of the vertices. It has two columns, one for the gene and one for the color.
df1 <- dd %>%
select(gene = gene1, color = module1)
df2 <- dd %>%
select(gene = gene2, color = module2)
df_color <- unique(rbind(df1,df2))
I take a slightly different approach to creating the igraph object. Here we just feed it the first three columns of dd for the relationships, and the new df_color dataframe for the list of vertices.
g2 <- graph_from_data_frame(dd[,1:3], directed = FALSE, vertices = df_color)
You can see now that we've captured both the names and the colors associated with the vertices in the igraph object.
V(g2)$name
[1] "GBA3" "IGHV3-52" "GGNBP1" "OR52B6" "LRP2BP" "ADGB" "ASNSP3" "HSD17B2"
[9] "HSP90B1" "IFT22" "P4HB" "TTC22" "XKR9" "IQSEC2" "NECAB2" "ANO1"
[17] "CPPED1" "MAGEE1" "MAPRE3" "COTL1P1" "OR13G1" "FTH1P11" "KRT8P44" "LINC00243"
[25] "MYOZ1" "PARD6G" "PDLIM5" "RN7SL67P" "PARP3" "SH3BGRL3" "KIF1B" "CDK6"
[33] "CYP24A1" "TFEB"
V(g2)$color
[1] "midnightblue" "midnightblue" "tan" "tan" "brown" "grey"
[7] "blue" "grey" "black" "cyan" "black" "brown"
[13] "green" "turquoise" "turquoise" "grey" "turquoise" "brown"
[19] "turquoise" "turquoise" "blue" "turquoise" "blue" "brown"
[25] "grey" "yellow" "turquoise" "purple" "turquoise" "turquoise"
[31] "turquoise" "blue" "brown" "turquoise"
From here we can use aesthetics as we can do in ggplot. I could not follow your second point regarding what you want to do with size. We can set the width aesthetic to another variable as we did for color to set the size of the edge. Here I've set it to the correlation. Hopefully this gives you a good starting point and you can finish it off. Otherwise, please clarify what you are looking to do with size and I'll try to offer more help.
ggraph(g2, layout = "igraph", algorithm = "fr") +
geom_node_point(aes(color = color), shape = 1, size = 3)+
geom_edge_link(aes(width = correlation), edge_color = "steelblue",
arrow = arrow(angle = 30, length = unit(0.05, "inches"), ends = "last", type = "open")) +
geom_node_text(aes(label = name, color = color)) +
theme_void() +
theme(legend.position = "none")
Upvotes: 1