PesKchan
PesKchan

Reputation: 978

Coloring nodes and edges based on module colors attributes

I have simple dataframe which I have obtained after running WGCNA where the correaltion was turned into and edge list from that object Im giving a small subset

dput(dd)
structure(list(gene1 = c("GBA3", "GBA3", "GBA3", "GBA3", "GBA3", 
"GBA3", "GBA3", "GBA3", "GBA3", "GBA3", "GBA3", "GBA3", "GBA3", 
"GBA3", "GBA3", "IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", 
"IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", 
"IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", "GGNBP1", 
"GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", 
"GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", 
"OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", 
"OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", 
"OR52B6"), gene2 = c("LRP2BP", "ADGB", "ASNSP3", "HSD17B2", "HSP90B1", 
"IFT22", "P4HB", "TTC22", "XKR9", "IQSEC2", "NECAB2", "ANO1", 
"CPPED1", "MAGEE1", "MAPRE3", "COTL1P1", "OR13G1", "FTH1P11", 
"KRT8P44", "LINC00243", "MYOZ1", "PARD6G", "PDLIM5", "RN7SL67P", 
"PARP3", "SH3BGRL3", "KIF1B", "CDK6", "CYP24A1", "TFEB", "LRP2BP", 
"ADGB", "ASNSP3", "HSD17B2", "HSP90B1", "IFT22", "P4HB", "TTC22", 
"XKR9", "IQSEC2", "NECAB2", "ANO1", "CPPED1", "MAGEE1", "MAPRE3", 
"COTL1P1", "OR13G1", "FTH1P11", "KRT8P44", "LINC00243", "MYOZ1", 
"PARD6G", "PDLIM5", "RN7SL67P", "PARP3", "SH3BGRL3", "KIF1B", 
"CDK6", "CYP24A1", "TFEB"), correlation = c(1.19842058210312e-07, 
3.95592260312023e-09, 1.18879994893077e-09, 3.67331679745971e-10, 
5.48302012245219e-09, 7.97197389702251e-06, 9.7387584019434e-08, 
5.77878345171157e-08, 1.01118703571283e-08, 1.81543845754574e-07, 
3.7673420265534e-08, 1.02575704450652e-08, 4.82487451740043e-08, 
1.65401803325697e-07, 2.95827225165244e-09, 1.35635056964288e-07, 
1.16813988688191e-09, 1.34340296981193e-07, 5.26153755948588e-08, 
5.06031471203736e-05, 1.63465042896832e-09, 2.10400523574347e-09, 
1.08460550923374e-08, 1.09938266167239e-06, 3.31572488037795e-08, 
3.97957891649769e-07, 2.0833042793021e-08, 4.16797585733493e-06, 
1.02162139939232e-07, 3.74962089757379e-06, 5.10285758466629e-07, 
0.000165189152741692, 0.000572780674091671, 2.43056928465514e-07, 
0.00166978419035755, 2.3826397075692e-07, 0.000204964046470693, 
1.32648351252772e-06, 2.79759921075308e-06, 1.11117833192239e-06, 
6.87171744654038e-09, 2.33022551088771e-09, 2.7732284839245e-06, 
1.74867497254059e-06, 1.16457488078883e-08, 2.58493584273799e-05, 
0.000117632422231583, 0.000115191350816912, 3.45926695804785e-05, 
6.60444623946169e-07, 8.48280303856373e-09, 9.3470012463335e-07, 
2.33358874243648e-05, 9.13982092399789e-05, 6.16545562787355e-06, 
0.0014007113940871, 1.549339320847e-06, 0.000373320941277797, 
2.87750585085082e-08, 0.00105876974504533), module1 = structure(c(9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 14L, 14L, 
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 
14L, 14L), .Label = c("black", "blue", "brown", "cyan", "green", 
"greenyellow", "grey", "magenta", "midnightblue", "pink", "purple", 
"red", "salmon", "tan", "turquoise", "yellow"), class = "factor"), 
    module2 = structure(c(3L, 7L, 2L, 7L, 1L, 4L, 1L, 3L, 5L, 
    15L, 15L, 7L, 15L, 3L, 15L, 15L, 2L, 15L, 2L, 3L, 7L, 16L, 
    15L, 11L, 15L, 15L, 15L, 2L, 3L, 15L, 3L, 7L, 2L, 7L, 1L, 
    4L, 1L, 3L, 5L, 15L, 15L, 7L, 15L, 3L, 15L, 15L, 2L, 15L, 
    2L, 3L, 7L, 16L, 15L, 11L, 15L, 15L, 15L, 2L, 3L, 15L), .Label = c("black", 
    "blue", "brown", "cyan", "green", "greenyellow", "grey", 
    "magenta", "midnightblue", "pink", "purple", "red", "salmon", 
    "tan", "turquoise", "yellow"), class = "factor")), row.names = c(NA, 
-60L), class = c("tbl_df", "tbl", "data.frame"))

My dataframe looks something like this

gene1 gene2   correlation module1      module2
  <chr> <chr>         <dbl> <fct>        <fct>  
1 GBA3  LRP2BP     1.20e- 7 midnightblue brown  
2 GBA3  ADGB       3.96e- 9 midnightblue grey   
3 GBA3  ASNSP3     1.19e- 9 midnightblue blue   
4 GBA3  HSD17B2    3.67e-10 midnightblue grey   
5 GBA3  HSP90B1    5.48e- 9 midnightblue black  
6 GBA3  IFT22      7.97e- 6 midnightblue cyan   

In my above small subset of the data I have two modules combined which can be seen on the module1 column which is my source column and moduel2 my target column color along with the strenth of correlation between the genes which are in first two columns

To turn it into igraph object I do the following

 g1 <- graph_from_data_frame(dd)
g1 <- simplify(g1,remove.loops = TRUE)
degree(g1) 
plot.igraph(g1,layout = layout.fruchterman.reingold(g1),directed = FALSE)

I get something like this, which looks fine but what I have seen and which i would like to know how to do,

  1. How do my label my source and target node based on the module colors?
  2. Node size based on the degree where only the source nodes are labelled and the targets remains just color they are assigned to.

Any suggestion or help would be really appreciated

enter image description here

Upvotes: 1

Views: 88

Answers (1)

stomper
stomper

Reputation: 1385

Here's an approach leveraging ggraph. The first thing I do here is create a separate dataframe of the vertices. It has two columns, one for the gene and one for the color.

df1 <- dd %>%
    select(gene = gene1, color = module1)

df2 <- dd %>%
    select(gene = gene2, color = module2)

df_color <- unique(rbind(df1,df2))

I take a slightly different approach to creating the igraph object. Here we just feed it the first three columns of dd for the relationships, and the new df_color dataframe for the list of vertices.

g2 <- graph_from_data_frame(dd[,1:3], directed = FALSE, vertices = df_color)

You can see now that we've captured both the names and the colors associated with the vertices in the igraph object.

V(g2)$name
 [1] "GBA3"      "IGHV3-52"  "GGNBP1"    "OR52B6"    "LRP2BP"    "ADGB"      "ASNSP3"    "HSD17B2"  
 [9] "HSP90B1"   "IFT22"     "P4HB"      "TTC22"     "XKR9"      "IQSEC2"    "NECAB2"    "ANO1"     
[17] "CPPED1"    "MAGEE1"    "MAPRE3"    "COTL1P1"   "OR13G1"    "FTH1P11"   "KRT8P44"   "LINC00243"
[25] "MYOZ1"     "PARD6G"    "PDLIM5"    "RN7SL67P"  "PARP3"     "SH3BGRL3"  "KIF1B"     "CDK6"     
[33] "CYP24A1"   "TFEB" 

V(g2)$color
 [1] "midnightblue" "midnightblue" "tan"          "tan"          "brown"        "grey"        
 [7] "blue"         "grey"         "black"        "cyan"         "black"        "brown"       
[13] "green"        "turquoise"    "turquoise"    "grey"         "turquoise"    "brown"       
[19] "turquoise"    "turquoise"    "blue"         "turquoise"    "blue"         "brown"       
[25] "grey"         "yellow"       "turquoise"    "purple"       "turquoise"    "turquoise"   
[31] "turquoise"    "blue"         "brown"        "turquoise"  

From here we can use aesthetics as we can do in ggplot. I could not follow your second point regarding what you want to do with size. We can set the width aesthetic to another variable as we did for color to set the size of the edge. Here I've set it to the correlation. Hopefully this gives you a good starting point and you can finish it off. Otherwise, please clarify what you are looking to do with size and I'll try to offer more help.

ggraph(g2, layout = "igraph", algorithm = "fr") +
    geom_node_point(aes(color = color), shape = 1, size = 3)+
    geom_edge_link(aes(width = correlation), edge_color = "steelblue", 
                   arrow = arrow(angle = 30, length = unit(0.05, "inches"), ends = "last", type = "open")) +
    geom_node_text(aes(label = name, color = color)) +
    theme_void() +
    theme(legend.position = "none") 


enter image description here

Upvotes: 1

Related Questions