Feng Tian
Feng Tian

Reputation: 1589

how to plot scatter plot in hexagon using R or ggplot2?

Test data:

set.seed(123)
Data <- data.frame(Pro=rnorm(20), Cla=rnorm(20), Neu=rnorm(20))

I want to plot each sample (row) as point in a hexagon (top figure). The positions of points are based on three coordinates, which have 120 angles between each other (bottom figure).

enter image description here

(Figure from Anoop P. Patel et al. Science, 2014)

Upvotes: 3

Views: 981

Answers (3)

nisetama
nisetama

Reputation: 8893

I needed to create a hexagonal version of a ternary diagram, where I had six columns in my data that always added up to 1, and I needed to plot the samples so that samples with a value of 1 in one column would plot in one corner of the hexagon. I ended up just modifying a script for plotting a ternary diagram:

library(tidyverse)
library(ggforce)
library(ggrepel)

t=read.table("https://pastebin.com/raw/5BVbJ4E9",row.names=1) # hexagon
# t=read.table("https://pastebin.com/raw/XNV7Xmmj",row.names=1) # square
# t=read.table("https://pastebin.com/raw/QvdWWvwx",row.names=1) # triangle

corners=rbind(c(.5,sqrt(3)/2),c(1,0),c(.5,-sqrt(3)/2),c(-.5,-sqrt(3)/2),c(-1,0),c(-.5,sqrt(3)/2)) # hexagon
# corners=rbind(c(0,1),c(1,0),c(0,-1),c(-1,0)) # diamond
# corners=rbind(c(1,1),c(1,-1),c(-1,-1),c(-1,1)) # square
# corners=rbind(c(0,sqrt(3)/2),c(-1,-sqrt(3)/2),c(1,-sqrt(3)/2)) # triangle

xy=as.data.frame(as.matrix(t)%*%corners)

# for hexagon
grid=as.data.frame(rbind(cbind(corners,rbind(corners[-1,],corners[1,])),cbind(corners,matrix(rep(0,12),ncol=2))))

# for diamond or square plot
# grid=apply(rbind(c(1,2,4,3),c(1,4,2,3),c(1,2,1,4),c(3,2,3,4),c(4,1,4,3),c(2,1,2,3)),1,function(x)cbind(
#   seq(corners[x[1],1],corners[x[2],1],length.out=11),
#   seq(corners[x[1],2],corners[x[2],2],length.out=11),
#   seq(corners[x[3],1],corners[x[4],1],length.out=11),
#   seq(corners[x[3],2],corners[x[4],2],length.out=11)
# )%>%as.data.frame)%>%bind_rows

# for ternary plot (triangle)
# grid=apply(rbind(c(1,2,3,2),c(1,3,2,3),c(2,1,3,1)),1,function(x)cbind(
#   seq(corners[x[1],1],corners[x[2],1],length.out=11),
#   seq(corners[x[1],2],corners[x[2],2],length.out=11),
#   seq(corners[x[3],1],corners[x[4],1],length.out=11),
#   seq(corners[x[3],2],corners[x[4],2],length.out=11)
# )%>%as.data.frame)%>%bind_rows

pop=sub(":.*","",rownames(xy))
centers=aggregate(xy,by=list(pop),mean)
xy$pop=pop

set.seed(1234)
color=as.factor(sample(seq(1,length(unique(xy$pop)))))
col=rbind(c(60,80),c(25,95),c(30,70),c(70,50),c(60,100),c(20,50),c(15,40))
hues=max(ceiling(length(color)/nrow(col)),8)
pal1=as.vector(apply(col,1,function(x)hcl(seq(15,375,length=hues+1)[1:hues],x[1],x[2])))
pal2=as.vector(apply(col,1,function(x)hcl(seq(15,375,length=hues+1)[1:hues],ifelse(x[2]>50,.8*x[1],.2*x[1]),ifelse(x[2]>50,.3*x[2],100))))

# add a small random factor so geom_voronoi_tile won't fail because of too many overlapping points
xy$V1=xy$V1+runif(nrow(xy))/1e3
xy$V2=xy$V2+runif(nrow(xy))/1e3

ggplot(xy,aes(x=V1,y=V2,group=-1L))+
geom_segment(data=grid,aes(x=V1,y=V2,xend=V3,yend=V4),color="gray90",size=.3)+
geom_voronoi_tile(aes(fill=color[as.factor(pop)],color=color[as.factor(pop)]),size=.07,max.radius=.05)+
# geom_point(data=centers,aes(x=V1,y=V2,color=color,fill=color),shape=21,size=2)+
# geom_label(data=centers,aes(x=V1,y=V2,label=Group.1,color=color,fill=color),alpha=.7,size=2.2,label.r=unit(.07,"lines"),label.padding=unit(.07,"lines"),label.size=0)+
geom_label_repel(data=centers,aes(x=V1,y=V2,label=Group.1,color=color,fill=color),max.overlaps=Inf,point.size=0,size=2.2,label.r=unit(.1,"lines"),label.padding=unit(.1,"lines"),label.size=.1,box.padding=0)+
coord_fixed(xlim=c(-1.08,1.08),ylim=c(-1.08,1.08),expand=F)+
scale_fill_manual(values=pal1)+
scale_color_manual(values=pal2)+
theme(
  axis.text=element_blank(),
  axis.ticks=element_blank(),
  axis.title=element_blank(),
  legend.position="none",
  panel.background=element_rect(fill="white"),
  plot.margin=margin(0,0,0,0,"cm")
)

ggsave("a.png",width=7,height=7)

ggforce is used to draw points using Voronoi tessellation: https://ggforce.data-imaginist.com/reference/geom_delvor.html. When I tried to install ggforce, it failed at first because its dependency units failed to install. By running install.packages("units"), I saw that I needed to run brew install udunits (libudunits2-dev on Debian and udunits2-devel on RPM).

Upvotes: 0

Mako212
Mako212

Reputation: 7312

The more standard way to accomplish your goal would be to use a ternary plot. I understand if a hexagon is more relevant for the way you want to display your data, but this plot is more straightforward to construct because there's a ggplot package for it.

require(ggplot2)
require(ggtern)

ggtern(Data, aes(Pro, Cla,Neu))+
  geom_point()+
  theme_tropical(base_size=14)

enter image description here

Upvotes: 3

Eumenedies
Eumenedies

Reputation: 1688

I am not aware of any particular way to do this automatically, but you can use some trigonometry to calculate the correct coordinates.

See below for my solution

set.seed(123)
Data <- data.frame(Pro=rnorm(20), Cla=rnorm(20), Neu=rnorm(20))

library(tidyverse)
#> Loading tidyverse: ggplot2
#> Loading tidyverse: tibble
#> Loading tidyverse: tidyr
#> Loading tidyverse: readr
#> Loading tidyverse: purrr
#> Loading tidyverse: dplyr
#> Warning: package 'dplyr' was built under R version 3.4.2
#> Conflicts with tidy packages ----------------------------------------------
#> filter(): dplyr, stats
#> lag():    dplyr, stats
Data %>%
  # Separating the S1, S2 and S3 axes into their x-y components is done using simple trigonometry.
  # S1 is the trivial case as it only has y component.
  # S2 and S3 are both 30 degrees (pi/6 radians) below the x-axis
  mutate(S1_x = Pro*cos(pi/2), S1_y = Pro*sin(pi/2), # Deconvolve S1 axis into cartesian coordinates (x,y)
         S2_x = Cla*cos(pi/6), S2_y = -Cla*sin(pi/6), # Deconvolve S2 axis into cartesian coordinates (x,y)
         S3_x = -Neu*cos(pi/6), S3_y = -Neu*sin(pi/6)) %>%  # Deconvolve S3 axis into cartesian coordinates (x,y)
  mutate(x = S1_x + S2_x + S3_x, y = S1_y + S2_y + S3_y) %>% # Combine x and y compononts from S1, S2 and S3
  ggplot(aes(x = x, y=y))+geom_point()

coordinates plotted

# Just to prove that the maths works, plot the hexagon described by unit length
path <- data.frame(Pro = c(1,1,0,0,0,1,1), Cla = c(0,1,1,1,0,0,0), Neu = c(0,0,0,1,1,1,0))
path %>%
  mutate(S1_x = Pro*cos(pi/2), S1_y = Pro*sin(pi/2), 
         S2_x = Cla*cos(pi/6), S2_y = -Cla*sin(pi/6), 
         S3_x = -Neu*cos(pi/6), S3_y = -Neu*sin(pi/6)) %>% 
  mutate(x = S1_x + S2_x + S3_x, y = S1_y + S2_y + S3_y) %>%
  ggplot(aes(x = x, y=y))+geom_path()

outer hexagon plotted

Upvotes: 4

Related Questions