Reputation: 21
I'm new to R and am trying to figure out how to do PCA using it. So far I have tried two methods but there are issues with both.
Here is the code for the first:
library('FactoMineR')
my.data.7 <- read.csv("Principal Component Analysis Input 2.csv", header=TRUE)
attach(my.data.7)
head(my.data.7)
PCA_result_2 <- PCA(my.data.7, scale.unit=TRUE, ncp=3, graph=TRUE)
result <- PCA_result_2$ind$coord
write.csv(result, file="PCA ind_coord 2.csv")
my.data.8 <- read.csv("Principal Component Analysis (second part) 2.csv")
attach(my.data.8)
head(my.data.8)
pc1 <- (my.data.8$Dim1)
pc2 <- (my.data.8$Dim2)
pc3 <- (my.data.8$Dim3)
acc <- (my.data.8$CDS)
colour <- (my.data.8$Root.skin.colour)
plot(pc1,pc3,col=c('black','yellow','orange','red','purple')[unclass(colour)],pch=19)
The plot output is: Dim1, Dim3 plot
My problem with this is that 1) how do I know that the colours have been correctly assigned, 2) how do I add ellipses.
Here is the code for my second method (I used this website for help but am still stuck):
install.packages("devtools")
library(devtools)
install_github("fawda123/ggord")
library(ggord)
library('FactoMineR')
my.data.9 <- read.csv("hello.csv")
attach(my.data.9)
head(my.data.9)
woo <- PCA(my.data.9[,2:5], scale.unit=TRUE, ncp=3, graph=TRUE)
ggord(woo, my.data.9$Root.skin.colour)
The plot: Dim1, Dim2 plot
My problem here is that I want to plot the 1st and 3rd dimension as before, not the 1st and 2nd, but don't know how to specify. Also I don't understand how to change the colours.
Does any of this look like I am heading in the right direction? I would really appreciate some help with this if possible. I have no idea what I am doing and am supposed to be working on my write-up, not still getting stuck with the analysis. Also sorry that files have weird names like "hello", I was getting frustrated.
Here is the information you asked for
> dput(my.data.8)
structure(list(CDS = structure(1:69, .Label = c("CDS010", "CDS011",
"CDS012", "CDS013", "CDS015", "CDS016", "CDS017", "CDS019", "CDS020",
"CDS021", "CDS022", "CDS023", "CDS024", "CDS027", "CDS028", "CDS029",
"CDS030", "CDS031", "CDS032", "CDS033", "CDS034", "CDS035", "CDS036",
"CDS037", "CDS038", "CDS039", "CDS040", "CDS042", "CDS043", "CDS044",
"CDS045", "CDS046", "CDS047", "CDS048", "CDS049", "CDS050", "CDS051",
"CDS052", "CDS053", "CDS054", "CDS056", "CDS058", "CDS059", "CDS060",
"CDS061", "CDS062", "CDS064", "CDS066", "CDS067", "CDS068", "CDS070",
"CDS072", "CDS073", "CDS075", "CDS076", "CDS078", "CDS079", "CDS080",
"CDS081", "CDS082", "CDS083", "CDS084", "CDS085", "CDS086", "CDS087",
"CDS089", "CDS090", "CDS091", "CDS092"), class = "factor"), Dim1 = c(0.989923706,
1.002847033, -0.323384931, 0.0280602, -2.103144589, -1.1750233,
-0.297369615, -1.285073349, -1.18724867, -2.440381033, 2.451488481,
0.432753586, -0.324628407, 1.83987238, 1.082150477, 1.222767528,
0.648419317, 1.17034895, -0.959949524, 0.405826882, 3.578749912,
1.315904789, -0.69599653, -2.650500936, 2.847954059, -1.124700789,
-1.345309845, -1.571390397, 0.808331242, -0.126459344, 1.978484169,
-0.372882529, -1.941508494, -1.895565455, -2.308079318, -2.734023717,
-3.326982705, -0.62297258, 0.4400687, 0.878134622, -0.143118506,
2.902361971, 1.852738657, 1.318157841, 1.525866109, 0.527018259,
-2.17646324, -0.938267968, -0.663267011, -1.626999833, -0.725444227,
4.181058153, -1.663567082, -0.797809065, -0.660857937, 1.275243335,
-1.246799754, -0.658948097, 3.148052501, 1.22737428, 5.770370659,
-0.659363823, 0.201377447, -0.250249239, -3.29492153, -2.525333499,
0.451643578, -2.285229864, -2.05602107), Dim2 = c(0.114080736,
0.189737473, 0.289738365, 0.15686147, -0.65967629, -0.618998916,
0.752658445, -0.187202662, 0.601081452, -0.488843082, -0.461435771,
0.376119902, 0.054640472, -0.352416385, -0.61155099, 0.287520862,
2.072955276, 1.368287549, -1.598022058, -2.35115053, 0.362478564,
-1.16829247, -1.161712522, 0.193574061, 0.582209805, 0.090423462,
0.272987178, 0.762263319, 0.164563899, 1.271976678, 2.169652432,
-0.304444502, 0.333864962, 0.086432067, 0.03375057, 0.42547905,
-0.332663346, 0.230207958, 0.416122611, 0.807386059, -0.622165091,
0.688807153, -0.419360229, -0.024587973, 0.099352776, 0.593489815,
-0.571526951, -0.587510558, 1.141107254, 0.341089899, -0.234002113,
-0.675011549, 0.523417802, 0.570074523, -0.2595101, -0.537050791,
0.691005207, -0.259618613, -0.525167633, -0.181669151, -1.681387716,
0.150321845, 0.528057749, 0.704124706, -1.3872153, 0.22736727,
0.459455992, -0.278329399, -0.183119019), Dim3 = c(0.048353514,
-0.025653037, 0.014924755, -0.00167208, -0.060333422, 0.020764933,
0.043057079, 0.002591265, 0.02799806, -0.01339572, 0.292361681,
0.094879935, 0.020261073, -0.147776529, -0.09613908, 0.015407622,
0.505027604, -0.011850932, 0.162592304, 0.331023774, -0.276654985,
0.13868844, -0.000362491, -0.124098518, -0.03418057, 0.055507617,
-0.044387737, 0.08246021, -0.023457465, 0.070215547, 0.090226544,
0.13499154, -0.115205136, -0.012187001, 0.016720661, -0.112492876,
-0.156433429, 0.03202894, 0.064274887, -0.121235242, 0.014234763,
-0.167012976, -0.063685493, 0.045759055, -0.047058619, -0.113183437,
-0.077100994, 0.039550025, 0.003385481, 0.044618291, 0.025128582,
0.047673827, -0.112599294, -0.096384527, 0.031407524, 0.095437746,
0.037062126, 0.026957783, -0.181217407, 0.411980154, -0.189412218,
-0.042853115, 0.040207038, -0.040919986, -0.082645255, -0.148945175,
0.030315385, -0.094242334, -0.05746962), Root.skin.colour = structure(c(1L,
5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 3L, 4L, 1L, 4L,
4L, 4L, 4L, 1L, 1L, 2L, 4L, 4L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 1L, 1L, 1L, 1L, 4L, 2L, 3L, 5L, 1L, 2L, 5L, 4L, 1L, 1L,
1L, 1L, 2L, 4L, 1L, 5L, 1L, 1L, 1L, 3L, 1L, 3L, 5L, 4L, 1L, 1L,
4L, 1L, 2L, 1L), .Label = c("Orange", "Purple", "Red", "White",
"Yellow"), class = "factor")), .Names = c("CDS", "Dim1", "Dim2",
"Dim3", "Root.skin.colour"), class = "data.frame", row.names = c(NA,
-69L))
Upvotes: 2
Views: 1082
Reputation: 2206
You might consider using the package factoextra
for visualization. You can use the output of different packages for PCA models with it (maybe with some formatting). Further, you can find a helpful tutorial on the following website for visualizing PCA: STHDA: Principal Component Methods in R: Practical Guide. A short example of code with the data you provided to get you started below. I hope this is what you need and helps you to solve your task.
library(factoextra)
#apply PCA to numeric part of data
data.pca <- prcomp(data[, c("Dim1", "Dim2", "Dim3")], scale = TRUE, center = TRUE)
str(data.pca) #in $x you can see that there are 3 components
fviz_pca_biplot(data.pca
#chose components to plot, here 1 and 3
,axes = c(1,3)
,geom = c("point", "text")
,addEllipses = TRUE
#color the individual points by an additional factor
#here contained in the Root.skin.colour column of the data
#for each data point
,col.ind = data[,"Root.skin.colour"]
,var.axes=TRUE
)
Upvotes: 1