Reputation: 33
I have a similarity matrix (SimMat) which I've converted to a dataframe by writing it out as .csv and re-reading it into the environment.
A B C
A 1 0.3 0.7
B 0.3 1 0.5
C 0.7 0.5 1
I want to change this so that I have a dataframe detailing the unique comparisons and values like below:
Comp Val
A-A 1
A-B 0.3
A-C 0.7
B-B 1
B-C 0.5
C-C 1
Does anyone know how I might be able to do this?
Upvotes: 0
Views: 1224
Reputation: 388807
Using tidyverse
, we can get rownames as new column, get the data in long format and combine rownames and column names of the data.
library(tidyverse)
df %>%
#If it is a matrix convert to dataframe
#as.data.frame() %>%
rownames_to_column() %>%
pivot_longer(cols = -rowname) %>%
unite(name, rowname, name, sep = "-")
# name value
# <chr> <dbl>
#1 A-A 1
#2 A-B 0.3
#3 A-C 0.7
#4 B-A 0.3
#5 B-B 1
#6 B-C 0.5
#7 C-A 0.7
#8 C-B 0.5
#9 C-C 1
To get only the unique values, we can use pmin
and pmax
.
df %>%
#as.data.frame() %>%
rownames_to_column() %>%
pivot_longer(cols = -rowname) %>%
mutate(newcol1 = pmin(rowname, name), newcol2 = pmax(rowname, name)) %>%
select(-rowname, -name) %>%
distinct() %>%
unite(Comp, newcol1, newcol2, sep = "-")
data
df <- structure(list(A = c(1, 0.3, 0.7), B = c(0.3, 1, 0.5), C = c(0.7,
0.5, 1)), class = "data.frame", row.names = c("A", "B", "C"))
Upvotes: 2
Reputation: 886938
One option without using any packages
transform(as.data.frame.table(m1), Comp =
paste(Var1, Var2, sep="-"), Val = Freq)[c("Comp", "Val")]
# Comp Val
#1 A-A 1.0
#2 B-A 0.3
#3 C-A 0.7
#4 A-B 0.3
#5 B-B 1.0
#6 C-B 0.5
#7 A-C 0.7
#8 B-C 0.5
#9 C-C 1.0
Or just replicate the dimnames and convert the matrix to vector with c
data.frame(Comp = paste(row.names(m1)[row(m1)],
colnames(m1)[col(m1)], sep= '-'), Val = c(m1))
If we only the lower.tri
angular values
Val <- m1[lower.tri(m1, diag = TRUE)]
Comp <- paste(row.names(m1)[col(m1)],
colnames(m1)[row(m1)], sep= '-')[lower.tri(m1, diag = TRUE)]
data.frame(Comp, Val)
# Comp Val
#1 A-A 1.0
#2 A-B 0.3
#3 A-C 0.7
#4 B-B 1.0
#5 B-C 0.5
#6 C-C 1.0
Or using tidyverse
library(dplyr)
library(tidyr)
as.data.frame.table(m1) %>%
unite(Comp, Var1, Var2, sep='-')
# Comp Freq
#1 A-A 1.0
#2 B-A 0.3
#3 C-A 0.7
#4 A-B 0.3
#5 B-B 1.0
#6 C-B 0.5
#7 A-C 0.7
#8 B-C 0.5
#9 C-C 1.0
Or with melt
library(data.table)
setDT(melt(m1))[, Comp := paste(Var1, Var2, sep="-")]
m1 <- structure(c(1, 0.3, 0.7, 0.3, 1, 0.5, 0.7, 0.5, 1), .Dim = c(3L,
3L), .Dimnames = list(c("A", "B", "C"), c("A", "B", "C")))
Upvotes: 3