David
David

Reputation: 33

R Dataframe make a new column combining row and column names

I have a similarity matrix (SimMat) which I've converted to a dataframe by writing it out as .csv and re-reading it into the environment.

   A    B   C
A  1   0.3 0.7   
B 0.3   1  0.5   
C 0.7  0.5  1  

I want to change this so that I have a dataframe detailing the unique comparisons and values like below:

Comp  Val
A-A    1
A-B   0.3
A-C   0.7
B-B    1 
B-C   0.5
C-C    1

Does anyone know how I might be able to do this?

Upvotes: 0

Views: 1224

Answers (2)

Ronak Shah
Ronak Shah

Reputation: 388807

Using tidyverse, we can get rownames as new column, get the data in long format and combine rownames and column names of the data.

library(tidyverse)

df %>%
  #If it is a matrix convert to dataframe
  #as.data.frame() %>%
  rownames_to_column() %>%
  pivot_longer(cols = -rowname) %>%
  unite(name, rowname, name, sep = "-")

#  name  value
#  <chr> <dbl>
#1 A-A     1  
#2 A-B     0.3
#3 A-C     0.7
#4 B-A     0.3
#5 B-B     1  
#6 B-C     0.5
#7 C-A     0.7
#8 C-B     0.5
#9 C-C     1  

To get only the unique values, we can use pmin and pmax.

df %>%
  #as.data.frame() %>%
  rownames_to_column() %>%
  pivot_longer(cols = -rowname) %>%
  mutate(newcol1 = pmin(rowname, name), newcol2 = pmax(rowname, name)) %>%
  select(-rowname, -name) %>%
  distinct() %>%
  unite(Comp, newcol1, newcol2, sep = "-")

data

df <- structure(list(A = c(1, 0.3, 0.7), B = c(0.3, 1, 0.5), C = c(0.7, 
0.5, 1)), class = "data.frame", row.names = c("A", "B", "C"))

Upvotes: 2

akrun
akrun

Reputation: 886938

One option without using any packages

transform(as.data.frame.table(m1), Comp = 
       paste(Var1, Var2, sep="-"), Val = Freq)[c("Comp", "Val")]
#  Comp Val
#1  A-A 1.0
#2  B-A 0.3
#3  C-A 0.7
#4  A-B 0.3
#5  B-B 1.0
#6  C-B 0.5
#7  A-C 0.7
#8  B-C 0.5
#9  C-C 1.0

Or just replicate the dimnames and convert the matrix to vector with c

data.frame(Comp = paste(row.names(m1)[row(m1)],
              colnames(m1)[col(m1)], sep= '-'), Val = c(m1))

If we only the lower.triangular values

Val <- m1[lower.tri(m1, diag = TRUE)]
Comp <- paste(row.names(m1)[col(m1)],
              colnames(m1)[row(m1)], sep= '-')[lower.tri(m1, diag = TRUE)]
data.frame(Comp, Val)
#  Comp Val
#1  A-A 1.0
#2  A-B 0.3
#3  A-C 0.7
#4  B-B 1.0
#5  B-C 0.5
#6  C-C 1.0

Or using tidyverse

library(dplyr)
library(tidyr)
as.data.frame.table(m1) %>%
     unite(Comp, Var1, Var2, sep='-')
#  Comp Freq
#1  A-A  1.0
#2  B-A  0.3
#3  C-A  0.7
#4  A-B  0.3
#5  B-B  1.0
#6  C-B  0.5
#7  A-C  0.7
#8  B-C  0.5
#9  C-C  1.0

Or with melt

library(data.table)
setDT(melt(m1))[, Comp := paste(Var1, Var2, sep="-")]

data

m1 <- structure(c(1, 0.3, 0.7, 0.3, 1, 0.5, 0.7, 0.5, 1), .Dim = c(3L, 
3L), .Dimnames = list(c("A", "B", "C"), c("A", "B", "C")))

Upvotes: 3

Related Questions