Reputation: 755
I have a table with three columns containing my first variable, my second variable, and their estimated correlation (estimates coming from a brmsfit
model).
As an example, let's consider the following table:
tibble::tribble(
~a, ~b, ~est,
"fro", "fcr", "0.061 [-0.276, 0.38]",
"fro", "nek", "0.115 [-0.218, 0.421]",
"nek", "fcr", "0.154 [-0.169, 0.476]",
"ooi", "fcr", "-0.022 [-0.352, 0.305]",
"ooi", "fro", "0.006 [-0.326, 0.323]",
"ooi", "nek", "0.173 [-0.151, 0.486]",
"ooi", "zyg", "0.039 [-0.29, 0.362]",
"zyg", "fcr", "-0.02 [-0.347, 0.317]",
"zyg", "fro", "0.092 [-0.245, 0.406]",
"zyg", "nek", "0.315 [0.011, 0.613]"
)
I would like to reshape it in the form of a correlation matrix. I know some packages permits to do the exact inverse operation (e.g., the corrr::stretch()
function), but I am not aware of an easy solution for reshaping a wide dataframe into a correlation matrix.
Any idea ?
Upvotes: 0
Views: 472
Reputation: 819
There is an excellent way with the help of {igraph} package:
dat <- tibble::tribble(
~a, ~b, ~est,
"fro", "fcr", "0.061 [-0.276, 0.38]",
"fro", "nek", "0.115 [-0.218, 0.421]",
"nek", "fcr", "0.154 [-0.169, 0.476]",
"ooi", "fcr", "-0.022 [-0.352, 0.305]",
"ooi", "fro", "0.006 [-0.326, 0.323]",
"ooi", "nek", "0.173 [-0.151, 0.486]",
"ooi", "zyg", "0.039 [-0.29, 0.362]",
"zyg", "fcr", "-0.02 [-0.347, 0.317]",
"zyg", "fro", "0.092 [-0.245, 0.406]",
"zyg", "nek", "0.315 [0.011, 0.613]"
)
dat |>
igraph::graph_from_data_frame(directed = FALSE) |>
igraph::as_adjacency_matrix(attr = "est", sparse = FALSE)
#> fro nek ooi
#> fro "" "0.115 [-0.218, 0.421]" "0.006 [-0.326, 0.323]"
#> nek "0.115 [-0.218, 0.421]" "" "0.173 [-0.151, 0.486]"
#> ooi "0.006 [-0.326, 0.323]" "0.173 [-0.151, 0.486]" ""
#> zyg "0.092 [-0.245, 0.406]" "0.315 [0.011, 0.613]" "0.039 [-0.29, 0.362]"
#> fcr "0.061 [-0.276, 0.38]" "0.154 [-0.169, 0.476]" "-0.022 [-0.352, 0.305]"
#> zyg fcr
#> fro "0.092 [-0.245, 0.406]" "0.061 [-0.276, 0.38]"
#> nek "0.315 [0.011, 0.613]" "0.154 [-0.169, 0.476]"
#> ooi "0.039 [-0.29, 0.362]" "-0.022 [-0.352, 0.305]"
#> zyg "" "-0.02 [-0.347, 0.317]"
#> fcr "-0.02 [-0.347, 0.317]" ""
Created on 2023-10-19 with reprex v2.0.2
Upvotes: 0
Reputation: 2399
Is this what you are looking for?
library(tidyverse)
library(magrittr)
df %$%
c(a, b) %>%
crossing(a = ., b = .) %>%
left_join(df) %>%
mutate(est = if_else(a == b, '1', est)) %>%
group_by(a) %>%
mutate(rn = row_number() %>% rev()) %>%
nest() %>%
mutate(gn = row_number() %>% rev()) %>%
unnest() %>%
mutate(
cond = gn > rn,
a_temp = a,
a = if_else(cond, b, a),
b = if_else(cond, a_temp, b)
) %>%
select(a, b, est) %>%
filter(!is.na(est)) %>%
spread(a, est)
Output:
b fcr fro nek ooi zyg
<chr> <chr> <chr> <chr> <chr> <chr>
1 fcr 1 0.061 [-0.276, 0.38] 0.154 [-0.169, 0.476] -0.022 [-0.352, 0.305] -0.02 [-0.347, 0.317]
2 fro NA 1 0.115 [-0.218, 0.421] 0.006 [-0.326, 0.323] 0.092 [-0.245, 0.406]
3 nek NA NA 1 0.173 [-0.151, 0.486] 0.315 [0.011, 0.613]
4 ooi NA NA NA 1 0.039 [-0.29, 0.362]
5 zyg NA NA NA NA 1
Upvotes: 1
Reputation: 39154
A solution using tidyverse
. dat2
is the final output with the first column as the variable name. The column header for other columns are also variable names.
library(tidyverse)
dat <- tibble::tribble(
~a, ~b, ~est,
"fro", "fcr", "0.061 [-0.276, 0.38]",
"fro", "nek", "0.115 [-0.218, 0.421]",
"nek", "fcr", "0.154 [-0.169, 0.476]",
"ooi", "fcr", "-0.022 [-0.352, 0.305]",
"ooi", "fro", "0.006 [-0.326, 0.323]",
"ooi", "nek", "0.173 [-0.151, 0.486]",
"ooi", "zyg", "0.039 [-0.29, 0.362]",
"zyg", "fcr", "-0.02 [-0.347, 0.317]",
"zyg", "fro", "0.092 [-0.245, 0.406]",
"zyg", "nek", "0.315 [0.011, 0.613]"
)
dat2 <- dat %>%
separate(est, into = c("est", "range"), sep = 6, convert = TRUE) %>%
select(-range) %>%
spread(b, est)
dat2
# # A tibble: 4 x 5
# a fcr fro nek zyg
# <chr> <dbl> <dbl> <dbl> <dbl>
# 1 fro 0.061 NA 0.115 NA
# 2 nek 0.154 NA NA NA
# 3 ooi -0.022 0.006 0.173 0.039
# 4 zyg -0.02 0.092 0.315 NA
Upvotes: 0