Outlier
Outlier

Reputation: 437

How to convert data into a matrix using index numbers

Suppose I have a dataset df like this, which contains adjacency information:

1 2 3 4
2 1 4
3 1
4 1 2 5 6 7
5 4
6 4 7
7 4 6

I want to convert this into an adjacency matrix, like this:

  1 2 3 4 5 6 7
1 0 1 1 1 0 0 0
2 1 0 0 1 0 0 0
3 1 0 0 0 0 0 0
4 1 1 0 0 1 1 1
5 0 0 0 1 0 0 0
6 0 0 0 1 0 0 1
7 0 0 0 1 0 1 0

Where the first row and columns are labels, so the matrix doesn't actually contain these elements.

Edit: When I read the CSV file into R, it replaces the missing entries with NA, so a head of the actual data now looks like this:

     Number  X X.1 X.2 X.3 X.4 X.5 X.6 X.7 X.8
   1      1 31  32  35  36  39  40  42  47  50
   2      2  3   8  NA  NA  NA  NA  NA  NA  NA
   3      3  2   4   6   8  NA  NA  NA  NA  NA
   4      4  3   5   6  12  14  15  NA  NA  NA
   5      5  4   7  11  12  13  NA  NA  NA  NA
   6      6  3   4   8  14  31  33  36  NA  NA

Note that I have 62 rows in total. Also, here's the data added using dput():

structure(list(Number = 1:62, X = c(31L, 3L, 2L, 3L, 4L, 3L, 
5L, 2L, 7L, 9L, 5L, 4L, 5L, 4L, 4L, 7L, 7L, 7L, 13L, 16L, 11L, 
17L, 17L, 14L, 14L, 21L, 12L, 26L, 12L, 14L, 6L, 14L, 6L, 26L, 
26L, 6L, 34L, 34L, 33L, 35L, 37L, 40L, 40L, 40L, 44L, 44L, 42L, 
49L, 46L, 39L, 48L, 46L, 49L, 51L, 52L, 54L, 51L, 54L, 41L, 19L, 
17L, 11L), X.1 = c(32L, 8L, 4L, 5L, 7L, 4L, 9L, 3L, 10L, 16L, 
12L, 5L, 7L, 6L, 12L, 9L, 13L, 16L, 17L, 18L, 12L, 20L, 19L, 
15L, 30L, 23L, 15L, 29L, 14L, 25L, 14L, 28L, 36L, 35L, 28L, 31L, 
38L, 35L, 36L, 38L, 38L, 43L, 42L, 43L, 46L, 45L, 43L, 51L, 48L, 
47L, 49L, 47L, 51L, 56L, 53L, NA, 53L, 57L, 40L, 21L, 18L, 13L
), X.2 = c(35L, NA, 6L, 6L, 11L, 8L, 13L, 6L, 16L, NA, 13L, 11L, 
11L, 15L, 14L, 10L, 18L, 17L, 23L, 22L, 26L, 23L, 22L, 27L, NA, 
28L, 24L, 32L, 21L, 32L, 32L, 29L, 39L, 37L, 32L, 33L, 41L, 37L, 
50L, 41L, 40L, 47L, 44L, 45L, 59L, 47L, 44L, NA, 51L, 52L, 53L, 
49L, 52L, 57L, 57L, NA, 54L, 51L, 44L, 23L, 20L, 19L), X.3 = c(36L, 
NA, 8L, 12L, 12L, 14L, 16L, NA, NA, NA, 21L, 15L, 17L, 24L, 24L, 
18L, 19L, 20L, 60L, 61L, 29L, 61L, 26L, 29L, NA, 29L, 29L, 35L, 
24L, NA, 36L, 30L, NA, 38L, 34L, 39L, NA, 40L, 1L, 42L, 59L, 
1L, 47L, 46L, NA, 49L, 46L, NA, 52L, 1L, 54L, 50L, 55L, 58L, 
NA, NA, 55L, NA, 45L, 26L, 22L, 21L), X.4 = c(39L, NA, NA, 14L, 
13L, 31L, 17L, NA, NA, NA, 62L, 21L, 19L, 25L, 27L, 20L, 22L, 
61L, 62L, NA, 60L, NA, 60L, NA, NA, 34L, NA, NA, 26L, NA, 1L, 
31L, NA, NA, 38L, 1L, NA, 41L, NA, 43L, NA, NA, NA, 47L, NA, 
52L, 50L, NA, 53L, NA, 57L, 53L, 57L, NA, NA, NA, 58L, NA, NA, 
62L, NA, 60L), X.5 = c(40L, NA, NA, 15L, NA, 33L, 18L, NA, NA, 
NA, NA, 27L, 62L, 29L, NA, NA, 23L, NA, NA, NA, 62L, NA, NA, 
NA, NA, 35L, NA, NA, 27L, NA, NA, 35L, NA, NA, 1L, NA, NA, NA, 
NA, 44L, NA, NA, NA, 59L, NA, NA, 52L, NA, NA, NA, 58L, 55L, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X.6 = c(42L, NA, NA, 
NA, NA, 36L, NA, NA, NA, NA, NA, 29L, NA, 30L, NA, NA, 61L, NA, 
NA, NA, NA, NA, NA, NA, NA, 60L, NA, NA, 28L, NA, NA, 1L, NA, 
NA, 40L, NA, NA, NA, NA, 59L, NA, NA, NA, NA, NA, NA, 1L, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X.7 = c(47L, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 31L, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 32L, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    X.8 = c(50L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, 32L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA, 
-62L))

Upvotes: 0

Views: 68

Answers (2)

Ian Campbell
Ian Campbell

Reputation: 24770

One approach might be to use tidyr::pivot_wider to convert the data into an edge list. Then use igraph:

library(tidyverse)
library(igraph)

data %>%
  pivot_longer(-Number) %>% #Convert from wide to long
  dplyr::select(-name) %>% #Remove the column holding the column name
  dplyr::filter(!is.na(value)) %>% #Remove NAs
  as.matrix %>% #The edge list is expected as a matrix
  graph_from_edgelist %>% #Create the graph
  as_adjacency_matrix %>% #Make the adjacency matrix
  as.matrix -> result #Convert from igraph class to standard matrix

dim(result)
#[1] 62 62
result[1:10,1:10]
#      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
# [1,]    0    0    0    0    0    0    0    0    0     0
# [2,]    0    0    1    0    0    0    0    1    0     0
# [3,]    0    1    0    1    0    1    0    1    0     0
# [4,]    0    0    1    0    1    1    0    0    0     0
# [5,]    0    0    0    1    0    0    1    0    0     0
# [6,]    0    0    1    1    0    0    0    1    0     0
# [7,]    0    0    0    0    1    0    0    0    1     0
# [8,]    0    1    1    0    0    1    0    0    0     0
# [9,]    0    0    0    0    0    0    1    0    0     1
#[10,]    0    0    0    0    0    0    0    0    1     0

Upvotes: 3

ThomasIsCoding
ThomasIsCoding

Reputation: 101034

Here is a base R option using table + stack

as.data.frame.matrix(
  table(
    lapply(
      rev(stack(setNames(apply(df[-1], 1, function(x) unlist(x)[!is.na(x)]), df$Number))),
      factor,
      levels = df$Number
    )
  )
)

Upvotes: 1

Related Questions