Counting paths and cycles of certain sizes for each node in network R

Question

I have a network and would like to count cycles of size 4 (paths of length four starting and ending at the same node) and paths of size 3 starting from a node, then tabulate them in a dataset by node.

g <- read.table(text= "supplier buyer
                              a    b
                              b    c
                              c    d
                              c    e
                              a    e
                              e    f
                              b    f",header=TRUE)
g <- graph.data.frame(g, directed=F)
adjacency <- as.matrix(as_adjacency_matrix(g))

I have referred to this post for counting cycles, but it counts cycles for the entire graph, not by node.

I have also read the documentation for kpath.census and kcycle.census. However, these only allow for specification of a maximum length (length <=3, rather than ==3). I'm also not fully understanding the output:

library(sna)
node_path<-sna::kpath.census(adjacency, maxlen = 3, mode = "graph", 
             tabulate.by.vertex = TRUE, dyadic.tabulation = "sum")
node_kcycle <- sna::kcycle.census(adjacency, maxlen = 4, mode = "graph", tabulate.by.vertex = TRUE, cycle.comembership = "sum")

> node_path$path.count
  Agg  a  b  c  e  f d
1  14  4  6  6  6  4 2
2  22 10 14 14 14 10 4
3  32 20 28 24 28 20 8

> node_kcycle$cycle.count
  Agg a b c e f d
2   0 0 0 0 0 0 0
3   0 0 0 0 0 0 0
4   3 2 3 2 3 2 0

I plotted the network below, and by my count, node a should have 4 cycles of length 4, and 6 paths of length 3. But the output from node_kcycle$cycle.count and node_path$path.count provide different numbers.

Is there a better way to count the cycles of size 4 and length of 3 for each node? I'd like to produce an output such that:

   cycles4   path3
a    4         6
b
c      ....
d
e
f

EDIT: I've figured out my confusion with the kpath.census output: it counts all paths including that node, not starting from that node, as I'd wanted. Still not sure how to count the paths starting from a node, though

EDIT2: This function all_simple_paths(g, from = V(g)) counts the simple paths, and I'm thinking I would try to extract paths of 3 from this function. Still struggling on finding the cycles of size 4.

Sinh Nguyen · Accepted Answer

This is a long answer for this as I tried out with R6 and define some thing on my own instead of using any paths related packages avaiable

Define the path class for control the logics

library(dplyr)
library(purrr)
library(R6)

# R6 Class
path <- R6Class(
  classname = "point",
  private = list(
    .points = c(),
    .is_circle = FALSE,
    .path_length = 0,
    check_path = function() {
      private$.path_length <- length(private$.points)
    }
  ),
  active = list(
    points = function(value) {
      if (missing(value)) {
        private$.points
      } else {
        stop("No manual edit allowed")
      }
    },
    is_circle = function(value) {
      if (missing(value)) {
        private$.is_circle
      } else {
        stop("No manual edit allowed")
      }
    },
    path_length = function(value) {
      if (missing(value)) {
        private$.path_length
      } else {
        stop("No manual edit allowed")
      }
    }
  ),
  public = list(
    initialize = function(supplier, buyer) {
      private$.points <- c(supplier, buyer)
      private$check_path()
    },
    # This function will add a 2 points path into this path object
    # handle some logics about cirle & duplicated path
    add_2_points_path = function(addition_path) {
      stopifnot(addition_path$path_length == 2)
      if (length(intersect(addition_path$points, private$.points)) == 2) {
        if (first(private$.points) %in% addition_path$points & 
            last(private$.points) %in% addition_path$points &
            private$.path_length > 2) {
          private$.is_circle <- TRUE
        } else {
          stop("No new node introduce")
        }
      } else {
        if (first(addition_path$points) == last(private$.points)) {
          private$.points <- c(private$.points, addition_path$points[2])
        } else if (last(addition_path$points) == last(private$.points)) {
          private$.points <- c(private$.points, addition_path$points[1])
        } else {
          stop("No common points for connection. No connection build")
        }
      }
      private$check_path()
    },
    add_point = function(new_point) {
      stopifnot(is.character(new_point) & length(new_point) == 1)
      private$.points <- c(private$.points, new_point)
      private$check_path()
    },
    get_start_point = function() {
      first(private$.points)
    },
    get_end_point = function() {
      last(private$.points)
    },
    print = function(...) {
      cat("Path: ", private$.points, "
")
      cat("Length: ", private$.path_length, "
")
      cat("Is circle: ", private$.is_circle, "
")
    }
  ),
  cloneable = TRUE
)

Running a path builder using the origin data

# original data
g
#>   supplier buyer
#> 1        a     b
#> 2        b     c
#> 3        c     d
#> 4        c     e
#> 5        a     e
#> 6        e     f
#> 7        b     f

# Define maximum loops to count. The maximum path_length
# will be loop_count + 1
loop_count <- 4
paths <- pmap(g, path$new)
index <- 2
for (index in 2:loop_count) {
  current_paths_level <- keep(.x = paths,
    .p = function(x) { x$path_length == index})
  for (i_path in current_paths_level) {
    possible_paths <- pmap(g, path$new)
    for (i_possible_paths in possible_paths) {
      new_path <- i_path$clone()
      tryCatch({
        new_path$add_2_points_path(i_possible_paths)
        if (new_path$is_circle) {
          paths <- keep(.x = paths,
            .p = function(x) { !identical(x$points, new_path$points) })
        }
        paths <- append(paths, new_path)
      }, error = function(e) {
        # skip error handling just ignore the path not able to connect
      })
    }
  }
}

# Filter duplicated paths
identical_path <- function(path_1, path_2) {
  identical <- FALSE
  if (path_1$path_length <= 3 | path_2$path_length <= 3) {
    identical <- length(intersect(path_1$points, path_2$points)) == path_1$path_length &
      path_1$path_length == path_2$path_length 
  } else {
    identical <- identical(path_1$points, path_2$points)
  }
  identical
}
path_filter <- rep(FALSE, length(paths))
for(i in 1:(length(paths) - 1)) {
  for(j in (i+1):length(paths)) {
    path_filter[j] <- path_filter[j] | identical_path(paths[[i]], paths[[j]])
  }
}
paths <- paths[!path_filter]

A few sample of the result

sample(paths, 10)
#> [[1]]
#> Path:  a b 
#> Length:  2 
#> Is circle:  FALSE 
#> 
#> [[2]]
#> Path:  b c e a 
#> Length:  4 
#> Is circle:  TRUE 
#> 
#> [[3]]
#> Path:  d c e f b 
#> Length:  5 
#> Is circle:  FALSE 
#> 
#> [[4]]
#> Path:  c e a b f 
#> Length:  5 
#> Is circle:  FALSE 
#> 
#> [[5]]
#> Path:  c b f 
#> Length:  3 
#> Is circle:  FALSE 
#> 
#> [[6]]
#> Path:  f b a e c 
#> Length:  5 
#> Is circle:  FALSE 
#> 
#> [[7]]
#> Path:  a e c b 
#> Length:  4 
#> Is circle:  TRUE 
#> 
#> [[8]]
#> Path:  a e f 
#> Length:  3 
#> Is circle:  FALSE 
#> 
#> [[9]]
#> Path:  e a b c d 
#> Length:  5 
#> Is circle:  FALSE 
#> 
#> [[10]]
#> Path:  a b f 
#> Length:  3 
#> Is circle:  FALSE

Finally do the statistics

# Define the counting functions
counting <- function(starting_point, list_paths) {
  starting_paths <- keep(paths,
    function(x) { x$get_start_point() == starting_point })
  circle4 <- length(keep(starting_paths,
    function(x) { x$is_circle & x$path_length == 4}))
  path3 <- length(keep(starting_paths,
    function(x) { x$path_length == 4}))
  return(tibble(point = starting_point,
    circle4 = circle4, path3 = path3))
}

map_dfr(unique(c(g$supplier, g$buyer)), counting, list_paths = paths)
#> # A tibble: 6 x 3
#>   point circle4 path3
#>       
#> 1 a           4     6
#> 2 b           6     6
#> 3 c           4     4
#> 4 e           6     6
#> 5 d           0     4
#> 6 f           4     6

Update the class to have a nice print function

^{Created on 2021-04-06 by the reprex package (v1.0.0)}

Counting paths and cycles of certain sizes for each node in network R

Answers (1)

Define the path class for control the logics

Running a path builder using the origin data

A few sample of the result

Finally do the statistics

Related Questions