Which is the proper way to DFS a matrix?

Question

I was doing this practice problem to find if a word exists in a matrix, and it made me realize I don't totally get DFS.

In Cracking the Coding Interview, pseudocode for DFS is:

void search(Node root) {
  if (root == null) return;
  visit(root);
  root.visited = true;
  for each (Node n in root.adjacent) {
    if (n.visited == false) {
      search(n);
    }
  }
}

To me this looked like the format:

goal
mark
loop
bail early if conditions are not met on the neighbor
recurse

So with this format, I wrote the function dfs():

  function dfs(r, c, i) {    
    // goal
    if (i === word.length-1) return true;

    // mark
    board[r][c] = '#';

    // loop and recurse each neighbor
    for (var d of dirs) {
      var nr = r + d[0];
      var nc = c + d[1];

      // bail early if neighbor does not meet conditions
      if (nr < 0 || nc < 0 || nr >= board.length || nc >= board[0].length) continue;  // neighbor is out of bounds
      if (board[nr][nc] === '#') continue;                                            // neighbor already visited
      if (board[nr][nc] !== word[i+1]) continue;                                      // neighbor does not meet goal

      // recursion
      var result = dfs(nr, nc, i+1);

      // un-mark
      board[r][c] = word[i];

      return result;
    }
  }

Secondly, I noticed most solutions don't use a for loop at all, but just a recursion written 4 times for each neighbor. With this in mind, I wrote dfs2():

goal
bail early if conditions are not met on the current node
mark

recurse

function dfs2(r, c, i) {    
// goal
if (i === word.length) return true;

// bail early if current does not meet conditions
if (r < 0 || c < 0 || r >= board.length || c >= board[0].length) return false;  // current is out of bounds
if (board[r][c] === '#') return false;                                          // current already visited
if (board[r][c] !== word[i]) return false;                                      // current does not meet goal

// mark
board[r][c] = '#';

// recursion
var result = dfs2(r+1, c, i+1) || dfs2(r-1, c, i+1) || dfs2(r, c+1, i+1) || dfs2(r, c-1, i+1);

// un-mark
board[r][c] = word[i];

return result;
}

This is more concise, but harder for me to understand. The first version dfs() does a loop and bails early on the neighbors before the recursion, which makes more sense to me. "If the neighbor is bad, don't go there." The second version doesn't have a loop, so it does all of the checks on the current node.

The first things that I noticed was that in most problems involving a grid, solutions involve "un-marking" after the recursion. Why is this? Is this only for specific cases like "word search problem" where you may want to re-visit the node in the future in a different path?

Which is correct, dfs() or dfs2()?

https://repl.it/MSCw/0 Here is the whole thing put together:

var dirs = [
  [0,1],  // r
  [1,0],  // d
  [0,-1], // u
  [-1,0], // l
];

var wsBoard = [
  ['A','B','C','E'],
  ['S','F','C','S'],
  ['A','D','E','E']
];

var exist = function(board, word, version) {
  for (var r = 0; r < board.length; r++) {
    for (var c = 0; c < board[0].length; c++) {
      if (board[r][c] === word[0])
        if (dfs(r, c, 0)) return true;
        // if (dfs2(r, c, 0)) return true;
    }
  }

  return false;

  function dfs(r, c, i) {
    console.log(`(${r},${c})	${i}: ${word[i]}`);

    // goal
    if (i === word.length-1) return true;

    // mark
    board[r][c] = '#';

    // loop and recurse each neighbor
    for (var d of dirs) {
      var nr = r + d[0];
      var nc = c + d[1];

      // bail early if neighbor does not meet conditions
      if (nr < 0 || nc < 0 || nr >= board.length || nc >= board[0].length) continue;  // neighbor is out of bounds
      if (board[nr][nc] === '#') continue;                                            // neighbor already visited
      if (board[nr][nc] !== word[i+1]) continue;                                      // neighbor does not meet goal

      console.log(board);

      // recursion
      var result = dfs(nr, nc, i+1);

      // un-mark
      board[r][c] = word[i];

      return result;
    }
  }

  function dfs2(r, c, i) {
    console.log(`(${r},${c})	${i}: ${word[i]}`);

    // goal
    if (i === word.length) return true;

    // bail early if current does not meet conditions
    if (r < 0 || c < 0 || r >= board.length || c >= board[0].length) return false;  // current is out of bounds
    if (board[r][c] === '#') return false;                                          // current already visited
    if (board[r][c] !== word[i]) return false;                                      // current does not meet goal

    // mark
    board[r][c] = '#';

    console.log(board);

    // recursion
    var result = dfs2(r+1, c, i+1) || dfs2(r-1, c, i+1) || dfs2(r, c+1, i+1) || dfs2(r, c-1, i+1);

    // un-mark
    board[r][c] = word[i];

    return result;
  }
};

console.log(exist(wsBoard, 'ABCCED'));  // => true
console.log(exist(wsBoard, 'SEE'));     // => true
console.log(exist(wsBoard, 'ABCB'));    // => false

Mauricio Poppe · Accepted Answer

I believe that even though both dfsand dfs2 are based on the same idea dfs has one flaw, it's returning the result of the exploration of the first path only!

Look at this example where I'm trying to find FOO in the board, clearly, it's the first column however your implementation returns false

var dirs = [
  [0,1],  // r
  [1,0],  // d
  [0,-1], // u
  [-1,0], // l
];

var board = [
  ['F','O','X'],
  ['O',' ',' '],
  ['O',' ',' ']
];

var exist = function(word) {
  function dfs(r, c, i) {
    // mark
    board[r][c] = '#';

    // goal
    if (i === word.length-1) return true;

    // loop and recurse each neighbor
    for (var d of dirs) {
      var nr = r + d[0];
      var nc = c + d[1];

      // bail early if neighbor does not meet conditions
      if (nr < 0 || nc < 0 || nr >= board.length || nc >= board[0].length) continue;  // neighbor is out of bounds
      if (board[nr][nc] === '#') continue;                                            // neighbor already visited
      if (board[nr][nc] !== word[i+1]) continue;                                      // neighbor does not meet goal

      // recursion
      var result = dfs(nr, nc, i+1);

      // un-mark
      board[r][c] = word[i];

      return result;
    }
  }
  for (var r = 0; r < board.length; r++) {
    for (var c = 0; c < board[0].length; c++) {
      if (board[r][c] === word[0]) {}
        if (dfs(r, c, 0)) return true;
    }
  }
  return false;
}

console.log(exist('FOO'))

The problem is that your for loop will always return the result of the first recursion, to fix it let's move result outside the loop, make it false initally and make it adopt a value of true once a valid path is found.

var dirs = [
  [0,1],  // r
  [1,0],  // d
  [0,-1], // u
  [-1,0], // l
];

var board = [
  ['F','O','X'],
  ['O',' ',' '],
  ['O',' ',' ']
];

var exist = function(word) {
  function dfs(r, c, i) {
    // mark
    board[r][c] = '#';

    // goal
    if (i === word.length-1) return true;

    // assume that there's no valid path initially
    var result = false
    // loop and recurse each neighbor
    for (var d of dirs) {
      var nr = r + d[0];
      var nc = c + d[1];

      // bail early if neighbor does not meet conditions
      if (nr < 0 || nc < 0 || nr >= board.length || nc >= board[0].length) continue;  // neighbor is out of bounds
      if (board[nr][nc] === '#') continue;                                            // neighbor already visited
      if (board[nr][nc] !== word[i+1]) continue;                                      // neighbor does not meet goal

      // recursion
      result = result || dfs(nr, nc, i+1);

    }

    // un-mark
    board[r][c] = word[i];
    
    return result;
  }
  for (var r = 0; r < board.length; r++) {
    for (var c = 0; c < board[0].length; c++) {
      if (board[r][c] === word[0]) {}
        if (dfs(r, c, 0)) return true;
    }
  }
  return false;
}

console.log(exist('FOO'))

If we look at dfs2 the only difference is that the for loop is unwrapped e.g.

var result = false;
for (var dir in dirs) {
   // ...
   result = result || dfs(nr, nc, i+1)
}
return result;

// becomes

var result = dfs2(...) || dfs2(...) || ...

The first things that I noticed was that in most problems involving a grid, solutions involve "un-marking" after the recursion. Why is this?

In some solutions you may actually mutate the object you're working with, for example in another classic problem which is finding all the permutations of a word you can solve it by mutating the word inplace recursively, after one permutation is found the next recursive call will work with a different state (which is not desired), the unmarking concept in this problem is translated into a revert operation that transforms the word to its previous state.

Which is correct, dfs() or dfs2()?

Both are correct (well, after dfs is fixed), however, dfs2 does recursion to an invalid state e.g. an out of bounds cell or a cell that's not part of the word, in terms of complexity this additional overhead is just a constant multiplier e.g. even if you imagine that you visit each neighbor from each cell the complexity is O(4 * # rows * # columns)

Which is the proper way to DFS a matrix?

Answers (1)

Related Questions