James Pinkerton
James Pinkerton

Reputation: 161

How to collapse a recursive tree in OCaml

I have a tree type:

type tree = Vertex of int * tree list;;

My recursive equality definition is that two trees are equal if their ints are equal and all of their children are equal.

How do I build the function

topo: tree -> tree list

that creates a list of all of the trees in depth first search order with each tree appearing once and only once (according to the equality definition)? I want to do this in a computationally efficient way. Maybe use lazy or a hashmap?

Here is my attempt, the code blows up when the length is too large:

type tree = Vertex of int * (tree list)

let rec base = function
    | 0 -> Vertex (0, [])
    | i -> Vertex (i, [base (i - 1)])

let rec range = function
    | 0 -> [0]
    | i -> i :: range (i - 1)

let agg i = Vertex (-1, List.map base (range i))

let rec equals (a: tree) (b: tree) : bool =
    let rec deep_match a_dep b_dep = match a_dep, b_dep with
        | [], []       -> true
        | [], _
        | _, []        -> false
        | x::xs, y::ys -> equals x y && deep_match xs ys
    in
    let Vertex (ai, al) = a in
    let Vertex (bi, bl) = b in
    ai = bi && deep_match al bl

let rec in_list (a: tree) (l: tree list) : bool = match l with
    | [] -> false
    | hd::tl -> equals a hd || in_list a tl

let rec topological (pool: tree list) (t: tree) : tree list =
    if in_list t pool then pool else
        t::match t with
            | Vertex(_, []) -> pool
            | Vertex(_, deps) -> List.fold_left topological pool deps

let big_agg = agg 100_000
let topo_ordered = topological [] big_agg;;
Printf.printf "len %i\n" (List.length topo_ordered)

Upvotes: 0

Views: 628

Answers (2)

ivg
ivg

Reputation: 35270

To make it efficient you need to implement ordering and hash-consing. With total ordering, you can store your trees in a balanced tree or even a hashtable, thus turning your in_list into O(logN) or even O(1). Adding hash-consing will enable O(1) comparison of your trees (at the cost of less efficient tree construction).

Instead of having both, depending on your design constraints, you can have only one. For the didactic purposes, let's implement hash-consing for your particular representation

To implement hash-consing you need to make your constructor private and hide data constructors behind an abstraction wall (to prevent users from breaking you hash-consing properties):

module Tree : sig
  type t = private Vertex of int * t list

  val create : int -> t list -> t
  val equal : t -> t -> bool
end  = struct
  type t = Vertex of int * t list

  let repository = Hashtbl.create 64

  let create n children =
    let node = Vertex (n,children) in
    try Hashtbl.find repository node
    with Not_found -> Hashtbl.add repository node node; node
  let equal x y = x == y
end

Since we guaranteed that structurally equal trees are physically equal during the tree creation (i.e., if there exists an equal tree in our repository then we return it), we are now able to substitute structural equality with physical equality, i.e., with pointer comparison.

We got a fast comparison with the price - we now leaking memory, since we need to store all ever created trees and the create function is now O(N). We can alleviate the first problem by using ephemerons, but the latter problem will persist, of course.

Another issue, is that we're not able to put our trees into ordered structure, like a map or a set. We can of course use regular polymorphic compare, but since it will be O(N), inserting to such structure will become quadratic. Not an option for us. Therefore we need to add total ordering on our trees. We can theoretically do this without changing the representation (using ephemerons), but it is easier just to add an order parameter to our tree representation, e.g.,

module Tree : sig
  type order (* = private int *) (* add this for debuggin *)
  type t = private Vertex of order * int * t list

  val create : int -> t list -> t
  val equal : t -> t -> bool
  val compare : t -> t -> int
end = struct
  type order = int
  type t = Vertex of order * int * t list
  type tree = t

  module Repository = Hashtbl.Make(struct
      type t = tree
      let max_hash = 16

      let rec equal (Vertex (_,p1,x)) (Vertex (_,p2,y)) =
        match compare p1 p2 with
        | 0 -> equal_trees x y
        | n -> false
      and equal_trees xs ys = match xs, ys with
        | [],[] -> true
        | [],_ | _,[] -> false
        | x :: xs, y::ys -> equal x y && equal_trees xs ys
      let rec hash (Vertex (_,p,xs)) =
        hash_trees (Hashtbl.hash p) max_hash xs
      and hash_trees hash depth = function
        | x :: xs when depth > 0 ->
          hash_trees (Hashtbl.hash x) (depth-1) xs
        | _ -> hash
    end)

  let repository = Repository.create 64

  let create n children =
    try Repository.find repository (Vertex (0,n,children))
    with Not_found ->
      let order = Repository.length repository + 1 in
      let node = Vertex (order,n,children) in
      Repository.add repository node node; node

  let equal x y = x == y
  let order (Vertex (order,_,_)) = order
  let compare x y = compare (order x) (order y)

end

We had to manually implement the structural variants of equal and hash for our trees because we need to ignore the order in comparison, when we store a new tree in the repository. It looks like a bit of work, but in the real-life you can do this using derivers.

Anyways, now we got a comparable version of a tree with a comparison function which is O(1), so we can put our trees in sets and maps, and implement your topo efficiently.

A nice feature of both implementations is a tight representation of a tree, since sharing is guaranteed by the create function. E.g.,

# let t1 = Tree.create 42 [];;
val t1 : Tree.t = Tree.Vertex (1, 42, [])
# let t3 = Tree.create 42 [t1; t1];;
val t3 : Tree.t =
  Tree.Vertex (2, 42, [Tree.Vertex (1, 42, []); Tree.Vertex (1, 42, [])])
# let t5 = Tree.create 42 [t1; t3; t1];;
val t5 : Tree.t =
  Tree.Vertex (3, 42,
   [Tree.Vertex (1, 42, []);
    Tree.Vertex (2, 42, [Tree.Vertex (1, 42, []); Tree.Vertex (1, 42, [])]);
    Tree.Vertex (1, 42, [])])
# 

In this example, t1 in t5 and t3 will be the same pointer.

Upvotes: 4

octachron
octachron

Reputation: 18902

For optimal performance, one possibility would be to use hashconsing. However, in your current example, both the generation and the unicity test are quadratic in n. Fixing both points seems to already improve performance a lot.

First, we can avoid the quadratic tree generation by adding a lot of sharing:

let range max =
  let rec range elt l n =
    if n > max then elt::l
    else
      let next = Vertex(n,[elt]) in
      range next (elt::l) (n+1) in
  range (Vertex(0,[])) [] 1

let agg i = Vertex (-1, range i)

With this change, it is become reasonable to generate a tree with 1010 elements (but only 105 unique elements). Then, the unicity test can be done with a set (or a hashtable):

module S = Set.Make(struct type t = tree let compare = compare end)

let rec topological (set, pool) t =
    if S.mem t set then (set, pool) else
      let set = S.add t set in
      let set, pool =
        match t with
        | Vertex(_, []) -> set, pool
        | Vertex(_, deps) -> List.fold_left topological (set,pool) deps in
      set, t::pool

Upvotes: 2

Related Questions