Ringil
Ringil

Reputation: 6537

Why is iterating through an array faster than Seq.find

I have an array sums that gives all the possible sums of a function f. This function accepts integers (say between 1 and 200, but same applies for say 1 and 10000) and converts them to double. I want to store sums as an array as I still haven't figured out how to do the algorithm I need without a loop.

Here's the code for how I generate sums:

let f n k = exp (double(k)/double(n)) - 1.0


let n = 200
let maxLimit = int(Math.Round(float(n)*1.5))

let FunctionValues = [|1..maxLimit|] |> Array.map (fun k -> f n k)

let sums = FunctionValues |> Array.map (fun i -> Array.map (fun j -> j + i) FunctionValues) |> Array.concat |> Array.sort

I found certain elements of the array sums that I want to find some integers that when input into the function f and then added will equal the value in sums. I could store the integers in sums, but I found that this destroys my memory.

Now I have two algorithms. Algorithm 1 uses a simple loop and a mutable int to store the values I care about. It shouldn't be very efficient since there isn't a break statement when it finds all the possible integers. I tried implementing Algorithm 2 that is more functional style, but I found it slower (~10% slower or 4200ms vs 4600ms with n = 10000), despite Seq being lazy. Why is this?

Algorithm 1:

let mutable a = 0
let mutable b = 0
let mutable c = 0
let mutable d = 0
for i in 1..maxLimit do
    for j in i..maxLimit do
        if sums.[bestI] = f n i + f n j then
            a <- i
            b <- j
        if sums.[bestMid] = f n i + f n j then
            c <- i
            d <- j

Algorithm 2:

let findNM x = 
    let seq = {1..maxLimit} |> Seq.map (fun k -> (f n k, k))
    let get2nd3rd (a, b, c) = (b, c)
    seq |> Seq.map (fun (i, n) -> Seq.map (fun (j, m) -> (j + i, n, m) ) seq) 
        |> Seq.concat |> Seq.find (fun (i, n, m) -> i = x)
        |>  get2nd3rd

let digitsBestI = findNM sums.[bestI]
let digitsBestMid = findNM sums.[bestMid]

let a = fst digitsBestI
let b = snd digitsBestI
let c = fst digitsBestMid
let d = snd digitsBestMid

Edit: Note that the array sums is length maxLimit*maxLimit not length n. bestI and bestMid are then indices between 0 and maxLimit*maxLimit. For the purposes of this question they can be any number in that range. Their specific values are not particularly relevant.

Upvotes: 3

Views: 107

Answers (1)

I extended OPs code a bit in order to profile it

open System

let f n k   = exp (double(k)/double(n)) - 1.0

let outer   = 200
let n       = 200
let maxLimit= int(Math.Round(float(n)*1.5))

let FunctionValues = [|1..maxLimit|] |> Array.map (fun k -> f n k)

let random = System.Random 19740531

let sums = FunctionValues |> Array.map (fun i -> Array.map (fun j -> j + i) FunctionValues) |> Array.concat |> Array.sort

let bests = 
  [| for i in [1..outer] -> (random.Next (n, maxLimit*maxLimit), random.Next (n, maxLimit*maxLimit))|]

let stopWatch = 
  let sw = System.Diagnostics.Stopwatch ()
  sw.Start ()
  sw

let timeIt (name : string) (a : int*int -> 'T) : unit = 
  let t = stopWatch.ElapsedMilliseconds
  let v = a (bests.[0])
  for i = 1 to (outer - 1) do
    a bests.[i] |> ignore
  let d = stopWatch.ElapsedMilliseconds - t
  printfn "%s, elapsed %d ms, result %A" name d v

let algo1 (bestI, bestMid) =
  let mutable a = 0
  let mutable b = 0
  let mutable c = 0
  let mutable d = 0
  for i in 1..maxLimit do
    for j in i..maxLimit do
      if sums.[bestI] = f n i + f n j then
        a <- i
        b <- j
      if sums.[bestMid] = f n i + f n j then
        c <- i
        d <- j

  a,b,c,d

let algo2 (bestI, bestMid) =
  let findNM x = 
    let seq = {1..maxLimit} |> Seq.map (fun k -> (f n k, k))
    let get2nd3rd (a, b, c) = (b, c)
    seq |> Seq.map (fun (i, n) -> Seq.map (fun (j, m) -> (j + i, n, m) ) seq) 
        |> Seq.concat |> Seq.find (fun (i, n, m) -> i = x)
        |> get2nd3rd

  let digitsBestI = findNM sums.[bestI]
  let digitsBestMid = findNM sums.[bestMid]

  let a = fst digitsBestI
  let b = snd digitsBestI
  let c = fst digitsBestMid
  let d = snd digitsBestMid

  a,b,c,d

let algo3 (bestI, bestMid) =
  let rec find best i j = 
    if best = f n i + f n j then i, j
    elif i = maxLimit && j = maxLimit then 0, 0
    elif j = maxLimit then find best (i + 1) 1
    else find best i (j + 1)
  let a, b = find sums.[bestI] 1 1
  let c, d = find sums.[bestMid] 1 1
  a, b, c, d

let algo4 (bestI, bestMid) =
  let rec findI bestI mid i j = 
    if bestI = f n i + f n j then 
      let x, y = mid
      i, j, x, y
    elif i = maxLimit && j = maxLimit then 0, 0, 0, 0
    elif j = maxLimit then findI bestI mid (i + 1) 1
    else findI bestI mid i (j + 1)

  let rec findMid ii bestMid i j = 
    if bestMid = f n i + f n j then 
      let x, y = ii
      x, y, i, j
    elif i = maxLimit && j = maxLimit then 0, 0, 0, 0
    elif j = maxLimit then findMid ii bestMid (i + 1) 1
    else findMid ii bestMid i (j + 1)

  let rec find bestI bestMid i j = 
    if bestI = f n i + f n j then findMid (i, j) bestMid i j
    elif bestMid = f n i + f n j then findI bestI (i, j) i j
    elif i = maxLimit && j = maxLimit then 0, 0, 0, 0
    elif j = maxLimit then find bestI bestMid (i + 1) 1
    else find bestI bestMid i (j + 1)

  find sums.[bestI] sums.[bestMid] 1 1

[<EntryPoint>]
let main argv =

  timeIt "algo1" algo1
  timeIt "algo2" algo2
  timeIt "algo3" algo3
  timeIt "algo4" algo4

  0

The test results on my machine:

algo1, elapsed 438 ms, result (162, 268, 13, 135)
algo2, elapsed 1012 ms, result (162, 268, 13, 135)
algo3, elapsed 348 ms, result (162, 268, 13, 135)
algo4, elapsed 322 ms, result (162, 268, 13, 135)

algo1 uses the naive for loop implementation. algo2 uses a more refined algorithm relying on Seq.find. I describe algo3 and algo4 later.

OP wondered why the naive algo1 performed better even it does more work than the algo2 that is based around lazy Seq (essentially an IEnumerable<>).

The answer is Seq abstraction introduces overhead and prevents useful optimizations from occuring.

I usually resort to looking at the generated IL code in order to understand what's going (There are many good decompilers for .NET like ILSpy).

Let's look at algo1 (decompiled to C#)

// Program
public static Tuple<int, int, int, int> algo1(int bestI, int bestMid)
{
  int a = 0;
  int b = 0;
  int c = 0;
  int d = 0;
  int i = 1;
  int maxLimit = Program.maxLimit;
  if (maxLimit >= i)
  {
    do
    {
      int j = i;
      int maxLimit2 = Program.maxLimit;
      if (maxLimit2 >= j)
      {
        do
        {
          if (Program.sums[bestI] == Math.Exp((double)i / (double)200) - 1.0 + (Math.Exp((double)j / (double)200) - 1.0))
          {
            a = i;
            b = j;
          }
          if (Program.sums[bestMid] == Math.Exp((double)i / (double)200) - 1.0 + (Math.Exp((double)j / (double)200) - 1.0))
          {
            c = i;
            d = j;
          }
          j++;
        }
        while (j != maxLimit2 + 1);
      }
      i++;
    }
    while (i != maxLimit + 1);
  }
  return new Tuple<int, int, int, int>(a, b, c, d);
}

algo1 is then expanded to an efficient while loop. In addition f is inlined. The JITter is easily able to create efficient machine code from this.

When we look at algo2 unpacking the full structure is too much for this post so I focus on findNM

internal static Tuple<int, int> findNM@48(double x)
{
  IEnumerable<Tuple<double, int>> seq = SeqModule.Map<int, Tuple<double, int>>(new Program.seq@49(), Operators.OperatorIntrinsics.RangeInt32(1, 1, Program.maxLimit));
  FSharpTypeFunc get2nd3rd = new Program.get2nd3rd@50-1();
  Tuple<double, int, int> tupledArg = SeqModule.Find<Tuple<double, int, int>>(new Program.findNM@52-1(x), SeqModule.Concat<IEnumerable<Tuple<double, int, int>>, Tuple<double, int, int>>(SeqModule.Map<Tuple<double, int>, IEnumerable<Tuple<double, int, int>>>(new Program.findNM@51-2(seq), seq)));
  FSharpFunc<Tuple<double, int, int>, Tuple<int, int>> fSharpFunc = (FSharpFunc<Tuple<double, int, int>, Tuple<int, int>>)((FSharpTypeFunc)((FSharpTypeFunc)get2nd3rd.Specialize<double>()).Specialize<int>()).Specialize<int>();
  return Program.get2nd3rd@50<double, int, int>(tupledArg);
}

We see that it requires creation of multiple objects implementing IEnumerable<> as well as functions objects that are passed to higher order functions like Seq.find. While it is in principle possible for the JITter to inline the loop it most likely won't because of time-constraints and memory reasons. This means each call to the function object is a virtual call, virtual calls are quite expensive (tip: check the machine code). Because the virtual call might do anything that in turn prevents optimizations such as using SIMD instructions.

The OP noted that F# loop expressions lacks break/continue constructs which are useful when writing efficient for loops. F# do however support it implicitly in that if you write a tail-recursive function F# unwinds this into an efficient loop that uses break/continue to exit early.

algo3 is an example of implementing algo2 using tail-recursion. The disassembled code is something like this:

internal static Tuple<int, int> find@66(double best, int i, int j)
{
  while (best != Math.Exp((double)i / (double)200) - 1.0 + (Math.Exp((double)j / (double)200) - 1.0))
  {
    if (i == Program.maxLimit && j == Program.maxLimit)
    {
      return new Tuple<int, int>(0, 0);
    }
    if (j == Program.maxLimit)
    {
      double arg_6F_0 = best;
      int arg_6D_0 = i + 1;
      j = 1;
      i = arg_6D_0;
      best = arg_6F_0;
    }
    else
    {
      double arg_7F_0 = best;
      int arg_7D_0 = i;
      j++;
      i = arg_7D_0;
      best = arg_7F_0;
    }
  }
  return new Tuple<int, int>(i, j);
}

This enables us to write idiomatic functional code and yet get very good performance while avoiding stack overflows.

Before I realized how good tail-recursion is implemented in F# I tried to write efficient while loops with mutable logic in the while test expression. For the sake of humanity that code is abolished from existence now.

algo4 is an optimized version in that it only iterates of sums once for both bestMid and bestI much like algo1 but algo4 exits early if it can.

Hope this helps

Upvotes: 4

Related Questions