pizycki
pizycki

Reputation: 1350

How to take sublist without first and last item with F#?

I have sorted list of integer values:

let ls = [1..4]

How can I get a sublist without first and the last element? (In the most optimal way)

The expected result is [2; 3].

This is what I have so far, and yeah, it's working, but I in my opinion it's just not the best approach.

[1..4] |> List.tail |> List.rev |> List.tail |> List.sort

Upvotes: 3

Views: 1170

Answers (3)

A somewhat long answer incoming in response to your innocently worded qualifier: "In the most optimal way"

Optimal in terms of what?

  1. Performance? (Most likely)
  2. Performance but also include GC performance?
  3. Memory usage?
  4. x86?
  5. x64?

And so on...

So I decided to measure some aspects of the problem.

I measured the different answers (added a non-idiomatic version as well) in this thread in various different context.

Without further ado here is the program I used to measure

open System
open System.Diagnostics
open System.IO

module so29100251 =
    // Daystate solution (OP)
    module Daystate =
        // Applied minor fixes to it
        let trim = function
            | [] | [_] | [_;_] -> []
            | ls -> ls |> List.tail |> List.rev |> List.tail |> List.rev

    // kaefer solution
    module kaefer =
        type 'a State = Zero | One | Other of 'a

        let skipFirstAndLast xss =
            let rec aux acc = function
            | _,            []    -> List.rev acc
            | Zero,         x::xs -> aux acc (One, xs)
            | One,          x::xs -> aux acc (Other x, xs)
            | (Other prev), x::xs -> aux (prev :: acc) (Other x, xs)
            aux [] (Zero, xss)

    // Petr solution
    module Petr =
        let rec trimImpl ls acc =
            match ls, acc with
            | [],      _   -> acc
            | h::[],   acc -> List.rev acc
            | h::n::t, []  -> trimImpl t [n]
            | h::t,    acc -> trimImpl t (h::acc)

        let trim ls = trimImpl ls []

    // NonIdiomatic solution
    module NonIdiomatic =
        let trim (hint : int) (ls : 'T list) =
            // trims last of rest

            // Can't ask for ls.Length as that is O(n)
            let ra = ResizeArray<_> (hint)

            // Can't use for x in list do as it relies on .GetEnumerator ()
            let mutable c = ls
            while not c.IsEmpty do
                ra.Add c.Head
                c <- c.Tail

            let count = ra.Count

            let mutable result = []
            for i in (count - 2)..(-1)..1 do
                result <- ra.[i]::result
            result

open so29100251

type Time = MilliSeconds of int64

type TestKind<'T> =
     | Functional               of 'T
     | MeasurePerformance       of int*int

[<EntryPoint>]
let main argv =
    let factor  = 10000000
//    let maxHint = Int32.MaxValue
    let maxHint = 100

    let time (action : unit -> 'T) : 'T*Time =
        let sw = Stopwatch ()

        sw.Start ()

        let r = action ()

        sw.Stop ()

        r, MilliSeconds sw.ElapsedMilliseconds

    let adapt fn hint ls = fn ls

    let trimmers =
        [|
            "Daystate"      , adapt Daystate.trim
            "kaefer"        , adapt kaefer.skipFirstAndLast
            "Petr"          , adapt Petr.trim
            "NonIdiomatic"  , NonIdiomatic.trim
        |]


#if DEBUG
    let functionalTestCases =
        [|
            Functional []               , "empty"       , []
            Functional []               , "singleton"   , [1]
            Functional []               , "duoton"      , [1;2]
            Functional [2]              , "triplet"     , [1;2;3]
            Functional [2;3]            , "quartet"     , [1;2;3;4]
        |]

    let performanceMeasurements = [||]
#else
    let functionalTestCases = [||]

    let performanceMeasurements =
        [|
            "small"   , 10
            "big"     , 1000
            "bigger"  , 100000
//            "huge"    , 10000000
        |] |> Array.map (fun (name, size) -> MeasurePerformance (size, (factor / size))  , name       , [for x in 1..size -> x])
#endif

    let testCases =
        [|
            functionalTestCases
            performanceMeasurements
        |] |> Array.concat


    use tsv = File.CreateText ("result.tsv")

    tsv.WriteLine (sprintf "TRIMMER\tTESTCASE\tSIZE\tHINT\tRUNS\tMEMORY_BEFORE\tMEMORY_AFTER\tGC_TIME\tRUN_TIME")

    for trimName, trim in trimmers do
        for testKind, testCaseName, testCase in testCases do
            match testKind with
            | Functional expected ->
                let actual = trim 0 testCase
                if actual = expected then
                    printfn "SUCCESS: Functional test of %s trim on testcase %s successful" trimName testCaseName
                else
                    printfn "FAILURE: Functional test of %s trim on testcase %s failed" trimName testCaseName
            | MeasurePerformance (size,testRuns) ->

                let hint    = min size maxHint

                let before  = GC.GetTotalMemory(true)

                printfn "MEASURE: Running performance measurement on %s trim using testcase %s..." trimName testCaseName

                let timeMe () =
                    for x in 1..testRuns do
                        ignore <| trim hint testCase
                let _, MilliSeconds ms = time timeMe

                let after   = GC.GetTotalMemory(false)

                let timeGC () =
                    ignore <| GC.GetTotalMemory(true)
                let _, MilliSeconds msGC = time timeMe

                printfn "...%d ms (%d runs), %d (before) %d (after) %d ms (GC)" ms testRuns before after msGC

                tsv.WriteLine (sprintf "%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d" trimName testCaseName size hint testRuns before after msGC ms)

    0

I then measured the execution time and GC time on x64 and max size hint allowed: (size hints is only used by the non-idiomatic version)

x64 maxhint

x86 and max size hint allowed: x86 maxhint

x64 and max 100 hint allowed: x64 hint 100

x86 and max 100 hint allowed: x86 hint 100

Looking at the performance charts we can note some somewhat surprising things:

  1. All variants are iterating 10000000 times. One would expect the execution time to not differ between the different variants but they do.
  2. The crusty old x86 scores consistently better overall. I won't speculate why.
  3. OPs initial version while seemingly wasteful scores pretty good. It's probably helped by that List.rev is very optimized (IIRC it does some safe cheating available only to F# devs)
  4. The kaefer version while on paper a better solution seems to score the worst. I think it's because it allocates extra State objects which are heap based. (This should obviously not be interpreted as a criticism of kaefers skills)
  5. The non-idiomatic solution scores good with good size hints but not as good as I expected. It might be that building the final lists is what costs most cycles. It might also be that tail recursive functions over lists are more efficient than while loops as IIRC pattern matching are more effective than calling List.Tail/List.Head/List.IsEmpty
  6. GC time is almost as big as the execution time.
  7. I expected the GC time of the non-idiomatic solution to be significantly lower than the rest. However, while the ResizeArray<_> are probably quick to collect the list objects aren't.
  8. On x86 arch the performance difference between Petr solution and the non-idiomatic one might not warrant the extra complexity.

Some final thoughts:

  1. OPs original solution did pretty good
  2. Garbage Collection takes time
  3. Always measure...

Hopefully it was somewhat interesting

Edit: The GC performance measurement numbers should not be over-interpreted into some thing more than: "GC can be expensive"

I later changed from a while loop to tail-recursion over a list which did improve the performance somewhat but not enough to warrant an update of the charts.

Upvotes: 8

Petr
Petr

Reputation: 4280

This is one of the ways:

let rec trim ls acc =
    match ls, acc with
    | [],      _   -> acc
    | h::[],   acc -> List.rev acc
    | h::n::t, []  -> trim t [n]  
    | h::t,    acc -> trim t (h::acc)

let reslt = trim ls []

Upvotes: 4

kaefer
kaefer

Reputation: 5741

You didn't require standard library functions to achieve this, your're just asking for an efficient way. Defining a recursive function with an accumulator which holds the intermediate results would then appear a viable solution, even when the list has to be reversed at its termination.

I'm providing a custom Discriminated Union to keep track of the state, this is modelled along the lines of the Option type with an extra case.

type 'a State = Zero | One | Other of 'a

let skipFirstAndLast xss =
    let rec aux acc = function
    | _,            []    -> List.rev acc
    | Zero,         x::xs -> aux acc (One, xs)
    | One,          x::xs -> aux acc (Other x, xs)
    | (Other prev), x::xs -> aux (prev :: acc) (Other x, xs)
    aux [] (Zero, xss)

[1..4] |> skipFirstAndLast // val it : int list = [2; 3]

Upvotes: 2

Related Questions