ca9163d9
ca9163d9

Reputation: 29159

F# list group by running total?

I have the following list of tuples ordered by the first item. I want to cluster the times by

  1. If the second item of the tuple is greater then 50, it will be in its own cluster.
  2. Otherwise, cluster the items whose sum is less than 50.
  3. The order cannot be changed.

code:

let values =
  [("ACE", 78);
   ("AMR", 3);
   ("Aam", 6);
   ("Acc", 1);
   ("Adj", 23);
   ("Aga", 12);
   ("All", 2);
   ("Ame", 4); 
   ("Amo", 60);
   //.... 
  ]
values |> Seq.groupBy(fun (k,v) -> ???)

The expected value will be

[["ACE"] // 78
 ["AMR"; "Aam"; "Acc"; "Adj"; "Aga"; "All"] // 47
 ["Ame"] // 4
 ["Amo"] // 60
....]

Ideally, I want to evenly distribute the second group (["AMR"; "Aam"; "Acc"; "Adj"; "Aga"; "All"] which got the sum of 47) and the third one (["Ame"] which has only 4).

How to implement it in F#?


I had the following solution. It uses a mutable variable. It's not F# idiomatic? Is for ... do imperative in F# or is it a syntactic sugar of some function construct?

seq {
    let mutable c = []
    for v in values |> Seq.sortBy(fun (k, _) -> k) do
        let sum = c |> Seq.map(fun (_, v) -> v) |> Seq.sum
        if not(c = []) && sum + (snd v) > 50 
        then 
            yield c
            c <- [v]
        else
            c <- List.append c [v]
 }

Upvotes: 2

Views: 181

Answers (4)

AMieres
AMieres

Reputation: 5004

Yet another solution using Seq.mapFold and Seq.groupBy:

let group values =
    values
    |> Seq.mapFold (fun (group, total) (name, count) -> 
        let newTotal = count + total
        let newGroup = group + if newTotal > 50 then 1 else 0
        (newGroup, name), (newGroup, if newGroup = group then newTotal else count) 
        ) (0, 0)
    |> fst
    |> Seq.groupBy fst
    |> Seq.map    (snd >> Seq.map snd >> Seq.toList)

Invoke it like this:

[   "ACE", 78
    "AMR", 3
    "Aam", 6
    "Acc", 1
    "Adj", 23
    "Aga", 12
    "All", 2
    "Ame", 4
    "Amo", 60
] 
|> group        
|> Seq.iter    (printfn "%A")

// ["ACE"]
// ["AMR"; "Aam"; "Acc"; "Adj"; "Aga"; "All"]
// ["Ame"]
// ["Amo"]

Upvotes: 0

user1562155
user1562155

Reputation:

Here is a recursive version - working much the same way as fold-versions:

let groupBySums data =
    let rec group cur sum acc lst =
        match lst with
        | [] -> acc |> List.where (not << List.isEmpty) |> List.rev
        | (name, value)::tail when value > 50 -> group [] 0 ([(name, value)]::(cur |> List.rev)::acc) tail
        | (name, value)::tail -> 
            match sum + value with
            | x when x > 50 -> group [(name, value)] 0 ((cur |> List.rev)::acc) tail
            | _ -> group ((name, value)::cur) (sum + value) acc tail
    (data |> List.sortBy (fun (name, _) -> name)) |> group [] 0 []

values |> groupBySums |> List.iter (printfn "%A")

Upvotes: 1

Dzoukr
Dzoukr

Reputation: 1145

I think I got it. Not the nicest code ever, but works and is immutable.

let foldFn (acc:(string list * int) list) (name, value) =
    let addToLast last = 
        let withoutLast = acc |> List.filter ((<>) last)
        let newLast = [((fst last) @ [name]), (snd last) + value]
        newLast |> List.append withoutLast

    match acc |> List.tryLast with
    | None -> [[name],value]
    | Some l ->
        if (snd l) + value <= 50 then addToLast l
        else [[name], value] |> List.append acc

values |> List.fold foldFn [] |> List.map fst

Update: Since append can be quite expensive operation, I added prepend only version (still fulfills original requirement to keep order).

let foldFn (acc:(string list * int) list) (name, value) =
    let addToLast last = 
        let withoutLast = acc |> List.filter ((<>) last) |> List.rev
        let newLast = ((fst last) @ [name]), (snd last) + value
        (newLast :: withoutLast) |> List.rev

    match acc |> List.tryLast with
    | None -> [[name],value]
    | Some l ->
        if (snd l) + value <= 50 then addToLast l
        else ([name], value) :: (List.rev acc) |> List.rev

Note: There is still @ operator on line 4 (when creating new list of names in cluster), but since the theoretical maximum amount of names in cluster is 50 (if all of them would be equal 1), the performance here is negligible.

If you remove List.map fst on last line, you would get sum value for each cluster in list.

Upvotes: 3

kaefer
kaefer

Reputation: 5741

Append operations are expensive. A straight-forward fold with prepended intermediate results is cheaper, even if the lists need to be reversed after processing.

["ACE", 78; "AMR", 3; "Aam", 6; "Acc", 1; "Adj", 23; "Aga", 12; "All", 2; "Ame", 4; "Amd", 6; "Amo", 60]
|> List.fold (fun (r, s1, s2) (t1, t2) ->
    if t2 > 50 then [t1]::s1::r, [], 0
    elif s2 + t2 > 50 then s1::r, [t1], t2
    else r, t1::s1, s2 + t2 ) ([], [], 0)
|> fun (r, s1, _) -> s1::r
|> List.filter (not << List.isEmpty)
|> List.map List.rev
|> List.rev
// val it : string list list =
//   [["ACE"]; ["AMR"; "Aam"; "Acc"; "Adj"; "Aga"; "All"]; ["Ame"; "Amd"];
//    ["Amo"]]

Upvotes: 2

Related Questions