Reputation: 333
I have this function here:
let ProcessFile (allLines: string list) =
let list = new List<List<string>>()
let rec SplitFile (input: string list) =
if input.Length <> 0 then
list.Add(new List<string>(input.TakeWhile(fun x -> x <> "")))
let nextGroup = input.SkipWhile(fun x -> x <> "").SkipWhile(fun x -> x = "")
SplitFile (Seq.toList nextGroup)
SplitFile allLines |> ignore
list
It is given the contents of a file as a list of strings and takes each group that is seperated by empty lines as a seperate list, giving me a list of lists.
My question is, is there a better way to do this with a yield that give me something like a string list list instead of me having to use new List< List< string>>? As this doesn't seem particularly neat to me.
Upvotes: 4
Views: 1512
Reputation: 131
How about using plain old List.fold
let processFile lines =
([], lines) ||>
List.fold(fun acc l ->
match acc with
| [] when l = "" -> acc // filter empty lines at the start of the file
| [] -> [[l]] // start the first group
| []::xss when l = "" -> acc // filter continous empty lines
| xs::xss when l = "" -> // found an empty line, start a new group
let rxs = List.rev xs // reverse the current group before starting a new one
[]::rxs::xss
| xs::xss -> (l::xs)::xss) // continue adding to the current group
|> List.rev
Upvotes: 0
Reputation: 48687
A more idiomatic solution might be:
let processFile xs =
let rec nonEmpties n = function
| [] as xs | ""::xs -> n, xs
| _::xs -> nonEmpties (n+1) xs
let rec loop xs =
seq { match xs with
| [] -> ()
| ""::xs -> yield! loop xs
| xs ->
let n, ys = nonEmpties 0 xs
yield Seq.take n xs
yield! loop ys }
loop xs
where the nested nonEmpties
function counts how many non-empty elements are at the front of the given list and returns both the count and the tail list after the last non-empty element, and the loop
function skips empty elements and yields sequences of non-empty elements.
Some interesting characteristics of this solution:
Fully tail recursive so it can handle arbitrarily long sequences of non-empty strings and sequences of sequences of non-empty strings.
Avoids copying by referring back to the input list.
On test input of 1,000 sequences of 1,000 strings this solution is 8x faster than yamen's and 50% faster than Tomas'.
Here is an even faster solution that begins by converting the input list into an array and then acts upon array indices:
let processFile xs =
let xs = Array.ofSeq xs
let rec nonEmpties i =
if i=xs.Length || xs.[i]="" then i else
nonEmpties (i+1)
let rec loop i =
seq { if i < xs.Length then
if xs.[i] = "" then
yield! loop (i+1)
else
let j = nonEmpties i
yield Array.sub xs i (j - i)
yield! loop j }
loop 0
On test input of 1,000 sequences of 1,000 strings this solution is 34x faster than yamen's and 6x faster than Tomas'.
Upvotes: 5
Reputation: 15618
Personally, I like one liners:
let source = ["a"; "b"; ""; ""; "c"; ""; "d"]
source // can be any enumerable or seq
|> Seq.scan (fun (i, _) e -> if e = "" then (i + 1, e) else (i, e)) (0, "") // add the 'index'
|> Seq.filter (fun (_, e) -> e <> "") // remove the empty entries
|> Seq.groupBy fst // group by the index
|> Seq.map (fun (_, l) -> l |> Seq.map snd |> List.ofSeq) // extract the list only from each group (discard the index)
|> List.ofSeq // turn back into a list
The biggest problem here is that Seq.groupBy
will read the entire list into memory, but you're doing that anyway. There are implementations of groupBy
that will only look at adjacent entries and that would be sufficient, and will let you input the file as a Seq
instead (for example, using File.ReadLines
rather than File.ReadAllLines
).
Upvotes: 0
Reputation: 243041
Your code is quite readable to me, but using TakeWhile
and SkipWhile
recursively is fairly inefficient. Here is a simple functional recursive solution:
let ProcessFile (allLines: string list) =
// Recursively processes 'input' and keeps the list of 'groups' collected
// so far. We keep elements of the currently generated group in 'current'
let rec SplitFile input groups current =
match input with
// Current line is empty and there was some previous group
// Add the current group to the list of groups and continue with empty current
| ""::xs when current <> [] -> SplitFile xs ((List.rev current)::groups) []
// Current line is empty, but there was no previous group - skip & continue
| ""::xs -> SplitFile xs groups []
// Current line is non-empty - add it to the current group
| x::xs -> SplitFile xs groups (x::current)
// We reached the end - add current group if it is not empty
| [] when current <> [] -> List.rev ((List.rev current)::groups)
| [] -> List.rev groups
SplitFile allLines [] []
ProcessFile ["a"; "b"; ""; ""; "c"; ""; "d"]
Essenitally the same code can be written using seq { ... }
as follows. We still need to keep the list of current groups using an accumulator (current
) but we now return groups lazily using yield
and yield!
as we iterate over the inputs:
let ProcessFile (allLines: string list) =
let rec SplitFile input current = seq {
match input with
| ""::xs when current <> [] ->
yield List.rev current
yield! SplitFile xs []
| ""::xs ->
yield! SplitFile xs []
| x::xs ->
yield! SplitFile xs (x::current)
| [] when current <> [] ->
yield List.rev current
| [] -> () }
SplitFile allLines []
Upvotes: 2