Brett Rowberry
Brett Rowberry

Reputation: 1050

F# Join Strings with Oxford Comma

I want to join a collection of strings into a single string using the Oxford (or serial) comma.

Given

let ss = [ "a"; "b"; "c"; "d" ]

I want

"a, b, c, and d"

Here's what I came up with.

let oxford (strings: seq<string>) =
  let ss = Seq.toArray strings
  match ss.Length with
  | 0 -> ""
  | 1 -> ss.[0]
  | 2 -> sprintf "%s and %s" ss.[0] ss.[1]
  | _ ->
    let allButLast = ss.[0 .. ss.Length - 2]
    let commaSeparated = System.String.Join(", ", allButLast)
    sprintf "%s, and %s" commaSeparated (Seq.last ss)

How can this be improved?

--- Edit ---
The comment about iterating through the sequences multiple times is on point. Both of the implementations below avoid the conversion to an array.

If I use seq, I quite like this:

open System.Linq
let oxfordSeq (ss: seq<string>) =
  match ss.Count() with
  | 0 -> ""
  | 1 -> ss.First()
  | 2 -> sprintf "%s and %s" (ss.ElementAt(0)) (ss.ElementAt(1))
  | _ ->
    let allButLast = ss.Take(ss.Count() - 1)
    let commaSeparated = System.String.Join(", ", allButLast)
    sprintf "%s, and %s" commaSeparated (ss.Last())

If I use array, I can also avoid the iteration of Last() by taking advantage of indexing.

let oxfordArray (ss: string[]) =
  match ss.Length with
  | 0 -> ""
  | 1 -> ss.[0]
  | 2 -> sprintf "%s and %s" ss.[0] ss.[1]
  | _ ->
    let allButLast = ss.[0 .. ss.Length - 2]
    let commaSeparated = System.String.Join(", ", allButLast)
    sprintf "%s, and %s" commaSeparated (ss.[ss.Length - 1]

--- Edit ---
Are seeing that link from @CaringDev, I think this is pretty nice. No wildcards, handles null, less indexing to get right, and only traverse the array once in the Join() method.

let oxford = function
    | null | [||] -> ""
    | [| a |] -> a
    | [| a; b |] -> sprintf "%s and %s" a b
    | ss ->
        let allButLast = System.ArraySegment(ss, 0, ss.Length - 1)
        let sb = System.Text.StringBuilder()
        System.String.Join(", ", allButLast) |> sb.Append |> ignore
        ", and " + ss.[ss.Length - 1] |> sb.Append |> ignore
        string sb

This once is pretty nice too and has even less jumping around:

let oxford2 = function
    | null | [||] -> ""
    | [| a |] -> a
    | [| a; b |] -> sprintf "%s and %s" a b
    | ss ->
        let sb = System.Text.StringBuilder()
        let action i (s: string) : unit = 
            if i < ss.Length - 1 
            then 
                sb.Append s |> ignore
                sb.Append ", " |> ignore
            else 
                sb.Append "and " |> ignore
                sb.Append s |> ignore
        Array.iteri action ss          
        string sb

Upvotes: 3

Views: 428

Answers (5)

kaefer
kaefer

Reputation: 5751

A way I have not yet seen: Matching on the end of the list.

let ox =
    List.rev >> function
    | [] -> ""
    | [x] -> x
    | [y; x] -> x + " and " + y
    | y::ys -> String.concat ", " (List.rev ("and " + y::ys))
// val ox : (string list -> string)

ox["a"; "b"; "c"; "d"]
// val it : string = "a, b, c, and d"

Upvotes: 1

Doktorn
Doktorn

Reputation: 797

Another recursive variant:

let rec oxford l =
    match l with
    | [] -> ""
    | [x] -> x
    | [x; y] -> x + " and " + y
    | head :: tail -> 
        head + ", " + oxford tail

Upvotes: 0

foldBack is also a possibility. Performance from concatenating strings this way isn't brilliant but for 4 items it usually doesn't matter.

let oxfordify (ws : seq<string>) : string =
  // Folder concats the value and the aggregated result using seperator 0
  //  it updates the state with the new string and moves 
  //  seperator 1 into seperator 0 slot and set seperator 1 to ", "
  let folder v (r, s0, s1) = (v + s0 + r, s1, ", ")
  // The seperator 0 for first iteration is empty string (if it's only 1 value)
  // The seperator 1 is set to ", and " as the seperator between 2 last items
  // For all other items ", " will be used (see folder)
  let r, _, _ = Seq.foldBack folder ws ("", "", ", and ")
  r

A coworker pointed me in the direction of using infinite sequences to represent the separators:

let separators = 
  Seq.concat [| [|""; ", and "|] :> seq<_>; Seq.initInfinite (fun _ -> ", ") |]
let oxfordify (ws : seq<string>) : string = 
  Seq.fold2 (fun r v s -> v + s + r) "" (ws |> Seq.rev) separators 

For a more performant option you could consider something like this:

module Details =
  module Loops =
    let inline app (sb : System.Text.StringBuilder) (w : string) : unit =
      sb.Append w |> ignore
    let rec oxfordify sb (ws : _ array) i : string =
      if i < ws.Length then
        if i = 0 then 
          ()
        elif i = ws.Length - 1 then 
          app sb ", and "
        else 
          app sb ", "
        app sb ws.[i]
        oxfordify sb ws (i + 1)
      else
        sb.ToString ()
open Details

let oxfordify (ws : string array) : string = 
  let sb = System.Text.StringBuilder ()
  Loops.oxfordify sb ws 0

Upvotes: 3

Devon Burriss
Devon Burriss

Reputation: 2532

You could look directly at the list and use pattern matching on the list. Maybe this could be improved but it gives the idea.

let rec oxford (s:string) (ss:string list) =
    match ss with
    | [] -> s
    | [x;y] -> sprintf "%s, %s, and %s" s x y
    | h::t when String.length s = 0 -> oxford h t
    | h::t -> oxford (sprintf "%s, %s" s h) t

It calls itself recursively with a smaller list applying the commas. When the list is only 2 in size it uses an and instead. The when is unfortunate but found I needed it for the first call with an empty string so dont end up with a leading ,.

EDIT

So personally I prefer the above option for a small number of words. The string concat for each call does not perform well for large numbers though.

// collect into a list including the *,* and *and*, then just concat that to string
let oxfordDrct (ss:string list) =
    let l = ss |> List.length
    let map i s = if(i < l-1) then [s;", "] else ["and ";s]        
    match ss with
    | [] -> ""
    | [x] -> x
    | [x;y] -> sprintf "%s, and %s" x y
    | _ -> ss |> List.mapi map |> List.concat |> String.concat ""

// Recursive like the original but instead pass a StringBuilder instead of string
let oxfordSb xs =        
    let rec collect (s:StringBuilder) (ss:string list) =
        match ss with
        | [] -> s
        | [x;y] -> sprintf ", %s, and %s" x y |> s.Append
        | h::t when s.Length = 0 -> collect (s.Append(h)) t
        | h::t -> collect (s.Append(sprintf ", %s" h)) t
    let sb = new StringBuilder()     
    (collect sb xs) |> string

These 2 options perform much like the original option, all of which are better than the rec by string.

Upvotes: 5

scrwtp
scrwtp

Reputation: 13577

My not-so-different approach.

let oxford (ss: string array) =
    match ss.Length with
    | 0 -> ""
    | 1 -> ss.[0]
    | 2 -> sprintf "%s and %s" ss.[0] ss.[1]
    | _ ->
       let cs = System.String.Join(", ", ss.[ 0 .. ss.Length - 2])
       sprintf "%s, and %s" cs (ss.[ss.Length - 1])

I don't really see anything wrong with your code except the fact you'd be iterating through your sequence multiple times (array conversion + string join + Seq.last).

This is not a performance problem, because I don't expect this function to be called on sequences larger than single-digit numbers, but on the off-chance that sequence is side-effecting or expensive to calculate, you'll get weird behaviour. This is why I switched the input to array.

As far as readability goes, you can't get better than what you already have, explicitly enumerating the base cases, and an extra string allocation from the sprintf in last line is irrelavant anyway (especially compared to what you'd get from direct recursion).

Upvotes: 2

Related Questions