Thomas
Thomas

Reputation: 12117

DateTime to string, in F#, can this be optimized further?

I have a system that does some simulations and outputs a lot of log information with timestamps.

I was surprised to see that DateTime.ToString() was quite expensive in the loop (it's really called a lot), so I tried to make a much faster version that outputs the date, time, and milliseconds.

Can this be done faster (the output needs to have milliseconds)? I didn't try to use a pointer because I assumed (maybe wrongly) that for such a small piece of code, the overhead of pinning the object would be higher.

module DateTimeFormatter =

    let inline private valueToDigit (value: int) : char =
        char (value + int '0')

    let inline private write2Characters (c: char[]) offset value =
        c.[offset + 0] <- valueToDigit (value / 10)
        c.[offset + 1] <- valueToDigit (value % 10)

    let inline private write3Characters (c: char[]) offset value =
        c.[offset + 0] <- valueToDigit (value / 100)
        c.[offset + 1] <- valueToDigit ((value % 100) / 10)
        c.[offset + 2] <- valueToDigit (value % 10)

    let format (dateTime: DateTime) =
        let c = Array.zeroCreate<char> 23
        write2Characters c 0 (dateTime.Year / 100)
        write2Characters c 2 (dateTime.Year % 100)
        c.[4] <- '-'
        write2Characters c 5 dateTime.Month
        c.[7] <- '-'
        write2Characters c 8 dateTime.Day
        c.[10] <- ' '
        write2Characters c 11 dateTime.Hour
        c.[13] <- ':'
        write2Characters c 14 dateTime.Minute
        c.[16] <- ':'
        write2Characters c 17 dateTime.Second
        c.[19] <- '.'
        write3Characters c 20 dateTime.Millisecond

        new string(c)

Here is the test code:

let a = DateTime.UtcNow
let iterations = 10_000_000
let sw = Stopwatch()

sw.Start()
for i = 0 to iterations do
    a.ToString() |> ignore
sw.Stop()
printfn $"original no ms display {sw.ElapsedMilliseconds} ms"

sw.Reset()
sw.Start()
for i = 0 to iterations do
    a.ToString("yyyy-MM-dd HH:mm:ss.fff", CultureInfo.InvariantCulture) |> ignore
sw.Stop()
printfn $"original with ms display {sw.ElapsedMilliseconds} ms"

sw.Reset()
sw.Start()
for i = 0 to iterations do
    a |> DateTimeFormatter.format |> ignore
sw.Stop()
printfn $"new with ms display {sw.ElapsedMilliseconds} ms"

and the test results (MBP i7 2019):

original no ms display   2892 ms

original with ms display 4042 ms
new with ms display      1435 ms

Upvotes: 2

Views: 165

Answers (2)

Sergey Berezovskiy
Sergey Berezovskiy

Reputation: 236268

Things I would improve

  • Avoid re-creating char array

  • Avoid re-assigning delimiters in char array - they never change

  • Avoid re-assigning part of the date that didn't change. If you worry about milliseconds, I would assume that years, months, days, hours, minutes, and even seconds don't change very often.

  • Avoid re-calculating the value of int '0' - it never changes

  • Avoid additional function calls

    let format =
         let mutable year = -1
         let mutable month = -1
         let mutable day = -1
         let mutable hour = -1
         let mutable minute = -1
         let mutable second = -1
         let array = "0000-00-00 00:00:00.000".ToCharArray()
         let zeroChar = int '0'
    
         fun (dateTime: DateTime) ->
             if dateTime.Year <> year then
                 year <- dateTime.Year
                 array.[0] <- char (zeroChar + year / 1000)
                 array.[1] <- char (zeroChar + (year % 1000) / 100)
                 array.[2] <- char (zeroChar + (year % 100) / 10)
                 array.[3] <- char (zeroChar + (year % 10))
    
             if dateTime.Month <> month then
                 month <- dateTime.Month
                 array.[5] <- char (zeroChar + month / 10)
                 array.[6] <- char (zeroChar + month % 10)
    
             if dateTime.Day <> day then
                 day <- dateTime.Day
                 array.[8] <- char (zeroChar + day / 10)
                 array.[9] <- char (zeroChar + day % 10)
    
             if dateTime.Hour <> hour then
                 hour <- dateTime.Hour
                 array.[11] <- char (zeroChar + hour / 10)
                 array.[12] <- char (zeroChar + hour % 10)
    
             if dateTime.Minute <> minute then
                 minute <- dateTime.Minute
                 array.[14] <- char (zeroChar + minute / 10)
                 array.[15] <- char (zeroChar + minute % 10)
    
             if dateTime.Second <> second then
                 second <- dateTime.Second
                 array.[17] <- char (zeroChar + second / 10)
                 array.[18] <- char (zeroChar + second % 10)
    
             let ms = dateTime.Millisecond
             array.[20] <- char (zeroChar + ms / 100)
             array.[21] <- char (zeroChar + (ms % 100) / 10)
             array.[22] <- char (zeroChar + ms % 10)
    
             new string(array)
    

Running it with your test case shows x2 performance against your solution and x5 comparing to the original.

original no ms display 2354 ms
original with ms display 3545 ms
new with ms display 1221 ms
newest with ms display 691 ms

Further optimization could be avoiding DateTime property calls and calculate values manually depending on Ticks.

Upvotes: 1

kaefer
kaefer

Reputation: 5751

Reusing the array and not rewriting the constant characters have already been mentioned in the comments. Further considerations:

  • Use of the inline keyword here does not seem to affect compiler optimizations of "inlining" the corresponding expressions; that objective could be served better by avoidance of arithmetic operations on the array indices
  • Calls to the System.DateTime property getters seem to be expensive
  • The return value is generated by a call to a constructor of the type System.String, for which new string(c) is the C# way of calling it

Therefore, have a look at this - the additional divisions by 1 do not seem to slow it down:

let internal c = "0000-00-00T00:00:00.000".ToCharArray()
let internal (%&) x m = char(48 + (x / m) % 10)
let format (a : System.DateTime) =
    let y, m, d, h, min, s, ms =
        a.Year, a.Month, a.Day,
        a.Hour, a.Minute, a.Second,
        a.Millisecond
    c.[ 0] <- y   %& 1000
    c.[ 1] <- y   %& 100
    c.[ 2] <- y   %& 10
    c.[ 3] <- y   %& 1
    c.[ 5] <- m   %& 10
    c.[ 6] <- m   %& 1
    c.[ 8] <- d   %& 10
    c.[ 9] <- d   %& 1
    c.[11] <- h   %& 10
    c.[12] <- h   %& 1
    c.[14] <- min %& 10
    c.[15] <- min %& 1
    c.[17] <- s   %& 10
    c.[18] <- s   %& 1
    c.[20] <- ms  %& 100
    c.[21] <- ms  %& 10
    c.[22] <- ms  %& 1
    System.String c

This will hopefully be compiled into the following C# code:

public static string format(DateTime a)
{
    int year = a.Year;
    ...
    c[0] = (char)(48 + year / 1000 % 10);
    c[1] = (char)(48 + year / 100 % 10);
    c[2] = (char)(48 + year / 10 % 10);
    c[3] = (char)(48 + year / 1 % 10);
    ...
    return new string(c);
}

Upvotes: 1

Related Questions