Riccardo
Riccardo

Reputation: 230

Fastest way to replace occurrences in small string using span in c#

I'm trying to maximize the cpu performance and memory optimization for string.Replace method in C#. The goal is to reduce memory allocation and cpu time as the project is in asp.net core with 10000 rps.

I found out two tips for improve performance: 1)use Span Struct 2)use String.Create

   internal struct ContextData
    {
        public string Origin { get; set; }
        public string Replace { get; set; }
        public string With { get; set; }
    }




    internal string SpanReplaceWithCreate(ContextData context)
    {
        int count = 0;
     
        ReadOnlySpan<char> origin_span = context.Origin.AsSpan();
        ReadOnlySpan<char> replace_span = context.Replace.AsSpan();
        ReadOnlySpan<char> replace_with = context.With.AsSpan();

        int index;
        ReadOnlySpan<char> tmp = origin_span;

        while ((index = tmp.IndexOf(replace_span)) > 0)
        {
            count++;
            tmp = tmp.Slice(index + replace_span.Length, tmp.Length - replace_span.Length - index);
        }

        string a = string.Create(context.Origin.Length + (context.Replace.Length - context.With.Length) * count, context, (chars, state) =>
           {
               // NOTE: We don't access the context variable in this delegate since 
               // it would cause a closure and allocation.
               // Instead we access the state parameter.

               // will track our position within the string data we are populating
               var position = 0;
               ReadOnlySpan<char> origin = state.Origin.AsSpan();
               ReadOnlySpan<char> replace = state.Replace.AsSpan();
               ReadOnlySpan<char> with = state.With.AsSpan();

               ReadOnlySpan<char> tmp_context = origin;

               while ((index = tmp_context.IndexOf(replace)) > 0)
               {
                   tmp_context.Slice(0, index).CopyTo(chars.Slice(position));
                   with.CopyTo(chars.Slice(position + index));
                   position += (index + with.Length);
                   tmp_context = tmp_context.Slice(index + replace.Length, tmp_context.Length - replace.Length - index);
               }

               if (position < chars.Length) {
                   tmp_context.CopyTo(chars.Slice(position));
               }

           });


        return a;
    }

but i still have worst performance compared to the string.Replace

Method URL find replace Mean Error StdDev Median Rank Gen 0 Gen 1 Gen 2 Allocated
StringReplace http(...)ogle [196] google afd 370.4 ns 9.37 ns 27.33 ns 360.7 ns 1 0.0319 - - 336 B
StringReplaceWithCreate http(...)ogle [196] google afd 492.8 ns 9.60 ns 12.15 ns 490.4 ns 2 0.0563 - - 592 B

Any suggestion?

here params for test

https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}



{camp} : "campiagn_it_banner_size_360"
{publisher_id} : "78983"
{transaction} : "c1032072-f815-413b-a57c-4a027f681e60"
{aff_sub1} : "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"
{aff_site} : "vbvsdgdavhdgdvjs_46_789-p90"
{creative_id} : "360x360"
{ua} : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
{ip} : "192.168.1.1"
{lang} : "en"

UPDATE 1

[Benchmark]
    public string FastTokenReplace()
    {

        string request = "http://wwww.example.com?a=campiagn_it_banner_size_360&b=78983&h=c1032072-f815-413b-a57c-4a027f681e6&y=78bea32a-6ead-4ea0-b9f2-9489ebc43d6a&ty=vbvsdgdavhdgdvjs_46_789-p90&yhhh=360x360&sua=Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F90.0.4430.93%20Safari%2F537.36&ppp=192.168.1.1";
        string redirecturl = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}&ieruiero{343454";

        int max_allocation = Math.Max(request.Length, redirecturl.Length) * 3;

        return string.Create(max_allocation, redirecturl, (chars, state) =>
        {
            ReadOnlySpan<char> tmp = state.AsSpan();
            int position = 0;
            int placeholder_start;
            while ((placeholder_start = tmp.IndexOf('{')) > 0)
            {
                int placeholder_end = tmp.Slice(placeholder_start).IndexOf('}');
                if (placeholder_end < 0)
                {
                    //copy the last part
                    tmp.CopyTo(chars.Slice(position));
                    break;
                }
                else
                {
                    tmp.Slice(0, placeholder_start).CopyTo(chars.Slice(position));
                    ReadOnlySpan<char> replace = tmp.Slice(placeholder_start, placeholder_end + 1);

                    //OPTIMIZE HERE?
                    ReadOnlySpan<char> with = Placeholders.getVal(replace.ToString()).AsSpan();

                    with.CopyTo(chars.Slice(position + placeholder_start));
                    position += (placeholder_start + with.Length);
                    tmp = tmp.Slice(placeholder_start + replace.Length, tmp.Length - replace.Length - placeholder_start);
                }

            }

        });
    }

 class Placeholders
{



    public const string camp = "{camp}";
    public const string publisher_id = "{publisher_id}";
    public const string creative_id = "{creative_id}";
    public const string ua = "{ua}";
    public const string lang = "{lang}";
    public const string ip = "{ip}";
    public const string Transaction = "{transaction}";
    public const string AffSite = "{aff_site}";
    public const string AdsetId = "{adset}";
    public const string AffSub1 = "{affsub1}";


    public static string getVal(string key)
    {

        switch (key)
        {
            case camp:
                return "campiagn_it_banner_size_360";
            case publisher_id:
                return "78983";
            case Transaction:
                return "c1032072-f815-413b-a57c-4a027f681e60";
            case AffSub1:
                return "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a";
            case AffSite:
                return "vbvsdgdavhdgdvjs_46_789-p90";
            case creative_id:
                return "360x360";
            case ua:
                return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36";
            case ip:
                return "192.168.1.1";
            default:
                return "";
        }
    }

    public static ReadOnlySpan<char> getVal(ReadOnlySpan<char> key)
    {

        if (MemoryExtensions.Equals(key, camp, StringComparison.Ordinal))
            return "campiagn_it_banner_size_360".AsSpan();
        else if (MemoryExtensions.Equals(key, publisher_id, StringComparison.Ordinal))
            return "78983".AsSpan();
        else if (MemoryExtensions.Equals(key, Transaction, StringComparison.Ordinal))
            return "c1032072-f815-413b-a57c-4a027f681e6".AsSpan();
        else if (MemoryExtensions.Equals(key, AffSub1, StringComparison.Ordinal))
            return "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a".AsSpan();
        else if (MemoryExtensions.Equals(key, AffSite, StringComparison.Ordinal))
            return "vbvsdgdavhdgdvjs_46_789-p90".AsSpan();
        else if (MemoryExtensions.Equals(key, creative_id, StringComparison.Ordinal))
            return "360x360".AsSpan();
        else if (MemoryExtensions.Equals(key, ua, StringComparison.Ordinal))
            return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36".AsSpan();
        else if (MemoryExtensions.Equals(key, ip, StringComparison.Ordinal))
            return "192.168.1.1".AsSpan();
        else
            return "".AsSpan();


    }




}
   [Benchmark]
    public string StandardTokenReplace()
    {

        string request = "http://wwww.example.com?a=campiagn_it_banner_size_360&b=78983&h=c1032072-f815-413b-a57c-4a027f681e6&y=78bea32a-6ead-4ea0-b9f2-9489ebc43d6a&ty=vbvsdgdavhdgdvjs_46_789-p90&yhhh=360x360&sua=Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F90.0.4430.93%20Safari%2F537.36&ppp=192.168.1.1";
        string redirecturl = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}&ieruiero{343454";
        int max_allocation = Math.Max(request.Length, redirecturl.Length) + Math.Abs(request.Length - redirecturl.Length);

        //get original url and take the longest one + domain


        return redirecturl.Replace(Placeholders.camp, Placeholders.getVal(Placeholders.camp))
            .Replace(Placeholders.publisher_id, Placeholders.getVal(Placeholders.publisher_id))
            .Replace(Placeholders.creative_id, Placeholders.getVal(Placeholders.creative_id))
            .Replace(Placeholders.ua, Placeholders.getVal(Placeholders.ua))
            .Replace(Placeholders.lang, Placeholders.getVal(Placeholders.lang))
            .Replace(Placeholders.ip, Placeholders.getVal(Placeholders.ip))
            .Replace(Placeholders.Transaction, Placeholders.getVal(Placeholders.Transaction))
            .Replace(Placeholders.AffSite, Placeholders.getVal(Placeholders.AffSite))
            .Replace(Placeholders.AdsetId, Placeholders.getVal(Placeholders.AdsetId))
            .Replace(Placeholders.AffSub1, Placeholders.getVal(Placeholders.AffSub1));

    }

1 MAX ALLOCATION

    int max_allocation = Math.Max(request.Length, redirecturl.Length) * 3;

we can calculate the correct size of the string but it will perform worse. for this case we can assume a max lenght.

www.example.com?camp=1234567890123456789023456789012345678902345678 www.replace.com?{camp}{camp}{camp}{camp}{camp}{camp}{camp}

won't work.

2 GETTING VALUE

   ReadOnlySpan<char> with = Placeholders.getVal(replace.ToString()).AsSpan();

in case of the placeholder is repeated we can cache the value or search fol all occurences before move to the next placeholder.

public static string getVal(string key) vs public static string getVal(ReadOnlySpan key)

we still have better perfomance using the string version. Any suggestion to improve it?

// * Summary *

BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.928 (2004/?/20H1) Intel Core i9-10900 CPU 2.80GHz, 1 CPU, 20 logical and 10 physical cores .NET Core SDK=5.0.202 [Host] : .NET Core 3.1.14 (CoreCLR 4.700.21.16201, CoreFX 4.700.21.16208), X64 RyuJIT .NET Core 3.1 : .NET Core 3.1.14 (CoreCLR 4.700.21.16201, CoreFX 4.700.21.16208), X64 RyuJIT

Job=.NET Core 3.1 Toolchain=.NET Core 3.1

Method URL find replace Mean Error StdDev Rank Gen 0 Gen 1 Gen 2 Allocated
FastTokenReplace [196] 518.8 ns 4.63 ns 3.61 ns 1 0.2470 0.0038 - 2.52 KB
FastTokenReplaceImproveMem [196] 584.4 ns 6.84 ns 5.71 ns 2 0.2050 0.0010 - 2.09 KB
StandardTokenReplace [196] 4,242.7 ns 84.82 ns 94.27 ns 3 0.6866 - - 7.06 KB

Upvotes: 7

Views: 9047

Answers (2)

Guiorgy
Guiorgy

Reputation: 1734

The builtin method should be as optimized as it gets, so trying to make a general Replace function that is faster than that is unrealistic. Maybe using unsafe code and/or a dll written in something like C/C++ could get close. That said, if you create a function that works for you specific constraints, you may be able to squeeze something more performance. From your examples there're 3 potential ways to do the job faster:

  • Instead of replacing tokens one at a time, replace them all in a batch. This, of course, assumes taht you don't depend on recursive replacement, otherwise, this won't work for you.
  • Assuming evey token that needs to be replaced has the same delimiters, you can greatly reduce calls to IndexOf.
  • If you can guess the upper bound of the resulting length, you may skip computing the length and just preallocate memory.

Here are the results of my benchmarks (AMD Ryzen 9 5950X):

Method Mean Ratio Allocated Alloc Ratio
Normal 875.4 ns 1.00 8.02 KB 1.00
BatchNaive 5,512.5 ns 6.32 1.14 KB 0.14
BatchNaiveCached 2,838.7 ns 3.24 1.47 KB 0.18
BatchCached 964.8 ns 1.10 1.47 KB 0.18
BatchDelimiterCached 312.8 ns 0.36 1.47 KB 0.18
BatchDelimiterPreallocated 266.0 ns 0.30 2.5 KB 0.31
BatchDelimiterPreallocatedSafe 255.6 ns 0.29 2.5 KB 0.31

As you can see, the last three are about 3 times faster for your example. Not only that, but they also use much less (heap) memory.

Here's the code:

  • BatchDelimiterCached
const string origin = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}";

var replaceTasks = new (string, string)[]
{
    ("camp", "campiagn_it_banner_size_360"),
    ("publisher_id", "78983"),
    ("transaction", "c1032072-f815-413b-a57c-4a027f681e60"),
    ("aff_sub1", "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"),
    ("aff_site", "vbvsdgdavhdgdvjs_46_789-p90"),
    ("creative_id", "360x360"),
    ("ua", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"),
    ("ip", "192.168.1.1"),
    ("lang", "en")
};
char prefix = '{';
char postfix = '}';
// the above are arguments to the funtion

ReadOnlySpan<char> tmp = origin;

// we store the occurences in the queue while calculating the length of the final string
// so we don't have to search for them the 2nd time later
var occurrences = new Queue<(int at, int task)>();
int offset = 0;
int resultLength = tmp.Length;

int prefixIndex;
while ((prefixIndex = tmp.IndexOf(prefix)) != -1)
{
    (int at, int task) next = (prefixIndex, -1);
    for (int i = 0; i < replaceTasks.Length; i++)
    {
        // we expect the postfix to be at this place
        int postfixIndex = prefixIndex + replaceTasks[i].toReplace.Length + 1;
        if (tmp.Length > postfixIndex // check that we don't cross the bounds
            && tmp[postfixIndex] == postfix // check that the postfix IS were we expect it to be
            && tmp.Slice(prefixIndex + 1, postfixIndex - prefixIndex - 1).SequenceEqual(replaceTasks[i].toReplace)) // compare all the characters in between the delimiters
        {
            next.task = i;
            break;
        }
    }

    if (next.task == -1)
    {
        // this delimiter character is just part of the string, so skip it
        tmp = tmp.Slice(prefixIndex + 1);
        offset += prefixIndex + 1;
        continue;
    }

    int newStart = next.at + replaceTasks[next.task].toReplace.Length + 2;
    tmp = tmp.Slice(newStart, tmp.Length - newStart);

    occurrences.Enqueue((next.at + offset, next.task));
    offset += newStart;

    resultLength += replaceTasks[next.task].replaceWith.Length - replaceTasks[next.task].toReplace.Length - 2;
}

string result = string.Create(resultLength, (replaceTasks, occurrences), (chars, state) =>
{
    var replaceTasks = state.replaceTasks;
    var occurrences = state.occurrences;

    var position = 0;

    ReadOnlySpan<char> origin = origin;
    int lastStart = 0;

    while (occurrences.Count != 0)
    {
        (int at, int task) next = occurrences.Dequeue();

        origin.Slice(lastStart, next.at - lastStart).CopyTo(chars.Slice(position));
        replaceTasks[next.task].replaceWith.CopyTo(chars.Slice(position + next.at - lastStart));
        position += next.at - lastStart + replaceTasks[next.task].replaceWith.Length;
        lastStart = next.at + replaceTasks[next.task].toReplace.Length + 2;
    }

    origin.Slice(lastStart).CopyTo(chars.Slice(position));
});

return result;
  • BatchDelimiterPreallocatedSafe
const string origin = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}";

var replaceTasks = new (string, string)[]
{
    ("camp", "campiagn_it_banner_size_360"),
    ("publisher_id", "78983"),
    ("transaction", "c1032072-f815-413b-a57c-4a027f681e60"),
    ("aff_sub1", "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"),
    ("aff_site", "vbvsdgdavhdgdvjs_46_789-p90"),
    ("creative_id", "360x360"),
    ("ua", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"),
    ("ip", "192.168.1.1"),
    ("lang", "en")
};
char prefix = '{';
char postfix = '}';
// the above are arguments to the funtion

int resultLength = -1;

string result = string.Create(origin.Length * 2, (replaceTasks, prefix, postfix), (chars, state) =>
{
    var replaceTasks = state.ReplaceTasksWithoutDelimiters;
    char prefix = state.prefix;
    char postfix = state.postfix;

    var position = 0;

    ReadOnlySpan<char> tmp = origin;

    int prefixIndex;
    while ((prefixIndex = tmp.IndexOf(prefix)) != -1)
    {
        bool replaced = false;
        for (int i = 0; i < replaceTasks.Length; i++)
        {
            // we expect the postfix to be at this place
            int postfixIndex = prefixIndex + replaceTasks[i].toReplace.Length + 1;
            if (tmp.Length > postfixIndex // check that we don't cross the bounds
                && tmp[postfixIndex] == postfix // check that the postfix IS were we expect it to be
                && tmp.Slice(prefixIndex + 1, postfixIndex - prefixIndex - 1).SequenceEqual(replaceTasks[i].toReplace)) // compare all the characters in between the delimiters
            {
                if (position + prefixIndex + replaceTasks[i].replaceWith.Length <= chars.Length) // check if the following copy operations would exceed our preallocated memory bounds
                {
                    tmp.Slice(0, prefixIndex).CopyTo(chars.Slice(position));
                    replaceTasks[i].replaceWith.CopyTo(chars.Slice(position + prefixIndex));
                    position += prefixIndex + replaceTasks[i].replaceWith.Length;
                    tmp = tmp.Slice(postfixIndex + 1);
                    replaced = true;
                    break;
                }
                else
                {
                    return;
                }
            }
        }
        if (!replaced) { // this delimiter is just part of the string, so skip it
            tmp.Slice(0, prefixIndex + 1).CopyTo(chars.Slice(position));
            position += prefixIndex + 1;
            tmp = tmp.Slice(prefixIndex+ 1);
        }
    }

    if (position + tmp.Length <= chars.Length) // check if the following copy operation would exceed our preallocated memory bounds
    {
        // copy the remaining string
        tmp.CopyTo(chars.Slice(position));
        resultLength = position + tmp.Length;
    }
});

if (resultLength != -1)
{
    return result.Substring(0, resultLength); // there are many extrea null characters ('\0') at the end of our string, so we trim the string down
    
    // Note: the above realocates the string. if you can find a way to trim the string inplace, it should reduce the memory usage and speed a little.
}
else
{
    // the resulting string exceeded our preallocated memory. fallback to another method
    result = origin;

    for (int i = 0; i < ReplaceTasks.Length; i++)
    {
        (string toReplace, string replaceWith) = ReplaceTasks[i];
        result = result.Replace(toReplace, replaceWith);
    }

    return result;
}

Of course when replacing only 1 token, this will be slower and use more memory than the builting method. On the other hand, the bigger the batch size, the bigger the speed and memory improvement (even if the tokens in the batch aren't in the origin string):

Method BatchSize Mean Ratio Allocated Alloc Ratio
Normal 1 103.4 ns 1.00 736 B 1.00
BatchDelimiterPreallocatedSafe 1 154.4 ns 1.49 2192 B 2.98
Normal 50 1,870.1 ns 1.00 8208 B 1.00
BatchDelimiterPreallocatedSafe 50 254.8 ns 0.14 2560 B 0.31

Upvotes: 1

NetMage
NetMage

Reputation: 26917

Running some tests using the sample data, it seems String.Replace is highly optimal, and both StringBuilder.Replace and a variation of my IndexOfAny that returns which match was found first (based on improvements from CodeReview) are slower. Using an array of tuples for the replacements was fastest of my tests:

var s = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}";

var replacementsa = new[] {
        ("{camp}", "campiagn_it_banner_size_360"),
        ("{publisher_id}", "78983"),
        ("{transaction}", "c1032072-f815-413b-a57c-4a027f681e60"),
        ("{aff_sub1}", "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"),
        ("{aff_site}", "vbvsdgdavhdgdvjs_46_789-p90"),
        ("{creative_id}", "360x360"),
        ("{ua}", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"),
        ("{ip}", "192.168.1.1"),
        ("{lang}", "en")
    };

public static string MultiReplace(this string s, (string match,string replace)[] replacements) {
    for (int replacementNum = 0; replacementNum < replacements.Length; ++replacementNum)
        s = s.Replace(replacements[replacementNum].match, replacements[replacementNum].replace);

    return s;
}

Upvotes: 2

Related Questions