tim93
tim93

Reputation: 109

delphi substring count performance

I have some functions to count a substring in a string delimited by spaces:

program Project2;

{$APPTYPE CONSOLE}

{$R *.res}

uses System.SysUtils,windows;

//s=string to search in
//t=string to search for
//cs=delimiters

function f0(const s,t:string;const cs:tsyscharset):integer;
var
  p,q:pchar;
  u:string;
begin
  result:=0;
  p:=pointer(s);
  if p<>nil then
    while p^<>#0 do
    begin
      while (p^<>#0) and charinset(p^,cs) do inc(p);
      q:=p;
      while (p^<>#0) and not charinset(p^,cs) do inc(p);
      if p>q then
      begin
        setstring(u,q,p-q);
        //writeln('[',u,']');
        if u=t then inc(result);
      end;
    end;
end;

function f1(const s,t:string;const cs:tsyscharset):integer;
var
  i,j,l:integer;
  u:string;
begin
  result:=0;
  l:=length(s);
  i:=1;
  while i<=l do
  begin
    while (i<=l) and charinset(s[i],cs) do inc(i);
    j:=i;
    while (i<=l) and not charinset(s[i],cs) do inc(i);
    if i>j then
    begin
      u:=copy(s,j,i-j);
      //writeln('[',u,']');
      if u=t then inc(result);
    end;
  end;
end;

function f2(const s,t:string;const cs:tsyscharset):integer;
var
  i,j,l:integer;
  u:string;
begin
  result:=0;
  l:=length(s);
  i:=1;
  while i<=l do
  begin
    while (i<=l) and charinset(s[i],cs) do inc(i);
    j:=i;
    while (i<=l) and not charinset(s[i],cs) do inc(i);
    if i>j then
    begin
      setlength(u,i-j);
      move(s[j],pointer(u)^,(i-j)*2);
      //writeln('[',u,']');
      if u=t then inc(result);
    end;
  end;
end;

type
  tfunc=function(const s,t:string;const cs:tsyscharset):integer;

const
  s='  de foo   de'+#13+' baz blah de  de blah'+#10+' asd de qwe rtz   un f'+#9+' t de ds w de  ';
  t='de';
  cs=[' ',#13,#10,#9];//CR,LF,TAB
  n=5000000;

procedure time(i:integer;f:tfunc);
var
  j,k:integer;
  start,finish,freq:int64;
begin
  QueryPerformanceCounter(start);
  for j := 1 to n do k:=f(s,t,cs);
  QueryPerformanceCounter(finish);
  QueryPerformanceFrequency(freq);
  Writeln(Format('f%u:%u:%.3fs',[i,k,(finish-start)/freq]));
end;

const
  funcs:array[0..2] of tfunc=(f0,f1,f2);
var
  i:integer;
begin
  setpriorityclass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
  for i := low(funcs) to high(funcs) do time(i,funcs[i]);
  readln
end.

The speed results are

f0:7:7,624s
f1:7:8,066s
f2:7:6,454s

My first question is: Why is f2 faster than f0?

My second question is: Do you have an idea how I can optimize this even more (without inline assembler)?

IDE: Delphi XE2 (Unicode)

Upvotes: 0

Views: 931

Answers (1)

Arnaud Bouchez
Arnaud Bouchez

Reputation: 43033

Why is it faster? Impossible to guess without a profiler. And it will depend on the CPU you run on.

I guess the faster will be processed by f2 since SetLength() will avoid reallocation most of the time, whereas SetString() will always release the memory before allocating it. It depends on your case.

For even faster process, with the same algorithm (you may find something even more optimized), I'd try:

const
  cs = [32,13,10,9];  

function f2(const s,t:string):integer;
var
  i,j,l:integer;
begin
  result := 0;
  l := length(s);
  i := 1;
  while i<=l do
  begin
    while (i<=l) and (ord(s[i]) in cs) do inc(i);
    j:=i;
    while (i<=l) and not(ord(s[i]) in cs) do inc(i);
    if (i>j) and CompareMem(pointer(s[j]),pointer(t),(i-j)*sizeof(char)) then
      inc(result);
  end;
end;

That is:

  • The CharInSet is not worth it for control chars;
  • Using a static set is IMHO faster than passing a parameter (but you can use a parameter if you want);
  • Do not use a temporary variable (this is the slow part of your algorithm, I think), but a good old CompareMem - there is still room for improvement here.

You may try writing:

const 
  cs: set of 0..32 = [32,13,10,9];

Which may be a bit faster, since the generated ASM code will be a fast bit-looking instruction, whereas the first version will use a list of comparison. But I think this won't make a big difference.

In all cases, I guess that you should better:

  • Not optimize too early - use first a profiler to guess what is slow and worth rewriting;
  • Optimize with real data, not a simplified fixed set as in your benchmark: you would optimized for one kind of data, whereas it won't be necessary true with real data.

Upvotes: 5

Related Questions