Ryan
Ryan

Reputation: 7959

Split a string into an array of strings based on a delimiter

I'm trying to find a Delphi function that will split an input string into an array of strings based on a delimiter. I've found a lot from searching the web, but all seem to have their own issues and I haven't been able to get any of them to work.

I just need to split a string like: "word:doc,txt,docx" into an array based on ':'. The result would be ['word', 'doc,txt,docx']. How can I do that?

Upvotes: 96

Views: 338236

Answers (21)

Learning
Learning

Reputation: 127

I know this is an old post but here is my take on the question. As someone who deals regularly with a lot of delimited files I find this useful:

interface

type
  TStringArray = array[0..8] of string; // for my purposes

implementation

function aParse(aString : string):TStringArray;
var
  I, Count : Integer;
  IArray : TStringArray;
begin
  Count := 0;
  aString := aString + ','; // there are no trailing commas in my lines
  I := 0;
  repeat
    if aString[I] = ',' then
      begin
        if (I = 1) then
          begin
            IArray[Count] :='';
            Delete(aString,1,1);
            Inc(Count);
            I := 0;
          end
        else
          begin
            IArray[Count] := Copy(aString,1,I-1);
            Delete(aString,1,I);
            Inc(Count);
            I := 0;
          end;
      end;
    inc(I);
  until Count = 9;
  Result := IArray;
end;
    

My code may not be pretty (I know strings are 1 based in Windows but a lot of my lines start with ",,") but it works and will parse 100,000 lines in 0.207 seconds

Upvotes: 0

TByte
TByte

Reputation: 185

I initially praised the answer from @Frank as I needed something that works for Delphi 6 and it appeared to work. However, I have since found that that solution has a bug whereby it still splits on #13#10 regardless of delimiter. Works perfectly if you are not expecting lines in your source string.

I wrote a simple parser that only works for single character delimiters. Note: it puts the values into a TStrings, not into an array as the op requested, but can easily be modified to adapt to arrays.

Here is the procedure:

procedure SplitString(const ASource: string; const ADelimiter: Char; AValues: TStrings);
var
  i, lastDelimPos: Integer;
begin
  AValues.Clear;
  lastDelimPos := 0;

  for i := 1 to Length(ASource) do
    if ASource[i] = ADelimiter then
    begin
      if lastDelimPos = 0 then
        AValues.Add(CopyRange(ASource, 1, i - 1))
      else
        AValues.Add(CopyRange(ASource, lastDelimPos + 1, i - 1));
      lastDelimPos := i;
    end;

  if lastDelimPos = 0 then
    AValues.Add(ASource)
  else
    AValues.Add(CopyRange(ASource, lastDelimPos + 1, MaxInt));
end;

function CopyRange(const s: string; const AIndexFrom, AIndexTo: Integer): string;
begin
  Result := Copy(s, AIndexFrom, AIndexTo - AIndexFrom + 1);
end;

Note: as per C#'s string.Split(), a blank input string will result in a single blank string in the TStrings. Similarly, just having a delimiter by itself as the input string would result in two blank strings in the TStrings.

Here is the rough test code I used to ensure it's solid:

procedure AddTest(const ATestLine: string; const AExpectedResult: array of string);
var
  expectedResult: TStringList;
  i: Integer;
begin
  expectedResult := TStringList.Create;
  for i := 0 to Length(AExpectedResult) - 1 do
    expectedResult.Add(AExpectedResult[i]);
  testStrings.AddObject(ATestLine, expectedResult);
end;

//====================

AddTest('test', ['test']);
AddTest('', ['']);
AddTest(',', ['', '']);
AddTest('line1' + #13#10 + ',line 2,line3, line 4', ['line1' + #13#10, 'line 2', 'line3', ' line 4']);
AddTest('line1' + #13#10 + 'd,line 2,line3, line 4', ['line1' + #13#10 + 'd', 'line 2', 'line3', ' line 4']);
AddTest('line1,line 2,line3, line 4', ['line1', 'line 2', 'line3', ' line 4']);
AddTest('test, ', ['test', ' ']);
AddTest('test,', ['test', '']);
AddTest('test1,test2 ', ['test1', 'test2 ']);
AddTest('test1,test2', ['test1', 'test2']);
AddTest('test1,test2, ', ['test1', 'test2', ' ']);
AddTest('test1,test2,', ['test1', 'test2', '']);

//====================

testFailed := False;
for i := 0 to testStrings.Count - 1 do
begin
  SplitString2(testStrings[i], ',', f);
  log('Test ID=%d', [i]);
  log('    Test String="%s"', [testStrings[i]]);
  log('    Item count=%d', [f.Count]);
  testResult := TStringList(TestStrings.Objects[i]);
  if testResult.Count <> f.Count then
  begin
    Log('!!');
    Log('!! Count mismatch. Got=%d, Expected=%d', [f.Count, testResult.Count]);
    Log('!!');

    testFailed := True;
  end;

  for j := 0 to f.Count - 1 do
  begin
    log('    Item %d="%s"    (len=%d)', [j, f[j], Length(f[j])]);
    if testResult[j] <> f[j] then
    begin
      Log('!!');
      Log('!! Text mismatch. Got="%s", Expected="%s"', [f[j], testResult[j]]);
      Log('!!');

      testFailed := True;
    end;
  end;
end;

Edit: code for the CopyRange() function was missing, added now. My bad.

Upvotes: 0

marvinbraga
marvinbraga

Reputation: 11

interface

uses
  Classes;

type
  TStringArray = array of string;

  TUtilStr = class
    class function Split(const AValue: string; const ADelimiter: Char = ';'; const AQuoteChar: Char = '"'): TStringArray; static;
  end;


implementation

{ TUtilStr }

class function TUtilStr.Split(const AValue: string; const ADelimiter: Char; const AQuoteChar: Char): TStringArray;
var
  LSplited: TStringList;
  LText: string;
  LIndex: Integer;
begin
  LSplited := TStringList.Create;
  try
    LSplited.StrictDelimiter := True;
    LSplited.Delimiter := ADelimiter;
    LSplited.QuoteChar := AQuoteChar;
    LSplited.DelimitedText := AValue;

    SetLength(Result, LSplited.Count);
    for LIndex := 0 to LSplited.Count - 1 do
    begin
      Result[LIndex] := LSplited[LIndex];
    end;
  finally
    LSplited.Free;
  end;
end;

end.

Upvotes: 0

John Boe
John Boe

Reputation: 3611

My favourite function for splitting:

procedure splitString(delim: char; s: string; ListOfStrings: TStrings);
var temp: string;
    i: integer;
begin
   ListOfStrings.Clear;
   for i:=1 to length(s) do
    begin
      if s[i] = delim then
        begin
          ListOfStrings.add(temp);
          temp := '';
        end
      else
        begin
          temp := temp + s[i];
          if i=length(s) then
             ListOfStrings.add(temp);
        end;
    end;
    ListOfStrings.add(temp);
end;

Upvotes: 1

Neri Bocchi
Neri Bocchi

Reputation: 61

procedure SplitCSV(S:STRING;out SL:TStringList);
var c,commatext:string;
  a,b,up:integer;
begin
   c:=s.Replace(' ','<SPACE>');   //curate spaces

   //first ocurrence of "
   a:=pos('"',c);
   b:=pos('"',c,a+1);
   if (a>0) and (b>0) then
   begin
     commatext:=commatext+copy(c,0,a-1);
     commatext:=commatext+copy(c,a,b-a+1).Replace(',','<COMMA>');   //curate commas
     up:=b+1;
   end
   else
     commatext:=c;

   //while continue discovering "
   while (a>0) and (b>0) do
   begin
     a:=Pos('"',c,b+1);
     b:=pos('"',c,a+1);
     if (a>0) and (b>0) then
     begin
       commatext:=commatext+copy(c,up,a-up);
       commatext:=commatext+copy(c,a,b-a+1).Replace(',','<COMMA>'); //curate commas
       up:=b+1;
     end;
   end;
   //last piece of text end  
   if up<c.Length then
     commatext:=commatext+copy(c,up,c.Length-up+1);

   //split text using CommaText
   sl.CommaText:=commatext;

   sl.Text:=sl.Text.Replace('<COMMA>',',');   //curate commas
   sl.Text:=sl.Text.Replace('<SPACE>',' ');   //curate spaces
end;

Upvotes: 0

alex
alex

Reputation: 1156

You can use StrUtils.SplitString.

function SplitString(const S, Delimiters: string): TStringDynArray;

Its description from the documentation:

Splits a string into different parts delimited by the specified delimiter characters.

SplitString splits a string into different parts delimited by the specified delimiter characters. S is the string to be split. Delimiters is a string containing the characters defined as delimiters.

SplitString returns an array of strings of type System.Types.TStringDynArray that contains the split parts of the original string.

Upvotes: 61

user3609960
user3609960

Reputation: 1

For delphi 2010, you need to create your own split function.

function Split(const Texto, Delimitador: string): TStringArray;
var
  i: integer;
  Len: integer;
  PosStart: integer;
  PosDel: integer;
  TempText:string;
begin
  i := 0;
  SetLength(Result, 1);
  Len := Length(Delimitador);
  PosStart := 1;
  PosDel := Pos(Delimitador, Texto);
  TempText:=  Texto;
  while PosDel > 0 do
    begin
      Result[i] := Copy(TempText, PosStart, PosDel - PosStart);
      PosStart := PosDel + Len;
      TempText:=Copy(TempText, PosStart, Length(TempText));
      PosDel := Pos(Delimitador, TempText);
      PosStart := 1;
      inc(i);
      SetLength(Result, i + 1);
    end;
  Result[i] := Copy(TempText, PosStart, Length(TempText));
end;

You can refer to it as such

type
  TStringArray = array of string;
var Temp2:TStringArray;
Temp1="hello:world";
Temp2=Split(Temp1,':')

Upvotes: 0

Rez Moss
Rez Moss

Reputation: 4604

The base of NGLG answer https://stackoverflow.com/a/8811242/6619626 you can use the following function:

type
OurArrayStr=array of string;

function SplitString(DelimeterChars:char;Str:string):OurArrayStr;
var
seg: TStringList;
i:integer;
ret:OurArrayStr;
begin
    seg := TStringList.Create;
    ExtractStrings([DelimeterChars],[], PChar(Str), seg);
    for i:=0 to seg.Count-1 do
    begin
         SetLength(ret,length(ret)+1);
         ret[length(ret)-1]:=seg.Strings[i];
    end;
    SplitString:=ret;
    seg.Free;
end;

It works in all Delphi versions.

Upvotes: 0

David Ulbrich
David Ulbrich

Reputation: 1

*

//Basic functionality of a TStringList solves this:


uses Classes  //TStringList 
    ,types    //TStringDynArray
    ,SysUtils //StringReplace()
    ;

....

 //--------------------------------------------------------------------------
 function _SplitString(const s:string; const delimiter:Char):TStringDynArray;
  var sl:TStringList;
      i:integer;
  begin
  sl:=TStringList.Create;

  //separete delimited items by sLineBreak;TStringlist will do the job:
  sl.Text:=StringReplace(s,delimiter,sLineBreak,[rfReplaceAll]);

  //return the splitted string as an array:
  setlength(Result,sl.count);
  for i:=0 to sl.Count-1
   do Result[i]:=sl[i];

  sl.Free;
  end;



//To split a FileName (last item will be the pure filename itselfs):

 function _SplitPath(const fn:TFileName):TStringDynArray;
  begin
  result:=_SplitString(fn,'\');
  end;

*

Upvotes: 0

Ihor Konovalenko
Ihor Konovalenko

Reputation: 1407

var  
    su  : string;        // What we want split
    si  : TStringList;   // Result of splitting
    Delimiter : string;
    ...
    Delimiter := ';';
    si.Text := ReplaceStr(su, Delimiter, #13#10);

Lines in si list will contain splitted strings.

Upvotes: 7

LU RD
LU RD

Reputation: 34899

Using the SysUtils.TStringHelper.Split function, introduced in Delphi XE3:

var
  MyString: String;
  Splitted: TArray<String>;
begin
  MyString := 'word:doc,txt,docx';
  Splitted := MyString.Split([':']);
end.

This will split a string with a given delimiter into an array of strings.

Upvotes: 57

RRUZ
RRUZ

Reputation: 136401

you can use the TStrings.DelimitedText property for split an string

check this sample

program Project28;

{$APPTYPE CONSOLE}

uses
  Classes,
  SysUtils;

procedure Split(Delimiter: Char; Str: string; ListOfStrings: TStrings) ;
begin
   ListOfStrings.Clear;
   ListOfStrings.Delimiter       := Delimiter;
   ListOfStrings.StrictDelimiter := True; // Requires D2006 or newer.
   ListOfStrings.DelimitedText   := Str;
end;


var
   OutPutList: TStringList;
begin
   OutPutList := TStringList.Create;
   try
     Split(':', 'word:doc,txt,docx', OutPutList) ;
     Writeln(OutPutList.Text);
     Readln;
   finally
     OutPutList.Free;
   end;
end.

UPDATE

See this link for an explanation of StrictDelimiter.

Upvotes: 105

Dennis
Dennis

Reputation: 170

This will solve your problem

interface
   TArrayStr = Array Of string;

implementation

function SplitString(Text: String): TArrayStr;
var
   intIdx: Integer;
   intIdxOutput: Integer;
const
   Delimiter = ';';
begin
   intIdxOutput := 0;
   SetLength(Result, 1);
   Result[0] := ''; 

   for intIdx := 1 to Length(Text) do
   begin
      if Text[intIdx] = Delimiter then
      begin
         intIdxOutput := intIdxOutput + 1;
         SetLength(Result, Length(Result) + 1);
      end
      else
         Result[intIdxOutput] := Result[intIdxOutput] + Text[intIdx];
   end;
end;

Upvotes: 3

Aleš Kocur
Aleš Kocur

Reputation: 1908

I wrote this function which returns linked list of separated strings by specific delimiter. Pure free pascal without modules.

Program split_f;

type
    PTItem = ^TItem;
    TItem = record
        str : string;
        next : PTItem;
    end;

var
    s : string;
    strs : PTItem;

procedure split(str : string;delim : char;var list : PTItem);
var
    i : integer;
    buff : PTItem;
begin
    new(list);
    buff:= list;
    buff^.str:='';
    buff^.next:=nil;

    for i:=1 to length(str) do begin
        if (str[i] = delim) then begin
            new(buff^.next);
            buff:=buff^.next;
            buff^.str := '';
            buff^.next := nil;
        end
        else
        buff^.str:= buff^.str+str[i];
    end;
end;

procedure print(var list:PTItem);
var
    buff : PTItem;
begin
    buff := list;
    while buff<>nil do begin
        writeln(buff^.str);
        buff:= buff^.next;
    end;
end;

begin

    s := 'Hi;how;are;you?';

    split(s, ';', strs);
    print(strs);


end.

Upvotes: 5

bob_saginowski
bob_saginowski

Reputation: 1429

You can make your own function which returns TArray of string:

function mySplit(input: string): TArray<string>;
var
  delimiterSet: array [0 .. 0] of char; 
     // split works with char array, not a single char
begin
  delimiterSet[0] := '&'; // some character
  result := input.Split(delimiterSet);
end;

Upvotes: 6

Arioch &#39;The
Arioch &#39;The

Reputation: 16045

Jedi Code Library provides an enhanced StringList with built-in Split function, that is capable of both adding and replacing the existing text. It also provides reference-counted interface. So this can be used even with older Delphi versions that have no SplitStrings and without careful and a bit tedious customizations of stock TStringList to only use specified delimiters.

For example given text file of lines like Dog 5 4 7 one can parse them using:

var slF, slR: IJclStringList; ai: TList<integer>; s: string; i: integer;
    action: procedure(const Name: string; Const Data: array of integer);

slF := TJclStringList.Create; slF.LoadFromFile('some.txt');
slR := TJclStringList.Create;
for s in slF do begin
    slR.Split(s, ' ', true);
    ai := TList<Integer>.Create;
    try
       for i := 1 to slR.Count - 1 do
           ai.Add(StrToInt(slR[i]));
       action(slR[0], ai.ToArray);
    finally ai.Free; end;
end; 

http://wiki.delphi-jedi.org/wiki/JCL_Help:IJclStringList.Split@string@string@Boolean

Upvotes: 3

Delphi 7
Delphi 7

Reputation: 71

Explode is very high speed function, source alhoritm get from TStrings component. I use next test for explode: Explode 134217733 bytes of data, i get 19173962 elements, time of work: 2984 ms.

Implode is very low speed function, but i write it easy.

{ ****************************************************************************** }
{  Explode/Implode (String <> String array)                                      }
{ ****************************************************************************** }
function Explode(S: String; Delimiter: Char): Strings; overload;
var I, C: Integer; P, P1: PChar;
begin
    SetLength(Result, 0);
    if Length(S) = 0 then Exit;
    P:=PChar(S+Delimiter); C:=0;
    while P^ <> #0 do begin
       P1:=P;
       while (P^ <> Delimiter) do P:=CharNext(P);
       Inc(C);
       while P^ in [#1..' '] do P:=CharNext(P);
       if P^ = Delimiter then begin
          repeat
           P:=CharNext(P);
          until not (P^ in [#1..' ']);
       end;
    end;
    SetLength(Result, C);
    P:=PChar(S+Delimiter); I:=-1;
    while P^ <> #0 do begin
       P1:=P;
       while (P^ <> Delimiter) do P:=CharNext(P);
       Inc(I); SetString(Result[I], P1, P-P1);
       while P^ in [#1..' '] do P:=CharNext(P);
       if P^ = Delimiter then begin
          repeat
           P:=CharNext(P);
          until not (P^ in [#1..' ']);
       end;
    end;
end;

function Explode(S: String; Delimiter: Char; Index: Integer): String; overload;
var I: Integer; P, P1: PChar;
begin
    if Length(S) = 0 then Exit;
    P:=PChar(S+Delimiter); I:=1;
    while P^ <> #0 do begin
       P1:=P;
       while (P^ <> Delimiter) do P:=CharNext(P);
        SetString(Result, P1, P-P1);
        if (I <> Index) then Inc(I) else begin
           SetString(Result, P1, P-P1); Exit;
        end;
       while P^ in [#1..' '] do P:=CharNext(P);
       if P^ = Delimiter then begin
          repeat
           P:=CharNext(P);
          until not (P^ in [#1..' ']);
       end;
    end;
end;

function Implode(S: Strings; Delimiter: Char): String;
var iCount: Integer;
begin
     Result:='';
     if (Length(S) = 0) then Exit;
     for iCount:=0 to Length(S)-1 do
     Result:=Result+S[iCount]+Delimiter;
     System.Delete(Result, Length(Result), 1);
end;

Upvotes: 7

NGLN
NGLN

Reputation: 43649

There is no need for engineering a Split function. It already exists, see: Classes.ExtractStrings.

Use it in a following manner:

program Project1;

{$APPTYPE CONSOLE}

uses
  Classes;

var
  List: TStrings;
begin
  List := TStringList.Create;
  try
    ExtractStrings([':'], [], PChar('word:doc,txt,docx'), List);
    WriteLn(List.Text);
    ReadLn;
  finally
    List.Free;
  end;
end.

And to answer the question fully; List represents the desired array with the elements:

List[0] = 'word'
List[1] = 'doc,txt,docx'

Upvotes: 81

Frank
Frank

Reputation: 492

I always use something similar to this:

Uses
   StrUtils, Classes;

Var
  Str, Delimiter : String;
begin
  // Str is the input string, Delimiter is the delimiter
  With TStringList.Create Do
  try
    Text := ReplaceText(S,Delim,#13#10);

    // From here on and until "finally", your desired result strings are
    // in strings[0].. strings[Count-1)

  finally
    Free; //Clean everything up, and liberate your memory ;-)
  end;

end;

Upvotes: 22

Deltics
Deltics

Reputation: 23036

Similar to the Explode() function offered by Mef, but with a couple of differences (one of which I consider a bug fix):

  type
    TArrayOfString = array of String;


  function SplitString(const aSeparator, aString: String; aMax: Integer = 0): TArrayOfString;
  var
    i, strt, cnt: Integer;
    sepLen: Integer;

    procedure AddString(aEnd: Integer = -1);
    var
      endPos: Integer;
    begin
      if (aEnd = -1) then
        endPos := i
      else
        endPos := aEnd + 1;

      if (strt < endPos) then
        result[cnt] := Copy(aString, strt, endPos - strt)
      else
        result[cnt] := '';

      Inc(cnt);
    end;

  begin
    if (aString = '') or (aMax < 0) then
    begin
      SetLength(result, 0);
      EXIT;
    end;

    if (aSeparator = '') then
    begin
      SetLength(result, 1);
      result[0] := aString;
      EXIT;
    end;

    sepLen := Length(aSeparator);
    SetLength(result, (Length(aString) div sepLen) + 1);

    i     := 1;
    strt  := i;
    cnt   := 0;
    while (i <= (Length(aString)- sepLen + 1)) do
    begin
      if (aString[i] = aSeparator[1]) then
        if (Copy(aString, i, sepLen) = aSeparator) then
        begin
          AddString;

          if (cnt = aMax) then
          begin
            SetLength(result, cnt);
            EXIT;
          end;

          Inc(i, sepLen - 1);
          strt := i + 1;
        end;

      Inc(i);
    end;

    AddString(Length(aString));

    SetLength(result, cnt);
  end;

Differences:

  1. aMax parameter limits the number of strings to be returned
  2. If the input string is terminated by a separator then a nominal "empty" final string is deemed to exist

Examples:

SplitString(':', 'abc') returns      :    result[0]  = abc

SplitString(':', 'a:b:c:') returns   :    result[0]  = a
                                          result[1]  = b
                                          result[2]  = c
                                          result[3]  = <empty string>

SplitString(':', 'a:b:c:', 2) returns:    result[0]  = a
                                          result[1]  = b

It is the trailing separator and notional "empty final element" that I consider the bug fix.

I also incorporated the memory allocation change I suggested, with refinement (I mistakenly suggested the input string might at most contain 50% separators, but it could conceivably of course consist of 100% separator strings, yielding an array of empty elements!)

Upvotes: 16

Leo
Leo

Reputation: 38180

Here is an implementation of an explode function which is available in many other programming languages as a standard function:

type 
  TStringDynArray = array of String;

function Explode(const Separator, S: string; Limit: Integer = 0): TStringDynArray; 
var 
  SepLen: Integer; 
  F, P: PChar; 
  ALen, Index: Integer; 
begin 
  SetLength(Result, 0); 
  if (S = '') or (Limit < 0) then Exit; 
  if Separator = '' then 
  begin 
    SetLength(Result, 1); 
    Result[0] := S; 
    Exit; 
  end; 
  SepLen := Length(Separator); 
  ALen := Limit; 
  SetLength(Result, ALen); 

  Index := 0; 
  P := PChar(S); 
  while P^ <> #0 do 
  begin 
    F := P; 
    P := AnsiStrPos(P, PChar(Separator)); 
    if (P = nil) or ((Limit > 0) and (Index = Limit - 1)) then P := StrEnd(F); 
    if Index >= ALen then 
    begin 
      Inc(ALen, 5); 
      SetLength(Result, ALen); 
    end; 
    SetString(Result[Index], F, P - F); 
    Inc(Index); 
    if P^ <> #0 then Inc(P, SepLen); 
  end; 
  if Index < ALen then SetLength(Result, Index); 
end; 

Sample usage:

var
  res: TStringDynArray;
begin
  res := Explode(':', yourString);

Upvotes: 5

Related Questions