Reputation: 973
I try to parse various input strings to list of object and some of that input may contains punctuation marks.
I do not success until consume a string block or something else.
Test input strings correctly working:
var inputs = new List<string>{
"prefix{{test='5'}}middle{{'test string'}}{{'test1'}}postfix",
"{{test='5'}}middle{{'test string'}}{{'test1'}}postfix",
"{{test='5'}}middle{{'test string'}}{{'test1'}}",
}
However that all contains letter and digit. So this one is not working:
var notWorkingInput= ".?n6y{{test='5'}}+*854d{{'test string'}}{{'test1'}}ret0}";
Expected result of notWorkingInput
is in that order:
Name = ".?n6y"
)Name = "test", Value="5"
)Name = "+*854d"
)Name = "test string"
)Name = "test1"
)Name = "ret0}"
)I use Pidgin parsing library.
Full working code:
public class TestParser {
private static readonly Parser<char, string> ObjectStart = String("{{");
private static readonly Parser<char, string> ObjectEnd = String("}}");
private static readonly Parser<char, string> Equal = String("=");
private static Parser<char, T> Tok<T>(Parser<char, T> token) => Try(token).Before(SkipWhitespaces);
private static Parser<char, string> Tok(string token) => Tok(String(token));
private static readonly Parser<char, string> StringLiteralDouble = Tok(Try(String("\"\"")).ThenReturn('"')
.Or(Token(c => c != '"'))
.ManyString()
.Between(Char('"')));
private static readonly Parser<char, string> StringLiteralSingle = Tok(Try(String("''")).ThenReturn('\'')
.Or(Token(c => c != '\''))
.ManyString()
.Between(Char('\'')));
private static readonly Parser<char, string> StringLiteral = Tok(OneOf(StringLiteralDouble, StringLiteralSingle));
private static readonly Parser<char, ITest> Test1Statement = Tok(StringLiteral.Between(Whitespaces).Between(ObjectStart, ObjectEnd))
.Select<ITest>(s => new Test1(s));
private static readonly Parser<char, ITest> Test2Statement = Tok(Letter.ManyString()).Before(Tok(Equal))
.Then(OneOf(Num.Select<dynamic>(s => s), Real.Select<dynamic>(s => s), StringLiteral.Select<dynamic>(s => s), LetterOrDigit.ManyString().Select<dynamic>(s => s)), (s, v) => new Test2(s, v)).Between(ObjectStart, ObjectEnd)
.Select<ITest>(s => s);
private static readonly Parser<char, ITest> TextStatement = LetterOrDigit.AtLeastOnceString().Select<ITest>(s => new Text(s));
private static readonly Parser<char, ITest> Statement = OneOf(Test1Statement, Test2Statement, TextStatement);
private static readonly Parser<char, IEnumerable<ITest>> Statements = Statement.Many();
public static Result<char, IEnumerable<ITest>> Parse(string input) => Statements.Parse(input);
}
public interface ITest {
string Name { get; }
}
public class Test1 : ITest {
public string Name { get; }
public Test1(string name) {
Name = name;
}
}
public class Test2 : ITest {
public string Name { get; }
public dynamic Value { get; }
public Test2(string name, dynamic value) {
Name = name;
Value = value;
}
}
public class Text : ITest {
public string Name { get; }
public Text(string name) {
Name = name;
}
}
UPDATE:
I test AnyCharExcept('{', '}')
. Actually, it is working but single bracket problem now.
UPDATE 2:
If possible to add something like that AnyExcept
also it will be good.
UPDATE 3:
Author of library adviced me to use Any.Until(Lookahead(String("{{")).Or(End))
but it is not working.
Upvotes: 1
Views: 583
Reputation: 3341
I wrote this custom parser which solves the problem for me:
internal class AnyStringExcept : Parser<char, string>
{
private readonly string[] terminators;
public AnyStringExcept(params string[] terminators)
{
this.terminators = terminators;
}
public override bool TryParse(ref ParseState<char> state, ref PooledList<Expected<char>> expecteds, out string result)
{
var sb = new StringBuilder();
var found = false;
while (state.HasCurrent && !found)
{
// unfortunately this cannot be simplified as
// found = terminators.Any(terminator => terminator == state.LookAhead(terminator.Length).ToString());
// because the ref parameter state cannot be used in a lambda expression
foreach (var terminator in terminators)
{
var nextChars = state.LookAhead(terminator.Length).ToString();
if (nextChars != terminator)
continue;
found = true;
break;
}
if (found)
break;
sb.Append(state.Current);
state.Advance();
}
result = sb.ToString();
return result != "";
}
}
I am using it like this:
var RAW_TEXT = new AnyStringExcept("<ls:", "</ls:");
This uses Pidgin 3.2.1. Hope it helps.
Upvotes: 0