ConditionRacer
ConditionRacer

Reputation: 4498

Map tokens and trivia to line numbers

I'm trying to map tokens and trivia to line numbers using Roslyn.

Here's my latest attempt with help from @Kevin Pilch-Bisson below.

public class CSharpSlocAnalyser : ISlocAnalyser
{
    public long GetSlocFor(IEnumerable<FileInfo> files, SlocOptions options)
    {
        var tree = CSharpSyntaxTree.ParseText(
@"using /* Blah */
    System;
// Blah
public class MyClass
{
    public void MyMethod()
    {
        var blah = ""abc"";
    }
}");

        var root = tree.GetRoot();
        var walker = new CustomWalker();
        walker.Visit(root);
        var lineMap = walker.LineMap;

        return 1;
    }

    public class CustomWalker : CSharpSyntaxWalker
    {
        public Dictionary<int, List<object>> LineMap { get; }

        public CustomWalker() : base(SyntaxWalkerDepth.StructuredTrivia)
        {
            LineMap = new Dictionary<int, List<object>>();
        }

        public override void VisitToken(SyntaxToken token)
        {
            var parent = token.Parent;
            while (parent.GetText().Length < token.Span.Start)
            {
                parent = parent.Parent;
            }

            var text = parent.GetText();
            var line = text.Lines.GetLineFromPosition(token.Span.Start).LineNumber;

            if (!LineMap.ContainsKey(line))
            {
                LineMap.Add(line, new List<object>());
            }

            LineMap[line].Add(token);

            base.VisitToken(token);
        }

        public override void VisitTrivia(SyntaxTrivia trivia)
        {
            var parent = trivia.Token.Parent;
            while (parent.GetText().Length < trivia.Span.Start)
            {
                parent = parent.Parent;
            }

            var text = parent.GetText();
            var line = text.Lines.GetLineFromPosition(trivia.Span.Start).LineNumber;

            if (!LineMap.ContainsKey(line))
            {
                LineMap.Add(line, new List<object>());
            }

            LineMap[line].Add(trivia);

            base.VisitTrivia(trivia);
        }
    }
}

However this produces the following map:

Line 0
    UsingKeyword - 'using'
    WhitespaceTrivia - ' '
    MultiLineCommentTrivia - '/* Blah */'
    EndOfLineTrivia - '
'
Line 1
    IdentifierToken - 'System'
    WhitespaceTrivia - '    '
    SemicolonToken - ';'
    EndOfLineTrivia - '
'
Line 2
    SingleLineCommentTrivia - '// Blah'
Line 3
    PublicKeyword - 'public'
    EndOfLineTrivia - '
'
    WhitespaceTrivia - ' '
    ClassKeyword - 'class'
    WhitespaceTrivia - ' '
    IdentifierToken - 'MyClass'
    EndOfLineTrivia - '
'
    WhitespaceTrivia - '    '
Line 4
    OpenBraceToken - '{'
    EndOfLineTrivia - '
'
    PublicKeyword - 'public'
Line 5
    WhitespaceTrivia - ' '
    VoidKeyword - 'void'
    WhitespaceTrivia - ' '
    IdentifierToken - 'MyMethod'
    OpenParenToken - '('
    CloseParenToken - ')'
    EndOfLineTrivia - '
'
    WhitespaceTrivia - '    '
Line 6
    OpenBraceToken - '{'
    EndOfLineTrivia - '
'
    WhitespaceTrivia - '        '
Line 7
    IdentifierToken - 'var'
    WhitespaceTrivia - ' '
    IdentifierToken - 'blah'
    WhitespaceTrivia - ' '
    EqualsToken - '='
    WhitespaceTrivia - ' '
    StringLiteralToken - '"abc"'
    SemicolonToken - ';'
    EndOfLineTrivia - '
'
Line 8
    CloseBraceToken - '}'
    WhitespaceTrivia - '    '
    EndOfLineTrivia - '
'
Line 9
    CloseBraceToken - '}'
    EndOfFileToken - ''

Everything looks good until Line 2, which does not contain and end of line trivia, Line 3 contains 2 end of line trivia's, and everything seems to go off the rails from there.

What am I doing wrong? I just want to map tokens and trivia to their original source line numbers.

Upvotes: 4

Views: 1016

Answers (2)

ConditionRacer
ConditionRacer

Reputation: 4498

This worked:

public class CSharpSlocAnalyser : ISlocAnalyser
{
    public long GetSlocFor(IEnumerable<FileInfo> files, SlocOptions options)
    {
        var tree = CSharpSyntaxTree.ParseText(
@"using /* Blah */
    System;
// Blah
public class MyClass
{
    public void MyMethod()
    {
        var blah = ""abc"";
    }
}");

        var root = tree.GetRoot();
        var walker = new CustomWalker();
        walker.Visit(root);
        var lineMap = walker.LineMap;

        return 1;
    }

    public class CustomWalker : CSharpSyntaxWalker
    {
        public Dictionary<int, List<object>> LineMap { get; }

        public CustomWalker() : base(SyntaxWalkerDepth.StructuredTrivia)
        {
            LineMap = new Dictionary<int, List<object>>();
        }

        public override void VisitToken(SyntaxToken token)
        {
            var parent = token.SyntaxTree.GetRoot();

            AddLine(token, token.Span.Start, parent);

            base.VisitToken(token);
        }

        public override void VisitTrivia(SyntaxTrivia trivia)
        {
            var parent = trivia.SyntaxTree.GetRoot();

            AddLine(trivia, trivia.Span.Start, parent);

            base.VisitTrivia(trivia);
        }

        private void AddLine(object tokenOrTrivia, int position, SyntaxNode parent)
        {
            var text = parent.GetText();
            var line = text.Lines.GetLineFromPosition(position).LineNumber;

            if (!LineMap.ContainsKey(line))
            {
                LineMap.Add(line, new List<object>());
            }

            LineMap[line].Add(tokenOrTrivia);
        }
    }
}

Basically I just needed to use the root syntaxtree for the GetLineFromPosition call.

This produced the following map, which is correct:

Line 0
    UsingKeyword - 'using'
    WhitespaceTrivia - ' '
    MultiLineCommentTrivia - '/* Blah */'
    EndOfLineTrivia - '
'
Line 1
    IdentifierToken - 'System'
    WhitespaceTrivia - '    '
    SemicolonToken - ';'
    EndOfLineTrivia - '
'
Line 2
    SingleLineCommentTrivia - '// Blah'
    EndOfLineTrivia - '
'
Line 3
    PublicKeyword - 'public'
    WhitespaceTrivia - ' '
    ClassKeyword - 'class'
    WhitespaceTrivia - ' '
    IdentifierToken - 'MyClass'
    EndOfLineTrivia - '
'
Line 4
    OpenBraceToken - '{'
    EndOfLineTrivia - '
'
Line 5
    PublicKeyword - 'public'
    WhitespaceTrivia - '    '
    WhitespaceTrivia - ' '
    VoidKeyword - 'void'
    WhitespaceTrivia - ' '
    IdentifierToken - 'MyMethod'
    OpenParenToken - '('
    CloseParenToken - ')'
    EndOfLineTrivia - '
'
Line 6
    OpenBraceToken - '{'
    WhitespaceTrivia - '    '
    EndOfLineTrivia - '
'
Line 7
    IdentifierToken - 'var'
    WhitespaceTrivia - '        '
    WhitespaceTrivia - ' '
    IdentifierToken - 'blah'
    WhitespaceTrivia - ' '
    EqualsToken - '='
    WhitespaceTrivia - ' '
    StringLiteralToken - '"abc"'
    SemicolonToken - ';'
    EndOfLineTrivia - '
'
Line 8
    CloseBraceToken - '}'
    WhitespaceTrivia - '    '
    EndOfLineTrivia - '
'
Line 9
    CloseBraceToken - '}'
    EndOfFileToken - ''

Upvotes: 1

Kevin Pilch
Kevin Pilch

Reputation: 11615

The SourceText already tracks the line ending in the Lines property. You can use something like the code in GetLineAndOffset

Upvotes: 3

Related Questions