Reputation: 5027
I need to get the line numbers of certain keys of a YAML file.
Please note, this answer does not solve the issue: I do use ruamel.yaml, and the answers do not work with ordered maps.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from ruamel import yaml
data = yaml.round_trip_load("""
key1: !!omap
- key2: item2
- key3: item3
- key4: !!omap
- key5: item5
- key6: item6
""")
print(data)
As a result I get this:
CommentedMap([('key1', CommentedOrderedMap([('key2', 'item2'), ('key3', 'item3'), ('key4', CommentedOrderedMap([('key5', 'item5'), ('key6', 'item6')]))]))])
what does not allow to access to the line numbers, except for the !!omap
keys:
print(data['key1'].lc.line) # output: 1
print(data['key1']['key4'].lc.line) # output: 4
but:
print(data['key1']['key2'].lc.line) # output: AttributeError: 'str' object has no attribute 'lc'
Indeed, data['key1']['key2]
is a str
.
I've found a workaround:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from ruamel import yaml
DATA = yaml.round_trip_load("""
key1: !!omap
- key2: item2
- key3: item3
- key4: !!omap
- key5: item5
- key6: item6
""")
def get_line_nb(data):
if isinstance(data, dict):
offset = data.lc.line
for i, key in enumerate(data):
if isinstance(data[key], dict):
get_line_nb(data[key])
else:
print('{}|{} found in line {}\n'
.format(key, data[key], offset + i + 1))
get_line_nb(DATA)
output:
key2|item2 found in line 2
key3|item3 found in line 3
key5|item5 found in line 5
key6|item6 found in line 6
but this looks a little bit "dirty". Is there a more proper way of doing it?
EDIT: this workaround is not only dirty, but only works for simple cases like the one above, and will give wrong results as soon as there are nested lists in the way
Upvotes: 5
Views: 2891
Reputation: 21
I have modified the @Anthon solution based on ruamel.yaml version 0.17.17, handling scalar, int and bool positions.
class MyLiteralScalarString(ruamel.yaml.scalarstring.LiteralScalarString):
__slots__ = ('comment', 'lc')
class MyFoldedScalarString(ruamel.yaml.scalarstring.FoldedScalarString):
__slots__ = ('fold_pos', 'comment', 'lc')
class MyDoubleQuotedScalarString(ruamel.yaml.scalarstring.DoubleQuotedScalarString):
__slots__ = ('lc')
class MySingleQuotedScalarString(ruamel.yaml.scalarstring.SingleQuotedScalarString):
__slots__ = ('lc')
class MyPlainScalarString(ruamel.yaml.scalarstring.PlainScalarString):
__slots__ = ('lc')
class MyScalarInt(ruamel.yaml.scalarint.ScalarInt):
lc = None
class MyScalarBoolean(ruamel.yaml.scalarbool.ScalarBoolean):
lc = None
class MyConstructor(ruamel.yaml.constructor.RoundTripConstructor):
def __init__(self, preserve_quotes=None, loader=None):
super(MyConstructor, self).__init__(preserve_quotes=preserve_quotes, loader=loader)
if not hasattr(self.loader, 'comment_handling'):
self.loader.comment_handling = None
def construct_scalar(self, node):
# type: (Any) -> Any
if not isinstance(node, ScalarNode):
raise ConstructorError(None, None, _F('expected a scalar node, but found {node_id!s}', node_id=node.id),
node.start_mark,)
ret_val = None
if node.style == '|' and isinstance(node.value, str):
lss = MyLiteralScalarString(node.value, anchor=node.anchor)
if self.loader and self.loader.comment_handling is None:
if node.comment and node.comment[1]:
lss.comment = node.comment[1][0] # type: ignore
else:
# NEWCMNT
if node.comment is not None and node.comment[1]:
# nprintf('>>>>nc1', node.comment)
# EOL comment after |
lss.comment = self.comment(node.comment[1][0]) # type: ignore
ret_val = lss
elif node.style == '>' and isinstance(node.value, str):
fold_positions = [] # type: List[int]
idx = -1
while True:
idx = node.value.find('\a', idx + 1)
if idx < 0:
break
fold_positions.append(idx - len(fold_positions))
fss = MyFoldedScalarString(node.value.replace('\a', ''), anchor=node.anchor)
if self.loader and self.loader.comment_handling is None:
if node.comment and node.comment[1]:
fss.comment = node.comment[1][0] # type: ignore
else:
# NEWCMNT
if node.comment is not None and node.comment[1]:
# nprintf('>>>>nc2', node.comment)
# EOL comment after >
fss.comment = self.comment(node.comment[1][0]) # type: ignore
if fold_positions:
fss.fold_pos = fold_positions # type: ignore
ret_val = fss
elif bool(self._preserve_quotes) and isinstance(node.value, str):
if node.style == "'":
ret_val = MySingleQuotedScalarString(node.value, anchor=node.anchor)
if node.style == '"':
ret_val = MyDoubleQuotedScalarString(node.value, anchor=node.anchor)
if not ret_val:
if node.anchor:
ret_val = MyPlainScalarString(node.value, anchor=node.anchor)
else:
ret_val = MyPlainScalarString(node.value)
ret_val.lc = ruamel.yaml.comments.LineCol()
ret_val.lc.line = node.start_mark.line
ret_val.lc.col = node.start_mark.column
return ret_val
def construct_yaml_int(self, node):
# type: (Any) -> Any
width = None # type: Any
value_su = self.construct_scalar(node)
try:
sx = value_su.rstrip('_')
underscore = [len(sx) - sx.rindex('_') - 1, False, False] # type: Any
except ValueError:
underscore = None
except IndexError:
underscore = None
value_s = value_su.replace('_', "")
sign = +1
# Assuming that I have only "normal" positive int in my case
"""
if value_s[0] == '-':
sign = -1
if value_s[0] in '+-':
value_s = value_s[1:]
if value_s == '0':
ret_val = 0
elif value_s.startswith('0b'):
if self.resolver.processing_version > (1, 1) and value_s[2] == '0':
width = len(value_s[2:])
if underscore is not None:
underscore[1] = value_su[2] == '_'
underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_'
ret_val = BinaryInt(sign * int(value_s[2:], 2), width=width, underscore=underscore, anchor=node.anchor)
elif value_s.startswith('0x'):
# default to lower-case if no a-fA-F in string
if self.resolver.processing_version > (1, 1) and value_s[2] == '0':
width = len(value_s[2:])
hex_fun = HexInt # type: Any
for ch in value_s[2:]:
if ch in 'ABCDEF': # first non-digit is capital
hex_fun = HexCapsInt
break
if ch in 'abcdef':
break
if underscore is not None:
underscore[1] = value_su[2] == '_'
underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_'
return hex_fun(
sign * int(value_s[2:], 16),
width=width,
underscore=underscore,
anchor=node.anchor,
)
elif value_s.startswith('0o'):
if self.resolver.processing_version > (1, 1) and value_s[2] == '0':
width = len(value_s[2:])
if underscore is not None:
underscore[1] = value_su[2] == '_'
underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_'
return OctalInt(
sign * int(value_s[2:], 8),
width=width,
underscore=underscore,
anchor=node.anchor,
)
elif self.resolver.processing_version != (1, 2) and value_s[0] == '0':
return sign * int(value_s, 8)
elif self.resolver.processing_version != (1, 2) and ':' in value_s:
digits = [int(part) for part in value_s.split(':')]
digits.reverse()
base = 1
value = 0
for digit in digits:
value += digit * base
base *= 60
return sign * value
elif self.resolver.processing_version > (1, 1) and value_s[0] == '0':
# not an octal, an integer with leading zero(s)
if underscore is not None:
# cannot have a leading underscore
underscore[2] = len(value_su) > 1 and value_su[-1] == '_'
return ScalarInt(sign * int(value_s), width=len(value_s), underscore=underscore)
elif underscore:
# cannot have a leading underscore
underscore[2] = len(value_su) > 1 and value_su[-1] == '_'
return ScalarInt(
sign * int(value_s), width=None, underscore=underscore, anchor=node.anchor
)
elif node.anchor:
return ScalarInt(sign * int(value_s), width=None, anchor=node.anchor)
else:
"""
ret_val = MyScalarInt(sign * int(value_s))
ret_val.lc = ruamel.yaml.comments.LineCol()
ret_val.lc.line = node.start_mark.line
ret_val.lc.col = node.start_mark.column
return ret_val
def construct_yaml_bool(self, node):
# type: (Any) -> Any
b = super(MyConstructor, self).construct_yaml_bool(node)
if node.anchor:
ret_val = MyScalarBoolean(b, anchor=node.anchor)
else:
ret_val = MyScalarBoolean(b)
ret_val.lc = ruamel.yaml.comments.LineCol()
ret_val.lc.line = node.start_mark.line
ret_val.lc.col = node.start_mark.column
return ret_val
MyConstructor.add_constructor('tag:yaml.org,2002:int', MyConstructor.construct_yaml_int)
MyConstructor.add_constructor('tag:yaml.org,2002:bool', MyConstructor.construct_yaml_bool)
Upvotes: 2
Reputation: 76812
This issue is not that you are using !omap
and that it doesn't give you the line-numbers as with "normal" mappings. That should be clear from the fact that you get 4 from doing print(data['key1']['key4'].lc.line)
(where key4
is a key in the outer !omap
).
As this answers indicates,
you can access the property lc on collection items
The value for data['key1']['key4']
is a collection item (another !omap
), but the value for data['key1']['key2']
is not a collection item but a, built-in, python string, which has no slot to store the lc
attribute.
To get an .lc
attribute on a non-collection like a string you have to subclass the RoundTripConstructor
, to use something like the classes in scalarstring.py
(with __slots__
adjusted to accept the lc
attribute and then transfer the line information available in the nodes to that attribute and then set the line, column information:
import sys
import ruamel.yaml
yaml_str = """
key1: !!omap
- key2: item2
- key3: item3
- key4: !!omap
- key5: 'item5'
- key6: |
item6
"""
class Str(ruamel.yaml.scalarstring.ScalarString):
__slots__ = ('lc')
style = ""
def __new__(cls, value):
return ruamel.yaml.scalarstring.ScalarString.__new__(cls, value)
class MyPreservedScalarString(ruamel.yaml.scalarstring.PreservedScalarString):
__slots__ = ('lc')
class MyDoubleQuotedScalarString(ruamel.yaml.scalarstring.DoubleQuotedScalarString):
__slots__ = ('lc')
class MySingleQuotedScalarString(ruamel.yaml.scalarstring.SingleQuotedScalarString):
__slots__ = ('lc')
class MyConstructor(ruamel.yaml.constructor.RoundTripConstructor):
def construct_scalar(self, node):
# type: (Any) -> Any
if not isinstance(node, ruamel.yaml.nodes.ScalarNode):
raise ruamel.yaml.constructor.ConstructorError(
None, None,
"expected a scalar node, but found %s" % node.id,
node.start_mark)
if node.style == '|' and isinstance(node.value, ruamel.yaml.compat.text_type):
ret_val = MyPreservedScalarString(node.value)
elif bool(self._preserve_quotes) and isinstance(node.value, ruamel.yaml.compat.text_type):
if node.style == "'":
ret_val = MySingleQuotedScalarString(node.value)
elif node.style == '"':
ret_val = MyDoubleQuotedScalarString(node.value)
else:
ret_val = Str(node.value)
else:
ret_val = Str(node.value)
ret_val.lc = ruamel.yaml.comments.LineCol()
ret_val.lc.line = node.start_mark.line
ret_val.lc.col = node.start_mark.column
return ret_val
yaml = ruamel.yaml.YAML()
yaml.Constructor = MyConstructor
data = yaml.load(yaml_str)
print(data['key1']['key4'].lc.line)
print(data['key1']['key2'].lc.line)
print(data['key1']['key4']['key6'].lc.line)
Please note that the output of the last call to print
is 6, as the literal scalar string starts with the |
.
If you also want to dump data
, you'll need to make a Representer
aware of those My....
types.
Upvotes: 7