Clock Slave
Clock Slave

Reputation: 7957

python convert all keys to strings

I have nested dictionary that has some numeric keys. I need to store this dictionary as JSON and since these keys are numeric, I cant store them as JSON. I wrote the code below but it gives me the error saying the length of the dictionary has changed (RuntimeError: dictionary changed size during iteration).

def convert_to_str(dictionary):
    for key in dictionary:
        print (key)
        found = False
        non_str_keys = []
        if not isinstance(key, str):
            print(key, 'is not a string')
            dictionary[str(key)] = dictionary[key]
            non_str_keys.append(key)
        if isinstance(dictionary[str(key)], dict):
            dictionary[str(key)] = convert_to_str(dictionary[str(key)])
            non_str_keys.append(key)
        if non_str_keys:
            for each_non_str_key in non_str_keys:
                del dictionary[each_non_str_key]
    return dictionary

How do I avoid this? The dictionary that I have is -

a = {
  "age": {
    1: 25.0,
    2: 50.25,
    3: 50.0,
    4: 75.0,
    5: 14.580906789680968,
    6: [
      25.0,
      30.0,
      34.800000000000004,
      40.0,
      46.60000000000001,
      50.0,
      56.0,
      61.0,
      65.0,
      69.0,
      75.0
    ],
    "quartiles": [
      38.0,
      64.0
    ],
    "decile_event_rate": [
      0.8125,
      0.7142857142857143,
      0.65625,
      0.42857142857142855,
      0.45161290322580644,
      0.4857142857142857,
      0.5925925925925926,
      0.5,
      0.5142857142857142,
      0.375
    ]
  },
  "income": {
    "min": 10198.0,
    "mean": 55621.78666666667,
    "median": 52880.0,
    "max": 99783.0,
    "std": 24846.911384024643,
    "deciles": [
      10198.0,
      25269.4,
      31325.800000000003,
      37857.0,
      43721.8,
      52880.0,
      63996.0,
      72526.9,
      82388.2,
      89765.90000000001,
      99783.0
    ],
    "quartiles": [
      35088.5,
      78687.25
    ],
    "decile_event_rate": [
      0.6666666666666666,
      0.6,
      0.5333333333333333,
      0.5666666666666667,
      0.5,
      0.6451612903225806,
      0.4827586206896552,
      0.5,
      0.5666666666666667,
      0.5
    ]
  },
  "edu_yrs": {
    "min": 0.0,
    "mean": 12.73,
    "median": 13.0,
    "max": 25.0,
    "std": 7.86234623342895,
    "deciles": [
      0.0,
      2.0,
      4.0,
      7.0,
      9.600000000000009,
      13.0,
      16.0,
      18.0,
      21.200000000000017,
      23.0,
      25.0
    ],
    "quartiles": [
      6.0,
      20.0
    ],
    "decile_event_rate": [
      0.5384615384615384,
      0.6521739130434783,
      0.5151515151515151,
      0.48,
      0.6111111111111112,
      0.5,
      0.5,
      0.6071428571428571,
      0.5151515151515151,
      0.6666666666666666
    ]
  },
  "yrs_since_exercise": {
    "min": 0.0,
    "mean": 18.566666666666666,
    "median": 16.0,
    "max": 60.0,
    "std": 14.417527732194037,
    "deciles": [
      0.0,
      3.0,
      5.0,
      8.0,
      12.0,
      16.0,
      20.0,
      25.0,
      31.0,
      41.0,
      60.0
    ],
    "quartiles": [
      6.0,
      27.0
    ],
    "decile_event_rate": [
      1.0,
      1.0,
      1.0,
      0.9629629629629629,
      0.75,
      0.4857142857142857,
      0.15384615384615385,
      0.06666666666666667,
      0.0,
      0.0
    ]
  },
  "security_label": {
    "event_rate": {
      "A": {
        "1.0": 0.6,
        "0.0": 0.4
      },
      "B": {
        "1.0": 0.57,
        "0.0": 0.43
      },
      "C": {
        "0.0": 0.5,
        "1.0": 0.5
      }
    },
    "freq": {
      "A": 100,
      "B": 100,
      "C": 100
    },
    "var_type": "categorical"
  }
}

EDIT

    json.dump(self.entity_data, open(path, 'w'), indent=2, cls=CustomEncoder)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/json/__init__.py", line 179, in dump
    for chunk in iterable:
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/json/encoder.py", line 430, in _iterencode
    yield from _iterencode_dict(o, _current_indent_level)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/json/encoder.py", line 404, in _iterencode_dict
    yield from chunks
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/json/encoder.py", line 404, in _iterencode_dict
    yield from chunks
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/json/encoder.py", line 404, in _iterencode_dict
    yield from chunks
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/json/encoder.py", line 376, in _iterencode_dict
    raise TypeError("key " + repr(key) + " is not a string")
TypeError: key 0 is not a string

Adding an image of the errorenter image description here

EDIT-2

I got serialization errors ebfore for using numpy objects. So I began,using this encoder to convert them to python objects.

class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(CustomEncoder, self).default(obj)

I have been doing the json.dump while using cls = CustomEncoder. This is the command I had used

 json.dump(self.entity_data, open(path, 'w'), indent=2, cls=CustomEncoder)

Upvotes: 3

Views: 12748

Answers (3)

Cristian Contrera
Cristian Contrera

Reputation: 713

I use this function to convert all dict keys to str keeping the values as I received them:

def _parse_dict_ket_to_str(d: dict):
    """To save data at mongodb need all keys as string"""
    result = {}
    if isinstance(d, dict):
        for k, v in d.items():
            if isinstance(v, dict):
                result[str(k)] = _parse_dict_ket_to_str(v)
            elif isinstance(v, list):
                result[str(k)] = [_parse_dict_ket_to_str(e) for e in v]
            else:
                result[str(k)] = v
    else:
        return d
    return result

If you want change other types as numpy.int64 to int, can add this to last else:

if isinstance(d, np.int64):
     return int(d)

Upvotes: 1

Martijn Pieters
Martijn Pieters

Reputation: 1121306

You'll need to recursively convert all keys; generate a new dictionary with a dict comprehension, that's much easier than altering the keys in-place. You can't add string keys and delete the non-string keys in a dictionary you are iterating over, because that mutates the hash table, which can easily alter the order the dictionary keys are listed in, so this is not permitted.

You should not forget to handle lists; they too can contain further dictionaries.

Whenever I need to transform a nested structure like this, I'd use the @functools.singledispatch decorator to split out handling for the different container types to different functions:

from functools import singledispatch

@singledispatch
def keys_to_strings(ob):
    return ob

@keys_to_strings.register
def _handle_dict(ob: dict):
    return {str(k): keys_to_strings(v) for k, v in ob.items()}

@keys_to_strings.register
def _handle_list(ob: list):
    return [keys_to_strings(v) for v in ob]

Then JSON encode the result of keys_to_string():

json.dumps(keys_to_string(a))

Not that this is all needed. json.dumps() accepts integer keys natively, turning them to strings. Your input example works without transforming:

json.dumps(a)

From the json.dumps() documentation:

Note: Keys in key/value pairs of JSON are always of the type str. When a dictionary is converted into JSON, all the keys of the dictionary are coerced to strings. As a result of this, if a dictionary is converted into JSON and then back into a dictionary, the dictionary may not equal the original one. That is, loads(dumps(x)) != x if x has non-string keys.

This only applies to types that JSON could otherwise already handle, so None, booleans, float and int objects. For anything else, you'd still get your exception. You probably have an object whose representation is 0, but it is not a Python int 0:

>>> json.dumps({0: 'works'})
'{"0": "works"}'
>>> import numpy
>>> numpy.int32()
0
>>> json.dumps({numpy.int32(): 'fails'})
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/Users/mjpieters/Development/Library/buildout.python/parts/opt/lib/python3.6/json/__init__.py", line 231, in dumps
    return _default_encoder.encode(obj)
  File "/Users/mjpieters/Development/Library/buildout.python/parts/opt/lib/python3.6/json/encoder.py", line 199, in encode
    chunks = self.iterencode(o, _one_shot=True)
  File "/Users/mjpieters/Development/Library/buildout.python/parts/opt/lib/python3.6/json/encoder.py", line 257, in iterencode
    return _iterencode(o, 0)
TypeError: keys must be a string

I picked a numpy integer type because that's a commonly confused integer value that is not a Python int.

A custom encoder, as you added to your post, won't be used for keys; that only applies to values in dictionaries, so if you have non-standard objects for keys, then you indeed still need to use the above recursive solution.

Upvotes: 7

jeffffc
jeffffc

Reputation: 780

json.dumps automatically turn interger indexes to string indexes

>>> import json
>>> a = {'income': {'deciles': [10198.0, 25269.4, 31325.800000000003, 37857.0, 43721.8, 52880.0, 63996.0, 72526.9, 82388.2, 89765.90000000001, 99783.0], 'min': 10198.0, 'std': 24846.911384024643, 'quartiles': [35088.5, 78687.25], 'median': 52880.0, 'decile_event_rate': [0.6666666666666666, 0.6, 0.5333333333333333, 0.5666666666666667, 0.5, 0.6451612903225806, 0.4827586206896552, 0.5, 0.5666666666666667, 0.5], 'max': 99783.0, 'mean': 55621.78666666667}, 'age': {1: 25.0, 2: 50.25, 3: 50.0, 4: 75.0, 5: 14.580906789680968, 6: [25.0, 30.0, 34.800000000000004, 40.0, 46.60000000000001, 50.0, 56.0, 61.0, 65.0, 69.0, 75.0], 'quartiles': [38.0, 64.0], 'decile_event_rate': [0.8125, 0.7142857142857143, 0.65625, 0.42857142857142855, 0.45161290322580644, 0.4857142857142857, 0.5925925925925926, 0.5, 0.5142857142857142, 0.375]}, 'edu_yrs': {'deciles': [0.0, 2.0, 4.0, 7.0, 9.600000000000009, 13.0, 16.0, 18.0, 21.200000000000017, 23.0, 25.0], 'min': 0.0, 'std': 7.86234623342895, 'quartiles': [6.0, 20.0], 'median': 13.0, 'decile_event_rate': [0.5384615384615384, 0.6521739130434783, 0.5151515151515151, 0.48, 0.6111111111111112, 0.5, 0.5, 0.6071428571428571, 0.5151515151515151, 0.6666666666666666], 'max': 25.0, 'mean': 12.73}, 'security_label': {'var_type': 'categorical', 'freq': {'C': 100, 'A': 100, 'B': 100}, 'event_rate': {'C': {'0.0': 0.5, '1.0': 0.5}, 'A': {'0.0': 0.4, '1.0': 0.6}, 'B': {'0.0': 0.43, '1.0': 0.57}}}, 'yrs_since_exercise': {'deciles': [0.0, 3.0, 5.0, 8.0, 12.0, 16.0, 20.0, 25.0, 31.0, 41.0, 60.0], 'min': 0.0, 'std': 14.417527732194037, 'quartiles': [6.0, 27.0], 'median': 16.0, 'decile_event_rate': [1.0, 1.0, 1.0, 0.9629629629629629, 0.75, 0.4857142857142857, 0.15384615384615385, 0.06666666666666667, 0.0, 0.0], 'max': 60.0, 'mean': 18.566666666666666}}
>>> new = json.dumps(a)  # as a json string
>>> new
'{"income": {"deciles": [10198.0, 25269.4, 31325.800000000003, 37857.0, 43721.8, 52880.0, 63996.0, 72526.9, 82388.2, 89765.90000000001, 99783.0], "min": 10198.0, "std": 24846.911384024643, "quartiles": [35088.5, 78687.25], "mean": 55621.78666666667, "decile_event_rate": [0.6666666666666666, 0.6, 0.5333333333333333, 0.5666666666666667, 0.5, 0.6451612903225806, 0.4827586206896552, 0.5, 0.5666666666666667, 0.5], "max": 99783.0, "median": 52880.0}, "age": {"1": 25.0, "2": 50.25, "3": 50.0, "4": 75.0, "5": 14.580906789680968, "6": [25.0, 30.0, 34.800000000000004, 40.0, 46.60000000000001, 50.0, 56.0, 61.0, 65.0, 69.0, 75.0], "quartiles": [38.0, 64.0], "decile_event_rate": [0.8125, 0.7142857142857143, 0.65625, 0.42857142857142855, 0.45161290322580644, 0.4857142857142857, 0.5925925925925926, 0.5, 0.5142857142857142, 0.375]}, "edu_yrs": {"deciles": [0.0, 2.0, 4.0, 7.0, 9.600000000000009, 13.0, 16.0, 18.0, 21.200000000000017, 23.0, 25.0], "min": 0.0, "std": 7.86234623342895, "quartiles": [6.0, 20.0], "mean": 12.73, "decile_event_rate": [0.5384615384615384, 0.6521739130434783, 0.5151515151515151, 0.48, 0.6111111111111112, 0.5, 0.5, 0.6071428571428571, 0.5151515151515151, 0.6666666666666666], "max": 25.0, "median": 13.0}, "security_label": {"var_type": "categorical", "freq": {"A": 100, "C": 100, "B": 100}, "event_rate": {"A": {"0.0": 0.4, "1.0": 0.6}, "C": {"0.0": 0.5, "1.0": 0.5}, "B": {"0.0": 0.43, "1.0": 0.57}}}, "yrs_since_exercise": {"deciles": [0.0, 3.0, 5.0, 8.0, 12.0, 16.0, 20.0, 25.0, 31.0, 41.0, 60.0], "min": 0.0, "std": 14.417527732194037, "quartiles": [6.0, 27.0], "mean": 18.566666666666666, "decile_event_rate": [1.0, 1.0, 1.0, 0.9629629629629629, 0.75, 0.4857142857142857, 0.15384615384615385, 0.06666666666666667, 0.0, 0.0], "max": 60.0, "median": 16.0}}'

Upvotes: 3

Related Questions