Reputation: 43265

How to change JSON encoding behaviour for serializable Python object?

It is easy to change the format of an object which is not JSON serializable, e.g. datetime.datetime.

My requirement, for debugging purposes, is to alter the way some custom objects extended from base ones like dict and list , get serialized in JSON format. Code:

import datetime
import json

def json_debug_handler(obj):
    print("object received:")
    print type(obj)
    print("\n\n")
    if  isinstance(obj, datetime.datetime):
        return obj.isoformat()
    elif isinstance(obj,mDict):
        return {'orig':obj , 'attrs': vars(obj)}
    elif isinstance(obj,mList):
        return {'orig':obj, 'attrs': vars(obj)}
    else:
        return None


class mDict(dict):
    pass


class mList(list):
    pass


def test_debug_json():
    games = mList(['mario','contra','tetris'])
    games.src = 'console'
    scores = mDict({'dp':10,'pk':45})
    scores.processed = "unprocessed"
    test_json = { 'games' : games , 'scores' : scores , 'date': datetime.datetime.now() }
    print(json.dumps(test_json,default=json_debug_handler))

if __name__ == '__main__':
    test_debug_json()

Demo

Output:

{"date": "2013-05-07T01:03:13.098727", "games": ["mario", "contra", "tetris"], "scores": {"pk": 45, "dp": 10}}

Desired output:

{"date": "2013-05-07T01:03:13.098727", "games": { "orig": ["mario", "contra", "tetris"] ,"attrs" : { "src":"console"}}  , "scores": { "orig": {"pk": 45, "dp": 10},"attrs":
"processed":"unprocessed }}

Does the default handler not work for serializable objects? If not, how can I override this, without adding toJSON methods to the extended classes?

Also, there is this version of JSON encoder which does not work:

class JsonDebugEncoder(json.JSONEncoder):
    def default(self,obj):
        if  isinstance(obj, datetime.datetime):
            return obj.isoformat()
        elif isinstance(obj,mDict):
            return {'orig':obj , 'attrs': vars(obj)}
        elif isinstance(obj,mList):
            return {'orig':obj, 'attrs': vars(obj)}
        else:
            return json.JSONEncoder.default(self, obj)

If there is a hack with pickle,__getstate__,__setstate__,and then using json.dumps over pickle.loads object, I am open to that as well. I tried, but that did not work.

Upvotes: 69

Answers (13)

Donny Winston

Reputation: 2452

Simplification for Python 3 (tested on 3.9 only):

from json.encoder import (_make_iterencode, JSONEncoder,
                          encode_basestring_ascii, INFINITY,
                          encode_basestring)

class CustomObjectEncoder(JSONEncoder):

    def iterencode(self, o, _one_shot=False):
        """Encode the given object and yield each string
        representation as available.

        For example::

            for chunk in JSONEncoder().iterencode(bigobject):
                mysocket.write(chunk)
                
        Change from json.encoder.JSONEncoder.iterencode is setting
        _one_shot=False and isinstance=self.isinstance
        in call to `_make_iterencode`.
        And not using `c_make_encoder`.

        """
        if self.check_circular:
            markers = {}
        else:
            markers = None
        if self.ensure_ascii:
            _encoder = encode_basestring_ascii
        else:
            _encoder = encode_basestring

        def floatstr(o, allow_nan=self.allow_nan,
                _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
            # Check for specials.  Note that this type of test is processor
            # and/or platform-specific, so do tests which don't depend on the
            # internals.

            if o != o:
                text = 'NaN'
            elif o == _inf:
                text = 'Infinity'
            elif o == _neginf:
                text = '-Infinity'
            else:
                return _repr(o)

            if not allow_nan:
                raise ValueError(
                    "Out of range float values are not JSON compliant: " +
                    repr(o))

            return text

        _iterencode = _make_iterencode(
                markers, self.default, _encoder, self.indent, floatstr,
                self.key_separator, self.item_separator, self.sort_keys,
                self.skipkeys, _one_shot=False, isinstance=self.isinstance)
        return _iterencode(o, 0)

Example subclass:

import datetime

from rdflib.term import Literal, BNode

class RDFTermEncoder(CustomObjectEncoder):
    def isinstance(self, o, cls):
        if isinstance(o, (Literal, BNode)):
            return False
        return isinstance(o, cls)
    def default(self, o):
        if isinstance(o, Literal):
            rv = {"value": o.value}
            if o.datatype is not None:
                rv["datatype"] = o.datatype
            if o.language is not None:
                rv["lang"] = o.language
            return rv
        if isinstance(o, BNode):
            return "http://localhost/bnode/" + str(o)
        if isinstance(o, datetime.datetime):
            return o.isoformat()
        if isinstance(o, datetime.date):
            return str(o)
        # Let the base class default method raise the TypeError
        return super().default(o)

I just used this successfully for my work as

db_json = json.loads(json.dumps(db_custom, cls=RDFTermEncoder))

Thank you all on this thread!

Upvotes: 0

Wilhelm Liao

Reputation: 829

I try to change the default resolver priority and change the default iterator outputs to achieve your purposes.

change the default resolver priority, executed precede all standard type verifying:

Inherits the json.JSONEncoder and overrides the iterencode() method.

All values should be wrapped by ValueWrapper type, avoid the values are resolved by default standard resolvers.
change the default iterator output;

Implement three custom wrapper classes ValueWrapper, ListWrapper, and DictWrapper. The ListWrapper implement __iter__() and the DictWrapper implement __iter__(), items() and iteritems().

import datetime
import json

class DebugJsonEncoder(json.JSONEncoder):
    def iterencode(self, o, _one_shot=False):
        default_resolver = self.default
        # Rewrites the default resolve, self.default(), with the custom resolver.
        # It will process the Wrapper classes
        def _resolve(o):
            if isinstance(o, ValueWrapper):
                # Calls custom resolver precede others. Due to the _make_iterencode()
                # call the custom resolver following by all standard type verifying 
                # failed. But we want custom resolver can be executed by all standard 
                # verifying.
                # see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L442
                result = default_resolver(o.data)
                if (o.data is not None) and (result is not None):
                    return result
                elif isinstance(o.data, (list, tuple)):
                    return ListWrapper(o.data)
                elif isinstance(o.data, dict):
                    return DictWrapper(o.data)
                else:
                    return o.data
            else:
                return default_resolver(o)

        # re-assign the default resolver self.default with custom resolver.
        # see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L161
        self.default = _resolve
        # The input value must be wrapped by ValueWrapper, avoid the values are 
        # resolved by the standard resolvers.
        # The last one arguemnt _one_shot must be False, we want to encode with
        # _make_iterencode().
        # see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L259
        return json.JSONEncoder.iterencode(self, _resolve(ValueWrapper(o)), False)


class ValueWrapper():
    """
    a wrapper wrapped the given object
    """

    def __init__(self, o):
        self.data = o

class ListWrapper(ValueWrapper, list):
    """
    a wrapper wrapped the given list
    """

    def __init__(self, o):
        ValueWrapper.__init__(self, o)

    # see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L307
    def __iter__(self):
        for chunk in self.data:
            yield ValueWrapper(chunk)

class DictWrapper(ValueWrapper, dict):
    """
    a wrapper wrapped the given dict
    """

    def __init__(self, d):
        dict.__init__(self, d)

    def __iter__(self):
        for key, value in dict.items(self):
            yield key, ValueWrapper(value)

    # see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L361
    def items(self):
        for key, value in dict.items(self):
            yield key, ValueWrapper(value)

    # see https://github.com/python/cpython/blob/2.7/Lib/json/encoder.py#L363
    def iteritems(self):
        for key, value in dict.iteritems(self):
            yield key, ValueWrapper(value)


def json_debug_handler(obj):
    print("object received:")
    print type(obj)
    print("\n\n")
    if  isinstance(obj, datetime.datetime):
        return obj.isoformat()
    elif isinstance(obj,mDict):
        return {'orig':obj , 'attrs': vars(obj)}
    elif isinstance(obj,mList):
        return {'orig':obj, 'attrs': vars(obj)}
    else:
        return None


class mDict(dict):
    pass

class mList(list):
    pass


def test_debug_json():
    games = mList(['mario','contra','tetris'])
    games.src = 'console'
    scores = mDict({'dp':10,'pk':45})
    scores.processed = "unprocessed"
    test_json = { 'games' : games , 'scores' : scores , 'date': datetime.datetime.now(), 'default': None}
    print(json.dumps(test_json,cls=DebugJsonEncoder,default=json_debug_handler))

if __name__ == '__main__':
    test_debug_json()

Upvotes: 0

obgnaw

Reputation: 3037

Can we just preprocess the test_json,to make it suitable for your requirement? It's easier to manipulate a python dict than write a useless Encode.

import datetime
import json
class mDict(dict):
    pass

class mList(list):
    pass

def prepare(obj):
    if  isinstance(obj, datetime.datetime):
        return obj.isoformat()
    elif isinstance(obj, mDict):
        return {'orig':obj , 'attrs': vars(obj)}
    elif isinstance(obj, mList):
        return {'orig':obj, 'attrs': vars(obj)}
    else:
        return obj
def preprocessor(toJson):
    ret ={}
    for key, value in toJson.items():
        ret[key] = prepare(value)
    return ret
if __name__ == '__main__':
    def test_debug_json():
        games = mList(['mario','contra','tetris'])
        games.src = 'console'
        scores = mDict({'dp':10,'pk':45})
        scores.processed = "unprocessed"
        test_json = { 'games' : games, 'scores' : scores , 'date': datetime.datetime.now() }
        print(json.dumps(preprocessor(test_json)))
    test_debug_json()

Upvotes: 1

Tarun Lalwani

Reputation: 146630

If you are only looking for serialization and not deserialization then you can process the object before sending it to json.dumps. See below example

import datetime
import json


def is_inherited_from(obj, objtype):
    return isinstance(obj, objtype) and not type(obj).__mro__[0] == objtype


def process_object(data):
    if isinstance(data, list):
        if is_inherited_from(data, list):
            return process_object({"orig": list(data), "attrs": vars(data)})
        new_data = []
        for d in data:
            new_data.append(process_object(d))
    elif isinstance(data, tuple):
        if is_inherited_from(data, tuple):
            return process_object({"orig": tuple(data), "attrs": vars(data)})
        new_data = []
        for d in data:
            new_data.append(process_object(d))
        return tuple(new_data)
    elif isinstance(data, dict):
        if is_inherited_from(data, dict):
            return process_object({"orig": list(data), "attrs": vars(data)})
        new_data = {}
        for k, v in data.items():
            new_data[k] = process_object(v)
    else:
        return data
    return new_data


def json_debug_handler(obj):
    print("object received:")
    print("\n\n")
    if isinstance(obj, datetime.datetime):
        return obj.isoformat()


class mDict(dict):
    pass


class mList(list):
    pass


def test_debug_json():
    games = mList(['mario', 'contra', 'tetris'])
    games.src = 'console'
    scores = mDict({'dp': 10, 'pk': 45})
    scores.processed = "unprocessed"
    test_json = {'games': games, 'scores': scores, 'date': datetime.datetime.now()}
    new_object = process_object(test_json)
    print(json.dumps(new_object, default=json_debug_handler))


if __name__ == '__main__':
    test_debug_json()

The output of the same is

{"games": {"orig": ["mario", "contra", "tetris"], "attrs": {"src": "console"}}, "scores": {"orig": ["dp", "pk"], "attrs": {"processed": "unprocessed"}}, "date": "2018-01-24T12:59:36.581689"}

It is also possible to override the JSONEncoder, but since it uses nested methods, it would be complex and require techniques discussed in below

Can you patch *just* a nested function with closure, or must the whole outer function be repeated?

Since you want to keep things simple, I would not suggest going that route

Upvotes: 0

Chris Johnson

Reputation: 21996

The default function is only called when the node being dumped isn't natively serializable, and your mDict classes serialize as-is. Here's a little demo that shows when default is called and when not:

import json

def serializer(obj):
    print 'serializer called'
    return str(obj)

class mDict(dict):
    pass

class mSet(set):
    pass

d = mDict(dict(a=1))
print json.dumps(d, default=serializer)

s = mSet({1, 2, 3,})
print json.dumps(s, default=serializer)

And the output:

{"a": 1}
serializer called
"mSet([1, 2, 3])"

Note that sets are not natively serializable, but dicts are.

Since your m___ classes are serializable, your handler is never called.

Update #1 -----

You could change JSON encoder code. The details of how to do this depend on which JSON implementation you're using. For example in simplejson, the relevant code is this, in encode.py:

def _iterencode(o, _current_indent_level):
    ...
        for_json = _for_json and getattr(o, 'for_json', None)
        if for_json and callable(for_json):
            ...
        elif isinstance(o, list):
            ...
        else:
            _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
            if _asdict and callable(_asdict):
                for chunk in _iterencode_dict(_asdict(),
                        _current_indent_level):
                    yield chunk
            elif (_tuple_as_array and isinstance(o, tuple)):
                ...
            elif isinstance(o, dict):
                ...
            elif _use_decimal and isinstance(o, Decimal):
                ...
            else:
                ...
                o = _default(o)
                for chunk in _iterencode(o, _current_indent_level):
                    yield chunk
                ...

In other words, there is a hard-wired behavior that calls default only when the node being encoded isn't one of the recognized base types. You could override this in one of several ways:

1 -- subclass JSONEncoder as you've done above, but add a parameter to its initializer that specifies the function to be used in place of the standard _make_iterencode, in which you add a test that would call default for classes that meet your criteria. This is a clean approach since you aren't changing the JSON module, but you would be reiterating a lot of code from the original _make_iterencode. (Other variations on this approach include monkeypatching _make_iterencode or its sub-function _iterencode_dict).

2 -- alter the JSON module source, and use the __debug__ constant to change behavior:

def _iterencode(o, _current_indent_level):
    ...
        for_json = _for_json and getattr(o, 'for_json', None)
        if for_json and callable(for_json):
            ...
        elif isinstance(o, list):
            ...
        ## added code below
        elif __debug__:
            o = _default(o)
            for chunk in _iterencode(o, _current_indent_level):
                yield chunk
        ## added code above
        else:
            ...

Ideally the JSONEncoder class would provide a parameter to specify "use default for all types", but it doesn't. The above is a simple one-time change that does what you're looking for.

Upvotes: 5

Roy Nieterau

Reputation: 1256

The answer by FastTurtle might be a much cleaner solution.

Here's something close to what you want based on the technique as explained in my question/answer: Overriding nested JSON encoding of inherited default supported objects like dict, list

import json
import datetime


class mDict(dict):
    pass


class mList(list):
    pass


class JsonDebugEncoder(json.JSONEncoder):
    def _iterencode(self, o, markers=None):
        if isinstance(o, mDict):
            yield '{"__mDict__": '
            # Encode dictionary
            yield '{"orig": '
            for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
                yield chunk
            yield ', '
            # / End of Encode dictionary
            # Encode attributes
            yield '"attr": '
            for key, value in o.__dict__.iteritems():
                yield '{"' + key + '": '
                for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
                    yield chunk
                yield '}'
            yield '}'
            # / End of Encode attributes
            yield '}'
        elif isinstance(o, mList):
            yield '{"__mList__": '
            # Encode list
            yield '{"orig": '
            for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
                yield chunk
            yield ', '
            # / End of Encode list
            # Encode attributes
            yield '"attr": '
            for key, value in o.__dict__.iteritems():
                yield '{"' + key + '": '
                for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
                    yield chunk
                yield '}'
            yield '}'
            # / End of Encode attributes
            yield '}'
        else:
            for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers=markers):
                yield chunk

    def default(self, obj):
        if isinstance(obj, datetime.datetime):
            return obj.isoformat()


class JsonDebugDecoder(json.JSONDecoder):
    def decode(self, s):
        obj = super(JsonDebugDecoder, self).decode(s)
        obj = self.recursiveObjectDecode(obj)
        return obj

    def recursiveObjectDecode(self, obj):
        if isinstance(obj, dict):
            decoders = [("__mList__", self.mListDecode),
                        ("__mDict__", self.mDictDecode)]
            for placeholder, decoder in decoders:
                if placeholder in obj:                  # We assume it's supposed to be converted
                    return decoder(obj[placeholder])
                else:
                    for k in obj:
                        obj[k] = self.recursiveObjectDecode(obj[k])
        elif isinstance(obj, list):
            for x in range(len(obj)):
                obj[x] = self.recursiveObjectDecode(obj[x])
        return obj

    def mDictDecode(self, o):
        res = mDict()
        for key, value in o['orig'].iteritems():
            res[key] = self.recursiveObjectDecode(value)
        for key, value in o['attr'].iteritems():
            res.__dict__[key] = self.recursiveObjectDecode(value)
        return res

    def mListDecode(self, o):
        res = mList()
        for value in o['orig']:
            res.append(self.recursiveObjectDecode(value))
        for key, value in o['attr'].iteritems():
            res.__dict__[key] = self.recursiveObjectDecode(value)
        return res


def test_debug_json():
    games = mList(['mario','contra','tetris'])
    games.src = 'console'
    scores = mDict({'dp':10,'pk':45})
    scores.processed = "unprocessed"
    test_json = { 'games' : games, 'scores' : scores ,'date': datetime.datetime.now() }
    jsonDump = json.dumps(test_json, cls=JsonDebugEncoder)
    print jsonDump
    test_pyObject = json.loads(jsonDump, cls=JsonDebugDecoder)
    print test_pyObject

if __name__ == '__main__':
    test_debug_json()

This results in:

{"date": "2013-05-06T22:28:08.967000", "games": {"__mList__": {"orig": ["mario", "contra", "tetris"], "attr": {"src": "console"}}}, "scores": {"__mDict__": {"orig": {"pk": 45, "dp": 10}, "attr": {"processed": "unprocessed"}}}}

This way you can encode it and decode it back to the python object it came from.

EDIT:

Here's a version that actually encodes it to the output you wanted and can decode it as well. Whenever a dictionary contains 'orig' and 'attr' it will check if 'orig' contains a dictionary or a list, if so it will respectively convert the object back to the mDict or mList.

import json
import datetime


class mDict(dict):
    pass


class mList(list):
    pass


class JsonDebugEncoder(json.JSONEncoder):
    def _iterencode(self, o, markers=None):
        if isinstance(o, mDict):    # Encode mDict
            yield '{"orig": '
            for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
                yield chunk
            yield ', '
            yield '"attr": '
            for key, value in o.__dict__.iteritems():
                yield '{"' + key + '": '
                for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
                    yield chunk
                yield '}'
            yield '}'
            # / End of Encode attributes
        elif isinstance(o, mList):    # Encode mList
            yield '{"orig": '
            for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers):
                yield chunk
            yield ', '
            yield '"attr": '
            for key, value in o.__dict__.iteritems():
                yield '{"' + key + '": '
                for chunk in super(JsonDebugEncoder, self)._iterencode(value, markers):
                    yield chunk
                yield '}'
            yield '}'
        else:
            for chunk in super(JsonDebugEncoder, self)._iterencode(o, markers=markers):
                yield chunk

    def default(self, obj):
        if isinstance(obj, datetime.datetime):    # Encode datetime
            return obj.isoformat()


class JsonDebugDecoder(json.JSONDecoder):
    def decode(self, s):
        obj = super(JsonDebugDecoder, self).decode(s)
        obj = self.recursiveObjectDecode(obj)
        return obj

    def recursiveObjectDecode(self, obj):
        if isinstance(obj, dict):
            if "orig" in obj and "attr" in obj and isinstance(obj["orig"], list):
                return self.mListDecode(obj)
            elif "orig" in obj and "attr" in obj and isinstance(obj['orig'], dict):
                return self.mDictDecode(obj)
            else:
                for k in obj:
                    obj[k] = self.recursiveObjectDecode(obj[k])
        elif isinstance(obj, list):
            for x in range(len(obj)):
                obj[x] = self.recursiveObjectDecode(obj[x])
        return obj

    def mDictDecode(self, o):
        res = mDict()
        for key, value in o['orig'].iteritems():
            res[key] = self.recursiveObjectDecode(value)
        for key, value in o['attr'].iteritems():
            res.__dict__[key] = self.recursiveObjectDecode(value)
        return res

    def mListDecode(self, o):
        res = mList()
        for value in o['orig']:
            res.append(self.recursiveObjectDecode(value))
        for key, value in o['attr'].iteritems():
            res.__dict__[key] = self.recursiveObjectDecode(value)
        return res


def test_debug_json():
    games = mList(['mario','contra','tetris'])
    games.src = 'console'
    scores = mDict({'dp':10,'pk':45})
    scores.processed = "unprocessed"
    test_json = { 'games' : games, 'scores' : scores ,'date': datetime.datetime.now() }
    jsonDump = json.dumps(test_json, cls=JsonDebugEncoder)
    print jsonDump
    test_pyObject = json.loads(jsonDump, cls=JsonDebugDecoder)
    print test_pyObject
    print test_pyObject['games'].src

if __name__ == '__main__':
    test_debug_json()

Here's some more info about the output:

# Encoded
{"date": "2013-05-06T22:41:35.498000", "games": {"orig": ["mario", "contra", "tetris"], "attr": {"src": "console"}}, "scores": {"orig": {"pk": 45, "dp": 10}, "attr": {"processed": "unprocessed"}}}

# Decoded ('games' contains the mList with the src attribute and 'scores' contains the mDict processed attribute)
# Note that printing the python objects doesn't directly show the processed and src attributes, as seen below.
{u'date': u'2013-05-06T22:41:35.498000', u'games': [u'mario', u'contra', u'tetris'], u'scores': {u'pk': 45, u'dp': 10}}

Sorry for any bad naming conventions, it's a quick setup. ;)

Note: The datetime doesn't get decoded back to the python representation. Implementing that could be done by checking for any dict key that is called 'date' and contains a valid string representation of a datetime.

Upvotes: 11

James Holderness

Reputation: 23011

As others have pointed out already, the default handler only gets called for values that aren't one of the recognised types. My suggested solution to this problem is to preprocess the object you want to serialize, recursing over lists, tuples and dictionaries, but wrapping every other value in a custom class.

Something like this:

def debug(obj):
    class Debug:
        def __init__(self,obj):
            self.originalObject = obj
    if obj.__class__ == list:
        return [debug(item) for item in obj]
    elif obj.__class__ == tuple:
        return (debug(item) for item in obj)
    elif obj.__class__ == dict:
        return dict((key,debug(obj[key])) for key in obj)
    else:
        return Debug(obj)

You would call this function, before passing your object to json.dumps, like this:

test_json = debug(test_json)
print(json.dumps(test_json,default=json_debug_handler))

Note that this code is checking for objects whose class exactly matches a list, tuple or dictionary, so any custom objects that are extended from those types will be wrapped rather than parsed. As a result, the regular lists, tuples, and dictionaries will be serialized as usual, but all other values will be passed on to the default handler.

The end result of all this, is that every value that reaches the the default handler is guaranteed to be wrapped in one of these Debug classes. So the first thing you are going to want to do is extract the original object, like this:

obj = obj.originalObject

You can then check the original object's type and handle whichever types need special processing. For everything else, you should just return the original object (so the last return from the handler should be return obj not return None).

def json_debug_handler(obj):
    obj = obj.originalObject      # Add this line
    print("object received:")
    print type(obj)
    print("\n\n")
    if  isinstance(obj, datetime.datetime):
        return obj.isoformat()
    elif isinstance(obj,mDict):
        return {'orig':obj, 'attrs': vars(obj)}
    elif isinstance(obj,mList):
        return {'orig':obj, 'attrs': vars(obj)}
    else:
        return obj                # Change this line

Note that this code doesn't check for values that aren't serializable. These will fall through the final return obj, then will be rejected by the serializer and passed back to the default handler again - only this time without the Debug wrapper.

If you need to deal with that scenario, you could add a check at the top of the handler like this:

if not hasattr(obj, 'originalObject'):
    return None

Ideone demo: http://ideone.com/tOloNq

Upvotes: 7

chees

Reputation: 595

Try the below. It produces the output you want and looks relatively simple. The only real difference from your encoder class is that we should override both decode and encode methods (since the latter is still called for types the encoder knows how to handle).

import json
import datetime

class JSONDebugEncoder(json.JSONEncoder):
    # transform objects known to JSONEncoder here
    def encode(self, o, *args, **kw):
        for_json = o
        if isinstance(o, mDict):
            for_json = { 'orig' : o, 'attrs' : vars(o) }
        elif isinstance(o, mList):
            for_json = { 'orig' : o, 'attrs' : vars(o) }
        return super(JSONDebugEncoder, self).encode(for_json, *args, **kw)

    # handle objects not known to JSONEncoder here
    def default(self, o, *args, **kw):
        if isinstance(o, datetime.datetime):
            return o.isoformat()
        else:
            return super(JSONDebugEncoder, self).default(o, *args, **kw)


class mDict(dict):
    pass

class mList(list):
    pass

def test_debug_json():
    games = mList(['mario','contra','tetris'])
    games.src = 'console'
    scores = mDict({'dp':10,'pk':45})
    scores.processed = "unprocessed"
    test_json = { 'games' : games , 'scores' : scores , 'date': datetime.datetime.now() }
    print(json.dumps(test_json,cls=JSONDebugEncoder))

if __name__ == '__main__':
    test_debug_json()

Upvotes: 4

bks

Reputation: 1360

You should be able to override JSONEncoder.encode():

class MyEncoder(JSONEncoder):
  def encode(self, o):
    if isinstance(o, dict):
      # directly call JSONEncoder rather than infinite-looping through self.encode()
      return JSONEncoder.encode(self, {'orig': o, 'attrs': vars(o)})
    elif isinstance(o, list):
      return JSONEncoder.encode(self, {'orig': o, 'attrs': vars(o)})
    else:
      return JSONEncoder.encode(self, o)

and then if you want to patch it into json.dumps it looks from http://docs.buildbot.net/latest/reference/json-pysrc.html like you'll need to replace json._default_encoder with an instance of MyEncoder.

Upvotes: 0

dnozay

Reputation: 24324

If you define these to override __instancecheck__:

def strict_check(builtin):
    '''creates a new class from the builtin whose instance check
    method can be overridden to renounce particular types'''
    class BuiltIn(type):
        def __instancecheck__(self, other):
            print 'instance', self, type(other), other
            if type(other) in strict_check.blacklist:
                return False
            return builtin.__instancecheck__(other)
    # construct a class, whose instance check method is known.
    return BuiltIn('strict_%s' % builtin.__name__, (builtin,), dict())

# for safety, define it here.
strict_check.blacklist = ()

then patch json.encoder like this to override _make_iterencode.func_defaults:

# modify json encoder to use some new list/dict attr.
import json.encoder
# save old stuff, never know when you need it.
old_defaults = json.encoder._make_iterencode.func_defaults
old_encoder = json.encoder.c_make_encoder
encoder_defaults = list(json.encoder._make_iterencode.func_defaults)
for index, default in enumerate(encoder_defaults):
    if default in (list, dict):
        encoder_defaults[index] = strict_check(default)

# change the defaults for _make_iterencode.
json.encoder._make_iterencode.func_defaults = tuple(encoder_defaults)
# disable C extension.
json.encoder.c_make_encoder = None

... your example would almost work verbatim:

import datetime
import json

def json_debug_handler(obj):
    print("object received:")
    print type(obj)
    print("\n\n")
    if  isinstance(obj, datetime.datetime):
        return obj.isoformat()
    elif isinstance(obj,mDict):
        # degrade obj to more primitive dict()
        # to avoid cycles in the encoding.
        return {'orig': dict(obj) , 'attrs': vars(obj)}
    elif isinstance(obj,mList):
        # degrade obj to more primitive list()
        # to avoid cycles in the encoding.
        return {'orig': list(obj), 'attrs': vars(obj)}
    else:
        return None


class mDict(dict):
    pass


class mList(list):
    pass

# set the stuff we want to process differently.
strict_check.blacklist = (mDict, mList)

def test_debug_json():
    global test_json
    games = mList(['mario','contra','tetris'])
    games.src = 'console'
    scores = mDict({'dp':10,'pk':45})
    scores.processed = "unprocessed"
    test_json = { 'games' : games , 'scores' : scores , 'date': datetime.datetime.now() }
    print(json.dumps(test_json,default=json_debug_handler))

if __name__ == '__main__':
    test_debug_json()

The things I needed to change were to make sure there were no cycles:

    elif isinstance(obj,mDict):
        # degrade obj to more primitive dict()
        # to avoid cycles in the encoding.
        return {'orig': dict(obj) , 'attrs': vars(obj)}
    elif isinstance(obj,mList):
        # degrade obj to more primitive list()
        # to avoid cycles in the encoding.
        return {'orig': list(obj), 'attrs': vars(obj)}

and add this somewhere before test_debug_json:

# set the stuff we want to process differently.
strict_check.blacklist = (mDict, mList)

here is my console output:

>>> test_debug_json()
instance <class '__main__.strict_list'> <type 'dict'> {'date': datetime.datetime(2013, 7, 17, 12, 4, 40, 950637), 'games': ['mario', 'contra', 'tetris'], 'scores': {'pk': 45, 'dp': 10}}
instance <class '__main__.strict_dict'> <type 'dict'> {'date': datetime.datetime(2013, 7, 17, 12, 4, 40, 950637), 'games': ['mario', 'contra', 'tetris'], 'scores': {'pk': 45, 'dp': 10}}
instance <class '__main__.strict_list'> <type 'datetime.datetime'> 2013-07-17 12:04:40.950637
instance <class '__main__.strict_dict'> <type 'datetime.datetime'> 2013-07-17 12:04:40.950637
instance <class '__main__.strict_list'> <type 'datetime.datetime'> 2013-07-17 12:04:40.950637
instance <class '__main__.strict_dict'> <type 'datetime.datetime'> 2013-07-17 12:04:40.950637
object received:
<type 'datetime.datetime'>



instance <class '__main__.strict_list'> <class '__main__.mList'> ['mario', 'contra', 'tetris']
instance <class '__main__.strict_dict'> <class '__main__.mList'> ['mario', 'contra', 'tetris']
instance <class '__main__.strict_list'> <class '__main__.mList'> ['mario', 'contra', 'tetris']
instance <class '__main__.strict_dict'> <class '__main__.mList'> ['mario', 'contra', 'tetris']
object received:
<class '__main__.mList'>



instance <class '__main__.strict_list'> <type 'dict'> {'attrs': {'src': 'console'}, 'orig': ['mario', 'contra', 'tetris']}
instance <class '__main__.strict_dict'> <type 'dict'> {'attrs': {'src': 'console'}, 'orig': ['mario', 'contra', 'tetris']}
instance <class '__main__.strict_list'> <type 'dict'> {'src': 'console'}
instance <class '__main__.strict_dict'> <type 'dict'> {'src': 'console'}
instance <class '__main__.strict_list'> <type 'list'> ['mario', 'contra', 'tetris']
instance <class '__main__.strict_list'> <class '__main__.mDict'> {'pk': 45, 'dp': 10}
instance <class '__main__.strict_dict'> <class '__main__.mDict'> {'pk': 45, 'dp': 10}
instance <class '__main__.strict_list'> <class '__main__.mDict'> {'pk': 45, 'dp': 10}
instance <class '__main__.strict_dict'> <class '__main__.mDict'> {'pk': 45, 'dp': 10}
object received:
<class '__main__.mDict'>



instance <class '__main__.strict_list'> <type 'dict'> {'attrs': {'processed': 'unprocessed'}, 'orig': {'pk': 45, 'dp': 10}}
instance <class '__main__.strict_dict'> <type 'dict'> {'attrs': {'processed': 'unprocessed'}, 'orig': {'pk': 45, 'dp': 10}}
instance <class '__main__.strict_list'> <type 'dict'> {'processed': 'unprocessed'}
instance <class '__main__.strict_dict'> <type 'dict'> {'processed': 'unprocessed'}
instance <class '__main__.strict_list'> <type 'dict'> {'pk': 45, 'dp': 10}
instance <class '__main__.strict_dict'> <type 'dict'> {'pk': 45, 'dp': 10}
{"date": "2013-07-17T12:04:40.950637", "games": {"attrs": {"src": "console"}, "orig": ["mario", "contra", "tetris"]}, "scores": {"attrs": {"processed": "unprocessed"}, "orig": {"pk": 45, "dp": 10}}}

Upvotes: 2

Wessie

Reputation: 3520

If you are able to change the way json.dumps is called. You can do all the processing required before the JSON encoder gets his hands on it. This version does not use any kind of copying and will edit the structures in-place. You can add copy() if required.

import datetime
import json
import collections


def json_debug_handler(obj):
    print("object received:")
    print type(obj)
    print("\n\n")
    if isinstance(obj, collections.Mapping):
        for key, value in obj.iteritems():
            if isinstance(value, (collections.Mapping, collections.MutableSequence)):
                value = json_debug_handler(value)

            obj[key] = convert(value)
    elif isinstance(obj, collections.MutableSequence):
        for index, value in enumerate(obj):
            if isinstance(value, (collections.Mapping, collections.MutableSequence)):
                value = json_debug_handler(value)

            obj[index] = convert(value)
    return obj

def convert(obj):
    if  isinstance(obj, datetime.datetime):
        return obj.isoformat()
    elif isinstance(obj,mDict):
        return {'orig':obj , 'attrs': vars(obj)}
    elif isinstance(obj,mList):
        return {'orig':obj, 'attrs': vars(obj)}
    else:
        return obj


class mDict(dict):
    pass


class mList(list):
    pass


def test_debug_json():
    games = mList(['mario','contra','tetris'])
    games.src = 'console'
    scores = mDict({'dp':10,'pk':45})
    scores.processed = "qunprocessed"
    test_json = { 'games' : games , 'scores' : scores , 'date': datetime.datetime.now() }
    print(json.dumps(json_debug_handler(test_json)))

if __name__ == '__main__':
    test_debug_json()

You call json_debug_handler on the object you are serializing before passing it to the json.dumps. With this pattern you could also easily reverse the changes and/or add extra conversion rules.

edit:

If you can't change how json.dumps is called, you can always monkeypatch it to do what you want. Such as doing this:

json.dumps = lambda obj, *args, **kwargs: json.dumps(json_debug_handler(obj), *args, **kwargs)

Upvotes: 1

FastTurtle

Reputation: 2311

It seems that to achieve the behavior you want, with the given restrictions, you'll have to delve into the JSONEncoder class a little. Below I've written out a custom JSONEncoder that overrides the iterencode method to pass a custom isinstance method to _make_iterencode. It isn't the cleanest thing in the world, but seems to be the best given the options and it keeps customization to a minimum.

# customencoder.py
from json.encoder import (_make_iterencode, JSONEncoder,
                          encode_basestring_ascii, FLOAT_REPR, INFINITY,
                          c_make_encoder, encode_basestring)


class CustomObjectEncoder(JSONEncoder):

    def iterencode(self, o, _one_shot=False):
        """
        Most of the original method has been left untouched.

        _one_shot is forced to False to prevent c_make_encoder from
        being used. c_make_encoder is a funcion defined in C, so it's easier
        to avoid using it than overriding/redefining it.

        The keyword argument isinstance for _make_iterencode has been set
        to self.isinstance. This allows for a custom isinstance function
        to be defined, which can be used to defer the serialization of custom
        objects to the default method.
        """
        # Force the use of _make_iterencode instead of c_make_encoder
        _one_shot = False

        if self.check_circular:
            markers = {}
        else:
            markers = None
        if self.ensure_ascii:
            _encoder = encode_basestring_ascii
        else:
            _encoder = encode_basestring
        if self.encoding != 'utf-8':
            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
                if isinstance(o, str):
                    o = o.decode(_encoding)
                return _orig_encoder(o)

        def floatstr(o, allow_nan=self.allow_nan,
                     _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
            if o != o:
                text = 'NaN'
            elif o == _inf:
                text = 'Infinity'
            elif o == _neginf:
                text = '-Infinity'
            else:
                return _repr(o)

            if not allow_nan:
                raise ValueError(
                    "Out of range float values are not JSON compliant: " +
                    repr(o))

            return text

        # Instead of forcing _one_shot to False, you can also just
        # remove the first part of this conditional statement and only
        # call _make_iterencode
        if (_one_shot and c_make_encoder is not None
                and self.indent is None and not self.sort_keys):
            _iterencode = c_make_encoder(
                markers, self.default, _encoder, self.indent,
                self.key_separator, self.item_separator, self.sort_keys,
                self.skipkeys, self.allow_nan)
        else:
            _iterencode = _make_iterencode(
                markers, self.default, _encoder, self.indent, floatstr,
                self.key_separator, self.item_separator, self.sort_keys,
                self.skipkeys, _one_shot, isinstance=self.isinstance)
        return _iterencode(o, 0)

You can now subclass the CustomObjectEncoder so it correctly serializes your custom objects. The CustomObjectEncoder can also do cool stuff like handle nested objects.

# test.py
import json
import datetime
from customencoder import CustomObjectEncoder


class MyEncoder(CustomObjectEncoder):

    def isinstance(self, obj, cls):
        if isinstance(obj, (mList, mDict)):
            return False
        return isinstance(obj, cls)

    def default(self, obj):
        """
        Defines custom serialization.

        To avoid circular references, any object that will always fail
        self.isinstance must be converted to something that is
        deserializable here.
        """
        if isinstance(obj, datetime.datetime):
            return obj.isoformat()
        elif isinstance(obj, mDict):
            return {"orig": dict(obj), "attrs": vars(obj)}
        elif isinstance(obj, mList):
            return {"orig": list(obj), "attrs": vars(obj)}
        else:
            return None


class mList(list):
    pass


class mDict(dict):
    pass


def main():
    zelda = mList(['zelda'])
    zelda.src = "oldschool"
    games = mList(['mario', 'contra', 'tetris', zelda])
    games.src = 'console'
    scores = mDict({'dp': 10, 'pk': 45})
    scores.processed = "unprocessed"
    test_json = {'games': games, 'scores': scores,
                 'date': datetime.datetime.now()}
    print(json.dumps(test_json, cls=MyEncoder))

if __name__ == '__main__':
    main()

Upvotes: 27

marcoseu

Reputation: 3952

Why can't you just create a new object type to pass to the encoder? Try:

class MStuff(object):
    def __init__(self, content):
        self.content = content

class mDict(MStuff):
    pass

class mList(MStuff):
    pass

def json_debug_handler(obj):
    print("object received:")
    print(type(obj))
    print("\n\n")
    if  isinstance(obj, datetime.datetime):
        return obj.isoformat()
    elif isinstance(obj,MStuff):
        attrs = {}
        for key in obj.__dict__:
            if not ( key.startswith("_") or key == "content"):
                attrs[key] = obj.__dict__[key]

        return {'orig':obj.content , 'attrs': attrs}
    else:
        return None

You could add validation on the mDict and mList if desired.

Upvotes: 2

How to change JSON encoding behaviour for serializable Python object?

Answers (13)

Related Questions