Reputation: 15384
Python has no built-in arbitrary-precision floats. Here is an example:
>>> float(4.4257052820783003)
So it doesn't matter what you use, you can't have a float object with arbitrary precision.
Let's say I have a JSON string (json_string = '{"abc": 4.4257052820783003}'
) containing an arbitrary-precision float. If I load that string, Python will cut the number:
>>> dct = json.loads(json_string)
>>> dct
{'abc': 4.4257052820783}
I managed to avoid this loss of info by using decimal.Decimal
>>> dct = json.loads(json_string, parse_float=Decimal)
>>> dct
{'abc': Decimal('4.4257052820783003')}
Now, I would like to serialize this dct
object to the original JSON formatted string. json.dumps(dct)
clearly does not work (because objects of type Decimal are not JSON serializable). I tried to subclass json.JSONEncoder
and redefine its default
class MyJSONEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, Decimal):
return str(o)
return super().default(o)
But this is clearly creating a string instead of a number:
>>> MyJSONEncoder().encode(dct)
'{"abc": "4.4257052820783003"}'
How can I serialize a Decimal
object to a JSON number (real) instead of a JSON string? In other words, I want the encode operation to return the original json_string
string. Ideally without using external packages (but solutions using external packages are still welcome).
This question is of course very related but I can't find an answer there: Python JSON serialize a Decimal object.
Upvotes: 10
Views: 1485
Reputation: 628
The following only uses the default library. It works by effectively "overriding" json.encoder._make_iterencode (see discussion below, after this example)...
from decimal import Decimal
import json
def _our_make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
## HACK: hand-optimized bytecode; turn globals into locals
if _indent is not None and not isinstance(_indent, str):
_indent = ' ' * _indent
def _iterencode_list(lst, _current_indent_level):
if not lst:
yield '[]'
if markers is not None:
markerid = id(lst)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = lst
buf = '['
if _indent is not None:
_current_indent_level += 1
newline_indent = '\n' + _indent * _current_indent_level
separator = _item_separator + newline_indent
buf += newline_indent
newline_indent = None
separator = _item_separator
first = True
for value in lst:
if first:
first = False
buf = separator
if isinstance(value, str):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
elif value is True:
yield buf + 'true'
elif value is False:
yield buf + 'false'
elif isinstance(value, int):
# Subclasses of int/float may override __repr__, but we still
# want to encode them as integers/floats in JSON. One example
# within the standard library is IntEnum.
yield buf + _intstr(value)
elif isinstance(value, float):
# see comment above for int
yield buf + _floatstr(value)
yield buf
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
chunks = _iterencode(value, _current_indent_level)
yield from chunks
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + _indent * _current_indent_level
yield ']'
if markers is not None:
del markers[markerid]
def _iterencode_dict(dct, _current_indent_level):
if not dct:
yield '{}'
if markers is not None:
markerid = id(dct)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = dct
yield '{'
if _indent is not None:
_current_indent_level += 1
newline_indent = '\n' + _indent * _current_indent_level
item_separator = _item_separator + newline_indent
yield newline_indent
newline_indent = None
item_separator = _item_separator
first = True
if _sort_keys:
items = sorted(dct.items())
items = dct.items()
for key, value in items:
if isinstance(key, str):
# JavaScript is weakly typed for these, so it makes sense to
# also allow them. Many encoders seem to do something like this.
elif isinstance(key, float):
# see comment for int/float in _make_iterencode
key = _floatstr(key)
elif key is True:
key = 'true'
elif key is False:
key = 'false'
elif key is None:
key = 'null'
elif isinstance(key, int):
# see comment for int/float in _make_iterencode
key = _intstr(key)
elif _skipkeys:
raise TypeError(f'keys must be str, int, float, bool or None, '
f'not {key.__class__.__name__}')
if first:
first = False
yield item_separator
yield _encoder(key)
yield _key_separator
if isinstance(value, str):
yield _encoder(value)
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is False:
yield 'false'
elif isinstance(value, int):
# see comment for int/float in _make_iterencode
yield _intstr(value)
elif isinstance(value, float):
# see comment for int/float in _make_iterencode
yield _floatstr(value)
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
chunks = _iterencode(value, _current_indent_level)
yield from chunks
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + _indent * _current_indent_level
yield '}'
if markers is not None:
del markers[markerid]
def _iterencode(o, _current_indent_level):
if isinstance(o, str):
yield _encoder(o)
elif isinstance(o, Decimal):
yield str(o) # unquoted string.
elif o is None:
yield 'null'
elif o is True:
yield 'true'
elif o is False:
yield 'false'
elif isinstance(o, int):
# see comment for int/float in _make_iterencode
yield _intstr(o)
elif isinstance(o, float):
# see comment for int/float in _make_iterencode
yield _floatstr(o)
elif isinstance(o, (list, tuple)):
yield from _iterencode_list(o, _current_indent_level)
elif isinstance(o, dict):
yield from _iterencode_dict(o, _current_indent_level)
if markers is not None:
markerid = id(o)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = o
o = _default(o)
yield from _iterencode(o, _current_indent_level)
if markers is not None:
del markers[markerid]
return _iterencode
class BigDecimalJSONEncoder(json.JSONEncoder):
def iterencode(self, o, _one_shot=False):
"""Encode the given object and yield each string
representation as available.
For example::
for chunk in JSONEncoder().iterencode(bigobject):
if self.check_circular:
markers = {}
markers = None
if self.ensure_ascii:
_encoder = json.encoder.encode_basestring_ascii
_encoder = json.encoder.encode_basestring
def floatstr(o, allow_nan=self.allow_nan,
_repr=float.__repr__, _inf=json.encoder.INFINITY, _neginf=-json.encoder.INFINITY):
# Check for specials. Note that this type of test is processor
# and/or platform-specific, so do tests which don't depend on the
# internals.
if o != o:
text = 'NaN'
elif o == _inf:
text = 'Infinity'
elif o == _neginf:
text = '-Infinity'
return _repr(o)
if not allow_nan:
raise ValueError(
"Out of range float values are not JSON compliant: " +
return text
_one_shot = False
if (_one_shot and json.encoder.c_make_encoder is not None
and self.indent is None):
_iterencode = json.encoder.c_make_encoder(
markers, self.default, _encoder, self.indent,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan)
_iterencode = _our_make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot)
return _iterencode(o, 0)
json_string = '{"abc": 4.4257052820783003}'
dct = json.loads(json_string, parse_float=Decimal)
print(f"encoded={json.dumps(dct, cls=BigDecimalJSONEncoder, indent=4)}")
Example output:
decoded={'abc': Decimal('4.4257052820783003')}
"abc": 4.4257052820783003
The main problem is that json.encoder does not provide an acceptable way to override json.JSONEncoder to a return string (i.e., from json.JSONEncoder.default) that is to be accepted as raw ready-to-go JSON string.
For example, consider the following pseudo ideal override...
class IdealDecimalEncoder(json.JSONEncoder):
def default(self, o) -> Union[Any, tuple[str, bool]]:
if isinstance(o, Decimal):
return str(o), False # return object (str) and False which means "do not quote".
return super().default(o)
The above allows default
to return the object (as it does today) or a tuple, where the second value is False if no further encoding should be performed (i.e., a string that should not be quoted). As we know, this is not supported.
The next question would then be, what lies between the call to default
and iterencode
... unfortunately, it's the json.encoder._make_iterencode
function which essentially produces a generator that relies on several "private" functions. If this were a class, or if the functions were broken out and accessible, you could perform a more terse override.
In my working example above, I essentially copy/pasted _make_iterencode
simply to add the following single case to the private _iterencode
elif isinstance(o, Decimal):
yield str(o) # unquoted string.
This obviously works because it returns an unquoted string. The 'str' case always uses _encoder which assumes a string requiring quotes for JSON, where the override bypasses that for Decimal.
Not a great solution but the only reasonable one I can see which uses only the built-in library which does not require parsing/decoding/modifying encoded JSON during the encoding process.
It has not been tested beyond the @Riccardo Bucco (OP)'s example.
Assuming no unforeseen back-compat issue, it seems it would be a relatively easy to modify Python to include this for Decimal.
Without something built in, I'm wondering if it's best, for now, to use one of the other JSON libraries supporting Decimal as others have discussed.
Upvotes: 4
Reputation: 44313
This code does not use anything that is not part of the standard library, but does require defining a custom-tailored dumps
The idea is to serialize a Decimal
value such as Decimal('1.0000000000000000001')
to 10000000000000000001E-19. This is a two step process:
to its string representation, i.e. "Decimal('1.0000000000000000001')".import json
from decimal import Decimal
import re
class MyJSONEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, Decimal):
return repr(o)
return super().default(o)
split_rex = re.compile(r'''("Decimal\('[^']+'\))"''')
match_rex = re.compile(r"'([^']+)'")
def dumps(o):
json_string = json.dumps(o, cls=MyJSONEncoder)
arr = re.split(split_rex, json_string)
# Process each Decimal
for idx in range(1,len(arr),2):
string_rep =, arr[idx])[1]
# Look for decimal point
index = string_rep.find('.')
if index == -1: # No decimal point
arr[idx] = string_rep + 'E-0'
l = len(string_rep)
# number of places after decimal point:
precision = l - index - 1
# Remove decimal point
string_rep = string_rep[:index] + string_rep[index+1:] + f'E-{precision}'
arr[idx] = string_rep
return ''.join(arr)
dicts = [
{'a': "some value", 'b': Decimal('1234'), "c": 1234},
{'a': "some value", 'b': Decimal('1.0000000000000000001'), "c": 1234}
for d in dicts:
json_string = dumps(d)
print(f'dictionary = {d}\nserialized = {repr(json_string)}\nun-serialized = {json.loads(json_string, parse_float=Decimal)}\n')
dictionary = {'a': 'some value', 'b': Decimal('1234'), 'c': 1234}
serialized = '{"a": "some value", "b": 1234E-0, "c": 1234}'
un-serialized = {'a': 'some value', 'b': Decimal('1234'), 'c': 1234}
dictionary = {'a': 'some value', 'b': Decimal('1.0000000000000000001'), 'c': 1234}
serialized = '{"a": "some value", "b": 10000000000000000001E-19, "c": 1234}'
un-serialized = {'a': 'some value', 'b': Decimal('1.0000000000000000001'), 'c': 1234}
A Second Simpler Solution
import json
from decimal import Decimal
class MyJSONEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, Decimal):
# To eliminate (almost) all possibility of mistaken identity:
return 'MyJSONEncoder Decimal: ' + str(o)
return super().default(o)
def dumps(o):
arr = list(MyJSONEncoder().iterencode(o))
for idx, json_string in enumerate(arr):
if json_string.startswith('"MyJSONEncoder Decimal: '):
string_rep = json_string[24:-1]
# Look for decimal point
index = string_rep.find('.')
if index == -1: # No decimal point
arr[idx] = string_rep + 'E-0'
l = len(string_rep)
# number of places after decimal point:
precision = l - index - 1
# Remove decimal point
string_rep = string_rep[:index] + string_rep[index+1:] + f'E-{precision}'
arr[idx] = string_rep
return ''.join(arr)
dicts = [
{'a': "some value", 'b': Decimal('1234'), "c": 1234},
{'a': "some value", 'b': Decimal('1.0000000000000000001'), "c": 1234}
for d in dicts:
json_string = dumps(d)
print(f'dictionary = {d}\nserialized = {repr(json_string)}\nun-serialized = {json.loads(json_string, parse_float=Decimal)}\n')
dictionary = {'a': 'some value', 'b': Decimal('1234'), 'c': 1234}
serialized = '{"a": "some value", "b": 1234E-0, "c": 1234}'
un-serialized = {'a': 'some value', 'b': Decimal('1234'), 'c': 1234}
dictionary = {'a': 'some value', 'b': Decimal('1.0000000000000000001'), 'c': 1234}
serialized = '{"a": "some value", "b": 10000000000000000001E-19, "c": 1234}'
un-serialized = {'a': 'some value', 'b': Decimal('1.0000000000000000001'), 'c': 1234}
Upvotes: 2
Reputation: 2271
Use simplejson.dumps:
If use_decimal is
) thendecimal.Decimal
will be natively serialized to JSON with full precision.
import json
import simplejson
from decimal import Decimal
dct = json.loads('{"abc": 4.4257052820783003}', parse_float=Decimal)
print(simplejson.dumps(dct, use_decimal=True))
print(simplejson.dumps(dct)) # Also works, if Decimal in the dct.
{'abc': Decimal('4.4257052820783003')}
{"abc": 4.4257052820783003}
{"abc": 4.4257052820783003}
Upvotes: 9