Writing to a json file with Polish characters

Question

I am using a JSON file to send data from an LDAP database on linux ADDC SAMBA for further processing. I fetch the data with a script written in python3. My problem is that some fields contain Polish characters that are encoded in unicode, for example "Bo\u017Cena \u017Ar\u00F3dlana" should be "Bożena Źródlana" . I would like the file to contain already decoded data so that I can read them without guessing what character is behind the unicode code. I need to ask for help where in my code I should put something similar to a decoder so that the entire file is already saved as decoded and containing Polish special characters

my python3 code:

#! /usr/bin/python3
import os
import configparser
import getpass
import sys
import json
import ssl
import shutil
from ldap3 import Server, Connection, Tls, ALL_ATTRIBUTES
from datetime import date
# screen cleaner
os.system('clear')
# timestamp
current_datetime = str(date.today())
# load main config files
main_conf_file = "/tmp/ldap-searchlight/config/searchlight.conf"
config = configparser.RawConfigParser()
config.read(main_conf_file)
# variables
main_path = config['GLOBAL']['main_path']
conf_path = config['GLOBAL']['conf_path']
data_path = config['GLOBAL']['data_path']
arch_patch = config['GLOBAL']['arch_patch']
json_users_file = config['USERS']['json_users_file']
json_cmptrs_file = config['CMPTRS']['json_cmptrs_file']
# ldap variables
ldap_base_dn = config['GLOBAL']["ldap-base-dn"]
ldap_users = config['USERS']['ldap-users']
ldap_cmptrs = config['CMPTRS']['ldap_cmptrs']
user1_name = config['USERS']['user1-name']
user2_name = config['USERS']['user2-name']
user3_name = config['USERS']['user3-name']
user4_name = config['USERS']['user4-name']
user5_name = config['USERS']['user5-name']
# user's choice
print(
    "Logujesz się jako:
" +
    " wybierz [ 1 ] dla " + user1_name + "
" +
    " wybierz [ 2 ] dla " + user2_name + "
" +
    " wybierz [ 3 ] dla " + user3_name + "
" +
    " wybierz [ 4 ] dla " + user4_name + "
" +
    " wybierz [ 5 ] dla " + user5_name + "
"
    )
input_name = input("WYBRANO: ")
if input_name == "1" :
    user = config["USERS"]["ldap-user1"]
elif input_name == "2" :
    user = config["USERS"]["ldap-user2"]
elif input_name == "3" :
    user = config["USERS"]["ldap-user3"]
elif input_name == "4" :
    user = config["USERS"]["ldap-user4"]
elif input_name == "5" :
    user = config["USERS"]["ldap-user5"]
else:
    print("Permission danied
")
    sys.exit(1)
password = getpass.getpass()
LDAP_HOST = config['GLOBAL']['ldap-host']
LDAP_USER = user +","+ ldap_users +","+ ldap_base_dn
LDAP_PASSWORD = password
tls_configuration = Tls(validate=ssl.CERT_NONE, version=ssl.PROTOCOL_TLSv1)
def ldap_server():
    return Server(LDAP_HOST, use_ssl=True, tls=tls_configuration, get_info=ALL_ATTRIBUTES)
def ldap_connection():
    server = ldap_server(), 
    return Connection(server, user=LDAP_USER,
                      password=LDAP_PASSWORD,
                      auto_bind=True)
# ldap users
LDAP_BASE_DN = ldap_users +","+ ldap_base_dn
LDAP_OBJECT_FILTER = '(objectclass=user)'
user_attr_list=[ \
    'cn', \
    'sn', \
    'givenName', \
    'instanceType', \
    'whenCreated', \
    'displayName', \
    'uSNCreated', \
    'name', \
    'objectGUID', \
    'badPwdCount', \
    'codePage', \
    'countryCode', \
    'badPasswordTime', \
    'lastLogoff', \
    'lastLogon',\
    'primaryGroupID', \
    'objectSid', \
    'accountExpires', \
    'logonCount', \
    'sAMAccountName', \
    'sAMAccountType', \
    'userPrincipalName', \
    'objectCategory', \
    'pwdLastSet', \
    'userAccountControl', \
    'lastLogonTimestamp', \
    'whenChanged', \
    'uSNChanged', \
    'memberOf', \
    'distinguishedName' ]
conn = ldap_connection()
conn.search(LDAP_BASE_DN, LDAP_OBJECT_FILTER, attributes=user_attr_list)
# output to json
json_users_data = main_path + data_path + json_users_file
data = json.loads(conn.response_to_json())
with open(json_users_data, 'w') as jsonfile:
    json.dump(data, jsonfile)
# copy data to archive
json_users_arch = main_path + arch_patch + current_datetime + "_" + json_users_file
shutil.copy2(json_users_data, json_users_arch)
# ldap computers
LDAP_BASE_DN = ldap_cmptrs +","+ ldap_base_dn
LDAP_OBJECT_FILTER = '(objectclass=computer)'
cmptr_attr_list=[ \
    'cn', \
    'instanceType', \
    'whenCreated', \
    'uSNCreated', \
    'name', \
    'objectGUID', \
    'badPwdCount', \
    'codePage', \
    'countryCode', \
    'badPasswordTime', \
    'lastLogoff', \
    'lastLogon',\
    'primaryGroupID', \
    'accountExpires', \
    'logonCount', \
    'sAMAccountName', \
    'sAMAccountType', \
    'objectCategory', \
    'pwdLastSet', \
    'userAccountControl', \
    'lastLogonTimestamp', \
    'whenChanged', \
    'uSNChanged', \
    'dNSHostName', \
    'isCriticalSystemObject', \
    'msDS-SupportedEncryptionTypes', \
    'operatingSystem', \
    'operatingSystemVersion', \
    'servicePrincipalName', \
    'distinguishedName' ]
conn = ldap_connection()
conn.search(LDAP_BASE_DN, LDAP_OBJECT_FILTER, attributes=cmptr_attr_list)
# output to json
json_cmptrs_data = main_path + data_path + json_cmptrs_file
data = json.loads(conn.response_to_json())
with open(json_cmptrs_data, 'w') as jsonfile:
    json.dump(data, jsonfile)
# copy data
json_cmptrs_arch = main_path + arch_patch + current_datetime + "_" + json_cmptrs_file
shutil.copy2(json_cmptrs_data, json_cmptrs_arch)
print("USERS:")
print("Data file created at: " + json_users_data)
print("Archive file created at: " + json_users_arch)
print("------------------------------------------------------------------------------")
print("COMPUTERS")
print("Data file created at: " + json_cmptrs_data)
print("Archive file created at: " + json_cmptrs_arch)
sys.exit(0)
# exit(0) -> OK
# exit(1) -> FAULT

my jsons output looks:

{"entries": [
        {"attributes": 
            {
                "accountExpires": ["9223372036854775807"],
                "badPasswordTime": [],
                "badPwdCount": [],
                "cn": ["Bo\u017Cena \u017Ar\u00F3dlana"],
                "codePage": ["0"],
                "countryCode": ["0"],
                "displayName": ["Bo\u017Cena \u017Ar\u00F3dlana"],
                "distinguishedName": ["CN=Bo\u017Cena \u017Ar\u00F3dlana,OU=FE,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"],
                "givenName": ["Bo\u017Cena"],
                "instanceType": ["4"],
                "lastLogoff": [],
                "lastLogon": [],
                "lastLogonTimestamp": ["132978476924537530"],
                "logonCount": [],
                "memberOf": [],
                "name": ["Bo\u017Cena \u017Ar\u00F3dlana"],
                "objectCategory": ["CN=Person,CN=Schema,CN=Configuration,DC=universum,DC=local"],
                "objectGUID": [
                                {
                                    "encoded": "AFvzBO0T+Ey9TL3RHGtghQ==",
                                    "encoding": "base64"
                                }
                            ],
                "objectSid": [
                                {
                                    "encoded": "AQUAAAAAAAUVAAAA6TO9FZD9W8QoWlFDIE8AAA==",
                                    "encoding": "base64"
                                }
                            ],
                "primaryGroupID": ["513"],
                "pwdLastSet": ["132979783101549910"],
                "sAMAccountName": ["pjarmolowicz"],
                "sAMAccountType": ["805306368"],
                "sn": ["\u017Ar\u00F3dlana"],
                "uSNChanged": ["4986"],
                "uSNCreated": ["4986"],
                "userAccountControl": ["512"],
                "userPrincipalName": ["bzrodlana@universum.local"],
                "whenChanged": ["20220525185150.0Z"],
                "whenCreated": ["20211125124337.0Z"]},
                "dn": "CN=Bo\u017Cena \u017Ar\u00F3dlana,OU=FE,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"
        }, 
        {"attributes": {
            "accountExpires": ["9223372036854775807"],
            "badPasswordTime": ["133128872888506790"],
            "badPwdCount": ["0"],
            "cn": ["Jan Kowalski"],
            "codePage": ["0"],
            "countryCode": ["0"],
            "displayName": ["Jan Kowalski"],
            "distinguishedName": ["CN=Jan Kowalski,OU=RR-32,OU=RR,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"],
            "givenName": ["Jan"],
            "instanceType": ["4"],
            "lastLogoff": [],
            "lastLogon": ["133129921828641420"],
            "lastLogonTimestamp": ["133125345565644950"],
            "logonCount": ["55"],
            "memberOf": [],
            "name": ["Jan Kowalski"],
            "objectCategory": ["CN=Person,CN=Schema,CN=Configuration,DC=universum,DC=local"],
            "objectGUID": [
                            {
                                "encoded": "AScnTASpKUun4oadMC5Qxg==",
                                "encoding": "base64"
                            }
                        ],
            "objectSid": [
                            {
                                "encoded": "AQUAAAAAAAUVAAAA6TO9FZD9W8QoWlFDngQAAA==",
                                "encoding": "base64"
                            }
                        ],
            "primaryGroupID": ["513"],
            "pwdLastSet": ["131577266641617910"],
            "sAMAccountName": ["jkowalski"],
            "sAMAccountType": ["805306368"],
            "sn": ["Kowalski"],
            "uSNChanged": ["149609"],
            "uSNCreated": ["5397"],
            "userAccountControl": ["512"],
            "userPrincipalName": ["jkowalski@universum.local"],
            "whenChanged": ["20221110061556.0Z"],
            "whenCreated": ["20130610115016.0Z"],
            "dn": "CN=Jan Kowalski,OU=RR-32,OU=RR,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"
        }
    ]
}

Mark Tolonen · Accepted Answer

Use the following to suppress Unicode escape codes and write the data UTF-8-encoded to support non-ASCII characters.

with open(json_cmptrs_data, 'w', encoding='utf8') as jsonfile:
    json.dump(data, jsonfile, ensure_ascii=False)

Working example:

import json

data = {"cn": ["Bo\u017Cena \u017Ar\u00F3dlana"]}
with open('output.json', 'w', encoding='utf8') as file:
    json.dump(data, file, ensure_ascii=False)

output.csv (UTF-8-encoded):

{"cn": ["Bożena źródlana"]}

Writing to a json file with Polish characters

Answers (1)

Related Questions