Reputation: 83
I am using a JSON file to send data from an LDAP database on linux ADDC SAMBA for further processing. I fetch the data with a script written in python3. My problem is that some fields contain Polish characters that are encoded in unicode, for example "Bo\u017Cena \u017Ar\u00F3dlana" should be "Bożena Źródlana" . I would like the file to contain already decoded data so that I can read them without guessing what character is behind the unicode code. I need to ask for help where in my code I should put something similar to a decoder so that the entire file is already saved as decoded and containing Polish special characters
my python3 code:
#! /usr/bin/python3
import os
import configparser
import getpass
import sys
import json
import ssl
import shutil
from ldap3 import Server, Connection, Tls, ALL_ATTRIBUTES
from datetime import date
# screen cleaner
os.system('clear')
# timestamp
current_datetime = str(date.today())
# load main config files
main_conf_file = "/tmp/ldap-searchlight/config/searchlight.conf"
config = configparser.RawConfigParser()
config.read(main_conf_file)
# variables
main_path = config['GLOBAL']['main_path']
conf_path = config['GLOBAL']['conf_path']
data_path = config['GLOBAL']['data_path']
arch_patch = config['GLOBAL']['arch_patch']
json_users_file = config['USERS']['json_users_file']
json_cmptrs_file = config['CMPTRS']['json_cmptrs_file']
# ldap variables
ldap_base_dn = config['GLOBAL']["ldap-base-dn"]
ldap_users = config['USERS']['ldap-users']
ldap_cmptrs = config['CMPTRS']['ldap_cmptrs']
user1_name = config['USERS']['user1-name']
user2_name = config['USERS']['user2-name']
user3_name = config['USERS']['user3-name']
user4_name = config['USERS']['user4-name']
user5_name = config['USERS']['user5-name']
# user's choice
print(
"Logujesz się jako:\n" +
" wybierz [ 1 ] dla " + user1_name + "\n" +
" wybierz [ 2 ] dla " + user2_name + "\n" +
" wybierz [ 3 ] dla " + user3_name + "\n" +
" wybierz [ 4 ] dla " + user4_name + "\n" +
" wybierz [ 5 ] dla " + user5_name + "\n"
)
input_name = input("WYBRANO: ")
if input_name == "1" :
user = config["USERS"]["ldap-user1"]
elif input_name == "2" :
user = config["USERS"]["ldap-user2"]
elif input_name == "3" :
user = config["USERS"]["ldap-user3"]
elif input_name == "4" :
user = config["USERS"]["ldap-user4"]
elif input_name == "5" :
user = config["USERS"]["ldap-user5"]
else:
print("Permission danied\n")
sys.exit(1)
password = getpass.getpass()
LDAP_HOST = config['GLOBAL']['ldap-host']
LDAP_USER = user +","+ ldap_users +","+ ldap_base_dn
LDAP_PASSWORD = password
tls_configuration = Tls(validate=ssl.CERT_NONE, version=ssl.PROTOCOL_TLSv1)
def ldap_server():
return Server(LDAP_HOST, use_ssl=True, tls=tls_configuration, get_info=ALL_ATTRIBUTES)
def ldap_connection():
server = ldap_server(),
return Connection(server, user=LDAP_USER,
password=LDAP_PASSWORD,
auto_bind=True)
# ldap users
LDAP_BASE_DN = ldap_users +","+ ldap_base_dn
LDAP_OBJECT_FILTER = '(objectclass=user)'
user_attr_list=[ \
'cn', \
'sn', \
'givenName', \
'instanceType', \
'whenCreated', \
'displayName', \
'uSNCreated', \
'name', \
'objectGUID', \
'badPwdCount', \
'codePage', \
'countryCode', \
'badPasswordTime', \
'lastLogoff', \
'lastLogon',\
'primaryGroupID', \
'objectSid', \
'accountExpires', \
'logonCount', \
'sAMAccountName', \
'sAMAccountType', \
'userPrincipalName', \
'objectCategory', \
'pwdLastSet', \
'userAccountControl', \
'lastLogonTimestamp', \
'whenChanged', \
'uSNChanged', \
'memberOf', \
'distinguishedName' ]
conn = ldap_connection()
conn.search(LDAP_BASE_DN, LDAP_OBJECT_FILTER, attributes=user_attr_list)
# output to json
json_users_data = main_path + data_path + json_users_file
data = json.loads(conn.response_to_json())
with open(json_users_data, 'w') as jsonfile:
json.dump(data, jsonfile)
# copy data to archive
json_users_arch = main_path + arch_patch + current_datetime + "_" + json_users_file
shutil.copy2(json_users_data, json_users_arch)
# ldap computers
LDAP_BASE_DN = ldap_cmptrs +","+ ldap_base_dn
LDAP_OBJECT_FILTER = '(objectclass=computer)'
cmptr_attr_list=[ \
'cn', \
'instanceType', \
'whenCreated', \
'uSNCreated', \
'name', \
'objectGUID', \
'badPwdCount', \
'codePage', \
'countryCode', \
'badPasswordTime', \
'lastLogoff', \
'lastLogon',\
'primaryGroupID', \
'accountExpires', \
'logonCount', \
'sAMAccountName', \
'sAMAccountType', \
'objectCategory', \
'pwdLastSet', \
'userAccountControl', \
'lastLogonTimestamp', \
'whenChanged', \
'uSNChanged', \
'dNSHostName', \
'isCriticalSystemObject', \
'msDS-SupportedEncryptionTypes', \
'operatingSystem', \
'operatingSystemVersion', \
'servicePrincipalName', \
'distinguishedName' ]
conn = ldap_connection()
conn.search(LDAP_BASE_DN, LDAP_OBJECT_FILTER, attributes=cmptr_attr_list)
# output to json
json_cmptrs_data = main_path + data_path + json_cmptrs_file
data = json.loads(conn.response_to_json())
with open(json_cmptrs_data, 'w') as jsonfile:
json.dump(data, jsonfile)
# copy data
json_cmptrs_arch = main_path + arch_patch + current_datetime + "_" + json_cmptrs_file
shutil.copy2(json_cmptrs_data, json_cmptrs_arch)
print("USERS:")
print("Data file created at: " + json_users_data)
print("Archive file created at: " + json_users_arch)
print("------------------------------------------------------------------------------")
print("COMPUTERS")
print("Data file created at: " + json_cmptrs_data)
print("Archive file created at: " + json_cmptrs_arch)
sys.exit(0)
# exit(0) -> OK
# exit(1) -> FAULT
my jsons output looks:
{"entries": [
{"attributes":
{
"accountExpires": ["9223372036854775807"],
"badPasswordTime": [],
"badPwdCount": [],
"cn": ["Bo\u017Cena \u017Ar\u00F3dlana"],
"codePage": ["0"],
"countryCode": ["0"],
"displayName": ["Bo\u017Cena \u017Ar\u00F3dlana"],
"distinguishedName": ["CN=Bo\u017Cena \u017Ar\u00F3dlana,OU=FE,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"],
"givenName": ["Bo\u017Cena"],
"instanceType": ["4"],
"lastLogoff": [],
"lastLogon": [],
"lastLogonTimestamp": ["132978476924537530"],
"logonCount": [],
"memberOf": [],
"name": ["Bo\u017Cena \u017Ar\u00F3dlana"],
"objectCategory": ["CN=Person,CN=Schema,CN=Configuration,DC=universum,DC=local"],
"objectGUID": [
{
"encoded": "AFvzBO0T+Ey9TL3RHGtghQ==",
"encoding": "base64"
}
],
"objectSid": [
{
"encoded": "AQUAAAAAAAUVAAAA6TO9FZD9W8QoWlFDIE8AAA==",
"encoding": "base64"
}
],
"primaryGroupID": ["513"],
"pwdLastSet": ["132979783101549910"],
"sAMAccountName": ["pjarmolowicz"],
"sAMAccountType": ["805306368"],
"sn": ["\u017Ar\u00F3dlana"],
"uSNChanged": ["4986"],
"uSNCreated": ["4986"],
"userAccountControl": ["512"],
"userPrincipalName": ["[email protected]"],
"whenChanged": ["20220525185150.0Z"],
"whenCreated": ["20211125124337.0Z"]},
"dn": "CN=Bo\u017Cena \u017Ar\u00F3dlana,OU=FE,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"
},
{"attributes": {
"accountExpires": ["9223372036854775807"],
"badPasswordTime": ["133128872888506790"],
"badPwdCount": ["0"],
"cn": ["Jan Kowalski"],
"codePage": ["0"],
"countryCode": ["0"],
"displayName": ["Jan Kowalski"],
"distinguishedName": ["CN=Jan Kowalski,OU=RR-32,OU=RR,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"],
"givenName": ["Jan"],
"instanceType": ["4"],
"lastLogoff": [],
"lastLogon": ["133129921828641420"],
"lastLogonTimestamp": ["133125345565644950"],
"logonCount": ["55"],
"memberOf": [],
"name": ["Jan Kowalski"],
"objectCategory": ["CN=Person,CN=Schema,CN=Configuration,DC=universum,DC=local"],
"objectGUID": [
{
"encoded": "AScnTASpKUun4oadMC5Qxg==",
"encoding": "base64"
}
],
"objectSid": [
{
"encoded": "AQUAAAAAAAUVAAAA6TO9FZD9W8QoWlFDngQAAA==",
"encoding": "base64"
}
],
"primaryGroupID": ["513"],
"pwdLastSet": ["131577266641617910"],
"sAMAccountName": ["jkowalski"],
"sAMAccountType": ["805306368"],
"sn": ["Kowalski"],
"uSNChanged": ["149609"],
"uSNCreated": ["5397"],
"userAccountControl": ["512"],
"userPrincipalName": ["[email protected]"],
"whenChanged": ["20221110061556.0Z"],
"whenCreated": ["20130610115016.0Z"],
"dn": "CN=Jan Kowalski,OU=RR-32,OU=RR,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"
}
]
}
Upvotes: 0
Views: 837
Reputation: 177471
Use the following to suppress Unicode escape codes and write the data UTF-8-encoded to support non-ASCII characters.
with open(json_cmptrs_data, 'w', encoding='utf8') as jsonfile:
json.dump(data, jsonfile, ensure_ascii=False)
Working example:
import json
data = {"cn": ["Bo\u017Cena \u017Ar\u00F3dlana"]}
with open('output.json', 'w', encoding='utf8') as file:
json.dump(data, file, ensure_ascii=False)
output.csv (UTF-8-encoded):
{"cn": ["Bożena źródlana"]}
Upvotes: 1