Reputation: 57
I have files with CommonChar is some of them and my python code works on them to build a dictionary. While building there are some required keys which users might forget to put in. The code should be able to flag the file and the key which is missing.
The syntax for python code to work on is like this:
CommonChar pins Category General
CommonChar pins Contact Mark
CommonChar pins Description 1st line
CommonChar pins Description 2nd line
CommonChar nails Category specific
CommonChar nails Description 1st line
So for above example "Contact" is missing: CommonChar nails Contact Robert
I have a list for ex: mustNeededKeys=["Category", "Description", "Contact"]
mainDict={}
for dirName, subdirList, fileList in os.walk(sys.argv[1]):
for eachFile in fileList:
#excluding file names ending in .swp , swo which are creatied temporarily when editing in vim
if not eachFile.endswith(('.swp','.swo','~')):
#print eachFile
filePath= os.path.join(dirName,eachFile)
#print filePath
with open(filePath, "r") as fh:
contents=fh.read()
items=re.findall("CommonChar.*$",contents,re.MULTILINE)
for x in items:
cc, group, topic, data = x.split(None, 3)
data = data.split()
group_dict = mainDict.setdefault(group, {'fileLocation': [filePath]})
if topic in group_dict:
group_dict[topic].extend(['</br>'] + data)
else:
group_dict[topic] = data
This above code does its job of building a dict like this:
{'pins': {'Category': ['General'], 'Contact': ['Mark'], 'Description': ['1st', 'line', '2nd', 'line'] } , 'nails':{'Category':['specific'], 'Description':['1st line']}
So when reading each file with CommonChar and building a group_dict , a way to check all the keys and compare it with mustNeededKeys and flag if not there and proceed if met.
Upvotes: 0
Views: 491
Reputation: 18635
Something like this should work:
# Setup mainDict (equivalent to code given above)
mainDict = {
'nails': {
'Category': ['specific'],
'Description': ['1st', 'line'],
'fileLocation': ['/some/path/nails.txt']
},
'pins': {
'Category': ['General'],
'Contact': ['Mark'],
'Description': ['1st', 'line', '</br>', '2nd', 'line'],
'fileLocation': ['/some/path/pins.txt']
}
}
# check for missing keys
mustNeededKeys = {"Category", "Description", "Contact"}
for group, group_dict in mainDict.items():
missing_keys = mustNeededKeys - set(group_dict.keys())
if missing_keys:
missing_key_list = ','.join(missing_keys)
print(
'group "{}" ({}) is missing key(s): {}'
.format(group, group_dict['fileLocation'][0], missing_key_list)
)
# group "nails" (/some/path/nails.txt) is missing key(s): Contact
If you must check for missing keys immediately after processing each group, you could use the code below. This assumes that each group is stored as a contiguous collection of rows in a single file (i.e., not mixed with other groups in the same file or spread across different files).
from itertools import groupby
mainDict={}
mustNeededKeys = {"Category", "Description", "Contact"}
for dirName, subdirList, fileList in os.walk(sys.argv[1]):
for eachFile in fileList:
# excluding file names ending in .swp , swo which are created
# temporarily when editing in vim
if not eachFile.endswith(('.swp','.swo','~')):
#print eachFile
filePath = os.path.join(dirName,eachFile)
#print filePath
with open(filePath, "r") as fh:
contents = fh.read()
items = re.findall("CommonChar.*$", contents, re.MULTILINE)
split_items = [line.split(None, 3) for line in items]
# group the items by group name (element 1 in each row)
for g, group_items in groupby(split_items, lambda row: row[1]):
group_dict = {'fileLocation': [filePath]}
# store all items in the current group
for cc, group, topic, data in group_items:
data = data.split()
if topic in group_dict:
group_dict[topic].extend(['</br>'] + data)
else:
group_dict[topic] = data
# check for missing keys
missing_keys = mustNeededKeys - set(group_dict.keys())
if missing_keys:
missing_key_list = ','.join(missing_keys)
print(
'group "{}" ({}) is missing key(s): {}'
.format(group, filePath, missing_key_list)
)
# add group to mainDict
mainDict[group] = group_dict
Upvotes: 1
Reputation: 195553
data = '''CommonChar pins Category General
CommonChar pins Contact Mark
CommonChar pins Description 1st line
CommonChar pins Description 2nd line
CommonChar nails Category specific
CommonChar nails Description 1st line'''
from collections import defaultdict
from pprint import pprint
required_keys = ["Category", "Description", "Contact"]
d = defaultdict(dict)
for line in data.splitlines():
line = line.split()
if line[2] == 'Description':
if line[2] not in d[line[1]]:
d[line[1]][line[2]] = []
d[line[1]][line[2]].extend(line[3:])
else:
d[line[1]][line[2]] = [line[3]]
pprint(dict(d))
print('*' * 80)
# find missing keys
for k in d.keys():
for missing_key in set(d[k].keys()) ^ set(required_keys):
print('Key "{}" is missing "{}"!'.format(k, missing_key))
Prints:
{'nails': {'Category': ['specific'], 'Description': ['1st', 'line']},
'pins': {'Category': ['General'],
'Contact': ['Mark'],
'Description': ['1st', 'line', '2nd', 'line']}}
********************************************************************************
Key "nails" is missing "Contact"!
Upvotes: 0