Reputation: 13
I've tried several solutions that I found on stack overflow but I either end up with empty dictionaries at the end of the branch or a filemarker when moving from dictionary to list of files.
The problem: A python list object
paths =[/etc/bluetooth/rfcomm.conf.dpkg-remove
/etc/bluetooth/serial.conf.dpkg-remove
/etc/bluetooth/input.conf
/etc/bluetooth/audio.conf.dpkg-remove
/etc/bluetooth/network.conf
/etc/bluetooth/main.conf
/etc/fish
/etc/fish/completions
/etc/fish/completions/task.fish]
expected output:
{"etc" :
{"bluetooth" :
["rfcomm.conf.dpkg-remove",
"serial.conf.dpkg-remove",
"input.conf",
"audio.conf.dpkg-remove",
"network.conf",
"main.conf"],
"fish" :
{"completions" : "task.fish"}}}
I found that I was able to output this in javascript using: Parse string of file path to json object
Is there a way to use dafaultdict to create lists of leafs without any preceding text?
This got me closest to what I am looking for but I am left with file marker(defaultdict(dict, ((FILE_MARKER, []),))) at each node.
from collections import defaultdict
FILE_MARKER = '<files>'
def attach(branch, trunk):
'''
Insert a branch of directories on its trunk.
'''
parts = branch.split('/', 1)
if len(parts) == 1: # branch is a file
trunk[FILE_MARKER].append(parts[0])
else:
node, others = parts
if node not in trunk:
trunk[node] = defaultdict(dict, ((FILE_MARKER, []),))
attach(others, trunk[node])
Upvotes: 1
Views: 1150
Reputation: 17166
Code
from pprint import pprint as pp # For pretty printing nested dictioanry
def make_path(paths):
# Sort so deepest paths are first
paths = sorted(paths, key = lambda s: len(s.lstrip('/').split('/')), reverse = True)
tree_path = {}
for path in paths:
# Split into list and remove leading '/' if present
levels = path.lstrip('/').split("/")
file = levels.pop()
acc = tree_path
for i, p in enumerate(levels, start = 1):
if i == len(levels):
# Reached termination of a path
# Use current terminal object is present, else use list
acc[p] = acc[p] if p in acc else []
if isinstance(acc[p], list):
# Only append if we are at a list
acc[p].append(file)
else:
# Exaand with dictionary by default
acc.setdefault(p, {})
acc = acc[p]
return tree_path
Usage
Example 1
paths =['/etc/bluetooth/rfcomm.conf.dpkg-remove',
'/etc/bluetooth/serial.conf.dpkg-remove',
'/etc/bluetooth/input.conf',
'/etc/bluetooth/audio.conf.dpkg-remove',
'/etc/bluetooth/network.conf',
'/etc/bluetooth/main.conf',
'/etc/fish',
'/etc/fish/completions',
'/etc/fish/completions/task.fish']
res = make_path(paths)
pp(res) # Pretty print result
Output
{'etc': {'bluetooth': ['rfcomm.conf.dpkg-remove',
'serial.conf.dpkg-remove',
'input.conf',
'audio.conf.dpkg-remove',
'network.conf',
'main.conf'],
'fish': {'completions': ['task.fish']}}}
Example 2
paths = [
"path1/subpath1/file111.doc",
"path1/subpath1/file112.doc",
"path1/subpath2/file121.doc",
"path1/subpath2/file122.doc",
"path2/subpath1/file211.doc",
"path2/subpath1/file212.doc",
"path2/subpath2/file221.doc",
"path2/subpath2/file222.doc",
"path2/additionalpath3/additionalpath1/file2311.doc"
]
res = make_path(paths)
pp(res) # Pretty print result
Output
{'path1': {'subpath1': ['file111.doc', 'file112.doc'],
'subpath2': ['file121.doc', 'file122.doc']},
'path2': {'additionalpath3': {'additionalpath1': ['file2311.doc']},
'subpath1': ['file211.doc', 'file212.doc'],
'subpath2': ['file221.doc', 'file222.doc']}}
Upvotes: 4
Reputation: 2159
The problem is closely related to a Trie. When taking the following input
paths = [
"/etc/bluetooth/rfcomm.conf.dpkg-remove",
"/etc/bluetooth/serial.conf.dpkg-remove",
"/etc/bluetooth/input.conf",
"/etc/bluetooth/audio.conf.dpkg-remove",
"/etc/bluetooth/network.conf",
"/etc/bluetooth/main.conf",
"/etc/bluetooth/subdirectory/main.conf",
"/etc/fish",
"/etc/fish/completions",
"/etc/fish/completions/task.fish"
]
We can quickly construct a Trie using the following
from collections import defaultdict
from functools import reduce
from pprint import pprint
# ... your input
if __name__ == '__main__':
node = lambda: defaultdict(node)
parts = node()
for path in paths:
reduce(dict.__getitem__, path[1:].split('/'), parts)
pprint(parts, width=10)
Which results in the following Trie
defaultdict(<function <lambda> at 0x000001E34CC06E50>,
{'etc': defaultdict(<function <lambda> at 0x000001E34CC06E50>,
{'bluetooth': defaultdict(<function <lambda> at 0x000001E34CC06E50>,
{'audio.conf.dpkg-remove': defaultdict(<function <lambda> at 0x000001E34CC06E50>, {}),
'input.conf': defaultdict(<function <lambda> at 0x000001E34CC06E50>, {}),
'main.conf': defaultdict(<function <lambda> at 0x000001E34CC06E50>, {}),
'network.conf': defaultdict(<function <lambda> at 0x000001E34CC06E50>, {}),
'rfcomm.conf.dpkg-remove': defaultdict(<function <lambda> at 0x000001E34CC06E50>, {}),
'serial.conf.dpkg-remove': defaultdict(<function <lambda> at 0x000001E34CC06E50>, {}),
'subdirectory': defaultdict(<function <lambda> at 0x000001E34CC06E50>,
{'main.conf': defaultdict(<function <lambda> at 0x000001E34CC06E50>, {})})}),
'fish': defaultdict(<function <lambda> at 0x000001E34CC06E50>,
{'completions': defaultdict(<function <lambda> at 0x000001E34CC06E50>,
{'task.fish': defaultdict(<function <lambda> at 0x000001E34CC06E50>, {})})})})})
Now you might want to make it slightly prettier, by removing the defaultdict
aspect of the Trie, by either following
or making a normal dictionary out of it, with an ending token, see
Clean up code
def simplify(dictionary):
if isinstance(dictionary, defaultdict):
return {k: simplify(v) or None for k, v in dictionary.items()}
return dictionary
Usage
pprint(simplify(parts))
Output
{'etc': {'bluetooth': {'audio.conf.dpkg-remove': None,
'input.conf': None,
'main.conf': None,
'network.conf': None,
'rfcomm.conf.dpkg-remove': None,
'serial.conf.dpkg-remove': None,
'subdirectory': {'main.conf': None}},
'fish': {'completions': {'task.fish': None}}}}
I do assume here that it is oké, to indicate a file by giving it the value None
. This way it can properly handle subdirectories and all endings are clearly indicated.
Another (implicit) assumption is that the Trie only has to be constructed a single time when simplifying. If you want to keep adding extra paths to it afterwards, I would advice to keep the defaultdict
version of the Trie.
Upvotes: 2