Reputation: 118
I'm creating a virtual/in-memory file system as part of a Flask apps, where the user is creating files & folders and I'm saving them in a SQL DB and displaying the directory tree back to the user in the UI (think dropbox/google drive).
For reprex' sake, the relevant metadata in both the 'File' and 'Folder' SQL tables would be: ['object_id', 'parent_id', 'child_nodes']
where,
I created Project and File classes to handle internal methods & properties (excluded but necessary). So ideally I need the classes in my end solution.
The main problem I'm running into is appending sub-directories as child_nodes
to directories. This is observable [in the commented out code at the bottom] when iterating from dir_list[1]
to dir_list[2]
, where dir_list[1]
would already have been appended to dir_list[0]
and therefore wouldn't reflect the following iteration.
Looking for any suggestion on how to implement this. I'm also entirely open to using different data structs altogether, so long as I can add metadata and format it the same way that FileDir.create_tree()
does. Note: I need to iterate through a theoretically infinite number of subdirectories, not just what is in my reprex.
# Objects for organizing each struct -----
class File():
def __init__(self, file_list):
self.id = file_list[0]
self.name = file_list[1]
self.parent = file_list[2]
self.directory = False
class Directory:
def __init__(self, dir_list):
self.id = dir_list [0]
self.name = dir_list [1]
self.parent = dir_list [2]
self.child_nodes = []
self.directory = True
def add_file_node(self, node):
node = {
'id': node.id,
'name': node.name,
'parent': self.parent,
'is_dir': node.directory
}
self.child_nodes.append(node)
def add_dir_node(self, node):
node = {
'id': node.id,
'name': node.name,
'parent': self.parent,
'is_dir': node.directory,
'children': self.child_nodes
}
self.child_nodes.append(node)
def return_tree(self):
tree = {
'name': self.name,
'children': self.child_nodes,
'parent': self.parent,
'is_directory': self.directory
}
return tree
class FileDir():
def __init__(self, dir_list):
self.dir_list = dir_list
def create_tree(self):
tree = []
for directory in self.dir_list:
tree.append(directory.return_tree())
return tree
# Example Data (formatted as 2d-list from my SQL query) -----
dir_list = [
['10001', 'dir_1', None],
['10002', 'dir_2', '10001'],
['10003', 'dir_3', '10002'],
['10004', 'dir_4', None]
]
file_list = [
['21110', 'file1.csv', None],
['21111', 'file2.csv', '10001'],
['21112', 'file3.csv', '10002'],
['21113', 'file3.csv', '10003']
]
dir_objs = [Directory(d) for d in dir_list]
file_objs = [File(f) for f in file_list]
for fil in file_objs:
if fil.parent:
for i, x in enumerate(dir_objs):
if fil.parent == x.id:
x.add_file_node(fil)
# TODO Append sub_folders
# ...
#
# for d in dir_objs:
# if d.parent:
# for i, x in enumerate(dir_objs):
# if d.parent == x.id:
# x.add_dir_node(d)
# dir_objs.remove(d)
tree = FileDir(dir_objs)
tree.create_tree()
Upvotes: 1
Views: 195
Reputation: 1032
Does this code accomplish your needs?
# Objects for organizing each struct -----
class File:
def __init__(self, file_list):
self.id = file_list[0]
self.name = file_list[1]
self.parent = file_list[2]
self.directory = False
class Directory:
def __init__(self, dir_list):
self.id = dir_list[0]
self.name = dir_list[1]
self.parent = dir_list[2]
self.child_nodes = []
self.directory = True
def add_file_node(self, node):
node = {
'id': node.id,
'name': node.name,
'parent': self.parent,
'is_dir': node.directory
}
self.child_nodes.append(node)
def add_dir_node(self, node):
node = {
'id': node.id,
'name': node.name,
'parent': self.parent,
'is_dir': node.directory,
'children': self.child_nodes
}
self.child_nodes.append(node)
def return_tree(self):
tree = {
'name': self.name,
'children': self.child_nodes,
'parent': self.parent,
'is_directory': self.directory
}
return tree
class FileDir:
def __init__(self, dir_list):
self.dir_list = dir_list
def create_tree(self):
tree = []
for directory in self.dir_list:
tree.append(directory.return_tree())
return tree
# Example Data (formatted as 2d-list from my SQL query) -----
dir_list = [
['10001', 'dir_1', None],
['10002', 'dir_2', '10001'],
['10003', 'dir_3', '10002'],
['10004', 'dir_4', None]
]
file_list = [
['21110', 'file1.csv', None],
['21111', 'file2.csv', '10001'],
['21112', 'file3.csv', '10002'],
['21113', 'file3.csv', '10003']
]
dir_objs = [Directory(d) for d in dir_list]
file_objs = [File(f) for f in file_list]
for fil in file_objs:
if fil.parent:
for i, x in enumerate(dir_objs):
if fil.parent == x.id:
x.add_file_node(fil)
for dir_obj in dir_objs:
if dir_obj.parent:
for potential_parent_dir_obj in dir_objs:
if dir_obj.parent == potential_parent_dir_obj.id:
potential_parent_dir_obj.add_dir_node(dir_obj)
dir_objs = [dir_obj for dir_obj in dir_objs if not dir_obj.parent]
tree = FileDir(dir_objs)
tree.create_tree()
This won't handle directory trees more than two levels deep, but your example data doesn't indicate that is necessary. Let me know if you do need to handle deeply nested hierarchies. You will need a different approach.
EDIT
Here is a more robust version that I developed with Python 3.8 that should handle arbitrary depth. I havn't tested it extensively, but hopefully this helps. No confusing recursion (on the surface).
from __future__ import annotations
from typing import Union, List
from dataclasses import dataclass, asdict, field
import json
@dataclass
class Node:
node_id: str
name: str
parent_node_id: str = None
def to_tree(self):
return asdict(self)
@dataclass
class File(Node):
is_directory: bool = False
@dataclass
class Directory(Node):
is_directory: bool = True
children: List[Union[Directory, File]] = field(default_factory=list)
def add_child(self, child: Union[Directory, File]):
self.children.append(child)
class FileSystem:
def __init__(self, *nodes):
self.nodes = {node.node_id: node for node in nodes}
for node in self.non_root_nodes:
self.nodes[node.parent_node_id].add_child(node)
def __getitem__(self, node_id):
return self.nodes[node_id]
@property
def root_nodes(self):
return [node for node in self.nodes.values() if node.parent_node_id is None]
@property
def non_root_nodes(self):
return [node for node in self.nodes.values() if node.parent_node_id is not None]
@property
def directories(self):
return [node for node in self.nodes.values() if node.is_directory]
@property
def files(self):
return [node for node in self.nodes.values() if not node.is_directory]
def to_tree(self):
return [node.to_tree() for node in self.root_nodes]
dir_list = [
# id, name, parent_node_id
['10001', 'dir_1', None],
['10002', 'dir_2', '10001'],
['10003', 'dir_3', '10002'],
['10004', 'dir_4', None]
]
file_list = [
['21110', 'file1.csv', None],
['21111', 'file2.csv', '10001'],
['21112', 'file3.csv', '10002'],
['21113', 'file3.csv', '10003']
]
dir_list = [Directory(*directory) for directory in dir_list]
file_list = [File(*file) for file in file_list]
file_system = FileSystem(*dir_list, *file_list)
tree = file_system.to_tree()
print(json.dumps(tree, indent=2))
Upvotes: 1