Reputation: 21
For starters I've only been playing with python for about a 2 weeks now and im relatively new to its proccessess, I'm trying to create a script that compares two directories with subdirectories and prints out ANY changes. I've read articles on hear about using os.walk to walk the directories and I've managed to write the script that prints all the files in a directory and its subdirectories in a understandable manner. I've also read on here and learned how to compare two directories but it only compares 1 file deep.
import os
x = 'D:\\xfiles'
y = 'D:\\yfiles'
q= [ filename for filename in x if filename not in y ]
print q
Obviously that does not do what I want it to. This however is listing all files and all directories.
import os
x = 'D:\\xfiles'
x1 = os.walk(x)
for dirName, subdirList, fileList in x1:
print ('Directory: %s' % dirName)
for fname in fileList:
print ('\%s' % fname)
How do I combine them and get it to work?
Upvotes: 2
Views: 8386
Reputation: 99
I have done a code that check two directory recursively, and if there is different, it would point out the line that different.
import os
FOLDER_A = os.path.join(os.path.dirname(__file__), 'folder_a')
FOLDER_B = os.path.join(os.path.dirname(__file__), 'folder_b')
def load_directory(directory):
files = set()
directories = set()
for file_or_directory in os.listdir(directory):
file_or_directory_path = f'{directory}/{file_or_directory}'
if os.path.isfile(file_or_directory_path):
files.add(file_or_directory)
else:
directories.add(file_or_directory)
return files, directories
def compare_files(a, b):
assert os.path.isfile(a)
assert os.path.isfile(b)
with open(a, 'r') as file:
file_a = file.read()
with open(b, 'r') as file:
file_b = file.read()
if file_a != file_b:
file_a_lines = file_a.split('\n')
file_b_lines = file_b.split('\n')
if len(file_a_lines) != len(file_b_lines):
print(f'Two file {a} and {b} have different length, of {len(file_a_lines)} and {len(file_b_lines)}')
return False
compare_lines = zip(file_a_lines, file_b_lines)
index = 0
for i in compare_lines:
index += 1
if i[0] != i[1]:
print(f'Different found in file {a} and {b}, at line number {index}')
return False
print('Some thing wrong')
return False
return True
def compare_directories(a, b):
assert not os.path.isfile(a)
assert not os.path.isfile(b)
a_files, a_directories = load_directory(a)
b_files, b_directories = load_directory(b)
if (a_files != b_files):
print(f'Different Found In {a} and {b} directories files')
print(f'A: {a_files}\nB: {b_files}')
return False
if (a_directories != b_directories):
print(f'Different Found In {a} and {b} directories subdirectories')
print(f'A: {a_directories}\nB: {b_directories}')
return False
for files in a_files:
if not compare_files(f'{a}/{files}', f'{b}/{files}'):
return False
for directories in a_directories:
if not compare_directories(f'{a}/{directories}', f'{b}/{directories}'):
return False
return True
def main():
print(compare_directories(FOLDER_A, FOLDER_B))
if __name__ == '__main__':
main()
Upvotes: 1
Reputation: 11
import os
def ls(path):
all = []
walked = os.walk(path)
for base, sub_f, files in walked:
for sub in sub_f:
entry = os.path.join(base,sub)
entry = entry[len(path):].strip("\\")
all.append(entry)
for file in files:
entry = os.path.join(base,file)
entry = entry[len(path):].strip("\\")
all.append(entry)
all.sort()
return all
def folder_diff(folder1_path, folder2_path):
folder1_list = ls(folder1_path);
folder2_list = ls(folder2_path);
diff = [item for item in folder1_list if item not in folder2_list]
diff.extend([item for item in folder2_list if item not in folder1_list])
return diff
Upvotes: 1
Reputation: 1482
I guess that best way to go will be external programs, as @Robᵩ suggests in the comment.
Using Python I would recommend doing following:
import os
def fileIsSame(right, left, path):
return os.path.exists (os.path.join(left, path.replace(right, '')));
def compare(right, left):
difference = list();
for root, dirs, files in os.walk(right):
for name in files:
path = os.path.join(root, name);
# check if file is same
if fileIsSame(right, left, path):
if os.path.isdir(path):
# recursively check subdirs
difference.extend(compare(path, left));
else:
# count file as difference
difference.append(path);
return difference;
This approach lacks normal fileIsSame
function that would make sure files are same by content or by date modified and be sure to handle paths correctly (as I'm not sure this variant will). This algorithm requres you to specify full paths.
Usage example:
print (compare(r'c:\test', r'd:\copy_of_test'));
If second folder is copy of first, all the differences in paths (different disk letter and foldername) is ignored. Output will be []
.
Upvotes: 2
Reputation: 55752
Write a function to aggregate your listing.
import os
def listfiles(path):
files = []
for dirName, subdirList, fileList in os.walk(path):
dir = dirName.replace(path, '')
for fname in fileList:
files.append(os.path.join(dir, fname))
return files
x = listfiles('D:\\xfiles')
y = listfiles('D:\\yfiles')
You could use a list comprehension to extract the files that are not in both directories.
q = [filename for filename in x if filename not in y]
But using sets is much more efficient and flexible.
files_only_in_x = set(x) - set(y)
files_only_in_y = set(y) - set(x)
files_only_in_either = set(x) ^ set(y)
files_in_both = set(x) & set(y)
all_files = set(x) | set(y)
Upvotes: 3