Reputation: 11
I have a tool which adds new files each day at 11pm into a Unix server folder. I want to write a python script, to be launched at 11.30pm, which find the new files, the removed files and the modified files inside this folder. The files have unique name "123456_0.csv, 123456_1.csv". Since the script will be launched manually, I don't want to use the busy wait pattern. I want to use only python library.
Thank you.
In the following lines my code(I'm new to python):
def F_CreateArchive(Time_stamp, Added_Filename, Aux_Filename, Field_Names, File_Extension):
if not os.path.exists(Added_Filename):
with open(Added_Filename, 'w', newline='') as arch:
archive_write = csv.DictWriter(arch, fieldnames=Field_Names, delimiter=';')
archive_write.writeheader()
folder_files = glob.glob(File_Extension)
for each_folder_file in folder_files:
md5file = hashlib.md5(open(each_folder_file,'rb').read()).hexdigest()
archive_write.writerow({'TIMESTAMP': Time_stamp, 'FILENAME': each_folder_file, 'ACTION': 'added', 'MD5': md5file})
return 0
def F_CheckAdded(Time_stamp, Added_Filename, Aux_Filename, Field_Names, File_Extension):
dict_added_archive = {}
dict_folder_file = {}
folder_files = glob.glob(file_extension)
for each_file in folder_files:
with open(each_file, 'rb') as file:
dict_folder_file[each_file] = hashlib.md5(file.read()).hexdigest()
with open(Added_Filename, 'r') as addfile:
added_read = csv.DictReader(addfile, delimiter=";")
for row in added_read:
if(row['FILENAME'] != ''):
dict_added_archive[row['FILENAME']] = row['MD5']
addfile.seek(0)
print(dict_folder_file)
print(dict_added_archive)
with open(Aux_Filename, 'w', newline='') as aux, open(Added_Filename, 'r') as addfile:
added_read = csv.DictReader(addfile, delimiter=";")
aux_write = csv.DictWriter(aux, fieldnames=Field_Names, delimiter=';')
aux_write.writeheader()
for filename_folder, md5_folder in dict_folder_file.items():
if filename_folder in dict_added_archive and md5_folder == dict_added_archive[filename_folder]:
print('The file %s hasn\'t been changed.' %filename_folder)
for row in added_read:
if(row['FILENAME'] == filename_folder):
aux_write.writerow(row)
addfile.seek(0)
elif filename_folder in dict_added_archive and md5_folder != dict_added_archive[filename_folder]:
print('The file %s has been modified.' %filename_folder)
aux_write.writerow({'TIMESTAMP': Time_stamp, 'FILENAME': filename_folder, 'ACTION': 'added', 'MD5': md5_folder})
elif filename_folder not in dict_added_archive:
print('The file %s is new.' %filename_folder)
aux_write.writerow({'TIMESTAMP': Time_stamp, 'FILENAME': filename_folder, 'ACTION': 'added', 'MD5': md5_folder})
for filename_archive, md5_archive in dict_added_archive.items():
if filename_archive not in dict_folder_file:
print('The file %s has been removed.' %filename_folder)
aux_write.writerow({'TIMESTAMP': Time_stamp, 'FILENAME': filename_archive, 'ACTION': 'removed', 'MD5': md5_archive})
Upvotes: 1
Views: 89
Reputation: 7091
Find deleted files?
Find new files ?
Upvotes: 1