Hashing Issue, Non-Text Files

Question

My code works ok except for hashing. It works fine on hashing text files but as soon as it encounters a jpg or other file type, it crashes. I know it's some type of encoding error, but I'm stumped on how to encode it properly for non-text files.

#import libraries
import os
import time
from datetime import datetime
import logging
import hashlib
from prettytable import PrettyTable
from pathlib import Path
import glob

#user input
path = input ("Please enter directory: ")

print ("===============================================")

#processing input
if os.path.exists(path):
    print("Processing directory: ", (path))
else:
    print("Invalid directory.")
    logging.basicConfig(filename="error.log", level=logging.ERROR)
    logging.error(' The directory is not valid, please run the script again with the correct directory.')

print ("===============================================")

#process directory
directory = Path(path)
paths = []
filename = []
size = []
hashes = []
modified = []
files = list(directory.glob('**/*.*'))

for file in files:
    paths.append(file.parents[0])
    filename.append(file.parts[-1])
    size.append(file.stat().st_size)
    modified.append(datetime.fromtimestamp(file.stat().st_mtime))
    with open(file) as f:        
        hashes.append(hashlib.md5(f.read().encode()).hexdigest())

#output in to tablecx
report = PrettyTable()

column_names = ['Path', 'File Name', 'File Size', 'Last Modified Time', 'MD5 Hash']
report.add_column(column_names[0], paths)
report.add_column(column_names[1], filename)
report.add_column(column_names[2], size)   
report.add_column(column_names[3], modified)
report.add_column(column_names[4], hashes)
report.sortby = 'File Size'

print(report)

Hashing Issue, Non-Text Files

Answers (1)

Related Questions