Reputation: 433
I want to find the md5sum of files starting with "10" ( could be exe, doc, pdf etc) hence not checking the file extension but only the start two digits. So far I've a script to traverse through the directory and print out all such files but couldn't get the checksum to be printed for each of them:
def print_files(file_directory, file_extensions=['10']):
''' Print files in file_directory with extensions in file_extensions, recursively. '''
# Get the absolute path of the file_directory parameter
file_directory = os.path.abspath(file_directory)
# Get a list of files in file_directory
file_directory_files = os.listdir(file_directory)
# Traverse through all files
for filename in file_directory_files:
filepath = os.path.join(file_directory, filename)
# Check if it's a normal file or directory
if os.path.isfile(filepath):
# Check if the file has an extension of typical video files
for file_extension in file_extensions:
# Not a reqd file, ignore
#if not filepath.endswith(file_extension):
if not filename.startswith(file_extension) or len(filename) != 19:
continue
# We have got a '10' file!
print_files.counter += 1
## TRYING TO READ AND PRINT MD5 USING HASHLIB/ DOESNT WORK###
hasher = hashlib.md5()
with open(filename, 'rb') as afile:
buf = afile.read(65536)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(65536)
# Print it's name
print('{0}'.format(filepath))
print hasher('{0}.format(filepath)').hexdigest()
print '\n'
elif os.path.isdir(filepath):
# We got a directory, enter into it for further processing
print_files(filepath)
if __name__ == '__main__':
# Directory argument supplied
if len(sys.argv) == 2:
if os.path.isdir(sys.argv[1]):
file_directory = sys.argv[1]
else:
print('ERROR: "{0}" is not a directory.'.format(sys.argv[1]))
exit(1)
else:
# Set file directory to CWD
file_directory = os.getcwd()
print('\n -- Looking for Required Files in "{0}" -- \n'.format(file_directory))
# Set the number of processed files equal to zero
print_files.counter = 0
# Start Processing
print_files(file_directory)
# We are done. Exit now.
'
Upvotes: 2
Views: 6139
Reputation: 433
Got it fixed with this line
print hashlib.md5(open('{0}'.format(filepath)).read()).hexdigest()
I wasnt reading the file but just passing hashlib.md5. Thanks Matt for the insight.
Upvotes: 0
Reputation: 738
I'd recommend that you do not solve this recursively, but instead make use of os.walk()
to traverse the directory structure. The following code could be the body of your print_files
function.
file_directory = os.path.abspath(file_directory)
paths_to_hash = []
for root, dirs, filenames in os.walk(file_directory, topdown=False):
for i, dir in enumerate(dirs):
for filename in filenames[i]:
if filenames[:2] == '10':
paths_to_hash += [os.path.abspath('{0}/{1}/{2}'.format(root, dir, filename)]
for path in paths_to_hash:
hash = hashlib.md5(open(path, 'rb').read()).digest())
print 'hash: {0} for path: {1}'.format(hash, path)
Upvotes: 3
Reputation: 41116
The line printing the hasher should be:
print('{0}'.format(hasher.hexdigest()))
Upvotes: 0