Reputation: 11
For a homework assignment, I have to write a function that calculates the SHA-256 hash code of each unique line of a text file and convert the hex digests of those hash codes to integers. I managed to get that far with the code but there's an extra credit clause that I would wanna meet and I am unsure of how to implement:
Plot a histogram of those integer values with 20 bins
import matplotlib.pyplot as plt
import hashlib
def file_processing_function(file_path):
unique_lines = set()
with open (file_path, 'r') as f:
for line in f:
unique_lines.add(line.rstrip())
return unique_lines
def sha256_calc(line):
m = hashlib.sha256(line.encode('utf-8'))
return m.hexdigest()
def sha256_to_int(hex_digest):
return int(hex_digest, 16)
def file_processing(file_path):
unique_lines = file_processing_function(file_path)
results = []
for line in unique_lines:
hex_digest = sha256_calc(line)
integer_value = sha256_to_int(hex_digest)
results.append((line, hex_digest, integer_value))
return results
def printer(results):
for line, hex_digest, integer_value in results:
print(f'Line: {line} \n SHA-256: {hex_digest} \n Int:{integer_value}\n')
def plot_histogram(results, bins = 20):
integer_values = [int(integer_value) for _, _, integer_value in results if isinstance(integer_value, int)]
plt.figure(figsize=(10, 6))
plt.hist(integer_values, bins = bins, color = 'purple', edgecolor = 'black')
plt.title(f'Histogram Based on SHA-256 of {file_path}')
plt.xlabel('Integer Values')
plt.ylabel('Frequency')
plt.grid(axis='y', linestyle='--', linewidth=0.7, alpha=0.7)
plt.show()
file_path= 'wordcount.txt'
results = file_processing(file_path)
printer(results)
plot_histogram(results, bins = 20)
This is what I have thus far but the code is bringing back a TypeError
Traceback (most recent call last):
File "c:\Users\serle\OneDrive\Desktop\Pyprojects\Hash Reader\Hash Reader Sha-256.py", line 53, in <module>
plot_histogram(results, bins = 20)
File "c:\Users\serle\OneDrive\Desktop\Pyprojects\Hash Reader\Hash Reader Sha-256.py", line 43, in plot_histogram
plt.hist(integer_values, bins = bins, color = 'purple', edgecolor = 'black')
File "C:\Users\serle\AppData\Local\Programs\Python\Python312\Lib\site-packages\matplotlib\pyplot.py", line 3440, in hist
return gca().hist(
^^^^^^^^^^^
File "C:\Users\serle\AppData\Local\Programs\Python\Python312\Lib\site-packages\matplotlib\__init__.py", line 1473, in inner
return func(
^^^^^
File "C:\Users\serle\AppData\Local\Programs\Python\Python312\Lib\site-packages\matplotlib\axes\_axes.py", line 7001, in hist
m, bins = np.histogram(x[i], bins, weights=w[i], **hist_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\serle\AppData\Local\Programs\Python\Python312\Lib\site-packages\numpy\lib\_histograms_impl.py", line 797, in histogram
bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\serle\AppData\Local\Programs\Python\Python312\Lib\site-packages\numpy\lib\_histograms_impl.py", line 430, in _get_bin_edges
first_edge, last_edge = _get_outer_edges(a, range)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\serle\AppData\Local\Programs\Python\Python312\Lib\site-packages\numpy\lib\_histograms_impl.py", line 314, in _get_outer_edges
if not (np.isfinite(first_edge) and np.isfinite(last_edge)):
^^^^^^^^^^^^^^^^^^^^^^^
TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
I'm not very good with Matplotlib, any ideas?
Upvotes: 0
Views: 44
Reputation: 25083
Numpy and consequently Matplotlib cannot deal with Python's unlimited integers, so to get your histogram, unnecessary histogram you have to convert your integers to floats.
from hashlib import sha256
from matplotlib.pyplot import hist, show, xlabel, ylabel
f = [float(int(sha256(line.strip().encode('utf-8')).hexdigest(), 16))
for line in open('War_and_Peace.txt') if line.strip()]
rectangles = plt.hist(f, bins=20, rwidth=0.8, color='cadetblue')[2]
for rectangle in rectangles: rectangle.set_edgecolor('black')
plt.show()
Upvotes: 1
Reputation: 522
def plot_histogram(results, file_path, bins=20):
integer_values = []
for _, _, integer_value in results:
try:
integer_value = int(integer_value)
if np.isfinite(integer_value):
integer_values.append(integer_value)
except (ValueError, TypeError):
continue # Skip if the value is not a valid integer
plt.figure(figsize=(10, 6))
plt.hist(integer_values, bins=bins, color='purple', edgecolor='black')
plt.title(f'Histogram Based on SHA-256 of {file_path}')
plt.xlabel('Integer Values')
plt.ylabel('Frequency')
plt.grid(axis='y', linestyle='--', linewidth=0.7, alpha=0.7)
plt.show()
This should most likely work:
file_path
explicitly to the plot_histogram()
function so that the title is dynamically created.try-except
block around the integer_value
conversion to ensure that each value is converted to an int
and checked for finiteness (np.isfinite())
Upvotes: -1