Reputation:
I have 4 arrays of clusters that I need to plot in a scatter plot. The documentation shows a simple example of X and Y plotting. I've tried some tutorials but most of them work with datasets or dataframes, so I was unable to properly figure out how to plot my data the right way. In short, I'm trying to plot these 4 arrays as clusters:
[ 4.33976958 19.73690959 9.05452373 1.29938447 1.25155903 18.07181231 1.28825463 14.31906422 1.58 4.04618339 4.27626005 1.28062485 1.00079968 12.40582121 5.31973684 3.59755473 6.18436739 4.96310387 4.21620683]
[1.31590273 3.75281228 2.5215868 1.99959996 1.06376689 2.35703203 1.02449988 1.64012195 2.755431 1.35661343 6.20786598 1.26 1.18389189 2.10864886 1.81118746 1.4 1.6857046 1.23693169 1.18810774]
[2.45348731 8.16029411 3.09767655 1.9078784 1.23951603 8.81716508 1.08885261 3.22546121 3.85585269 1.34164079 5.62138773 1.74688294 1.20016666 1.96203975 2.9662097 1.63963411 1.69339895 1.27687118 1.34699666]
[2.48386795 4.32485838 2.03381415 2.3 3.48137904 4.8340873 3.52278299 1.41421356 1.41265707 1.26743836 3.90384426 2.44532206 1.36367151 3.3346664 2.16 0.97897906 1.68534863 1.6503333 1.47837749]
My current code:
import matplotlib.pyplot as plt
std_colomns1 = [4.33976958, 19.73690959, 9.05452373, 1.29938447, 1.25155903, 18.07181231, 1.28825463, 14.31906422, 1.58, 4.04618339, 4.27626005, 1.28062485, 1.00079968, 12.40582121, 5.31973684, 3.59755473, 6.18436739, 4.96310387, 4.21620683]
std_colomns2 = [1.31590273, 3.75281228, 2.5215868, 1.99959996, 1.06376689, 2.35703203, 1.02449988, 1.64012195, 2.755431, 1.35661343, 6.20786598, 1.26, 1.18389189, 2.10864886, 1.81118746, 1.4, 1.6857046, 1.23693169, 1.18810774]
std_colomns3 = [2.45348731, 8.16029411, 3.09767655, 1.9078784, 1.23951603, 8.81716508, 1.08885261, 3.22546121, 3.85585269, 1.34164079, 5.62138773, 1.74688294, 1.20016666, 1.96203975, 2.9662097, 1.63963411, 1.69339895, 1.27687118, 1.34699666]
std_colomns4 = [2.48386795, 4.32485838, 2.03381415, 2.3, 3.48137904, 4.8340873, 3.52278299, 1.41421356, 1.41265707, 1.26743836, 3.90384426, 2.44532206, 1.36367151, 3.3346664, 2.16, 0.97897906, 1.68534863, 1.6503333, 1.47837749]
x = std_colomns1
y = std_colomns4
plt.scatter(x, y, label="Face clusters", color='k', s=10)
plt.xlabel('X')
plt.ylabel('y')
plt.title("Faces Features")
plt.legend()
plt.show()
I wish to plot those 4 arrays in a 2D space and distinguish them either by class (color) or centroids plotted in the center of each cluster.
Upvotes: 1
Views: 7246
Reputation: 80319
Here is another possibility, showing 4 boxplots:
import matplotlib.pyplot as plt
import numpy as np
std_colomns1 = [4.33976958,19.73690959,9.05452373,1.29938447,1.25155903,18.07181231,1.28825463,14.31906422,1.58,4.04618339,4.27626005,1.28062485,1.00079968,12.40582121,5.31973684,3.59755473,6.18436739,4.96310387,4.21620683]
std_colomns2 = [1.31590273,3.75281228,2.5215868,1.99959996,1.06376689,2.35703203,1.02449988,1.64012195,2.755431,1.35661343,6.20786598,1.26,1.18389189,2.10864886,1.81118746,1.4,1.6857046,1.23693169,1.18810774]
std_colomns3 = [2.45348731,8.16029411,3.09767655,1.9078784,1.23951603,8.81716508,1.08885261,3.22546121,3.85585269,1.34164079,5.62138773,1.74688294,1.20016666,1.96203975,2.9662097,1.63963411,1.69339895,1.27687118,1.34699666]
std_colomns4 = [2.48386795,4.32485838,2.03381415,2.3,3.48137904,4.8340873,3.52278299,1.41421356,1.41265707,1.26743836,3.90384426,2.44532206,1.36367151,3.3346664,2.16,0.97897906,1.68534863,1.6503333,1.47837749]
plt.boxplot([std_colomns1, std_colomns2, std_colomns3, std_colomns4], positions=range(4))
plt.xticks(ticks=range(4), labels=['std_colomns1', 'std_colomns2', 'std_colomns3', 'std_colomns4'])
plt.show()
Or, using seaborn (and pandas) you could draw a violin plot or a swarm plot:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
df = pd.DataFrame({'std_colomns1': std_colomns1, 'std_colomns2': std_colomns2,
'std_colomns3': std_colomns3, 'std_colomns4': std_colomns4})
sns.violinplot(data=df)
plt.show()
At the left sns.violinplot(data=df)
, at the right sns.swarmplot(data=df)
:
Upvotes: 1
Reputation: 62403
import matplotlib.pyplot as plt
import numpy as np
# plot style
plt.rcParams['figure.figsize'] = (16.0, 10.0)
plt.style.use('ggplot')
# create list of data lists
data = [std_colomns1, std_colomns2, std_colomns3, std_colomns4]
# plot data and print median
for i, d in enumerate(data, 1):
plt.plot(d, marker='.', linestyle='none', markersize=7, label=f'col_{i}')
print(f'Median col_{i}: {np.median(d)}')
# format plot
plt.xticks(range(0, 19, 1))
plt.yticks(range(1, 21, 1))
plt.ylabel('Values')
plt.xlabel('Index')
plt.legend()
plt.show()
columns
parameter.
column=['a', 'b', 'c', 'd']
as an example.import pandas as pd
import matplotlib.pyplot as plt
# plot style
plt.rcParams['figure.figsize'] = (16.0, 10.0)
plt.style.use('ggplot')
# create list of data lists
data = [std_colomns1, std_colomns2, std_colomns3, std_colomns4]
# create dataframe
df = pd.DataFrame(list(zip(*data)))
# print median
stats = df.agg(['median', 'mean'])
print(stats)
0 1 2 3
median 4.276260 1.640122 1.907878 2.160000
mean 6.222733 1.993142 2.875864 2.425034
# plot
df.plot.bar()
# format plot
plt.xticks(rotation=0)
plt.yticks(range(1, 21, 1))
plt.ylabel('Values')
plt.xlabel('Index')
plt.legend()
plt.show()
Upvotes: 2
Reputation: 12496
Check this code:
import matplotlib.pyplot as plt
import numpy as np
std_colomns1 = [4.33976958,19.73690959,9.05452373,1.29938447,1.25155903,18.07181231,1.28825463,14.31906422,1.58,4.04618339,4.27626005,1.28062485,1.00079968,12.40582121,5.31973684,3.59755473,6.18436739,4.96310387,4.21620683]
std_colomns2 = [1.31590273,3.75281228,2.5215868,1.99959996,1.06376689,2.35703203,1.02449988,1.64012195,2.755431,1.35661343,6.20786598,1.26,1.18389189,2.10864886,1.81118746,1.4,1.6857046,1.23693169,1.18810774]
std_colomns3 = [2.45348731,8.16029411,3.09767655,1.9078784,1.23951603,8.81716508,1.08885261,3.22546121,3.85585269,1.34164079,5.62138773,1.74688294,1.20016666,1.96203975,2.9662097,1.63963411,1.69339895,1.27687118,1.34699666]
std_colomns4 = [2.48386795,4.32485838,2.03381415,2.3,3.48137904,4.8340873,3.52278299,1.41421356,1.41265707,1.26743836,3.90384426,2.44532206,1.36367151,3.3346664,2.16,0.97897906,1.68534863,1.6503333,1.47837749]
x = std_colomns1
y = std_colomns4
center_colomn1 = np.median(np.array(std_colomns1))
center_colomn2 = np.median(np.array(std_colomns2))
center_colomn3 = np.median(np.array(std_colomns3))
center_colomn4 = np.median(np.array(std_colomns4))
plt.plot(std_colomns1, 'ko', label="Face 1")
plt.plot(std_colomns2, 'ro', label="Face 2")
plt.plot(std_colomns3, 'go', label="Face 3")
plt.plot(std_colomns4, 'bo', label="Face 4")
plt.xlabel('X')
plt.ylabel('Y')
plt.title("Faces Features")
plt.legend()
plt.show()
it will provide these centers:
4.27626005
1.64012195
1.9078784
2.16
and this scatter plot:
Upvotes: 1