Scatter plot colorbar based on datapoint cluster

I am trying to achieve a plot similar to this one:

enter image description here

The color shows the clustering of the datapoints.

My code so far:

import pandas as pd
import readTrc
import matplotlib.pyplot as plt
import numpy as np
import os
import gc

trcpath = 'filename.trc'
datX, datY, m = readTrc.readTrc(trcpath)
srx, sry = pd.Series(datX * 1000), pd.Series(datY * 1000)
df_plot = pd.concat([srx, sry], axis = 1)
df_plot.set_index(0, inplace = True)
fig, ax = plt.subplots()

#Eliminate Noise
df_plot[df_plot < 3] = 0
df = df_plot[df_plot > 3]
df[df < 3] = None
df = df.dropna()

#Plot Parameters
p = np.array(df[1].tolist()[:-1])
p_nach = np.array(df[1].tolist()[1:])
d_t = np.array(pd.Series(df.index).diff().tolist()[1:])

#Graph Limit
graphlim = 101

#Plot
plt.scatter(p, p_nach,
            edgecolors = 'none',
            c = p,
            s = 20,
            cmap=plt.cm.get_cmap('jet'))
plt.xlim(0,graphlim)
plt.ylim(0,graphlim)
plt.xticks(range(0,graphlim,int(graphlim/10)))
plt.yticks(range(0,graphlim,int(graphlim/10)))
plt.colorbar()
plt.grid(zorder = 0, alpha = 0.3)
ax.set_xlabel('p / mV')
ax.set_ylabel('p_nach / mV')

##plt.savefig(dpi = 300)
plt.show()
##plt.close()
##fig.clear()
##gc.collect()

print('Progress... done!')

enter image description here

As you can see, the colorbar does not represent the clustering and instead the place on the x-axis. How do I configure my colorbar to represent the amount of datapoints in an area?

Folder with files: Link

Upvotes: 1

Views: 2917

Answers (1)

Trenton McKinney
Trenton McKinney

Reputation: 62403

import pandas as pd
import readTrc
import matplotlib.pyplot as plt
import numpy as np
import os
import gc

trcpath = 'filename.trc'
datX, datY, m = readTrc.readTrc(trcpath)

df = pd.DataFrame({'time': datX * 1000, 'volts': datY * 1000})

reduce_noise_df = df[df.volts >= 3.0]

d_t = reduce_noise_df.time.diff()[1:]

p = reduce_noise_df.volts[:-1]

p_nach = reduce_noise_df.volts[1:]

#Graph Limit
graphlim = 41

#Plot
fig, ax = plt.subplots(figsize=(6,6))
plt.scatter(p, p_nach,
            edgecolors = 'none',
            c = d_t,
            s = 20,
            cmap=plt.cm.get_cmap('jet'))
plt.xlim(0, graphlim)
plt.ylim(0, graphlim)
plt.xticks(range(0, graphlim, int(graphlim/10)))
plt.yticks(range(0, graphlim, int(graphlim/10)))
plt.colorbar()
plt.grid(zorder = 0, alpha = 0.3)
ax.set_xlabel('p / mV')
ax.set_ylabel('p_nach / mV')
plt.show()
  1. I began be removing unnecessary code
  2. The main issue was c = p instead of c = d_t.

enter image description here

Plot of waveform from your Le Croy WR640Zi colored by data density

import pandas as pd
import readTrc
import matplotlib.pyplot as plt
import numpy as np
import os
import gc
from scipy.stats import gaussian_kde

trcpath = 'filename.trc'
datX, datY, m = readTrc.readTrc(trcpath)

df = pd.DataFrame({'time': datX * 1000, 'volts': datY * 1000})

reduce_noise_df = df[df.volts >= 3.0]

y = np.array(reduce_noise_df.volts.tolist())
x = np.array(reduce_noise_df.time.tolist())

# Calculate point density
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)

# Sort points by density
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]

#Plot
fig, ax = plt.subplots(figsize=(6,6))
plt.scatter(x, y,
            edgecolors = 'none',
            c = z,
            s = 20,
            cmap=plt.cm.get_cmap('jet'))
plt.colorbar()
plt.grid(zorder = 0, alpha = 0.3)
ax.set_xlabel('Time (ms)')
ax.set_ylabel('Voltage (mV)')
plt.show()

enter image description here

Upvotes: 1

Related Questions