Use categorical variable in dataframe, to color line plot along the line in plotly Scatter graph

Question

I'm calculating a "state/activity" variable (string) on per second sensor data. There are 12 states, and the data spans on average 10-12 days. I'm building a per second log viewer with sensor data parameters, along side the "state/activity". The plot is done as in example below. I'm trying to color "battle_deaths" column by the value of "category" variable. There is a color attribute in plotly, but in all examples I've seen, its taking a numerical value, I'm not able to "map" the categorical value to color. Please see current output and expected output below (overdrawn on the output)

#dataframe with time index
data = {
        'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:06.119994',
 '2014-05-01 18:47:07.178768', '2014-05-01 18:47:08.230071', 
'2014-05-01 18:47:09.230071', '2014-05-01 18:47:10.280592', 
'2014-05-01 18:47:11.332662', '2014-05-01 18:47:12.385109', 
'2014-05-01 18:47:13.436523', '2014-05-01 18:47:14.486877'], 
        'battle_deaths': [34, 25, 26, 15, 15, 14, 26, 25, 62, 41],
        'category' : ["A", "A","A","A","C","A","B","C","B","B"],
        'chicken_dinners':["4000", "5000", "6000", "-1000","4500", 
                            "5900", "6300", "6712","7788","4681"]
       }

df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'category', 'chicken_dinners'])
df['date'] = pd.to_datetime(df['date'])
df.index = df['date']
del df['date']

print(df)   

>     date  battle_deaths   category    
>     2014-05-01 18:47:05.069722    34  A
>     2014-05-01 18:47:06.119994    25  A
>     2014-05-01 18:47:07.178768    26  A
>     2014-05-01 18:47:08.230071    15  A
>     2014-05-01 18:47:09.230071    15  C
>     2014-05-01 18:47:10.280592    14  A
>     2014-05-01 18:47:11.332662    26  B
>     2014-05-01 18:47:12.385109    25  C
>     2014-05-01 18:47:13.436523    62  B
>     2014-05-01 18:47:14.486877    41  B



#plot code
random_x = df.index

traceC1 = go.Scattergl(
    x=random_x,
    y=df["battle_deaths"],
    mode='lines+ markers',
    name="battle_deaths ",
    hoverinfo='x'
)
traceC2 = go.Scattergl(
    x=random_x,
    y=df["chicken_dinners"],
    mode='lines',
    name="chicken_dinners",
    hoverinfo='y'
)  

#append traces to the above colored plot, no need to color other plots
fig_circ = tools.make_subplots(rows=2, cols=1, shared_xaxes=True)
fig_circ.append_trace(traceC1, 1, 1)
fig_circ.append_trace(traceC2, 2, 1)

#custom scales on different sensor data channels
#scaling is important and can't autoscale, because data has 'spikes' all over the place

fig_circ['layout'].update(
                            height=1000, width=1600,
                            margin = dict(l = 100, r =0, t=0, b= 0),
                            yaxis = dict(range = [0, 100],
                            yaxis2 = dict(range = [-50, 500])
                        )

plotly.offline.plot(fig_circ, filename='sample.html')

Maximilian Peters · Accepted Answer

Currently (Feb 2019) there is no simple/direct way of doing it.

One possible solution would be:

plot multiple traces with different colors
group identical colors via legendgroup
set showlegend to False if the category was already plotted

The code below could use some optimization but it should get you started.

import pandas as pd
import plotly
plotly.offline.init_notebook_mode()

# taken from the original question
data = {
        'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:06.119994',
'2014-05-01 18:47:07.178768', '2014-05-01 18:47:08.230071', 
'2014-05-01 18:47:09.230071', '2014-05-01 18:47:10.280592', 
'2014-05-01 18:47:11.332662', '2014-05-01 18:47:12.385109', 
'2014-05-01 18:47:13.436523', '2014-05-01 18:47:14.486877'], 
        'battle_deaths': [34, 25, 26, 15, 15, 14, 26, 25, 62, 41],
        'category' : ["A", "A","A","A","C","A","B","C","B","B"]
       }

df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'category'])
df['date'] = pd.to_datetime(df['date'])
df.index = df['date']
del df['date']

# just an empty figure
fig = plotly.graph_objs.Figure()

# a dict which maps your categorical values to colors
colors = {'A': 'orange',
          'B': 'green',
          'C': 'red'}

# the list which stores categories which were already plotted
already_plotted = []

for i in range(df.shape[0] + 1):
    # create a new trace if the category changes or at the end of the data frame
    if i in (0, df.shape[0]) or cat != df.iloc[i, ]['category']:
        if i != 0:
            if i != df.shape[0]:
                x.append(df.iloc[i,].name)
                y.append(df.iloc[i,]['battle_deaths'])
            trace = plotly.graph_objs.Scatter(x=x, y=y, 
                                              legendgroup=cat,  # group identical categories
                                              showlegend=cat not in already_plotted,  # hide legend if already plotted
                                              name=cat,
                                              marker={'color': colors[df.iloc[i - 1, ]['category']]})
            fig.add_trace(trace)
            already_plotted.append(cat)

        if i == df.shape[0]:
            continue
        cat = df.iloc[i, ]['category']
        x = []
        y = []    

    x.append(df.iloc[i,].name)
    y.append(df.iloc[i,]['battle_deaths'])

plotly.offline.iplot(fig)

Use categorical variable in dataframe, to color line plot along the line in plotly Scatter graph

Answers (1)

Related Questions