Reputation: 623
I build a plotly dashboard in python that displays multiple variables over time. One of the variables is here called "color" and I would like to sort the resulting plot by it.
import pandas as pd
import plotly.express as px
import string
import random
import numpy as np
# for the color mapping
color_dict = {"Colors": {
"green": "rgb(0, 255, 0)",
"black": "rgb(0, 0, 0)",
"red": "rgb(255, 0, 0)"
}}
# creating the df
random.seed(30)
letters = list(string.ascii_lowercase)[0:20]
data = {"letters":letters}
df = pd.DataFrame(data)
df = pd.DataFrame(np.repeat(df.values, 3, axis=0), columns=df.columns) # repeat each row 2 times
df['attempt'] = np.where(df.index%2==0, 1, 2) # adds either 1 or 2 in column "attempts"
lst = ['2022_10_10', '2022_10_11', '2022_10_12']
N = len(df)
df["date"] = pd.Series(np.tile(lst, N//len(lst))).iloc[:N] # add date column with 3 repeating dates
df["colors"] = random.choices(["green", "black", "red"], k=len(df)) # add randomly the colors
df.head()
#letters attempt date colors
#0 a 1 2022_10_10 black
#1 a 2 2022_10_11 green
#2 a 1 2022_10_12 green
#3 b 2 2022_10_10 black
#4 b 1 2022_10_11 green
# the plot
fig = px.scatter(
df,
x="date",
y="letters",
symbol="attempt",
opacity=0.8,
color="colors",
color_discrete_map=color_dict["Colors"],
width=1000,
height=800,
)
fig.update_layout(
yaxis={
"type": "category",
"showgrid": False,
},
xaxis={
"type": "category",
"showgrid": False,
},
)
fig
However, as the original df (I assume?) undergoes some groupby etc for the plotting, my pre-plot sorting (I tried sort_values, custom sorting functions etc) seems to have no affect. Therefore, I would like to create additional columns "black", "green", "red", that hold the count of how often black / green / red appeared e.g. on row "a".
df["black"] = ""
df["red"] = ""
df["green"] = ""
#letters attempt date colors black red green
#0 a 1 2022_10_10 black 1 0 2
#1 a 2 2022_10_11 green 1 0 2
#2 a 1 2022_10_12 green 1 0 2
#3 b 2 2022_10_10 black
#4 b 1 2022_10_11 green
So my questions are: a) how to get color count values into there columns? b) how to use the values of the column "green", then "red", then "black" for sorting the order of the y axis of the plot?
thanks!
EDIT: SORRY, I know this is a complicated task. But I'm looking solely for a way to sort/order the full rows on the y axis. The order WITHIN a row (e.g. row "a") has to be maintained.
EDIT2: I attached a (terribly manufactured) image of the result I'm looking for:
EDIT post answer: in case someone wants to perform this type of row sorting, but only take the latest date (last column on x axis) into account, you can adjust the count status function like this:
color_counts = (
df[df["date"] == df["date"].max()]
.groupby('letters')['colors']
.value_counts()
.unstack(fill_value=0)
)
Upvotes: 4
Views: 368
Reputation: 7230
Let's add the number of colors per letter:
color_counts = (
df.groupby('letters')['colors']
.value_counts()
.unstack(fill_value=0)
)
df = df.merge(color_counts, on='letters')
The first 5 records of the modified data frame:
letters attempt date colors black green red
0 a 1 2022_10_10 black 1 2 0
1 a 2 2022_10_11 green 1 2 0
2 a 1 2022_10_12 green 1 2 0
3 b 2 2022_10_10 black 1 2 0
4 b 1 2022_10_11 green 1 2 0
In order to add the black
, green
, red
counts to the plot, we have to include hover_data
to px.scatter
like this:
fig = px.scatter(
...
hover_data=['black','red','green'],
...
)
To sort letters along the y-axis, we can use the category_orders
parameter of px.scatter
this way:
color_counts = (
df.groupby('letters')['colors']
.value_counts()
.unstack(fill_value=0)
)
def sort_letters(color):
return color_counts[color].sort_values().index
desired_color = 'black'
fig = px.scatter(
...
category_orders={'letters': sort_letters(desired_color)},
...
)
Here we pass the exact order of the letters, how they should be positioned along the axis.
import pandas as pd
import numpy as np
import plotly.express as px
from string import ascii_lowercase
import random
# Color mapping
color_map = {
"green": "YellowGreen",
"black": "Black",
"red": "Crimson"
}
# Mocking data
random.seed(30)
num_letters = 20
letters = [*ascii_lowercase][:num_letters]
dates = ['2022_10_10', '2022_10_11', '2022_10_12']
df = pd.DataFrame(np.repeat(letters, len(dates)), columns=['letters'])
df['attempt'] = df.index % 2 + 1
df['date'] = np.tile(dates, num_letters)
df['colors'] = random.choices([*color_map], k=len(df))
color_counts = (
df.groupby('letters')['colors']
.value_counts()
.unstack(fill_value=0)
)
df = df.merge(color_counts, on='letters')
#display(df.sample(5))
def sort_letters(color):
return color_counts[color].sort_values().index
# The plot
desired_color = 'black'
fig = px.scatter(
df,
x="date",
y="letters",
category_orders={'letters':sort_letters(desired_color)},
color="colors",
color_discrete_map=color_map,
symbol='attempt',
symbol_map={1:'circle-open',2:'circle'},
hover_data=[*color_map],
opacity=0.8,
width=800,
height=800,
title=f"Sorted by {desired_color} counts"
)
fig.update_layout(
yaxis={
"type": "category",
"showgrid": False,
},
xaxis={
"type": "category",
"showgrid": False,
},
)
fig
To sort by color counts, as in lexicographic order, we can use the code from the original post with this addition at the end:
color_counts = (
df.groupby('letters')['colors']
.value_counts()
.unstack(fill_value=0)
)
fig.update_yaxes(
categoryorder="array",
categoryarray=color_counts.sort_values(by=['green','red','black']).index,
)
Here's the output:
See also Plotly: Categorical Axes
Upvotes: 6