Reputation: 53
I just wanted to start off and say I really appreciate everyone's help on StackOverflow! As a new coder, a lot of solution guides tend to be confusing and everyone here is really helpful.
Now my latest question is I build this heatmap below using ggplot in R but it looks very busy (attached is the full view when all of the data is present.) I was hoping to maybe either:
A. Have the colorscale not bother with coloring sales by month totals and only color the item type sales across the rows (basically what month did the item sell well in vs. when it didn't).
B. Or a graphic way to make it look a bit less busy such as vertical lines being a different color than horizontal lines.
> dput(head(sales, 100))
structure(list(Region = c("Sub-Saharan Africa", "Europe", "Middle East and North Africa",
"Sub-Saharan Africa", "Europe", "Sub-Saharan Africa", "Asia",
"Asia", "Sub-Saharan Africa", "Central America and the Caribbean",
"Sub-Saharan Africa", "Europe", "Europe", "Central America and the Caribbean",
"Middle East and North Africa", "Australia and Oceania", "Central America and the Caribbean",
"Europe", "Middle East and North Africa", "Europe", "Asia", "Europe",
"Europe", "Asia", "Europe", "Europe", "Europe", "Europe", "Australia and Oceania",
"Central America and the Caribbean", "Europe", "Europe", "Europe",
"Europe", "Central America and the Caribbean", "Middle East and North Africa",
"Middle East and North Africa", "Europe", "Sub-Saharan Africa",
"Europe", "Europe", "Asia", "Middle East and North Africa", "Europe",
"Middle East and North Africa", "Europe", "Europe", "Australia and Oceania",
"Australia and Oceania", "Australia and Oceania", "Europe", "Australia and Oceania",
"Sub-Saharan Africa", "Sub-Saharan Africa", "Asia", "Sub-Saharan Africa",
"Europe", "Europe", "Central America and the Caribbean", "Europe",
"Middle East and North Africa", "Central America and the Caribbean",
"Europe", "Europe", "Europe", "Sub-Saharan Africa", "Sub-Saharan Africa",
"Sub-Saharan Africa", "Europe", "Europe", "Europe", "Europe",
"Sub-Saharan Africa", "Sub-Saharan Africa", "Europe", "Sub-Saharan Africa",
"Sub-Saharan Africa", "Europe", "Asia", "Central America and the Caribbean",
"Asia", "Middle East and North Africa", "North America", "Sub-Saharan Africa",
"Sub-Saharan Africa", "Europe", "Europe", "Sub-Saharan Africa",
"Europe", "Sub-Saharan Africa", "Central America and the Caribbean",
"Sub-Saharan Africa", "Sub-Saharan Africa", "Australia and Oceania",
"Middle East and North Africa", "Sub-Saharan Africa", "Sub-Saharan Africa",
"Europe", "Sub-Saharan Africa", "Sub-Saharan Africa"), Country = c("Chad",
"Latvia", "Pakistan", "Democratic Republic of the Congo", "Czech Republic",
"South Africa", "Laos", "China", "Eritrea", "Haiti", "Cameroon",
"Bosnia and Herzegovina", "Germany", "Barbados", "Algeria", "Palau",
"Cuba", "Vatican City", "Lebanon", "Lithuania", "Myanmar", "Ukraine",
"Russia", "Japan", "Russia", "Liechtenstein", "Slovakia", "Albania",
"Federated States of Micronesia", "Dominica", "Andorra", "Switzerland",
"Lithuania", "San Marino", "Nicaragua", "Azerbaijan", "Syria",
"Serbia", "Mauritius", "Germany", "Italy", "Bhutan", "Turkey",
"Bulgaria", "Pakistan", "Poland", "France", "Fiji", "Australia",
"Nauru", "Slovenia", "Samoa", "South Africa", "Ghana", "Sri Lanka",
"Guinea", "Spain", "Moldova", "Dominican Republic", "Luxembourg",
"Kuwait", "Saint Lucia", "Georgia", "Bosnia and Herzegovina",
"Iceland", "Mauritius", "Malawi", "Seychelles", "Montenegro",
"Germany", "Estonia", "Serbia", "Madagascar", "Benin", "Hungary",
"Djibouti", "Senegal", "Ireland", "Mongolia", "Antigua and Barbuda",
"Cambodia", "Oman", "United States of America", "Mauritania",
"Central African Republic", "Albania", "Switzerland", "Ghana",
"Austria", "Democratic Republic of the Congo", "Dominican Republic",
"Mauritius", "Cote d'Ivoire", "Samoa", "Kuwait", "Uganda", "Senegal",
"Moldova", "Cote d'Ivoire", "Niger"), Item_Type = c("Office Supplies",
"Beverages", "Vegetables", "Household", "Beverages", "Beverages",
"Vegetables", "Baby Food", "Meat", "Office Supplies", "Cereal",
"Baby Food", "Office Supplies", "Vegetables", "Clothes", "Snacks",
"Beverages", "Beverages", "Personal Care", "Snacks", "Meat",
"Office Supplies", "Snacks", "Cosmetics", "Meat", "Vegetables",
"Cereal", "Baby Food", "Baby Food", "Beverages", "Office Supplies",
"Personal Care", "Clothes", "Vegetables", "Fruits", "Cosmetics",
"Baby Food", "Beverages", "Fruits", "Meat", "Cereal", "Clothes",
"Clothes", "Cosmetics", "Household", "Cereal", "Baby Food", "Beverages",
"Personal Care", "Office Supplies", "Cosmetics", "Clothes", "Cereal",
"Vegetables", "Office Supplies", "Meat", "Fruits", "Personal Care",
"Cereal", "Personal Care", "Office Supplies", "Fruits", "Vegetables",
"Cosmetics", "Snacks", "Personal Care", "Office Supplies", "Meat",
"Personal Care", "Household", "Meat", "Clothes", "Baby Food",
"Beverages", "Clothes", "Snacks", "Fruits", "Household", "Meat",
"Baby Food", "Personal Care", "Vegetables", "Baby Food", "Office Supplies",
"Cosmetics", "Baby Food", "Vegetables", "Household", "Vegetables",
"Household", "Clothes", "Baby Food", "Personal Care", "Office Supplies",
"Personal Care", "Fruits", "Beverages", "Personal Care", "Household",
"Personal Care"), Sales_Channel = c("Online", "Online", "Offline",
"Online", "Online", "Offline", "Online", "Online", "Online",
"Online", "Offline", "Offline", "Online", "Offline", "Offline",
"Offline", "Online", "Online", "Offline", "Offline", "Online",
"Online", "Offline", "Offline", "Offline", "Offline", "Offline",
"Offline", "Online", "Offline", "Online", "Online", "Offline",
"Online", "Online", "Online", "Online", "Online", "Offline",
"Online", "Offline", "Offline", "Online", "Offline", "Offline",
"Offline", "Offline", "Online", "Online", "Offline", "Online",
"Offline", "Online", "Online", "Offline", "Online", "Offline",
"Online", "Online", "Online", "Offline", "Online", "Offline",
"Offline", "Online", "Online", "Online", "Online", "Online",
"Online", "Offline", "Online", "Offline", "Offline", "Online",
"Offline", "Offline", "Offline", "Online", "Online", "Online",
"Online", "Offline", "Offline", "Offline", "Online", "Online",
"Online", "Online", "Offline", "Online", "Offline", "Online",
"Online", "Online", "Offline", "Offline", "Offline", "Online",
"Online"), Order_Priority = c("L", "C", "C", "C", "C", "H", "L",
"C", "L", "C", "M", "M", "C", "C", "C", "L", "H", "L", "H", "H",
"C", "C", "L", "H", "L", "L", "H", "C", "M", "H", "M", "M", "M",
"H", "L", "M", "L", "H", "H", "L", "H", "L", "L", "L", "M", "C",
"M", "L", "H", "H", "M", "C", "M", "L", "M", "C", "L", "M", "L",
"L", "L", "C", "H", "H", "H", "M", "C", "C", "L", "L", "H", "M",
"C", "H", "M", "H", "H", "H", "L", "H", "H", "C", "L", "L", "H",
"H", "M", "M", "H", "L", "L", "H", "H", "M", "H", "L", "C", "H",
"H", "C"), Order_Date = c("1/27/2011", "12/28/2015", "1/13/2011",
"9/11/2012", "10/27/2015", "7/10/2012", "2/20/2011", "4/10/2017",
"11/21/2014", "7/4/2015", "1/1/2016", "10/20/2012", "2/22/2015",
"1/1/2016", "6/21/2011", "9/19/2013", "11/15/2015", "4/6/2015",
"4/12/2010", "9/26/2011", "1/2/2016", "8/14/2010", "4/13/2012",
"9/19/2013", "12/2/2015", "2/26/2017", "1/2/2016", "5/20/2011",
"10/24/2013", "6/14/2011", "6/20/2015", "8/5/2011", "1/2/2016",
"7/5/2015", "3/25/2015", "8/22/2013", "1/3/2016", "6/23/2013",
"5/8/2015", "1/3/2016", "3/10/2013", "3/18/2012", "2/11/2015",
"10/30/2012", "7/6/2012", "1/4/2011", "10/25/2013", "1/3/2016",
"3/16/2014", "1/3/2016", "9/30/2010", "11/5/2010", "7/21/2017",
"7/10/2013", "10/6/2012", "6/4/2011", "4/12/2014", "10/26/2015",
"8/4/2011", "2/24/2017", "3/30/2011", "5/2/2015", "2/1/2014",
"3/3/2012", "4/22/2015", "5/12/2011", "12/21/2011", "12/2/2010",
"8/14/2010", "10/5/2010", "2/8/2012", "9/8/2012", "8/11/2011",
"10/28/2012", "10/11/2013", "1/3/2016", "7/28/2017", "1/5/2016",
"1/5/2016", "11/13/2014", "8/26/2012", "7/15/2014", "5/2/2011",
"11/11/2013", "4/14/2011", "10/4/2012", "5/14/2013", "1/12/2013",
"10/3/2012", "10/23/2010", "2/6/2014", "9/4/2011", "1/5/2016",
"7/19/2015", "10/28/2012", "1/5/2016", "10/25/2013", "2/11/2011",
"1/5/2016", "2/6/2012"), Order_ID = c(292494523, 361825549, 141515767,
500364005, 127481591, 482292354, 844532620, 564251220, 411809480,
327881228, 743598735, 479823005, 498603188, 953377091, 181401288,
500204360, 640987718, 206925189, 221503102, 878520286, 319358670,
746630275, 246883237, 967895781, 305029237, 223957431, 485685670,
121455848, 332936227, 692031657, 365978467, 392325484, 917994248,
603977954, 965943562, 233629691, 664174449, 212921321, 763686978,
520714461, 637702119, 671986758, 912333714, 540041816, 156722390,
434299266, 765008771, 593408763, 856333482, 682830178, 574837148,
365692222, 289660394, 681165492, 594943845, 956044280, 509828126,
771969211, 178453862, 835580909, 869961678, 278519999, 478492200,
257427108, 723186051, 353942859, 848183858, 374707877, 322626245,
351362788, 640653836, 540548217, 821407258, 523904788, 109027135,
113437545, 672654092, 701131856, 148230302, 230407607, 129491746,
606854999, 885983693, 260676658, 345045220, 123513209, 900816953,
452005279, 672439515, 827793490, 704053533, 157518470, 117058742,
272820842, 548818433, 198175609, 875250566, 511720263, 929683959,
923598563), Ship_Date = c("2/12/2011", "1/23/2016", "2/1/2011",
"10/6/2012", "12/5/2015", "8/21/2012", "3/20/2011", "5/12/2017",
"1/10/2015", "7/20/2015", "2/18/2016", "11/15/2012", "2/27/2015",
"1/3/2016", "7/21/2011", "10/4/2013", "11/30/2015", "4/27/2015",
"5/19/2010", "10/2/2011", "1/16/2016", "8/31/2010", "4/22/2012",
"9/28/2013", "12/26/2015", "2/28/2017", "1/10/2016", "6/19/2011",
"12/3/2013", "7/20/2011", "7/21/2015", "9/1/2011", "1/16/2016",
"7/29/2015", "5/9/2015", "8/30/2013", "1/27/2016", "7/18/2013",
"5/13/2015", "1/25/2016", "4/4/2013", "5/4/2012", "3/2/2015",
"11/3/2012", "8/1/2012", "2/21/2011", "12/10/2013", "2/20/2016",
"4/27/2014", "2/15/2016", "11/11/2010", "12/5/2010", "8/22/2017",
"7/26/2013", "10/21/2012", "7/24/2011", "4/15/2014", "12/15/2015",
"8/27/2011", "4/14/2017", "4/12/2011", "6/14/2015", "2/26/2014",
"4/10/2012", "5/13/2015", "5/15/2011", "1/18/2012", "12/25/2010",
"9/16/2010", "11/14/2010", "3/18/2012", "9/20/2012", "8/19/2011",
"11/7/2012", "10/27/2013", "1/10/2016", "7/31/2017", "2/11/2016",
"1/26/2016", "12/20/2014", "9/22/2012", "8/15/2014", "5/4/2011",
"12/17/2013", "5/20/2011", "11/21/2012", "6/10/2013", "2/2/2013",
"11/12/2012", "11/20/2010", "3/28/2014", "9/4/2011", "1/11/2016",
"8/20/2015", "11/24/2012", "2/3/2016", "11/3/2013", "2/26/2011",
"2/9/2016", "2/26/2012"), Units_Sold = c(4484, 1075, 6515, 7683,
3491, 9880, 4825, 3330, 2431, 6197, 6245, 9145, 6618, 4322, 9527,
441, 1365, 2617, 6545, 2530, 4182, 3345, 7091, 725, 3784, 2835,
4038, 339, 2083, 6401, 16, 6684, 3753, 9353, 3020, 5072, 2834,
7005, 803, 9835, 9083, 4670, 8675, 9229, 6493, 7659, 1950, 1695,
6962, 3479, 5941, 5310, 5802, 861, 5959, 3603, 8327, 1699, 7318,
5814, 9848, 9112, 5330, 7257, 5678, 8412, 5307, 3243, 1130, 4912,
2562, 9084, 1516, 3924, 2407, 7545, 2148, 9352, 3495, 1586, 8340,
735, 1118, 8871, 5403, 9158, 609, 7261, 8650, 1344, 3941, 2070,
9138, 2605, 6425, 3421, 4947, 8252, 2998, 2194), Unit_Price = c(651.21,
47.45, 154.06, 668.27, 47.45, 47.45, 154.06, 255.28, 421.89,
651.21, 205.7, 255.28, 651.21, 154.06, 109.28, 152.58, 47.45,
47.45, 81.73, 152.58, 421.89, 651.21, 152.58, 437.2, 421.89,
154.06, 205.7, 255.28, 255.28, 47.45, 651.21, 81.73, 109.28,
154.06, 9.33, 437.2, 255.28, 47.45, 9.33, 421.89, 205.7, 109.28,
109.28, 437.2, 668.27, 205.7, 255.28, 47.45, 81.73, 651.21, 437.2,
109.28, 205.7, 154.06, 651.21, 421.89, 9.33, 81.73, 205.7, 81.73,
651.21, 9.33, 154.06, 437.2, 152.58, 81.73, 651.21, 421.89, 81.73,
668.27, 421.89, 109.28, 255.28, 47.45, 109.28, 152.58, 9.33,
668.27, 421.89, 255.28, 81.73, 154.06, 255.28, 651.21, 437.2,
255.28, 154.06, 668.27, 154.06, 668.27, 109.28, 255.28, 81.73,
651.21, 81.73, 9.33, 47.45, 81.73, 668.27, 81.73), Total_Profit = c(566105,
16834.5, 411291.95, 1273303.59, 54669.06, 154720.8, 304602.25,
319213.8, 139053.2, 782371.25, 553244.55, 876639.7, 835522.5,
272847.86, 699662.88, 24316.74, 21375.9, 40982.22, 164017.7,
139504.2, 239210.4, 422306.25, 390997.74, 126055.75, 216444.8,
178973.55, 357726.42, 32496.54, 199676.38, 100239.66, 2020, 167501.04,
275620.32, 590454.89, 7278.2, 881868.64, 271667.24, 109698.3,
1935.23, 562562, 804662.97, 342964.8, 637092, 1604646.23, 1076084.89,
678510.81, 186927, 26543.7, 174467.72, 439223.75, 1032961.67,
389966.4, 513999.18, 54354.93, 752323.75, 206091.6, 20068.07,
42576.94, 648301.62, 145698.84, 1243310, 21959.92, 336482.9,
1261774.59, 313084.92, 210804.72, 670008.75, 185499.6, 28317.8,
814065.76, 146546.4, 667128.96, 145323.76, 61449.84, 176770.08,
416031.3, 5176.68, 1549906.96, 199914, 152033.96, 209000.4, 46400.55,
107171.48, 1119963.75, 939419.61, 877885.88, 38446.17, 1203365.53,
546074.5, 222741.12, 289427.04, 198430.2, 228998.28, 328881.25,
161010.5, 8244.61, 77470.02, 206795.12, 496858.54, 54981.64),
Month_RecentYear = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, "January", NA, NA, "January", NA, NA, NA, NA, NA, NA,
"January", NA, NA, NA, NA, NA, "January", NA, NA, NA, NA,
NA, "January", NA, NA, NA, "January", NA, NA, "January",
NA, NA, NA, NA, NA, NA, NA, "January", NA, "January", NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, "January", NA, "January",
"January", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, "January", NA, NA, "January", NA, NA, "January", NA),
Year = c(2011, 2015, 2011, 2012, 2015, 2012, 2011, 2017,
2014, 2015, 2016, 2012, 2015, 2016, 2011, 2013, 2015, 2015,
2010, 2011, 2016, 2010, 2012, 2013, 2015, 2017, 2016, 2011,
2013, 2011, 2015, 2011, 2016, 2015, 2015, 2013, 2016, 2013,
2015, 2016, 2013, 2012, 2015, 2012, 2012, 2011, 2013, 2016,
2014, 2016, 2010, 2010, 2017, 2013, 2012, 2011, 2014, 2015,
2011, 2017, 2011, 2015, 2014, 2012, 2015, 2011, 2011, 2010,
2010, 2010, 2012, 2012, 2011, 2012, 2013, 2016, 2017, 2016,
2016, 2014, 2012, 2014, 2011, 2013, 2011, 2012, 2013, 2013,
2012, 2010, 2014, 2011, 2016, 2015, 2012, 2016, 2013, 2011,
2016, 2012), Month = c("January", "December", "January",
"September", "October", "July", "February", "April", "November",
"July", "January", "October", "February", "January", "June",
"September", "November", "April", "April", "September", "January",
"August", "April", "September", "December", "February", "January",
"May", "October", "June", "June", "August", "January", "July",
"March", "August", "January", "June", "May", "January", "March",
"March", "February", "October", "July", "January", "October",
"January", "March", "January", "September", "November", "July",
"July", "October", "June", "April", "October", "August",
"February", "March", "May", "February", "March", "April",
"May", "December", "December", "August", "October", "February",
"September", "August", "October", "October", "January", "July",
"January", "January", "November", "August", "July", "May",
"November", "April", "October", "May", "January", "October",
"October", "February", "September", "January", "July", "October",
"January", "October", "February", "January", "February")), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L), spec = structure(list(
cols = list(Region = structure(list(), class = c("collector_character",
"collector")), Country = structure(list(), class = c("collector_character",
"collector")), Item_Type = structure(list(), class = c("collector_character",
"collector")), Sales_Channel = structure(list(), class = c("collector_character",
"collector")), Order_Priority = structure(list(), class = c("collector_character",
"collector")), Order_Date = structure(list(), class = c("collector_character",
"collector")), Order_ID = structure(list(), class = c("collector_double",
"collector")), Ship_Date = structure(list(), class = c("collector_character",
"collector")), Units_Sold = structure(list(), class = c("collector_double",
"collector")), Unit_Price = structure(list(), class = c("collector_double",
"collector")), Total_Profit = structure(list(), class = c("collector_double",
"collector")), Month_RecentYear = structure(list(), class = c("collector_character",
"collector")), Year = structure(list(), class = c("collector_double",
"collector")), Month = structure(list(), class = c("collector_character",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))
THISYEAR <- filter(sales, sales$Month_RecentYear != "NA")
df <- data.frame(
ItemType = c(THISYEAR$Item_Type),
UnitsSold = c(THISYEAR$Units_Sold),
TotalProfit = c(THISYEAR$Total_Profit),
MonthRecentYear = c(THISYEAR$Month_RecentYear))
df2 <- df %>%
group_by(MonthRecentYear, ItemType) %>%
summarise(TotalUnitsSold = sum(UnitsSold))
median(df2$TotalUnitsSold)
HEAT <- ggplot(data = df2, mapping = aes(x = factor(df2$MonthRecentYear, levels = c(month.name)), df2$ItemType)) + geom_tile(aes(fill = df2$TotalUnitsSold), color = "grey", size = 1) + geom_text(aes(label = df2$TotalUnitsSold)) + scale_fill_gradient2(low = ("red"), mid = ("yellow"), high = ("green"), midpoint = 45000)
HEAT + labs(title = "Total Item Sales per Month in 2016", fill = "Units Sold", x = "Month", y = "Item Type")
Upvotes: 0
Views: 142
Reputation: 16178
To my opinion, your question B. is a little bit too evasive (or maybe I didn't understand it). Can you clarify it ?
Regarding the question A., I think you can normalized your data to the maximal value for each row (Item) by doing the following:
library(dplyr)
DF %>% group_by(Item) %>%
mutate(Norm_val = Value/max(Value))
# A tibble: 132 x 4
# Groups: Item [11]
Month Item Value Norm_val
<fct> <fct> <int> <dbl>
1 January Baby Food 61662 0.788
2 February Baby Food 67869 0.867
3 March Baby Food 12985 0.166
4 April Baby Food 39924 0.510
5 May Baby Food 78292 1
6 June Baby Food 72554 0.927
7 July Baby Food 55403 0.708
8 August Baby Food 75160 0.960
9 September Baby Food 56434 0.721
10 October Baby Food 19641 0.251
# … with 122 more rows
You can then use this normalized values into the fill of geom_tile
by doing:
DF %>% group_by(Item) %>%
mutate(Norm_val = Value/max(Value)) %>%
ggplot(aes(x = Month, y = Item))+
geom_tile(aes(fill = Norm_val), color = "grey", size = 1)+
geom_text(aes(label = Value))+
scale_fill_gradient2(name = "Normalised value", low = "red", mid = "yellow",high = "green", midpoint = 0.5)+
labs(title = "Total Item Sales per Month in 2016", fill = "Units Sold", x = "Month", y = "Item Type")
Does it answer your question ?
Reproducible example (DF)
library(lubridate)
Month <- as.character(month(seq(ymd("2020-01-01"),ymd("2020-12-31"), by = "1 month"), label = TRUE, abbr = FALSE))
Item <- as.character(df2$ItemType)
DF <- expand.grid(Month, Item)
set.seed(123)
DF$Value <- sample(10000:100000, 132)
colnames(DF) <- c("Month", "Item", "Value")
Upvotes: 2