Kris
Kris

Reputation: 420

ggplot2 Bar Chart - Omit one factor from Plot Output

Because the sample is large, I put a dput code chunk at the bottom of my question to import a sample of my data.

I am trying to create a bar chart showing the percent of incidents (called a 'LookupCode') for this data set. In p01, I look at only the incidents that have a lookup code (omitting places where 'LookupCode' is 'n/a'). In p02 I'd like to look at all incidents and plot a count and percentage including where 'LookupCode' is 'n/a', but omit those values from bar chart. (aka: I don't want the big bar showing NA on the x-axis).

Any ideas on how to pull this off?

Thank you. Code to follow:

library(tidyverse)
library(scales)

# plots p01 and p02
p01 <- ggplot(df %>% filter(!is.na(LookupCode)), aes(fct_infreq(LookupCode), fill=LookupCode)) +
        geom_bar(stat='count') + 
        geom_text(stat='count', aes(label=scales::percent((..count..)/sum(..count..))), vjust='inward') + 
        theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5), legend.position='none') + 
        labs(title = "Count and Percent of Lookup Codes", x = "Lookup Code", y = "Count")

p02 <- ggplot(df, aes(fct_infreq(LookupCode), fill=LookupCode)) +
        geom_bar(stat='count') + 
        geom_text(stat='count', aes(label=scales::percent((..count..)/sum(..count..))), vjust='inward') + 
        theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5), legend.position='none') + 
        labs(title = "Count and Percent of Lookup Codes", x = "Lookup Code", y = "Count")
# sample data
df <- structure(list(ReleaseYear = c(2016, 2017, 2018, 2017, 2019, 
2019, 2019, 2017, 2019, 2017, 2016, 2017, 2017, 2018, 2018, 2017, 
2019, 2017, 2017, 2017, 2017, 2016, 2017, 2018, 2019, 2016, 2016, 
2016, 2018, 2018, 2019, 2017, 2016, 2018, 2019, 2018, 2017, 2016, 
2018, 2017, 2018, 2016, 2018, 2019, 2018, 2018, 2019, 2016, 2018, 
2019), ReleaseMonth = c("SEPT", "APRIL", "AUGUST", "JUNE", "JAN", 
"JAN", "AUGUST", "MARCH", "FEB", "APRIL", "NOV", "AUGUST", "DEC", 
"FEB", "FEB", "JUNE", "MAY", "MARCH", "AUGUST", "FEB", "DEC", 
"OCT", "AUGUST", "JULY", "APRIL", "MARCH", "SEPT", "NOV", "NOV", 
"JULY", "AUGUST", "JAN", "DEC", "APRIL", "MARCH", "MAY", "JAN", 
"JULY", "JUNE", "FEB", "AUGUST", "NOV", "MARCH", "FEB", "SEPT", 
"NOV", "MAY", "NOV", "MARCH", "FEB"), ProductionOrder = c(10026795, 
10027932, 10032532, 10029147, 10033613, 10033771, 10035329, 10028252, 
10033714, 10027859, 10026658, 10029516, 10030769, 10028211, 10031074, 
10028976, 10034237, 10028414, 10029408, 100296220, 10030650, 
10027200, 10029249, 10032374, 10034502, 10024691, 10026743, 10027112, 
10033068, 10032391, 10035711, 10027797, 10026638, 10031533, 10034233, 
10031882, 10027822, 10026203, 10032144, 10028241, 10031825, 10026656, 
10031067, 10034340, 10032801, 10033399, 10034876, 10027364, 10031486, 
10034002), NilesHeatNo = c("8J47674", "8J51003", "8G58509", "8H51788", 
"8J60248", "8G60351", "9J63427", "8J50670", "8T60855", "8F50950", 
"8G48562", "8G52372", "9H54615", "8J55715", "8K55529", "8G51685", 
"8G62172", "8H50464", "8J52323", "8H50164", "8G54596", "9G48197", 
"9H52494", "8G57871", "8V61894", "8H45452", "9G47724", "8G48507", 
"8T59690", "8G57886", "8H63385", "8F49662", "8F49020", "8G56373", 
"8H61652", "8J57119", "8J49638", "8J46954", "8J57651", "8F49986", 
"8G58447", "8G48520", "8G56064", "8H61297", "8G58851", "8G59461", 
"8G62447", "8J48584", "8H56190", "8R60756"), LookupCode = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "W4 - PROCESS ERROR", 
NA, NA, "U0 - EQUIPMENT BREAK DOWN", NA, NA, NA, NA, NA, "C1 - OXYGEN - HIGH", 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, "C2 - OXYGEN - LOW", "A2 - ALUMINUM - LOW", 
NA, NA, NA, "A1 - ALUMINUM - HIGH"), ScrapWeight = c(NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, 0, NA, 
NA, NA, NA, NA, 13779, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 0, NA, NA, NA, 
0), Melt = c("8J", "8J", "8G", "8H", "8J", "8G", "9J", "8J", 
"8T", "8F", "8G", "8G", "9H", "8J", "8K", "8G", "8G", "8H", "8J", 
"8H", "8G", "9G", "9H", "8G", "8V", "8H", "9G", "8G", "8T", "8G", 
"8H", "8F", "8F", "8G", "8H", "8J", "8J", "8J", "8J", "8F", "8G", 
"8G", "8G", "8H", "8G", "8G", "8G", "8J", "8H", "8R"), MeltNo = c(47674, 
51003, 58509, 51788, 60248, 60351, 63427, 50670, 60855, 50950, 
48562, 52372, 54615, 55715, 55529, 51685, 62172, 50464, 52323, 
50164, 54596, 48197, 52494, 57871, 61894, 45452, 47724, 48507, 
59690, 57886, 63385, 49662, 49020, 56373, 61652, 57119, 49638, 
46954, 57651, 49986, 58447, 48520, 56064, 61297, 58851, 59461, 
62447, 48584, 56190, 60756), NilesWeight = c(20359, 20797, 19342, 
20585, 17629, 19770, 11776, 20167, NA, 18622, 20401, 19292, 13524, 
16090, 13605, 20099, 20065, 20893, 20659, 20698, 18528, 14016, 
13779, 20701, 19415, 20318, 15152, 20601, 19380, 20032, 19532, 
20395, 19410, 19739, 20728, 18536, 13841, 20478, 20777, 20269, 
17682, 20890, 20344, 19269, 17858, 18101, 20376, 13672, 20427, 
13100), CantonWeight = c(NA, NA, NA, NA, NA, NA, NA, NA, 20235, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), Grade = c("766FW", "766FK", 
"764KS", "764FK", "766BK", "766BK", "766SS", "766BL", "766GK", 
"766JS", "766BL", "766JK", "764SK", "745TS", "764KK", "766BJ", 
"766BK", "765BU", "779KJ", "779BV", "766GK", "766TW", "766TJ", 
"766BK", "766BK", "766BV", "766NS", "766GL", "766BK", "766JK", 
"764KK", "764KW", "766BS", "766BK", "766BK", "763BK", "749BW", 
"766JJ", "765BV", "766FS", "766JJ", "766BL", "779BS", "766BK", 
"766JS", "764KK", "766JK", "764KL", "765BL", "766IK"), MeltDate = structure(c(17030, 
17269, 17759, 17330, 17876, 17882, 18110, 17243, 17921, 17265, 
17100, 17374, 17516, 17577, 17569, 17323, 18005, 17233, 17371, 
17212, 17515, 17071, 17384, 17718, 17988, 16841, 17034, 17096, 
17845, 17719, 18107, 17180, 17131, 17616, 17972, 17665, 17178, 
16976, 17702, 17202, 17755, 17098, 17597, 17946, 17785, 17832, 
18026, 17101, 17605, 17922), class = "Date"), MeltControlRelease = structure(c(1472515200, 
1492732800, 1535068800, 1498176000, 1545782400, 1548892800, 1565740800, 
1490572800, 1549324800, 1492560000, 1478131200, 1502236800, 1514505600, 
1522195200, 1525305600, 1498780800, 1556496000, 1489536000, 1502064000, 
1492041600, 1513728000, 1475712000, 1503878400, 1531440000, 1555027200, 
1456790400, 1472428800, 1478131200, 1542758400, 1532304000, 1564963200, 
1484784000, 1480464000, 1522713600, 1554249600, 1526860800, 1484784000, 
1468281600, 1529971200, 1486684800, 1535500800, 1478131200, 1522800000, 
1551830400, 1538611200, 1542326400, 1558483200, 1478649600, 1521417600, 
1549843200), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    O2Range = c("17/20", "17/20", "14/19", "16/20", "16/20", 
    "16/20", NA, "17/20", "18/21", "17/20", NA, "17/20", NA, 
    NA, "13/19", "16/20", "17/20", "10/12", "12 max", "12 max", 
    "17/20", "17/20", "17/20", "17/20", "17/20", "16/20", "17/20", 
    "19/22", "17/20", "17/20", "13/19", "14/19", "17/20", "17/20", 
    "17/20", "17/20", "32/37", "17/20", NA, "14/17", "17/20", 
    "17/20", "9/12", "16/20", "17/20", "8/14", "17/20", "14/19", 
    "10/13", "17/20"), ScrapPct = c("50(T)", "50", "70", "50", 
    "70", "60", NA, "60", "50", "50(T)", NA, "70", NA, "0", "TURNINGS PUCKS", 
    "70", "70", "50", "0", "50", "50", "0", "0", "50", "70", 
    "50", "0", "0(T)", "70", "50", "70(T)", "50", "70", "70", 
    "70", "0", "28", "50(T)", "28", "28", "28", "50", "50", "70", 
    "28", "28", "50(T)", "50", "28", "50"), ReasonLate = c(NA, 
    NA, NA, NA, NA, NA, "remelt of 9J62399", NA, NA, NA, NA, 
    NA, "remelt of 8R54007", "Water spot on intermediate cast, use for non-rotor", 
    NA, NA, "plant power outage, restrike, melt to 42\"", NA, 
    NA, NA, NA, NA, "High O2 top and bottom", NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, "Low O2 ingot, apply to a different order", "Low Al/V.  Appley to a different order", 
    NA, NA, NA, "High Al"), O2High = list("20", "20", "19", "20", 
        "20", "20", NA_character_, "20", "21", "20", NA_character_, 
        "20", NA_character_, NA_character_, "19", "20", "20", 
        "12", NA_character_, NA_character_, "20", "20", "20", 
        "20", "20", "20", "20", "22", "20", "20", "19", "19", 
        "20", "20", "20", "20", "37", "20", NA_character_, "17", 
        "20", "20", "12", "20", "20", "14", "20", "19", "13", 
        "20"), O2Low = list("17", "17", "14", "16", "16", "16", 
        NA_character_, "17", "18", "17", NA_character_, "17", 
        NA_character_, NA_character_, "13", "16", "17", "10", 
        NA_character_, NA_character_, "17", "17", "17", "17", 
        "17", "16", "17", "19", "17", "17", "13", "14", "17", 
        "17", "17", "17", "32", "17", NA_character_, "14", "17", 
        "17", "9", "16", "17", "8", "17", "14", "10", "17"), 
    Turnings = c(1, 0, 0, 0, 0, 0, NA, 0, 0, 1, NA, 0, NA, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 
    0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0), Furnace = c("J", 
    "J", "G", "H", "J", "G", "J", "J", "T", "F", "G", "G", "H", 
    "J", "K", "G", "G", "H", "J", "H", "G", "G", "H", "G", "V", 
    "H", "G", "G", "T", "G", "H", "F", "F", "G", "H", "J", "J", 
    "J", "J", "F", "G", "G", "G", "H", "G", "G", "G", "J", "H", 
    "R"), DailyAverageRelativeHumidity = c(85L, 57L, 80L, 66L, 
    76L, 78L, NA, 93L, 62L, 34L, 76L, 72L, 76L, 91L, 82L, 80L, 
    91L, 48L, 78L, 78L, 76L, 59L, 78L, 72L, 46L, 79L, 72L, 77L, 
    72L, 63L, 80L, 70L, 82L, 49L, 53L, 90L, 89L, 58L, 86L, 54L, 
    70L, 72L, 79L, 68L, 87L, 93L, 83L, 90L, 67L, 81L), DailyAverageDewPointTemperature = c(68L, 
    33L, 68L, 67L, 22L, 28L, NA, 32L, 5L, 29L, 32L, 63L, 24L, 
    52L, 16L, 52L, 50L, 26L, 61L, 24L, 12L, 44L, 57L, 60L, 19L, 
    15L, 62L, 37L, 20L, 52L, 66L, 20L, 32L, 22L, 16L, 59L, 46L, 
    54L, 63L, 18L, 60L, 39L, 27L, 12L, 60L, 42L, 51L, 44L, 21L, 
    14L)), row.names = c(NA, -50L), class = c("tbl_df", "tbl", 
"data.frame"))

Upvotes: 0

Views: 102

Answers (1)

jeffverboon
jeffverboon

Reputation: 336

Easiest if you make an intermediate dataframe.

df_plot <- df %>% 
   group_by(LookupCode) %>% 
   summarise(count = n()) %>%
   mutate(percent =100*count/sum(count))

Now you can just filter the NA if you want still know the original percent.

ggplot(df_plot %>% filter(!is.na(LookupCode)), aes(x=LookupCode, y = count)) +
    geom_col() + geom_text(df_plot, mapping = aes(label = percent))

and add whatever themes and niceties

Upvotes: 1

Related Questions