Tidying "Benchmark" columns

Question

I have a data frame that is mostly tidy but 2 columns contain benchmarks rather than incorporating the benchmarks as observations. How do I tidy this so both "Facility_score" and "TTP" col_names are added as observations under "Facility_label" for each unique FYQ and Metric combination?

Input data:

library(zoo)

dd <- structure(list(Facility_label = structure(c(1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("A", "B", "C", 
"D", "Nashville"), class = "factor"), FYQ = structure(c(2017.75, 
2018, 2018.25, 2018.5, 2017.75, 2018, 2018.25, 2018.5, 2018.75, 
2017.75, 2018, 2018.25, 2018.5, 2018.75, 2017.75, 2018, 2018.25, 
2018.5, 2018.75, 2017.75, 2018, 2018.25, 2018.5, 2018.75, 2017.75, 
2018, 2018.25, 2018.5, 2018.75, 2017.75), class = "yearqtr"), 
    Metric = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
    1L, 1L, 1L, 1L, 1L, 2L), .Label = c("Safety Recall", "Turnaround days", 
    "Consult Active <= 30d", "Consult Pending <- 7d", "Consult Scheduling <- 90d", 
    "ICB Compliance Rate", "FCA Assessment", "Minor construction execution", 
    "NRM funding execution", "Deficincies", "%Deficienceis corrected among corrected or action plan", 
    "%Deficienceis corrected or action plan", "Ratio of Hospital Staff to HR FTE", 
    "Turnover Rate", "GEMS no Action", "Lost time claims", "RTMS risk score", 
    "DOC Control", "Loaner deficiencies", "Pretreatment", "RME rate", 
    "SPS staff vacany rate", "Stock Inactive", "MSPV-NG", "Days to close prosthetis consult", 
    "%Prosthetic PO using national contracts"), class = "factor"), 
    Facility_score = c(84.78802993, 95.59659091, 100, 100, 77.61732852, 
    57.87671233, 81.28898129, 33.33333333, 31.57894737, 10.2, 
    7.902356902, 8.62, 11.71, 13.15, 30.98236776, 33.26086957, 
    31.19584055, 54.54545455, 27.27272727, 11, 17.19132653, 26.02008197, 
    22.29, 30.41, 89.09090909, 93.47826087, 82.10735586, 91.66666667, 
    87.5, 3.2), `Facility mean` = c(85.35550152, 87.31899147, 
    93.11498231, 100, 85.35550152, 87.31899147, 93.11498231, 
    100, 100, 12, 13.06073298, 12.2, 11.51, 10.56, 85.35550152, 
    87.31899147, 93.11498231, 100, 100, 12, 13.06073298, 12.2, 
    11.51, 10.56, 85.35550152, 87.31899147, 93.11498231, 100, 
    100, 12), TTP_score = c(100, 100, 100, 100, 100, 100, 100, 
    100, 100, 5.65, 5.063953488, 4.779310345, 4.47, 4.545, 100, 
    100, 100, 100, 100, 5.65, 5.063953488, 4.779310345, 4.47, 
    4.545, 100, 100, 100, 100, 100, 5.65)), row.names = c(NA, 
-30L), class = c("tbl_df", "tbl", "data.frame"))

Desired output:

dd_output <- structure(list(Facility_label = c("A", "Facility mean", "TTP score", 
"A", "Facility mean", "TTP score", "A", "Facility mean", "TTP score", 
"A", "Facility mean", "TTP score", "B", "B", "B", "B", "B", "B", 
"Facility mean", "TTP score", "B", "Facility mean", "TTP score", 
"B", "Facility mean", "TTP score", "B", "Facility mean", "TTP score", 
"B", "Facility mean", "TTP score", "C", "C", "C", "C", "C", "C", 
"C", "C", "C", "C", "D", "D", "D", "D", "D", "D"), FYQ = c("2017 Q4", 
"2017 Q4", "2017 Q4", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q2", 
"2018 Q2", "2018 Q2", "2018 Q3", "2018 Q3", "2018 Q3", "2017 Q4", 
"2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", "2017 Q4", "2017 Q4", 
"2017 Q4", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q2", "2018 Q2", 
"2018 Q2", "2018 Q3", "2018 Q3", "2018 Q3", "2018 Q4", "2018 Q4", 
"2018 Q4", "2017 Q4", "2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", 
"2017 Q4", "2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", "2017 Q4", 
"2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", "2017 Q4"), Metric = c("Safety Recall", 
"Safety Recall", "safety Recall", "Safety Recall", "Safety Recall", 
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall", 
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall", 
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall", 
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days", 
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days", 
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days", 
"Turnaround days", "Turnaround days", "Turnaround days", "Safety Recall", 
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall", 
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days", 
"Turnaround days", "Safety Recall", "Safety Recall", "Safety Recall", 
"Safety Recall", "Safety Recall", "Turnaround days"), Facility_score = c(84.78802993, 
85.35550152, 100, 95.59659091, 87.31899147, 100, 100, 93.11498231, 
100, 100, 100, 100, 77.61732852, 57.87671233, 81.28898129, 33.33333333, 
31.57894737, 10.2, 12, 5.65, 7.902356902, 13.06073298, 5.063953488, 
8.62, 12.2, 4.779310345, 11.71, 11.51, 4.47, 13.15, 10.56, 4.545, 
30.98236776, 33.26086957, 31.19584055, 54.54545455, 27.27272727, 
11, 17.19132653, 26.02008197, 22.29, 30.41, 89.09090909, 93.47826087, 
82.10735586, 91.66666667, 87.5, 3.2)), class = c("spec_tbl_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -48L), spec = structure(list(
    cols = list(X1 = structure(list(), class = c("collector_skip", 
    "collector")), Facility_label = structure(list(), class = c("collector_character", 
    "collector")), FYQ = structure(list(), class = c("collector_character", 
    "collector")), Metric = structure(list(), class = c("collector_character", 
    "collector")), Facility_score = structure(list(), class = c("collector_double", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
    "collector")), skip = 1), class = "col_spec"))

acylam · Accepted Answer

We can do it with tidyr::gather and some dplyr::mutate manipulation:

library(tidyverse)

dd %>%
  mutate(ID = row_number()) %>%
  gather(var, Facility_score, Facility_score:TTP_score) %>%
  group_by(FYQ, Metric,
           temp_ID = case_when(var == "Facility mean" ~ 1, 
                               var == "TTP_score" ~ 2, 
                               TRUE ~ 0)) %>%
  slice(if(any(temp_ID == 0)) row_number() else 1) %>%
  mutate(Facility_label = if_else(var == "Facility_score", as.character(Facility_label), var)) %>%
  ungroup() %>%
  arrange(ID, temp_ID) %>% 
  select(ID, everything(), -var, -temp_ID)

Note that I have added the ID column to indicate the original row number. This makes it less confusing when all the scores in the same row are combined.

Notes:

gather reshapes the table from wide to long format treating the entries of Facility_score, Facility mean and TTP_score as the new Facility_score's. A variable var is created to temporarily store the value labels.
We then group_by FYQ, Metric, and a temporary ID variable (temp_ID) created by setting Facility mean to 1, TTP_score to 2, and everything else in var to 0.
Based on temp_ID, we use slice to grab all rows if it's 0, and only the first row otherwise. This effectively returns all rows corresponding to Facility_score, but only one of Facility mean and TTP_score in each FYQ + Metric combination.
Next, we replace Facility_label with the corresponding labels in var.
Finally, ungroup, arrange by ID and temp_ID, and rearrange the column order while deleting var and temp_ID (everything is useful when we want to bring a variable(s) in front while keeping other variables untouched).

Output:

# A tibble: 50 x 5
      ID Facility_label FYQ           Metric          Facility_score
                                   
 1     1 A              2017 Q4       Safety Recall             84.8
 2     1 Facility mean  2017 Q4       Safety Recall             85.4
 3     1 TTP_score      2017 Q4       Safety Recall            100  
 4     2 A              2018 Q1       Safety Recall             95.6
 5     2 Facility mean  2018 Q1       Safety Recall             87.3
 6     2 TTP_score      2018 Q1       Safety Recall            100  
 7     3 A              2018 Q2       Safety Recall            100  
 8     3 Facility mean  2018 Q2       Safety Recall             93.1
 9     3 TTP_score      2018 Q2       Safety Recall            100  
10     4 A              2018 Q3       Safety Recall            100  
11     4 Facility mean  2018 Q3       Safety Recall            100  
12     4 TTP_score      2018 Q3       Safety Recall            100  
13     5 B              2017 Q4       Safety Recall             77.6
14     6 B              2018 Q1       Safety Recall             57.9
15     7 B              2018 Q2       Safety Recall             81.3
16     8 B              2018 Q3       Safety Recall             33.3
17     9 B              2018 Q4       Safety Recall             31.6
18     9 Facility mean  2018 Q4       Safety Recall            100  
19     9 TTP_score      2018 Q4       Safety Recall            100  
20    10 B              2017 Q4       Turnaround days           10.2
# ... with 30 more rows

Tidying "Benchmark" columns

Answers (1)

Related Questions

Tidying &quot;Benchmark&quot; columns

Answers (1)

Related Questions

Tidying "Benchmark" columns