Reputation: 130
I have a data frame that is mostly tidy but 2 columns contain benchmarks rather than incorporating the benchmarks as observations. How do I tidy this so both "Facility_score" and "TTP" col_names are added as observations under "Facility_label" for each unique FYQ and Metric combination?
Input data:
library(zoo)
dd <- structure(list(Facility_label = structure(c(1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("A", "B", "C",
"D", "Nashville"), class = "factor"), FYQ = structure(c(2017.75,
2018, 2018.25, 2018.5, 2017.75, 2018, 2018.25, 2018.5, 2018.75,
2017.75, 2018, 2018.25, 2018.5, 2018.75, 2017.75, 2018, 2018.25,
2018.5, 2018.75, 2017.75, 2018, 2018.25, 2018.5, 2018.75, 2017.75,
2018, 2018.25, 2018.5, 2018.75, 2017.75), class = "yearqtr"),
Metric = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L), .Label = c("Safety Recall", "Turnaround days",
"Consult Active <= 30d", "Consult Pending <- 7d", "Consult Scheduling <- 90d",
"ICB Compliance Rate", "FCA Assessment", "Minor construction execution",
"NRM funding execution", "Deficincies", "%Deficienceis corrected among corrected or action plan",
"%Deficienceis corrected or action plan", "Ratio of Hospital Staff to HR FTE",
"Turnover Rate", "GEMS no Action", "Lost time claims", "RTMS risk score",
"DOC Control", "Loaner deficiencies", "Pretreatment", "RME rate",
"SPS staff vacany rate", "Stock Inactive", "MSPV-NG", "Days to close prosthetis consult",
"%Prosthetic PO using national contracts"), class = "factor"),
Facility_score = c(84.78802993, 95.59659091, 100, 100, 77.61732852,
57.87671233, 81.28898129, 33.33333333, 31.57894737, 10.2,
7.902356902, 8.62, 11.71, 13.15, 30.98236776, 33.26086957,
31.19584055, 54.54545455, 27.27272727, 11, 17.19132653, 26.02008197,
22.29, 30.41, 89.09090909, 93.47826087, 82.10735586, 91.66666667,
87.5, 3.2), `Facility mean` = c(85.35550152, 87.31899147,
93.11498231, 100, 85.35550152, 87.31899147, 93.11498231,
100, 100, 12, 13.06073298, 12.2, 11.51, 10.56, 85.35550152,
87.31899147, 93.11498231, 100, 100, 12, 13.06073298, 12.2,
11.51, 10.56, 85.35550152, 87.31899147, 93.11498231, 100,
100, 12), TTP_score = c(100, 100, 100, 100, 100, 100, 100,
100, 100, 5.65, 5.063953488, 4.779310345, 4.47, 4.545, 100,
100, 100, 100, 100, 5.65, 5.063953488, 4.779310345, 4.47,
4.545, 100, 100, 100, 100, 100, 5.65)), row.names = c(NA,
-30L), class = c("tbl_df", "tbl", "data.frame"))
Desired output:
dd_output <- structure(list(Facility_label = c("A", "Facility mean", "TTP score",
"A", "Facility mean", "TTP score", "A", "Facility mean", "TTP score",
"A", "Facility mean", "TTP score", "B", "B", "B", "B", "B", "B",
"Facility mean", "TTP score", "B", "Facility mean", "TTP score",
"B", "Facility mean", "TTP score", "B", "Facility mean", "TTP score",
"B", "Facility mean", "TTP score", "C", "C", "C", "C", "C", "C",
"C", "C", "C", "C", "D", "D", "D", "D", "D", "D"), FYQ = c("2017 Q4",
"2017 Q4", "2017 Q4", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q2",
"2018 Q2", "2018 Q2", "2018 Q3", "2018 Q3", "2018 Q3", "2017 Q4",
"2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", "2017 Q4", "2017 Q4",
"2017 Q4", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q2", "2018 Q2",
"2018 Q2", "2018 Q3", "2018 Q3", "2018 Q3", "2018 Q4", "2018 Q4",
"2018 Q4", "2017 Q4", "2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4",
"2017 Q4", "2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", "2017 Q4",
"2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", "2017 Q4"), Metric = c("Safety Recall",
"Safety Recall", "safety Recall", "Safety Recall", "Safety Recall",
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall",
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall",
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall",
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days",
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days",
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days",
"Turnaround days", "Turnaround days", "Turnaround days", "Safety Recall",
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall",
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days",
"Turnaround days", "Safety Recall", "Safety Recall", "Safety Recall",
"Safety Recall", "Safety Recall", "Turnaround days"), Facility_score = c(84.78802993,
85.35550152, 100, 95.59659091, 87.31899147, 100, 100, 93.11498231,
100, 100, 100, 100, 77.61732852, 57.87671233, 81.28898129, 33.33333333,
31.57894737, 10.2, 12, 5.65, 7.902356902, 13.06073298, 5.063953488,
8.62, 12.2, 4.779310345, 11.71, 11.51, 4.47, 13.15, 10.56, 4.545,
30.98236776, 33.26086957, 31.19584055, 54.54545455, 27.27272727,
11, 17.19132653, 26.02008197, 22.29, 30.41, 89.09090909, 93.47826087,
82.10735586, 91.66666667, 87.5, 3.2)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -48L), spec = structure(list(
cols = list(X1 = structure(list(), class = c("collector_skip",
"collector")), Facility_label = structure(list(), class = c("collector_character",
"collector")), FYQ = structure(list(), class = c("collector_character",
"collector")), Metric = structure(list(), class = c("collector_character",
"collector")), Facility_score = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))
Upvotes: 3
Views: 64
Reputation: 18681
We can do it with tidyr::gather
and some dplyr::mutate
manipulation:
library(tidyverse)
dd %>%
mutate(ID = row_number()) %>%
gather(var, Facility_score, Facility_score:TTP_score) %>%
group_by(FYQ, Metric,
temp_ID = case_when(var == "Facility mean" ~ 1,
var == "TTP_score" ~ 2,
TRUE ~ 0)) %>%
slice(if(any(temp_ID == 0)) row_number() else 1) %>%
mutate(Facility_label = if_else(var == "Facility_score", as.character(Facility_label), var)) %>%
ungroup() %>%
arrange(ID, temp_ID) %>%
select(ID, everything(), -var, -temp_ID)
Note that I have added the ID
column to indicate the original row number. This makes it less confusing when all the scores in the same row are combined.
Notes:
gather
reshapes the table from wide to long format treating the entries of Facility_score
, Facility mean
and TTP_score
as the new Facility_score
's. A variable var
is created to temporarily store the value labels.
We then group_by
FYQ
, Metric
, and a temporary ID variable (temp_ID
) created by setting Facility mean
to 1
, TTP_score
to 2
, and everything else in var
to 0
.
Based on temp_ID
, we use slice
to grab all rows if it's 0
, and only the first row otherwise. This effectively returns all rows corresponding to Facility_score
, but only one of Facility mean
and TTP_score
in each FYQ
+ Metric
combination.
Next, we replace Facility_label
with the corresponding labels in var
.
Finally, ungroup
, arrange
by ID
and temp_ID
, and rearrange the column order while deleting var
and temp_ID
(everything
is useful when we want to bring a variable(s) in front while keeping other variables untouched).
Output:
# A tibble: 50 x 5
ID Facility_label FYQ Metric Facility_score
<int> <chr> <S3: yearqtr> <fct> <dbl>
1 1 A 2017 Q4 Safety Recall 84.8
2 1 Facility mean 2017 Q4 Safety Recall 85.4
3 1 TTP_score 2017 Q4 Safety Recall 100
4 2 A 2018 Q1 Safety Recall 95.6
5 2 Facility mean 2018 Q1 Safety Recall 87.3
6 2 TTP_score 2018 Q1 Safety Recall 100
7 3 A 2018 Q2 Safety Recall 100
8 3 Facility mean 2018 Q2 Safety Recall 93.1
9 3 TTP_score 2018 Q2 Safety Recall 100
10 4 A 2018 Q3 Safety Recall 100
11 4 Facility mean 2018 Q3 Safety Recall 100
12 4 TTP_score 2018 Q3 Safety Recall 100
13 5 B 2017 Q4 Safety Recall 77.6
14 6 B 2018 Q1 Safety Recall 57.9
15 7 B 2018 Q2 Safety Recall 81.3
16 8 B 2018 Q3 Safety Recall 33.3
17 9 B 2018 Q4 Safety Recall 31.6
18 9 Facility mean 2018 Q4 Safety Recall 100
19 9 TTP_score 2018 Q4 Safety Recall 100
20 10 B 2017 Q4 Turnaround days 10.2
# ... with 30 more rows
Upvotes: 1