Reputation: 103
I am trying to convert my data frame from a long to a wide format. Currently there is an InputCode column, which has Input A and B, and these need to be their own columns using values from 'DataValue'. Ive been trying spread and dcast,
data_wide <- spread(oldData_long, InputCode, DataValue)
or
data_wide2 <- dcast(oldData_long, Indicator + IndicatorID + InputName DataYear + Country + Division + InputUnit ~ InputCode, value.var="DataValue")
but the number of rows in my dataframe remains the same (84) instead of becoming 42, despite the creation of Input A and Input B columns. Whenever there is a value for Input A theres NA in the column for input B and vice versa.
Furthermore, ideally there would be an InputUnit column for each input Code, e.g 'InputAUnit', as this value will also be unique when trying to spread the data and might be causing my above problem. The same for InputName, but I have no idea how to also pull that information across neatly.
Any help would be greatly appreciated!!
dput:
structure(list(ID = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Indicator = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "Waste Generated", class = "factor"), IndicatorID = c(11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L), InputCode = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("InputA", "InputB"), class = "factor"), InputName = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("Waste Generated - Waste incinerated",
"Waste Generated - Waste sent to landfill"), class = "factor"),
DataValue = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 5L, 1L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
4L, 6L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 9L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 10L,
3L), .Label = c("0", "155", "19", "2,898.00", "20,462.34",
"22.317", "4.368", "40", "6,695.65", "8.998"), class = "factor"),
UnitCode = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = "t", class = "factor"), DataYear = c(2009L, 2009L,
2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,
2009L, 2009L, 2009L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2011L,
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2011L, 2011L, 2011L, 2011L, 2009L, 2009L, 2009L, 2009L, 2009L,
2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2011L, 2011L, 2011L, 2011L,
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2011L), Country = structure(c(4L, 1L, 2L, 3L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L, 2L, 3L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L, 2L, 3L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L, 2L, 3L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L, 2L,
3L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L,
2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L), .Label = c("Afghanistan",
"Albania", "Algeria", "All", "American Samoa", "Andorra",
"Angola", "Antigua and Barbuda", "Argentina", "Armenia",
"Aruba", "Australia", "Austria", "Azerbaijan"), class = "factor"),
ISO = structure(c(5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L, 7L, 8L,
1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L, 7L,
8L, 1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L,
7L, 8L, 1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L,
7L, 8L, 1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L,
7L, 8L, 1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L,
7L, 8L, 1L, 11L, 12L, 13L), .Label = c("ABW", "AFG", "AGO",
"ALB", "ALL", "AND", "ARG", "ARM", "ASM", "ATG", "AUS", "AUT",
"AZE", "DZA"), class = "factor"), Division = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Test", class = "factor"),
FurtherDetails1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "Test1", class = "factor"), FurtherDetails2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Test2", class = "factor")), class = "data.frame", row.names = c(NA,
-84L))
This would be the ideal output :
structure(list(ID = c(NA, NA, NA, NA, NA, NA), Indicator = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "Waste Generated", class = "factor"),
IndicatorID = c(11L, 11L, 11L, 11L, 11L, 11L), DataYear = c(2009L,
2009L, 2009L, 2009L, 2009L, 2009L), Country = structure(c(4L,
1L, 2L, 3L, 5L, 6L), .Label = c("Afghanistan", "Albania",
"Algeria", "All", "American Samoa", "Andorra", "Angola",
"Antigua and Barbuda", "Argentina", "Armenia", "Aruba", "Australia",
"Austria", "Azerbaijan"), class = "factor"), ISO = structure(c(5L,
2L, 4L, 14L, 9L, 6L), .Label = c("ABW", "AFG", "AGO", "ALB",
"ALL", "AND", "ARG", "ARM", "ASM", "ATG", "AUS", "AUT", "AZE",
"DZA"), class = "factor"), Division = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = "Test", class = "factor"), FurtherDetails1 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "Test1", class = "factor"),
FurtherDetails2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "Test2", class = "factor"),
InputA = c(0L, 0L, 0L, 0L, 0L, 0L), InputAUnit = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("", "t"), class = "factor"),
InputAName = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"Waste Generated - Waste sent to landfill"), class = "factor"),
InputB = c(0L, 0L, 0L, 0L, 0L, 0L), InputBUnit = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("", "t"), class = "factor"),
InputBName = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"Waste Generated - Waste incinerated"), class = "factor")), row.names = c(NA,
6L), class = "data.frame")
Thanks!!
Upvotes: 0
Views: 49
Reputation: 23598
A possible tidyr solution.
library(tidyr)
out <- pivot_wider(oldData_long, names_from = InputCode, values_from = c(DataValue, UnitCode, InputName))
out
# A tibble: 42 x 15
ID Indicator IndicatorID DataYear Country ISO Division FurtherDetails1 FurtherDetails2 DataValue_InputA DataValue_InputB
<lgl> <fct> <int> <int> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
1 NA Waste Ge… 11 2009 All ALL Test Test1 Test2 0 0
2 NA Waste Ge… 11 2009 Afghan… AFG Test Test1 Test2 0 0
3 NA Waste Ge… 11 2009 Albania ALB Test Test1 Test2 0 0
4 NA Waste Ge… 11 2009 Algeria DZA Test Test1 Test2 0 0
5 NA Waste Ge… 11 2009 Americ… ASM Test Test1 Test2 0 0
6 NA Waste Ge… 11 2009 Andorra AND Test Test1 Test2 0 0
7 NA Waste Ge… 11 2009 Angola AGO Test Test1 Test2 0 0
8 NA Waste Ge… 11 2009 Antigu… ATG Test Test1 Test2 0 0
9 NA Waste Ge… 11 2009 Argent… ARG Test Test1 Test2 0 0
10 NA Waste Ge… 11 2009 Armenia ARM Test Test1 Test2 0 0
# … with 32 more rows, and 4 more variables: UnitCode_InputA <fct>, UnitCode_InputB <fct>, InputName_InputA <fct>, InputName_InputB <fct>
str(out)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 42 obs. of 15 variables:
$ ID : logi NA NA NA NA NA NA ...
$ Indicator : Factor w/ 1 level "Waste Generated": 1 1 1 1 1 1 1 1 1 1 ...
$ IndicatorID : int 11 11 11 11 11 11 11 11 11 11 ...
$ DataYear : int 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 ...
$ Country : Factor w/ 14 levels "Afghanistan",..: 4 1 2 3 5 6 7 8 9 10 ...
$ ISO : Factor w/ 14 levels "ABW","AFG","AGO",..: 5 2 4 14 9 6 3 10 7 8 ...
$ Division : Factor w/ 1 level "Test": 1 1 1 1 1 1 1 1 1 1 ...
$ FurtherDetails1 : Factor w/ 1 level "Test1": 1 1 1 1 1 1 1 1 1 1 ...
$ FurtherDetails2 : Factor w/ 1 level "Test2": 1 1 1 1 1 1 1 1 1 1 ...
$ DataValue_InputA: Factor w/ 10 levels "0","155","19",..: 1 1 1 1 1 1 1 1 1 1 ...
$ DataValue_InputB: Factor w/ 10 levels "0","155","19",..: 1 1 1 1 1 1 1 1 1 1 ...
$ UnitCode_InputA : Factor w/ 1 level "t": 1 1 1 1 1 1 1 1 1 1 ...
$ UnitCode_InputB : Factor w/ 1 level "t": 1 1 1 1 1 1 1 1 1 1 ...
$ InputName_InputA: Factor w/ 2 levels "Waste Generated - Waste incinerated",..: 2 2 2 2 2 2 2 2 2 2 ...
$ InputName_InputB: Factor w/ 2 levels "Waste Generated - Waste incinerated",..: 1 1 1 1 1 1 1 1 1 1 ...
Upvotes: 2