Reputation: 300
I am using below code to plot Pairplot
based on categorical data type. However, I was wondering if there is library which basically search for column with categorical or factor datatype and then plot the pairplot based on 1st column containing datatype category or factor ? In below code we already know "Species" column is categorical datatype. However, let's say If I have dataframe containing multiple columns with categorical or factor datatype. Below is example data and I want to plot Pairplot
based on location column.
library(ggplot2)
library(GGally)
ggpairs(iris,aes(colour = Species))
dput of the dataset
structure(list(location = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Afghanistan",
"Africa", "Albania", "Algeria", "Andorra", "Angola", "Anguilla",
"Antigua and Barbuda", "Argentina", "Armenia", "Aruba", "Asia",
"Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh",
"Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bermuda",
"Bhutan", "Bolivia", "Bonaire Sint Eustatius and Saba", "Bosnia and Herzegovina",
"Botswana", "Brazil", "British Virgin Islands", "Brunei", "Bulgaria",
"Burkina Faso", "Burundi", "Cambodia", "Cameroon", "Canada",
"Cape Verde", "Cayman Islands", "Central African Republic", "Chad",
"Chile", "China", "Colombia", "Comoros", "Congo", "Cook Islands",
"Costa Rica", "Cote d'Ivoire", "Croatia", "Cuba", "Curacao",
"Cyprus", "Czechia", "Democratic Republic of Congo", "Denmark",
"Djibouti", "Dominica", "Dominican Republic", "Ecuador", "Egypt",
"El Salvador", "England", "Equatorial Guinea", "Estonia", "Eswatini",
"Ethiopia", "Europe", "European Union", "Faeroe Islands", "Falkland Islands",
"Fiji", "Finland", "France", "French Polynesia", "Gabon", "Gambia",
"Georgia", "Germany", "Ghana", "Gibraltar", "Greece", "Greenland",
"Grenada", "Guatemala", "Guernsey", "Guinea", "Guinea-Bissau",
"Guyana", "Haiti", "High income", "Honduras", "Hong Kong", "Hungary",
"Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Isle of Man",
"Israel", "Italy", "Jamaica", "Japan", "Jersey", "Jordan", "Kazakhstan",
"Kenya", "Kiribati", "Kosovo", "Kuwait", "Kyrgyzstan", "Laos",
"Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein",
"Lithuania", "Low income", "Lower middle income", "Luxembourg",
"Macao", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali",
"Malta", "Mauritania", "Mauritius", "Mexico", "Moldova", "Monaco",
"Mongolia", "Montenegro", "Montserrat", "Morocco", "Mozambique",
"Myanmar", "Namibia", "Nauru", "Nepal", "Netherlands", "New Caledonia",
"New Zealand", "Nicaragua", "Niger", "Nigeria", "Niue", "North America",
"North Macedonia", "Northern Cyprus", "Northern Ireland", "Norway",
"Oceania", "Oman", "Pakistan", "Palestine", "Panama", "Papua New Guinea",
"Paraguay", "Peru", "Philippines", "Pitcairn", "Poland", "Portugal",
"Qatar", "Romania", "Russia", "Rwanda", "Saint Helena", "Saint Kitts and Nevis",
"Saint Lucia", "Saint Vincent and the Grenadines", "Samoa", "San Marino",
"Sao Tome and Principe", "Saudi Arabia", "Scotland", "Senegal",
"Serbia", "Seychelles", "Sierra Leone", "Singapore", "Sint Maarten (Dutch part)",
"Slovakia", "Slovenia", "Solomon Islands", "Somalia", "South Africa",
"South America", "South Korea", "South Sudan", "Spain", "Sri Lanka",
"Sudan", "Suriname", "Sweden", "Switzerland", "Syria", "Taiwan",
"Tajikistan", "Tanzania", "Thailand", "Timor", "Togo", "Tokelau",
"Tonga", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan",
"Turks and Caicos Islands", "Tuvalu", "Uganda", "Ukraine", "United Arab Emirates",
"United Kingdom", "United States", "Upper middle income", "Uruguay",
"Uzbekistan", "Vanuatu", "Venezuela", "Vietnam", "Wales", "Wallis and Futuna",
"World", "Yemen", "Zambia", "Zimbabwe"), class = "factor"), iso_code = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("ABW", "AFG", "AGO", "AIA", "ALB",
"AND", "ARE", "ARG", "ARM", "ATG", "AUS", "AUT", "AZE", "BDI",
"BEL", "BEN", "BES", "BFA", "BGD", "BGR", "BHR", "BHS", "BIH",
"BLR", "BLZ", "BMU", "BOL", "BRA", "BRB", "BRN", "BTN", "BWA",
"CAF", "CAN", "CHE", "CHL", "CHN", "CIV", "CMR", "COD", "COG",
"COK", "COL", "COM", "CPV", "CRI", "CUB", "CUW", "CYM", "CYP",
"CZE", "DEU", "DJI", "DMA", "DNK", "DOM", "DZA", "ECU", "EGY",
"ESP", "EST", "ETH", "FIN", "FJI", "FLK", "FRA", "FRO", "GAB",
"GBR", "GEO", "GGY", "GHA", "GIB", "GIN", "GMB", "GNB", "GNQ",
"GRC", "GRD", "GRL", "GTM", "GUY", "HKG", "HND", "HRV", "HTI",
"HUN", "IDN", "IMN", "IND", "IRL", "IRN", "IRQ", "ISL", "ISR",
"ITA", "JAM", "JEY", "JOR", "JPN", "KAZ", "KEN", "KGZ", "KHM",
"KIR", "KNA", "KOR", "KWT", "LAO", "LBN", "LBR", "LBY", "LCA",
"LIE", "LKA", "LSO", "LTU", "LUX", "LVA", "MAC", "MAR", "MCO",
"MDA", "MDG", "MDV", "MEX", "MKD", "MLI", "MLT", "MMR", "MNE",
"MNG", "MOZ", "MRT", "MSR", "MUS", "MWI", "MYS", "NAM", "NCL",
"NER", "NGA", "NIC", "NIU", "NLD", "NOR", "NPL", "NRU", "NZL",
"OMN", "OWID_AFR", "OWID_ASI", "OWID_CYN", "OWID_ENG", "OWID_EUN",
"OWID_EUR", "OWID_HIC", "OWID_KOS", "OWID_LIC", "OWID_LMC", "OWID_NAM",
"OWID_NIR", "OWID_OCE", "OWID_SAM", "OWID_SCT", "OWID_UMC", "OWID_WLS",
"OWID_WRL", "PAK", "PAN", "PCN", "PER", "PHL", "PNG", "POL",
"PRT", "PRY", "PSE", "PYF", "QAT", "ROU", "RUS", "RWA", "SAU",
"SDN", "SEN", "SGP", "SHN", "SLB", "SLE", "SLV", "SMR", "SOM",
"SRB", "SSD", "STP", "SUR", "SVK", "SVN", "SWE", "SWZ", "SXM",
"SYC", "SYR", "TCA", "TCD", "TGO", "THA", "TJK", "TKL", "TKM",
"TLS", "TON", "TTO", "TUN", "TUR", "TUV", "TWN", "TZA", "UGA",
"UKR", "URY", "USA", "UZB", "VCT", "VEN", "VGB", "VNM", "VUT",
"WLF", "WSM", "YEM", "ZAF", "ZMB", "ZWE"), class = "factor"),
date = structure(84:89, .Label = c("2020-12-01", "2020-12-02",
"2020-12-03", "2020-12-04", "2020-12-05", "2020-12-06", "2020-12-07",
"2020-12-08", "2020-12-09", "2020-12-10", "2020-12-11", "2020-12-12",
"2020-12-13", "2020-12-14", "2020-12-15", "2020-12-16", "2020-12-17",
"2020-12-18", "2020-12-19", "2020-12-20", "2020-12-21", "2020-12-22",
"2020-12-23", "2020-12-24", "2020-12-25", "2020-12-26", "2020-12-27",
"2020-12-28", "2020-12-29", "2020-12-30", "2020-12-31", "2021-01-01",
"2021-01-02", "2021-01-03", "2021-01-04", "2021-01-05", "2021-01-06",
"2021-01-07", "2021-01-08", "2021-01-09", "2021-01-10", "2021-01-11",
"2021-01-12", "2021-01-13", "2021-01-14", "2021-01-15", "2021-01-16",
"2021-01-17", "2021-01-18", "2021-01-19", "2021-01-20", "2021-01-21",
"2021-01-22", "2021-01-23", "2021-01-24", "2021-01-25", "2021-01-26",
"2021-01-27", "2021-01-28", "2021-01-29", "2021-01-30", "2021-01-31",
"2021-02-01", "2021-02-02", "2021-02-03", "2021-02-04", "2021-02-05",
"2021-02-06", "2021-02-07", "2021-02-08", "2021-02-09", "2021-02-10",
"2021-02-11", "2021-02-12", "2021-02-13", "2021-02-14", "2021-02-15",
"2021-02-16", "2021-02-17", "2021-02-18", "2021-02-19", "2021-02-20",
"2021-02-21", "2021-02-22", "2021-02-23", "2021-02-24", "2021-02-25",
"2021-02-26", "2021-02-27", "2021-02-28", "2021-03-01", "2021-03-02",
"2021-03-03", "2021-03-04", "2021-03-05", "2021-03-06", "2021-03-07",
"2021-03-08", "2021-03-09", "2021-03-10", "2021-03-11", "2021-03-12",
"2021-03-13", "2021-03-14", "2021-03-15", "2021-03-16", "2021-03-17",
"2021-03-18", "2021-03-19", "2021-03-20", "2021-03-21", "2021-03-22",
"2021-03-23", "2021-03-24", "2021-03-25", "2021-03-26", "2021-03-27",
"2021-03-28", "2021-03-29", "2021-03-30", "2021-03-31", "2021-04-01",
"2021-04-02", "2021-04-03", "2021-04-04", "2021-04-05", "2021-04-06",
"2021-04-07", "2021-04-08", "2021-04-09", "2021-04-10", "2021-04-11",
"2021-04-12", "2021-04-13", "2021-04-14", "2021-04-15", "2021-04-16",
"2021-04-17", "2021-04-18", "2021-04-19", "2021-04-20", "2021-04-21",
"2021-04-22", "2021-04-23", "2021-04-24", "2021-04-25", "2021-04-26",
"2021-04-27", "2021-04-28", "2021-04-29", "2021-04-30", "2021-05-01",
"2021-05-02", "2021-05-03", "2021-05-04", "2021-05-05", "2021-05-06",
"2021-05-07", "2021-05-08", "2021-05-09", "2021-05-10", "2021-05-11",
"2021-05-12", "2021-05-13", "2021-05-14", "2021-05-15", "2021-05-16",
"2021-05-17", "2021-05-18", "2021-05-19", "2021-05-20", "2021-05-21",
"2021-05-22", "2021-05-23", "2021-05-24", "2021-05-25", "2021-05-26",
"2021-05-27", "2021-05-28", "2021-05-29", "2021-05-30", "2021-05-31",
"2021-06-01", "2021-06-02", "2021-06-03", "2021-06-04", "2021-06-05",
"2021-06-06", "2021-06-07", "2021-06-08", "2021-06-09", "2021-06-10",
"2021-06-11", "2021-06-12", "2021-06-13", "2021-06-14", "2021-06-15",
"2021-06-16", "2021-06-17", "2021-06-18", "2021-06-19", "2021-06-20",
"2021-06-21", "2021-06-22", "2021-06-23", "2021-06-24", "2021-06-25",
"2021-06-26", "2021-06-27", "2021-06-28", "2021-06-29", "2021-06-30",
"2021-07-01", "2021-07-02", "2021-07-03", "2021-07-04", "2021-07-05",
"2021-07-06", "2021-07-07", "2021-07-08", "2021-07-09", "2021-07-10",
"2021-07-11", "2021-07-12", "2021-07-13", "2021-07-14", "2021-07-15",
"2021-07-16", "2021-07-17", "2021-07-18", "2021-07-19", "2021-07-20",
"2021-07-21", "2021-07-22", "2021-07-23", "2021-07-24", "2021-07-25",
"2021-07-26", "2021-07-27", "2021-07-28", "2021-07-29", "2021-07-30",
"2021-07-31", "2021-08-01", "2021-08-02", "2021-08-03", "2021-08-04",
"2021-08-05", "2021-08-06", "2021-08-07", "2021-08-08", "2021-08-09",
"2021-08-10", "2021-08-11", "2021-08-12", "2021-08-13", "2021-08-14",
"2021-08-15", "2021-08-16", "2021-08-17", "2021-08-18", "2021-08-19",
"2021-08-20", "2021-08-21", "2021-08-22", "2021-08-23", "2021-08-24",
"2021-08-25", "2021-08-26", "2021-08-27", "2021-08-28", "2021-08-29",
"2021-08-30", "2021-08-31", "2021-09-01", "2021-09-02", "2021-09-03",
"2021-09-04", "2021-09-05", "2021-09-06", "2021-09-07", "2021-09-08",
"2021-09-09", "2021-09-10", "2021-09-11", "2021-09-12", "2021-09-13",
"2021-09-14", "2021-09-15", "2021-09-16", "2021-09-17", "2021-09-18",
"2021-09-19", "2021-09-20", "2021-09-21", "2021-09-22", "2021-09-23",
"2021-09-24", "2021-09-25", "2021-09-26", "2021-09-27", "2021-09-28",
"2021-09-29", "2021-09-30", "2021-10-01", "2021-10-02", "2021-10-03",
"2021-10-04", "2021-10-05", "2021-10-06", "2021-10-07", "2021-10-08",
"2021-10-09", "2021-10-10", "2021-10-11", "2021-10-12", "2021-10-13",
"2021-10-14", "2021-10-15", "2021-10-16", "2021-10-17", "2021-10-18",
"2021-10-19", "2021-10-20", "2021-10-21", "2021-10-22", "2021-10-23",
"2021-10-24", "2021-10-25", "2021-10-26", "2021-10-27", "2021-10-28",
"2021-10-29", "2021-10-30", "2021-10-31", "2021-11-01", "2021-11-02",
"2021-11-03", "2021-11-04", "2021-11-05", "2021-11-06", "2021-11-07",
"2021-11-08", "2021-11-09", "2021-11-10", "2021-11-11", "2021-11-12",
"2021-11-13", "2021-11-14", "2021-11-15", "2021-11-16", "2021-11-17",
"2021-11-18", "2021-11-19", "2021-11-20", "2021-11-21", "2021-11-22",
"2021-11-23", "2021-11-24", "2021-11-25", "2021-11-26", "2021-11-27",
"2021-11-28", "2021-11-29", "2021-11-30", "2021-12-01", "2021-12-02",
"2021-12-03", "2021-12-04", "2021-12-05", "2021-12-06", "2021-12-07",
"2021-12-08", "2021-12-09", "2021-12-10", "2021-12-11", "2021-12-12",
"2021-12-13", "2021-12-14", "2021-12-15", "2021-12-16", "2021-12-17",
"2021-12-18", "2021-12-19", "2021-12-20", "2021-12-21", "2021-12-22",
"2021-12-23", "2021-12-24", "2021-12-25", "2021-12-26", "2021-12-27",
"2021-12-28", "2021-12-29", "2021-12-30", "2021-12-31", "2022-01-01",
"2022-01-02", "2022-01-03", "2022-01-04", "2022-01-05", "2022-01-06",
"2022-01-07", "2022-01-08"), class = "factor"), total_vaccinations = c(0,
NA, NA, NA, NA, NA), people_vaccinated = c(0, NA, NA, NA,
NA, NA), people_fully_vaccinated = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), total_boosters = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), daily_vaccinations_raw = c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), daily_vaccinations = c(NA,
1367L, 1367L, 1367L, 1367L, 1367L), total_vaccinations_per_hundred = c(0,
NA, NA, NA, NA, NA), people_vaccinated_per_hundred = c(0,
NA, NA, NA, NA, NA), people_fully_vaccinated_per_hundred = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), total_boosters_per_hundred = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), daily_vaccinations_per_million = c(NA,
34L, 34L, 34L, 34L, 34L), daily_people_vaccinated = c(NA,
1367L, 1367L, 1367L, 1367L, 1367L), daily_people_vaccinated_per_hundred = c(NA,
0.003, 0.003, 0.003, 0.003, 0.003)), class = c("data.table",
"data.frame"), row.names = c(NA, -6L), .internal.selfref = <pointer: 0x0000000007b01ef0>)
Upvotes: 0
Views: 221
Reputation: 2520
I believe the following code does what you're looking for.
This code doesn't work with the dataset you included as there are too many invalid numbers/entries
library(tidyverse)
#get list of column types
class_list<- sapply(df, class)
#get only categorical columns (assuming character is categorical)
categorical_column_names<-class_list[class_list=="character" | class_list=="factor"]
#relevel factors to reduce number of unused factor levels to get below the max 15 allowed by ggpairs,
df_simpler_factors<- sapply(df,
function(x) if(class(x)=="factor"){
as.character(x) %>% fct_relevel
} else {
x
}
) %>% data.frame
ggpairs(df_simpler_factors ,
#pick the first column as the main category
aes(colour = categorical_column_names[1])
)
Upvotes: 0