Reputation: 15
I'm converting some pdfs tables to data frames involving data from US states, and one of the tables has a hick-up in it where District of Colombia is broken up into two lines. Here's a glimpse of the data
$ State <fct> Alabama, Alaska, Arkansas, Arizona, California, Colorado, Connecticut, District of, Columbia, Delaw…
$ Any <fct> "1,133", "311", "591", "1,080", "3,698", "910", "643", "", "69", "32", "3,113", "1,759", "34", "876"…
$ DD <fct> "47,421", "3,317", "41,962", "67,185", "212,800", "37,444", "9,886", "", "34,841", "2,183", "101,41"…
$ MG <fct> "15,653", "1,617", "4,827", "16,879", "28,774", "6,040", "22,023", "", "4,278", "569", "29,128", "2"…
$ Sil <fct> "8,550", "2,085", "7,218", "15,860", "7,303", "6,475", "5,170", "", "160", "302", "28,312", "29,259"…
$ SBR <fct> "1,327", "508", "1,091", "5,492", "3,738", "2,217", "1,068", "", "440", "94", "7,432", "3,563", "55"…
$ SBS <fct> "2,075", "1,101", "1,017", "1,851", "12,166", "1,389", "960", "", "1,048", "507", "6,125", "9,422",…
$ Total <fct> "76,159", "8,939", "56,706", "108,347", "268,479", "54,475", "39,750", "", "40,836", "3,687", "175",…
I'm trying to find a way to use dplyr to both eliminate the empty row with the State "District of" and rename the "Colombia" cell in the State column to "District of Colombia
I was trying
data_table %>%
filter(as.character(State) != "District of")
to filter out the empty column, but that isn't working
My attempt to recode the State column was
atf_data_table %>% mutate(State = recode_factor(State, `Colombia` = "District of Colombia"))
but that hasn't bore fruit either.
edit: Attached is the dput of a section of the data
structure(list(State = structure(c(1L, 2L, 4L, 3L, 5L, 6L, 8L,
10L, 7L), .Label = c("Alabama", "Alaska", "Arizona", "Arkansas",
"California", "Colorado", "Columbia", "Connecticut", "Delaware",
"District of", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois",
"Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine",
"Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi",
"Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire",
"New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota",
"Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island",
"South Carolina", "South Dakota", "Tennessee", "Texas", "Utah",
"Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin",
"Wyoming"), class = "factor"), Any = structure(c(6L, 24L, 38L,
4L, 23L, 50L, 40L, 1L, 44L), .Label = c("", "1,019", "1,061",
"1,080", "1,110", "1,133", "1,330", "1,430", "1,487", "1,502",
"1,726", "1,759", "1,776", "199", "2,012", "2,043", "2,514",
"2,633", "221", "277", "286", "3,113", "3,698", "311", "32",
"34", "344", "403", "404", "410", "42", "425", "430", "431",
"517", "570", "588", "591", "6,169", "643", "670", "674", "679",
"69", "717", "747", "820", "837", "876", "910", "933", "958"), class = "factor"),
DD = structure(c(41L, 26L, 38L, 47L, 21L, 36L, 52L, 1L, 33L
), .Label = c("", "1,605", "101,418", "102,723", "11,737",
"12,878", "13,004", "13,960", "14,049", "148,700", "159,805",
"161,841", "17,492", "19,379", "2,183", "2,216", "2,517",
"2,710", "2,852", "21,221", "212,800", "22,239", "24,986",
"25,316", "26,358", "3,317", "3,352", "3,459", "30,778",
"32,848", "34,628", "34,693", "34,841", "35,707", "36,066",
"37,444", "37,700", "41,962", "44,821", "45,037", "47,421",
"48,026", "5,068", "5,503", "6,776", "60,764", "67,185",
"69,005", "71,814", "8,318", "83,363", "9,886"), class = "factor"),
MG = structure(c(11L, 5L, 34L, 12L, 23L, 39L, 18L, 1L, 31L
), .Label = c("", "1,070", "1,480", "1,495", "1,617", "1,659",
"1,892", "10,703", "11,234", "13,442", "15,653", "16,879",
"17,019", "17,384", "18,013", "2,049", "2,438", "22,023",
"22,081", "23,709", "24,651", "28,690", "28,774", "29,128",
"3,262", "3,289", "3,709", "3,805", "3,886", "30,220", "4,278",
"4,342", "4,664", "4,827", "429", "569", "595", "6,010",
"6,040", "6,076", "6,182", "6,385", "6,442", "6,555", "6,828",
"6,843", "6,895", "7,521", "7,972", "8,230", "9,090", "9,863"
), class = "factor"), Sil = structure(c(47L, 17L, 43L, 13L,
44L, 38L, 36L, 1L, 15L), .Label = c("", "1,071", "1,103",
"1,153", "1,290", "1,826", "10,407", "105", "11,964", "12,046",
"12,914", "15,736", "15,860", "16,735", "160", "2,022", "2,085",
"2,266", "2,273", "2,303", "2,325", "2,782", "2,887", "2,920",
"27", "28,312", "29,259", "3,368", "3,679", "301", "302",
"4,359", "4,416", "4,913", "47,712", "5,170", "5,857", "6,475",
"6,671", "619", "68", "7,024", "7,218", "7,303", "7,457",
"8,400", "8,550", "8,609", "9,571", "9,839", "9,841", "965"
), class = "factor"), SBR = structure(c(11L, 42L, 3L, 41L,
35L, 25L, 2L, 1L, 39L), .Label = c("", "1,068", "1,091",
"1,094", "1,095", "1,103", "1,174", "1,183", "1,214", "1,315",
"1,327", "1,370", "1,379", "1,380", "1,419", "1,420", "1,457",
"1,577", "1,713", "1,787", "1,864", "1,927", "106", "134",
"2,217", "2,343", "2,346", "2,621", "2,623", "2,842", "2,850",
"260", "264", "3,563", "3,738", "3,853", "343", "369", "440",
"497", "5,492", "508", "55", "572", "6,720", "682", "688",
"7,432", "700", "895", "9,271", "94"), class = "factor"),
SBS = structure(c(20L, 5L, 3L, 15L, 17L, 10L, 52L, 1L, 4L
), .Label = c("", "1,011", "1,017", "1,048", "1,101", "1,106",
"1,121", "1,160", "1,309", "1,389", "1,470", "1,508", "1,638",
"1,674", "1,851", "112", "12,166", "12,323", "175", "2,075",
"2,266", "2,316", "2,542", "211", "3,655", "3,771", "3,822",
"345", "373", "385", "398", "410", "5,538", "507", "548",
"59", "6,125", "6,272", "6,557", "607", "7,342", "712", "752",
"767", "768", "8,581", "850", "887", "9,422", "936", "94",
"960"), class = "factor"), Total = structure(c(47L, 49L,
41L, 4L, 18L, 39L, 29L, 1L, 30L), .Label = c("", "10,871",
"106,513", "108,347", "108,574", "11,770", "112,931", "114,110",
"14,742", "15,724", "17,455", "175,528", "18,376", "197,217",
"223,303", "258,204", "26,151", "268,479", "27,553", "29,383",
"3,592", "3,687", "3,803", "30,319", "36,080", "37,307",
"38,917", "39,389", "39,750", "40,836", "43,589", "45,530",
"48,340", "48,359", "48,989", "49,024", "5,581", "50,550",
"54,475", "55,105", "56,706", "58,964", "6,185", "65,524",
"69,371", "7,652", "76,159", "8,293", "8,939", "80,736",
"82,187", "95,060"), class = "factor")), row.names = c(NA,
9L), class = "data.frame")
Any help would be appreciated
Upvotes: 0
Views: 55
Reputation: 6954
I do not really see the issue here. This code works just fine for me
my_data %>%
filter(State != "District of") %>%
mutate(State = recode_factor(State,
"Columbia" = "District of Columbia"))
You had a typo in "Colombia" but the filtering is fine
Upvotes: 1