cliffson_a
cliffson_a

Reputation: 15

How to use dplyr in R to both eliminate a row and rename a factor variable

I'm converting some pdfs tables to data frames involving data from US states, and one of the tables has a hick-up in it where District of Colombia is broken up into two lines. Here's a glimpse of the data

$ State <fct> Alabama, Alaska, Arkansas, Arizona, California, Colorado, Connecticut, District of, Columbia, Delaw…
$ Any   <fct> "1,133", "311", "591", "1,080", "3,698", "910", "643", "", "69", "32", "3,113", "1,759", "34", "876"…
$ DD    <fct> "47,421", "3,317", "41,962", "67,185", "212,800", "37,444", "9,886", "", "34,841", "2,183", "101,41"…
$ MG    <fct> "15,653", "1,617", "4,827", "16,879", "28,774", "6,040", "22,023", "", "4,278", "569", "29,128", "2"…
$ Sil   <fct> "8,550", "2,085", "7,218", "15,860", "7,303", "6,475", "5,170", "", "160", "302", "28,312", "29,259"…
$ SBR   <fct> "1,327", "508", "1,091", "5,492", "3,738", "2,217", "1,068", "", "440", "94", "7,432", "3,563", "55"…
$ SBS   <fct> "2,075", "1,101", "1,017", "1,851", "12,166", "1,389", "960", "", "1,048", "507", "6,125", "9,422",…
$ Total <fct> "76,159", "8,939", "56,706", "108,347", "268,479", "54,475", "39,750", "", "40,836", "3,687", "175",…

I'm trying to find a way to use dplyr to both eliminate the empty row with the State "District of" and rename the "Colombia" cell in the State column to "District of Colombia

I was trying

data_table %>%
  filter(as.character(State) != "District of")

to filter out the empty column, but that isn't working

My attempt to recode the State column was

  atf_data_table %>% mutate(State = recode_factor(State, `Colombia` = "District of Colombia"))

but that hasn't bore fruit either.

edit: Attached is the dput of a section of the data

structure(list(State = structure(c(1L, 2L, 4L, 3L, 5L, 6L, 8L, 
10L, 7L), .Label = c("Alabama", "Alaska", "Arizona", "Arkansas", 
"California", "Colorado", "Columbia", "Connecticut", "Delaware", 
"District of", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", 
"Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", 
"Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", 
"Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", 
"New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", 
"Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", 
"South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", 
"Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", 
"Wyoming"), class = "factor"), Any = structure(c(6L, 24L, 38L, 
4L, 23L, 50L, 40L, 1L, 44L), .Label = c("", "1,019", "1,061", 
"1,080", "1,110", "1,133", "1,330", "1,430", "1,487", "1,502", 
"1,726", "1,759", "1,776", "199", "2,012", "2,043", "2,514", 
"2,633", "221", "277", "286", "3,113", "3,698", "311", "32", 
"34", "344", "403", "404", "410", "42", "425", "430", "431", 
"517", "570", "588", "591", "6,169", "643", "670", "674", "679", 
"69", "717", "747", "820", "837", "876", "910", "933", "958"), class = "factor"), 
    DD = structure(c(41L, 26L, 38L, 47L, 21L, 36L, 52L, 1L, 33L
    ), .Label = c("", "1,605", "101,418", "102,723", "11,737", 
    "12,878", "13,004", "13,960", "14,049", "148,700", "159,805", 
    "161,841", "17,492", "19,379", "2,183", "2,216", "2,517", 
    "2,710", "2,852", "21,221", "212,800", "22,239", "24,986", 
    "25,316", "26,358", "3,317", "3,352", "3,459", "30,778", 
    "32,848", "34,628", "34,693", "34,841", "35,707", "36,066", 
    "37,444", "37,700", "41,962", "44,821", "45,037", "47,421", 
    "48,026", "5,068", "5,503", "6,776", "60,764", "67,185", 
    "69,005", "71,814", "8,318", "83,363", "9,886"), class = "factor"), 
    MG = structure(c(11L, 5L, 34L, 12L, 23L, 39L, 18L, 1L, 31L
    ), .Label = c("", "1,070", "1,480", "1,495", "1,617", "1,659", 
    "1,892", "10,703", "11,234", "13,442", "15,653", "16,879", 
    "17,019", "17,384", "18,013", "2,049", "2,438", "22,023", 
    "22,081", "23,709", "24,651", "28,690", "28,774", "29,128", 
    "3,262", "3,289", "3,709", "3,805", "3,886", "30,220", "4,278", 
    "4,342", "4,664", "4,827", "429", "569", "595", "6,010", 
    "6,040", "6,076", "6,182", "6,385", "6,442", "6,555", "6,828", 
    "6,843", "6,895", "7,521", "7,972", "8,230", "9,090", "9,863"
    ), class = "factor"), Sil = structure(c(47L, 17L, 43L, 13L, 
    44L, 38L, 36L, 1L, 15L), .Label = c("", "1,071", "1,103", 
    "1,153", "1,290", "1,826", "10,407", "105", "11,964", "12,046", 
    "12,914", "15,736", "15,860", "16,735", "160", "2,022", "2,085", 
    "2,266", "2,273", "2,303", "2,325", "2,782", "2,887", "2,920", 
    "27", "28,312", "29,259", "3,368", "3,679", "301", "302", 
    "4,359", "4,416", "4,913", "47,712", "5,170", "5,857", "6,475", 
    "6,671", "619", "68", "7,024", "7,218", "7,303", "7,457", 
    "8,400", "8,550", "8,609", "9,571", "9,839", "9,841", "965"
    ), class = "factor"), SBR = structure(c(11L, 42L, 3L, 41L, 
    35L, 25L, 2L, 1L, 39L), .Label = c("", "1,068", "1,091", 
    "1,094", "1,095", "1,103", "1,174", "1,183", "1,214", "1,315", 
    "1,327", "1,370", "1,379", "1,380", "1,419", "1,420", "1,457", 
    "1,577", "1,713", "1,787", "1,864", "1,927", "106", "134", 
    "2,217", "2,343", "2,346", "2,621", "2,623", "2,842", "2,850", 
    "260", "264", "3,563", "3,738", "3,853", "343", "369", "440", 
    "497", "5,492", "508", "55", "572", "6,720", "682", "688", 
    "7,432", "700", "895", "9,271", "94"), class = "factor"), 
    SBS = structure(c(20L, 5L, 3L, 15L, 17L, 10L, 52L, 1L, 4L
    ), .Label = c("", "1,011", "1,017", "1,048", "1,101", "1,106", 
    "1,121", "1,160", "1,309", "1,389", "1,470", "1,508", "1,638", 
    "1,674", "1,851", "112", "12,166", "12,323", "175", "2,075", 
    "2,266", "2,316", "2,542", "211", "3,655", "3,771", "3,822", 
    "345", "373", "385", "398", "410", "5,538", "507", "548", 
    "59", "6,125", "6,272", "6,557", "607", "7,342", "712", "752", 
    "767", "768", "8,581", "850", "887", "9,422", "936", "94", 
    "960"), class = "factor"), Total = structure(c(47L, 49L, 
    41L, 4L, 18L, 39L, 29L, 1L, 30L), .Label = c("", "10,871", 
    "106,513", "108,347", "108,574", "11,770", "112,931", "114,110", 
    "14,742", "15,724", "17,455", "175,528", "18,376", "197,217", 
    "223,303", "258,204", "26,151", "268,479", "27,553", "29,383", 
    "3,592", "3,687", "3,803", "30,319", "36,080", "37,307", 
    "38,917", "39,389", "39,750", "40,836", "43,589", "45,530", 
    "48,340", "48,359", "48,989", "49,024", "5,581", "50,550", 
    "54,475", "55,105", "56,706", "58,964", "6,185", "65,524", 
    "69,371", "7,652", "76,159", "8,293", "8,939", "80,736", 
    "82,187", "95,060"), class = "factor")), row.names = c(NA, 
9L), class = "data.frame")

Any help would be appreciated

Upvotes: 0

Views: 55

Answers (1)

mnist
mnist

Reputation: 6954

I do not really see the issue here. This code works just fine for me

my_data %>%
  filter(State != "District of") %>% 
  mutate(State = recode_factor(State, 
                               "Columbia" = "District of Columbia"))

You had a typo in "Colombia" but the filtering is fine

Upvotes: 1

Related Questions