garry
garry

Reputation: 57

state names to abbreviations using R

I have an Excel file with name MASTER and I have a column name states.

The state names are mix of abbreviations and full names. I want all full names to be converted to abbreviation. I've tried this code, but I don't know what I am doing wrong:

MASTER<- read.csv("c:\\users\\Desktop\\test merge\\MASTER.csv", header=T, sep=",")
> MASTER = c("CALIFORNIA","ARIZONA","ARKANSAS","colorado","CONNECTICUT","DISTRICT OF COLUMBIA","FLORIDA","Georgia","hawaii","Illinois","Indiana","Iowa","kansas","Kentucky","LOUISIANA","Maine","Marryland","Massachusetts","Michigan","Minnesota","Missouri","MONTANA","NEBRASKA","Nevada","New Hampshire","New Jersey","NEW MEXICO","New York","North Carolina","OHIO","OKLAHOMA","Oregon","palmer","PANAMA","Pennsylvania","SOUTH CAROLINA","SOUTH DAKOTA","Tejas","Tennessee","Texas","TX.","UTAH","Virgin Islands","VIRGINIA","Washington","West Virginia","WISCONSIN","Wyoming")
> MASTER$state <- state.abb[match(MASTER$state,state.name)]
Error in `$<-.data.frame`(`*tmp*`, state, value = character(0)) : 
  replacement has 0 rows, data has 635768

Upvotes: 1

Views: 2077

Answers (1)

hrbrmstr
hrbrmstr

Reputation: 78832

This can help you get started and hopefully illustrate a few concepts in the process:

master <- c(
  "CALIFORNIA", "ARIZONA", "ARKANSAS", "colorado", "CONNECTICUT", "DISTRICT OF COLUMBIA", 
  "FLORIDA", "Georgia", "hawaii", "Illinois", "Indiana", "Iowa", "kansas", "Kentucky", 
  "LOUISIANA", "Maine", "Marryland", "Massachusetts", "Michigan", "Minnesota", "Missouri", 
  "MONTANA", "NEBRASKA", "Nevada", "New Hampshire", "New Jersey", "NEW MEXICO", "New York", 
  "North Carolina", "OHIO", "OKLAHOMA", "Oregon", "palmer", "PANAMA", "Pennsylvania", 
  "SOUTH CAROLINA", "SOUTH DAKOTA", "Tejas", "Tennessee", "Texas", "TX.", "UTAH", 
  "Virgin Islands", "VIRGINIA", "Washington", "West Virginia", "WISCONSIN", "Wyoming"
)

(master_low <- tolower(master))
##  [1] "california"           "arizona"              "arkansas"            
##  [4] "colorado"             "connecticut"          "district of columbia"
##  [7] "florida"              "georgia"              "hawaii"              
## [10] "illinois"             "indiana"              "iowa"                
## [13] "kansas"               "kentucky"             "louisiana"           
## [16] "maine"                "marryland"            "massachusetts"       
## [19] "michigan"             "minnesota"            "missouri"            
## [22] "montana"              "nebraska"             "nevada"              
## [25] "new hampshire"        "new jersey"           "new mexico"          
## [28] "new york"             "north carolina"       "ohio"                
## [31] "oklahoma"             "oregon"               "palmer"              
## [34] "panama"               "pennsylvania"         "south carolina"      
## [37] "south dakota"         "tejas"                "tennessee"           
## [40] "texas"                "tx."                  "utah"                
## [43] "virgin islands"       "virginia"             "washington"          
## [46] "west virginia"        "wisconsin"            "wyoming"

state_tbl <- setNames(state.abb, tolower(state.name))

data.frame(
  orig = master,
  lower = master_low,
  abbrev = state_tbl[master_low],
  stringsAsFactors = FALSE
)
##                    orig                lower abbrev
## 1            CALIFORNIA           california     CA
## 2               ARIZONA              arizona     AZ
## 3              ARKANSAS             arkansas     AR
## 4              colorado             colorado     CO
## 5           CONNECTICUT          connecticut     CT
## 6  DISTRICT OF COLUMBIA district of columbia   <NA>
## 7               FLORIDA              florida     FL
## 8               Georgia              georgia     GA
## 9                hawaii               hawaii     HI
## 10             Illinois             illinois     IL
## 11              Indiana              indiana     IN
## 12                 Iowa                 iowa     IA
## 13               kansas               kansas     KS
## 14             Kentucky             kentucky     KY
## 15            LOUISIANA            louisiana     LA
## 16                Maine                maine     ME
## 17            Marryland            marryland   <NA>
## 18        Massachusetts        massachusetts     MA
## 19             Michigan             michigan     MI
## 20            Minnesota            minnesota     MN
## 21             Missouri             missouri     MO
## 22              MONTANA              montana     MT
## 23             NEBRASKA             nebraska     NE
## 24               Nevada               nevada     NV
## 25        New Hampshire        new hampshire     NH
## 26           New Jersey           new jersey     NJ
## 27           NEW MEXICO           new mexico     NM
## 28             New York             new york     NY
## 29       North Carolina       north carolina     NC
## 30                 OHIO                 ohio     OH
## 31             OKLAHOMA             oklahoma     OK
## 32               Oregon               oregon     OR
## 33               palmer               palmer   <NA>
## 34               PANAMA               panama   <NA>
## 35         Pennsylvania         pennsylvania     PA
## 36       SOUTH CAROLINA       south carolina     SC
## 37         SOUTH DAKOTA         south dakota     SD
## 38                Tejas                tejas   <NA>
## 39            Tennessee            tennessee     TN
## 40                Texas                texas     TX
## 41                  TX.                  tx.   <NA>
## 42                 UTAH                 utah     UT
## 43       Virgin Islands       virgin islands   <NA>
## 44             VIRGINIA             virginia     VA
## 45           Washington           washington     WA
## 46        West Virginia        west virginia     WV
## 47            WISCONSIN            wisconsin     WI
## 48              Wyoming              wyoming     WY

Upvotes: 4

Related Questions