Reputation: 2179
I have the following data.frame:
"NAME_1" "id" "prevalence"
"3" "Arizona" 3193 3
"4" "Arkansas" 3194 4
"5" "California" 3195 5
"6" "Colorado" 3196 6
"7" "Connecticut" 3197 7
"8" "Delaware" 3198 8
"9" "District of Columbia" 3199 9
"10" "Florida" 3200 10
"11" "Georgia" 3201 11"
"12" "Hawaii" 3202 12
"13" "Idaho" 3203 13
"14" "Illinois" 3204 14
"15" "Indiana" 3205 15
"16" "Iowa" 3206 16
"17" "Kansas" 3207 17
"18" "Kentucky" 3208 18
"19" "Louisiana" 3209 19
"20" "Maine" 3210 20
"21" "Maryland" 3211 21
"22" "Massachusetts" 3212 22
"23" "Michigan" 3213 23
"24" "Minnesota" 3214 24
"25" "Mississippi" 3215 25
"26" "Missouri" 3216 26
"27" "Montana" 3217 27
"28" "Nebraska" 3218 28
"29" "Nevada" 3219 29
"30" "New Hampshire" 3220 30
"31" "New Jersey" 3221 31
"32" "New Mexico" 3222 32
"33" "New York" 3223 33
"34" "North Carolina" 3224 34
"35" "North Dakota" 3225 35
"36" "Ohio" 3226 36
"37" "Oklahoma" 3227 37
"38" "Oregon" 3228 38
"39" "Pennsylvania" 3229 39
"40" "Rhode Island" 3230 40
"41" "South Carolina" 3231 41
"42" "South Dakota" 3232 42
"43" "Tennessee" 3233 43
"44" "Texas" 3234 44
"45" "Utah" 3235 45
"46" "Vermont" 3236 46
"47" "Virginia" 3237 47
"48" "Washington" 3238 48
"49" "West Virginia" 3239 49
"50" "Wisconsin" 3240 50
"51" "Wyoming" 3241 51
What I would like to do now is plot the values on a map of the United states. Therefore I do the following:
Load in libraries
library(ggplot2)
library(maptools)
library(rgeos)
library(Cairo)
library(ggmap)
library(scales)
library(RColorBrewer)
Get .shp file (to be found here: http://www.diva-gis.org/gdata)
setwd("~/portfolio_text_mining/plots_US")
states.shp <- readShapeSpatial("USA_adm1.shp")
And then I take the following steps:
Fortify shape file to get into dataframe
states.shp.f <- fortify(states.shp, region = "ID_1")
class(states.shp.f)
Merge with coefficients and reorder
merge.shp.coef<-merge(states.shp.f, mydata, by="id", all.x=TRUE)
final.plot<-merge.shp.coef[order(merge.shp.coef$order), ]
And then I create the plot:
ggplot() +
geom_polygon(data = final.plot,
aes(x = long, y = lat, group = group, fill = prevalence),
color = "black", size = 0.25) +
coord_map()
This all seems to work but my graph looks odd:
Upvotes: 2
Views: 317
Reputation: 78792
You are missing Alabama and your "data.frame" paste is malformed. You also don't need to go outside R to do this (i.e. no shapefile required):
library(maptools)
library(ggplot2)
library(ggthemes)
us_map <- map_data("state")
# NOTE: Alabama is still missing since you had it missing
my_data <- data.frame(NAME_1 = c("Arizona", "Arkansas", "California",
"Colorado", "Connecticut", "Delaware", "District of Columbia",
"Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana",
"Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland",
"Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri",
"Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey",
"New Mexico", "New York", "North Carolina", "North Dakota", "Ohio",
"Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina",
"South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia",
"Washington", "West Virginia", "Wisconsin", "Wyoming"),
id = 3193:3241, prevalence = 3:51)
# region (state) names are lower case in the built-in map data, so we
# need to have that match here. we cld do this elsewhere, but it's not
# alot of data
my_data$region <- tolower(my_data$NAME_1)
gg <- ggplot()
# lay down the base layer
gg <- gg + geom_map(data=us_map, map=us_map,
aes(long, lat, map_id=region),
color="#b2b2b277", size=0.1, fill=NA)
# make your choropleth
gg <- gg + geom_map(data=my_data, map=us_map,
aes(fill=prevalence, map_id=region),
color="#b2b2b277", size=0.1)
# fill it (you may want to think abt binning tho)
gg <- gg + scale_fill_distiller(name="Prevalence", palette="BrBG")
# life's too short to use bad projections
gg <- gg + coord_map("polyconic")
# clean map theme
gg <- gg + theme_map()
# move the legend
gg <- gg + theme(legend.position=c(0.85, 0.2))
gg
Upvotes: 4