Patrick Stephenson
Patrick Stephenson

Reputation: 98

ggplot2 not printing the colours specified inthe dataframe and set as factor levels

I am trying to reproduce a map from W.E.B DuBois with the original colors and values with thin black arrows in each state starting at the bottom right corner of the state polygon and pointing to the centroid of the state. I have 2 issues 1) ggplot2 would not plot the colors specified in the data frame. 2) The arrows pointing to the centroids are too big and would not start at the bottom right corner of the state polygon and would point to the centroid at a 45 degrees angle. I would appreciate any help to fix these issues. Here is the data

    df_join <- structure(list(name = c("New Mexico", "Puerto Rico", "California", 
                            "Alabama", "Georgia", "Arkansas", "Oregon", "Mississippi", "Colorado", 
                            "Utah", "Oklahoma", "Tennessee", "Wyoming", "Indiana", "Massachusetts", 
                            "Idaho", "Alaska", "Nevada", "Illinois", "Vermont", "New Jersey", 
                            "North Dakota", "Iowa", "South Carolina", "Arizona", "Delaware", 
                            "District of Columbia", "Guam", "American Samoa", "Connecticut", 
                            "New Hampshire", "Nebraska", "Washington", "South Dakota", "Texas", 
                            "Kentucky", "Ohio", "Wisconsin", "Pennsylvania", "Missouri", 
                            "North Carolina", "Virginia", "West Virginia", "Louisiana", "New York", 
                            "Michigan", "Kansas", "Florida", "United States Virgin Islands", 
                            "Montana", "Minnesota", "Minnesota", "Maryland", "Maine", "Hawaii", 
                            "Commonwealth of the Northern Mariana Islands", "Rhode Island"),
                   color = c("#d2b48c", "", "#ffd700", "#00aa00", "#000000", 
                             "#dc143c", "#dc143c", "#d2b48c", "#dc143c", "#654321", "#ffd700", 
                             "#654321", "#ffd700", "#ffb6c1", "#d2b48c", "#4682b4", "", 
                             "#ffb6c1", "#ffd700", "#ffb6c1", "#ffd700", "#d2b48c", "#696969", 
                             "#4682b4", "#4682b4", "#d2b48c", "", "", "", "#d2b48c", "#ffb6c1", 
                             "#ffb6c1", "#696969", "#654321", "#696969", "#696969", "#d2b48c", 
                             "#4682b4", "#dc143c", "#4682b4", "#ffb6c1", "#00aa00", "#ffa500", 
                             "#ffb6c1", "#4682b4", "#654321", "#00aa00", "#ffd700", "", 
                             "#00aa00", "#ffb6c1", "#ffb6c1", "#ffd700", "#ffd700", "", 
                             "", "#dc143c"),
                   present_location = c(
      38L, NA, 254L, 24556L, 798747L, NA, 32L, 589L, 285L, 9L, 
      68L, 9998L, 21L, 193L, 293L, 7L, 12142L, 1L, 556L, 11L, 
      229L, 5L, 120L, 347L, 48L, 12L, 320L, NA, NA, 97L, 14L, 
      121L, 44L, 18L, 12016L, 424L, 474L, 27L, 321L, 480L, 462L, 
      223L, 40L, 6025L, 866L, 51L, 480L, 3981L, 223L, NA, 62L, 
      38L, 148L, 7L, NA, 48L, 44L
    ) ))
                          
And here are the codes that I have tried

    library(sf)
    library(ggplot2)
    library(tigris)
    
    
    # Read in state shapefile data
    us_states <- states(cb = TRUE)
    
    # Compute the centroid of each state
    us_states_centroid <- st_centroid(us_states)
    
    # Transform the centroid coordinates to WGS84 (lat/long)
    us_states_centroid_wgs84 <- st_transform(us_states_centroid, 4326)
    
    # Compute the convex hull of each state
    us_states_hull <- st_cast(st_convex_hull(us_states), "MULTILINESTRING")
    
    # Extract the coordinates of the bottom right corner of each state polygon
    us_states_bottom_right <- st_coordinates(st_centroid(us_states_hull))
    
    # Combine the centroid and bottom right coordinates into one dataframe
    us_states_coords <- data.frame(
      centroid_lon = st_coordinates(us_states_centroid_wgs84)[,1],
      centroid_lat = st_coordinates(us_states_centroid_wgs84)[,2],
      bottom_right_lon = us_states_bottom_right[,1],
      bottom_right_lat = us_states_bottom_right[,2]
    )
    us_states <- cbind(us_states, us_states_coords)
    
    # Merge data with color and present_location information
    df_join$color <- factor(df_join$color, levels = unique(df_join$color))
    us_states_data <- merge(us_states, df_join, by.x = "NAME", by.y = "name")
    
    
    
    # Plot the map with state boundaries, colored by color column and labeled with present_location
    ggplot() +
      geom_sf(data = us_states_data, aes(fill = color), color = "black", size = 0.2) +
    geom_segment(data = us_states, aes(x = bottom_right_lon, y = bottom_right_lat, xend = centroid_lon, yend = centroid_lat),
                  arrow = arrow(length = unit(0.15, "cm"), type = "closed", ends = "last", angle = 25),
                  size = 0.5) +
    scale_fill_manual(values = us_states_data$color ) +
      scale_color_manual(values = us_states_data$color) +    
     
       geom_text(data = us_states_data, aes(x = centroid_lon - 0.5, y = centroid_lat, label = present_location), size = 3, hjust = 1, vjust = 0.5) +
      labs(title = "US State Boundaries Colored by Color Column and Labeled with Present_Location", 
           subtitle = "",
           fill = "Color", x = "Longitude", y = "Latitude") +
      theme_void() +
      theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
            plot.subtitle = element_text(hjust = 0.5, size = 14, face = "italic"),
            legend.position = "none") +
      xlim(c(-125, -66)) + ylim(c(25, 50))

I followed @Jul advice and modified the codes. I am getting an error here

  # Combine the centroid coordinates and angles into one dataframe
    > us_states_coords <- data.frame(
    +   centroid_lon = us_states_centroid_coords[,1],
    +   centroid_lat = us_states_centroid_coords[,2],
    +   angle_degrees = us_states_centroid_coords$angle_degrees
    + )
    Error in us_states_centroid_coords[, 1] : incorrect number of dimensions
    
    library(sf)
    library(ggplot2)
    library(tigris)
    
    # Read in state shapefile data
    us_states <- states(cb = TRUE)
    
    # Filter to the continental US states only
    us_states <- us_states[us_states$REGION %in% c("3", "4", "5"),]
    
    # Compute the centroid of each state
    us_states_centroid <- st_centroid(us_states)
    
    # Transform the centroid coordinates to WGS84 (lat/long)
    us_states_centroid_wgs84 <- st_transform(us_states_centroid, 4326)
    
    # Compute the convex hull of each state
    us_states_hull <- st_cast(st_convex_hull(us_states), "MULTILINESTRING")
    
    # Extract the coordinates of the bottom right corner of each state polygon
    us_states_bottom_right <- st_coordinates(st_centroid(us_states_hull))
    
    # Extract the centroid of Georgia
    ga_centroid <- us_states_centroid_wgs84[us_states_centroid_wgs84$NAME == "Georgia",]
    
    # Calculate the angle between the centroid of each state and Georgia
    us_states_centroid_coords <- st_coordinates(us_states_centroid_wgs84)
    us_states_centroid_coords$angle <- atan2(
      ga_centroid$geometry[[1]][2] - us_states_centroid_coords[,2], 
      ga_centroid$geometry[[1]][1] - us_states_centroid_coords[,1]
    )
    
    # Convert the angle to degrees
    us_states_centroid_coords$angle_degrees <- us_states_centroid_coords$angle * 180 / pi
    
    # Convert negative angles to positive angles (for visualization purposes)
    us_states_centroid_coords$angle_degrees[us_states_centroid_coords$angle_degrees < 0] <- 
      360 + us_states_centroid_coords$angle_degrees[us_states_centroid_coords$angle_degrees < 0]
    
    # Combine the centroid coordinates and angles into one dataframe
    us_states_coords <- data.frame(
      centroid_lon = us_states_centroid_coords[,1],
      centroid_lat = us_states_centroid_coords[,2],
      angle_degrees = us_states_centroid_coords$angle_degrees
    )
    
    # Merge data with color and present_location information
    us_states_data <- merge(us_states, df_join, by.x = "NAME", by.y = "name")
    us_states_data <- us_states_data[us_states_data$color!="",]
    
    # Plot the map with state boundaries, colored by color column and labeled with present_location
    ggplot() +
      geom_sf(data = us_states_data, aes(fill = color), color = "black", size = 0.2) +
      geom_spoke(data = us_states_coords, aes(x0 = centroid_lon, y0 = centroid_lat, 
                                              angle = angle_degrees, radius = 0.5), 
                 color = "black", size = 0.5) +
      geom_text(data = us_states_data, aes(x = centroid_lon - 0.5, y = centroid_lat, label = present_location), 
                size = 3, hjust = 1, vjust = 0.5) +
      labs(title = "US State Boundaries Colored by Color Column and Labeled with Present_Location", 
           subtitle = "",
           fill = "Color", x = "Longitude", y = "Latitude") +
      theme_void() +
      theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
            plot.subtitle = element_text(hjust = 0.5, size = 14, face = "italic"),
            legend.position = "none") +
      xlim(c(-125, -66)) + ylim(c(25, 50))        

Upvotes: 0

Views: 60

Answers (1)

Jul
Jul

Reputation: 1139

For 1.), there's no need to convert the colours to a factor, and in the ggplot call, it shouldn't be in an aes call. The aes call will tell ggplot to take care of the colour mapping. If you want ggplot to use some colours directly from a dataframe, you pass them outside of the aes. Also, for simplicity it may be best to filter out the non-continental territories.

# Merge data with color and present_location information
#df_join$color <- factor(df_join$color, levels = unique(df_join$color))
us_states_data <- merge(us_states, df_join, by.x = "NAME", by.y = "name")
us_states_data <- us_states_data[us_states_data$color!="",] ##Filter to the core states only if they are the only states in scope.

# Plot the map with state boundaries, colored by color column and labeled with present_location
ggplot() +
  #geom_sf(data = us_states_data, aes(fill = color), color = "black", size = 0.2) +
  geom_sf(data = us_states_data, fill = us_states_data$color, color = "black", size = 0.2) +
  geom_segment(data = us_states, aes(x = bottom_right_lon, y = bottom_right_lat, xend = centroid_lon, yend = centroid_lat),
               arrow = arrow(length = unit(0.15, "cm"), type = "closed", ends = "last", angle = 25),
               size = 0.5) +
  #scale_fill_manual(values = us_states_data$color ) +
  #scale_color_manual(values = us_states_data$color) +    
  geom_text(data = us_states_data, aes(x = centroid_lon - 0.5, y = centroid_lat, label = present_location), size = 3, hjust = 1, vjust = 0.5) +
  labs(title = "US State Boundaries Colored by Color Column and Labeled with Present_Location", 
       subtitle = "",
       fill = "Color", x = "Longitude", y = "Latitude") +
  theme_void() +
  theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5, size = 14, face = "italic"),
        legend.position = "none") +
  xlim(c(-125, -66)) + ylim(c(25, 50))

For 2.), you are currently picking up the centroid of the hull, not the bottom right of it. You could potentially get close to your need by getting the bounding box of each state st_bbox and then picking the xmax and ymax figures and then taking some other actions to clip the arrow to be within the state lines. But it looks like the map you are trying to recreate actually has all of the arrows emanating from Georgia? In which case, it may be best to calculate the angle between the centroids of each state and Georgia and then utilise geom_spoke to create the arrows. https://ggplot2.tidyverse.org/reference/geom_spoke.html


2023-03-08 Extension

The new error is caused by the way objects are being assigned to the us_states_centroid_coords object. You appear to be inadvertently coercing the matrix output of st_coordinates to a list when assigning $angle. If you create us_states_centroid_coords as a data.frame, the unexpected coercion will not happen.

us_states_centroid_coords <- as.data.frame(st_coordinates(us_states_centroid_wgs84))

Upvotes: 1

Related Questions