ggplot2 not printing the colours specified inthe dataframe and set as factor levels

Question

I am trying to reproduce a map from W.E.B DuBois with the original colors and values with thin black arrows in each state starting at the bottom right corner of the state polygon and pointing to the centroid of the state. I have 2 issues 1) ggplot2 would not plot the colors specified in the data frame. 2) The arrows pointing to the centroids are too big and would not start at the bottom right corner of the state polygon and would point to the centroid at a 45 degrees angle. I would appreciate any help to fix these issues. Here is the data

    df_join <- structure(list(name = c("New Mexico", "Puerto Rico", "California", 
                            "Alabama", "Georgia", "Arkansas", "Oregon", "Mississippi", "Colorado", 
                            "Utah", "Oklahoma", "Tennessee", "Wyoming", "Indiana", "Massachusetts", 
                            "Idaho", "Alaska", "Nevada", "Illinois", "Vermont", "New Jersey", 
                            "North Dakota", "Iowa", "South Carolina", "Arizona", "Delaware", 
                            "District of Columbia", "Guam", "American Samoa", "Connecticut", 
                            "New Hampshire", "Nebraska", "Washington", "South Dakota", "Texas", 
                            "Kentucky", "Ohio", "Wisconsin", "Pennsylvania", "Missouri", 
                            "North Carolina", "Virginia", "West Virginia", "Louisiana", "New York", 
                            "Michigan", "Kansas", "Florida", "United States Virgin Islands", 
                            "Montana", "Minnesota", "Minnesota", "Maryland", "Maine", "Hawaii", 
                            "Commonwealth of the Northern Mariana Islands", "Rhode Island"),
                   color = c("#d2b48c", "", "#ffd700", "#00aa00", "#000000", 
                             "#dc143c", "#dc143c", "#d2b48c", "#dc143c", "#654321", "#ffd700", 
                             "#654321", "#ffd700", "#ffb6c1", "#d2b48c", "#4682b4", "", 
                             "#ffb6c1", "#ffd700", "#ffb6c1", "#ffd700", "#d2b48c", "#696969", 
                             "#4682b4", "#4682b4", "#d2b48c", "", "", "", "#d2b48c", "#ffb6c1", 
                             "#ffb6c1", "#696969", "#654321", "#696969", "#696969", "#d2b48c", 
                             "#4682b4", "#dc143c", "#4682b4", "#ffb6c1", "#00aa00", "#ffa500", 
                             "#ffb6c1", "#4682b4", "#654321", "#00aa00", "#ffd700", "", 
                             "#00aa00", "#ffb6c1", "#ffb6c1", "#ffd700", "#ffd700", "", 
                             "", "#dc143c"),
                   present_location = c(
      38L, NA, 254L, 24556L, 798747L, NA, 32L, 589L, 285L, 9L, 
      68L, 9998L, 21L, 193L, 293L, 7L, 12142L, 1L, 556L, 11L, 
      229L, 5L, 120L, 347L, 48L, 12L, 320L, NA, NA, 97L, 14L, 
      121L, 44L, 18L, 12016L, 424L, 474L, 27L, 321L, 480L, 462L, 
      223L, 40L, 6025L, 866L, 51L, 480L, 3981L, 223L, NA, 62L, 
      38L, 148L, 7L, NA, 48L, 44L
    ) ))
                          
And here are the codes that I have tried

    library(sf)
    library(ggplot2)
    library(tigris)
    
    
    # Read in state shapefile data
    us_states <- states(cb = TRUE)
    
    # Compute the centroid of each state
    us_states_centroid <- st_centroid(us_states)
    
    # Transform the centroid coordinates to WGS84 (lat/long)
    us_states_centroid_wgs84 <- st_transform(us_states_centroid, 4326)
    
    # Compute the convex hull of each state
    us_states_hull <- st_cast(st_convex_hull(us_states), "MULTILINESTRING")
    
    # Extract the coordinates of the bottom right corner of each state polygon
    us_states_bottom_right <- st_coordinates(st_centroid(us_states_hull))
    
    # Combine the centroid and bottom right coordinates into one dataframe
    us_states_coords <- data.frame(
      centroid_lon = st_coordinates(us_states_centroid_wgs84)[,1],
      centroid_lat = st_coordinates(us_states_centroid_wgs84)[,2],
      bottom_right_lon = us_states_bottom_right[,1],
      bottom_right_lat = us_states_bottom_right[,2]
    )
    us_states <- cbind(us_states, us_states_coords)
    
    # Merge data with color and present_location information
    df_join$color <- factor(df_join$color, levels = unique(df_join$color))
    us_states_data <- merge(us_states, df_join, by.x = "NAME", by.y = "name")
    
    
    
    # Plot the map with state boundaries, colored by color column and labeled with present_location
    ggplot() +
      geom_sf(data = us_states_data, aes(fill = color), color = "black", size = 0.2) +
    geom_segment(data = us_states, aes(x = bottom_right_lon, y = bottom_right_lat, xend = centroid_lon, yend = centroid_lat),
                  arrow = arrow(length = unit(0.15, "cm"), type = "closed", ends = "last", angle = 25),
                  size = 0.5) +
    scale_fill_manual(values = us_states_data$color ) +
      scale_color_manual(values = us_states_data$color) +    
     
       geom_text(data = us_states_data, aes(x = centroid_lon - 0.5, y = centroid_lat, label = present_location), size = 3, hjust = 1, vjust = 0.5) +
      labs(title = "US State Boundaries Colored by Color Column and Labeled with Present_Location", 
           subtitle = "",
           fill = "Color", x = "Longitude", y = "Latitude") +
      theme_void() +
      theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
            plot.subtitle = element_text(hjust = 0.5, size = 14, face = "italic"),
            legend.position = "none") +
      xlim(c(-125, -66)) + ylim(c(25, 50))

I followed @Jul advice and modified the codes. I am getting an error here

  # Combine the centroid coordinates and angles into one dataframe
    > us_states_coords <- data.frame(
    +   centroid_lon = us_states_centroid_coords[,1],
    +   centroid_lat = us_states_centroid_coords[,2],
    +   angle_degrees = us_states_centroid_coords$angle_degrees
    + )
    Error in us_states_centroid_coords[, 1] : incorrect number of dimensions
    
    library(sf)
    library(ggplot2)
    library(tigris)
    
    # Read in state shapefile data
    us_states <- states(cb = TRUE)
    
    # Filter to the continental US states only
    us_states <- us_states[us_states$REGION %in% c("3", "4", "5"),]
    
    # Compute the centroid of each state
    us_states_centroid <- st_centroid(us_states)
    
    # Transform the centroid coordinates to WGS84 (lat/long)
    us_states_centroid_wgs84 <- st_transform(us_states_centroid, 4326)
    
    # Compute the convex hull of each state
    us_states_hull <- st_cast(st_convex_hull(us_states), "MULTILINESTRING")
    
    # Extract the coordinates of the bottom right corner of each state polygon
    us_states_bottom_right <- st_coordinates(st_centroid(us_states_hull))
    
    # Extract the centroid of Georgia
    ga_centroid <- us_states_centroid_wgs84[us_states_centroid_wgs84$NAME == "Georgia",]
    
    # Calculate the angle between the centroid of each state and Georgia
    us_states_centroid_coords <- st_coordinates(us_states_centroid_wgs84)
    us_states_centroid_coords$angle <- atan2(
      ga_centroid$geometry[[1]][2] - us_states_centroid_coords[,2], 
      ga_centroid$geometry[[1]][1] - us_states_centroid_coords[,1]
    )
    
    # Convert the angle to degrees
    us_states_centroid_coords$angle_degrees <- us_states_centroid_coords$angle * 180 / pi
    
    # Convert negative angles to positive angles (for visualization purposes)
    us_states_centroid_coords$angle_degrees[us_states_centroid_coords$angle_degrees < 0] <- 
      360 + us_states_centroid_coords$angle_degrees[us_states_centroid_coords$angle_degrees < 0]
    
    # Combine the centroid coordinates and angles into one dataframe
    us_states_coords <- data.frame(
      centroid_lon = us_states_centroid_coords[,1],
      centroid_lat = us_states_centroid_coords[,2],
      angle_degrees = us_states_centroid_coords$angle_degrees
    )
    
    # Merge data with color and present_location information
    us_states_data <- merge(us_states, df_join, by.x = "NAME", by.y = "name")
    us_states_data <- us_states_data[us_states_data$color!="",]
    
    # Plot the map with state boundaries, colored by color column and labeled with present_location
    ggplot() +
      geom_sf(data = us_states_data, aes(fill = color), color = "black", size = 0.2) +
      geom_spoke(data = us_states_coords, aes(x0 = centroid_lon, y0 = centroid_lat, 
                                              angle = angle_degrees, radius = 0.5), 
                 color = "black", size = 0.5) +
      geom_text(data = us_states_data, aes(x = centroid_lon - 0.5, y = centroid_lat, label = present_location), 
                size = 3, hjust = 1, vjust = 0.5) +
      labs(title = "US State Boundaries Colored by Color Column and Labeled with Present_Location", 
           subtitle = "",
           fill = "Color", x = "Longitude", y = "Latitude") +
      theme_void() +
      theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
            plot.subtitle = element_text(hjust = 0.5, size = 14, face = "italic"),
            legend.position = "none") +
      xlim(c(-125, -66)) + ylim(c(25, 50))

ggplot2 not printing the colours specified inthe dataframe and set as factor levels

Answers (1)

Related Questions