Reputation: 3
I have a dataset I am trying to get only a certain portion of according to specific criteria but am stuck on how to do so. I think that it has something to do with using the which() and intersect() functions, but I am unfamiliar with them and don't know how to use them.
I have a dataset with a bunch of states, the amounts of gun deaths in the state and their "Brady Score". I am trying to get the states with gun deaths under 4 (per 100,000) and Brady Scores under 0, and then arrange them in a table.
This is the data I am working with:
dput(Guns)
structure(list
(Jurisdiction = structure(1:51, .Label = c("Alabama",
"Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
"D.C.", "Delaware", "Florida", "Georgia", "Hawaii", "Idaho",
"Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana",
"Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota",
"Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire",
"New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota",
"Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island",
"South Carolina", "South Dakota", "Tennessee", "Texas", "Utah",
"Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin",
"Wyoming"), class = "factor"),
Homicide.rate = c(7.1, 4.1, 5.5,
5.9, 5, 3.1, 4.1, 13.9, 6.2, 5.2, 5.9, 2.1, 1.8, 5.8, 4.7, 1.5,
2.9, 4.5, 10.8, 1.9, 6.3, 1.8, 7, 1.8, 7.4, 6.5, 2.7, 2.9, 4.5,
1.1, 4.4, 5.6, 3.5, 4.9, 4, 4.3, 5.7, 2.4, 5.4, 3.2, 6.9, 3,
6, 4.4, 1.8, 1.3, 3.8, 3, 3.9, 3, 2.4),
Gun.accident.rate = c(0.44,
0, 0.1, 0.41, 0.08, 0.18, 0, 0, 0, 0.13, 0.29, 0, 0.38, 0.14,
0.22, 0.11, 0.2, 0.4, 0.75, 0, 0.07, 0, 0.09, 0.08, 0.65, 0.33,
0.43, 0.29, 0.13, 0, 0, 0, 0.02, 0.29, 0, 0.14, 0.39, 0.13, 0.28,
0, 0.41, 0, 0.37, 0.18, 0, 0, 0.13, 0.12, 0.23, 0.09, 0),
Sum = c(7.5,
4.1, 5.6, 6.3, 5.1, 3.3, 4.1, 13.9, 6.2, 5.3, 6.2, 2.1, 2.2,
5.9, 4.9, 1.6, 3.1, 4.9, 11.6, 1.9, 6.4, 1.8, 7.1, 1.9, 8.1,
6.8, 3.1, 3.2, 4.6, 1.1, 4.4, 5.6, 3.5, 5.2, 4, 4.4, 6.1, 2.5,
5.7, 3.2, 7.3, 3, 6.4, 4.6, 1.8, 1.3, 3.9, 3.1, 4.1, 3.1, 2.4
),
Brady.score = c(3.5, -7, -8, 1, 75, 14.5, 70, 50, 34.5, 3,
2, 58.5, 0, 45, 4.5, 14, -4, -3.5, -2, 3, 66.5, 60.5, 15, 19.5,
-4, -0.5, -3, 6.5, 1.5, 5.5, 68.5, 0, 65.5, 1.5, 2, 10, 1, 11,
20, 41.5, 1, -4.5, 2, 1.5, -2, -4, 7, 19.5, 3, 13, -5),
Brady.grade = structure(c(8L,
10L, 10L, 10L, 1L, 5L, 1L, 2L, 3L, 10L, 10L, 4L, 10L, 2L, 8L,
6L, 10L, 10L, 10L, 10L, 1L, 4L, 5L, 5L, 10L, 10L, 10L, 7L, 10L,
8L, 1L, 10L, 1L, 10L, 10L, 7L, 10L, 9L, 5L, 3L, 10L, 10L, 10L,
10L, 10L, 10L, 7L, 5L, 10L, 6L, 10L), .Label = c("A?", "B", "B?",
"B+", "C", "C?", "D", "D?", "D+", "F"), class = "factor")),
class = "data.frame", row.names = c(NA,
-51L))
So far I have this:
LowB=(Guns$Brady.score<0)
LowD=(Guns$Sum<4)
LowB1=Guns[LowB,]$Brady.score
LowD1=Guns[LowD,]$Sum
intersect(LowB1,LowD1)
I have succeeded in converting the Brady Scores and Gun Deaths (Sum) into numerical variables, but now have no idea how to align them into a table where each state matches to its correspondent Brady Score and Gun Death sum. To reiterate, what I want to get at the end is a table with the states that have both a Brady score below a certain number and a Sum below a certain number, where all three variables correspond to eachother. Is there any way I can do this? Thank you.
Upvotes: 0
Views: 48
Reputation: 5788
An additional Base R method:
df[which(df$Brady.score < 0 & df$Sum < 4),]
Upvotes: 0
Reputation: 389135
We can use subset
and select
the columns that we need.
subset(Guns, Brady.score < 0 & Sum < 4,
select = c('Jurisdiction', 'Sum', 'Brady.score'))
# Jurisdiction Sum Brady.score
#17 Kansas 3.1 -4.0
#27 Montana 3.1 -3.0
#42 South Dakota 3.0 -4.5
#45 Utah 1.8 -2.0
#46 Vermont 1.3 -4.0
#51 Wyoming 2.4 -5.0
In dplyr
, we can use filter
and select
.
library(dplyr)
Guns %>%
filter(Brady.score < 0 & Sum < 4) %>%
select(Jurisdiction, Sum, Brady.score)
Upvotes: 1