Reputation: 7
Year Cities generosity economic culture hp
2000 Seoul 1.23788 0.63376 0.86027 0.25497
2000 Mexico 1.29704 0.62433 0.89042 0.33088
2000 Guangzh 0.91451 0.48181 0.81444 0.14074
2000 Beijing 1.24711 0.54604 0.86179 0.40105
2000 Cairo 1.23287 0.49049 0.69702 0.14574
2000 New Yor 1.21963 0.61583 0.91894 0.28034
2000 Kolkata 1.36948 0.61777 0.89533 0.45901
2000 Moscow 1.28566 0.5845 0.89667 0.2225
2000 Bangkok 1.12575 0.64157 0.80925 0.26428
2000 Buenos 1.28548 0.59625 0.90943 0.51912
2000 Shenzhe 1.25596 0.42908 0.72052 0.05841
2000 Dhaka 1.02 0.54252 1.02525 0.31105
2000 Lagos 1.1985 0.5421 0.79661 0.24434
2000 Istanbu 1.29937 0.61477 0.89186 0.28214
2000 Osaka 1.12447 0.44132 0.85857 0.33363
2000 Karachi 1.0786 0.6404 0.79733 0.32573
2003 Seoul 0.94675 0.51697 0.81658 0.08185
2003 Mexico 0.91612 0.48827 0.29924 0.19591
2003 Guangzh 0.88588 0.35068 0.7689 0.13748
2003 Beijing 1.00964 0.20107 0.74836 0.19231
2003 Cairo 1.13935 0.51469 0.87519 0.13719
2003 New Yor 1.09562 0.29671 0.72437 0.18226
2003 Kolkata 1.03516 0.62545 0.58114 0.24991
2003 Moscow 0.64095 0.41691 0.60954 0.07172
2003 Bangkok 0.80434 0.35733 0.81325 0.14272
2003 Buenos 0.91916 0.09245 0.79081 0.24808
2003 Shenzhe 1.17202 0.57672 0.66825 0.21684
2003 Dhaka 1.3006 0.43626 0.60268 0.3323
2003 Lagos 0.92933 0.07699 0.88213 0
2003 Istanbu 0.80001 0.33916 0.83947 0.21854
2003 Osaka 1.20215 0.32112 0.75905 0.128
2003 Karachi 0.95348 0.40148 0.6951 0.23027
How would I create a table with the 5 cities with the largest positive value of the difference between their 2003 economic and their 2000 economic score? using base r or dplyr? Im keep having issues since i cant exclude the other columns for some reason.
Upvotes: 0
Views: 60
Reputation: 76653
Something like the following will get you close.
library(dplyr)
df1 %>%
group_by(Cities) %>%
arrange(Year) %>%
summarise(diff = diff(economic), .groups = "drop") %>%
arrange(desc(diff), Cities) %>%
top_n(5)
If by "positive value of the difference" you mean the absolute differences, then wrap abs()
around the diff
above:
summarise(diff = abs(diff(economic)), .groups = "drop")
But if you want to discard the negative differences, pipe the summarise
to filter(diff >= 0)
.
Data in dput
format.
df1 <-
structure(list(Year = c(2000L, 2000L, 2000L, 2000L, 2000L, 2000L,
2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L,
2000L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L), Cities = c("Seoul",
"Mexico", "Guangzh", "Beijing", "Cairo", "New York", "Kolkata",
"Moscow", "Bangkok", "Buenos", "Shenzhe", "Dhaka", "Lagos", "Istanbu",
"Osaka", "Karachi", "Seoul", "Mexico", "Guangzh", "Beijing",
"Cairo", "New York", "Kolkata", "Moscow", "Bangkok", "Buenos",
"Shenzhe", "Dhaka", "Lagos", "Istanbu", "Osaka", "Karachi"),
generosity = c(1.23788, 1.29704, 0.91451, 1.24711, 1.23287,
1.21963, 1.36948, 1.28566, 1.12575, 1.28548, 1.25596, 1.02,
1.1985, 1.29937, 1.12447, 1.0786, 0.94675, 0.91612, 0.88588,
1.00964, 1.13935, 1.09562, 1.03516, 0.64095, 0.80434, 0.91916,
1.17202, 1.3006, 0.92933, 0.80001, 1.20215, 0.95348), economic = c(0.63376,
0.62433, 0.48181, 0.54604, 0.49049, 0.61583, 0.61777, 0.5845,
0.64157, 0.59625, 0.42908, 0.54252, 0.5421, 0.61477, 0.44132,
0.6404, 0.51697, 0.48827, 0.35068, 0.20107, 0.51469, 0.29671,
0.62545, 0.41691, 0.35733, 0.09245, 0.57672, 0.43626, 0.07699,
0.33916, 0.32112, 0.40148), culture = c(0.86027, 0.89042,
0.81444, 0.86179, 0.69702, 0.91894, 0.89533, 0.89667, 0.80925,
0.90943, 0.72052, 1.02525, 0.79661, 0.89186, 0.85857, 0.79733,
0.81658, 0.29924, 0.7689, 0.74836, 0.87519, 0.72437, 0.58114,
0.60954, 0.81325, 0.79081, 0.66825, 0.60268, 0.88213, 0.83947,
0.75905, 0.6951), hp = c(0.25497, 0.33088, 0.14074, 0.40105,
0.14574, 0.28034, 0.45901, 0.2225, 0.26428, 0.51912, 0.05841,
0.31105, 0.24434, 0.28214, 0.33363, 0.32573, 0.08185, 0.19591,
0.13748, 0.19231, 0.13719, 0.18226, 0.24991, 0.07172, 0.14272,
0.24808, 0.21684, 0.3323, 0, 0.21854, 0.128, 0.23027)),
class = "data.frame", row.names = c(NA, -32L))
Upvotes: 2
Reputation: 39613
Only using dplyr
:
library(dplyr)
#Code
newdf <- df %>% filter(Year %in% c(2000,2003)) %>%
arrange(Cities,Year) %>%
group_by(Cities) %>%
summarise(Diff=diff(economic)) %>%
ungroup() %>%
arrange(desc(Diff)) %>%
top_n(5)
Output:
# A tibble: 5 x 2
Cities Diff
<chr> <dbl>
1 Shenzhe 0.148
2 Cairo 0.0242
3 Kolkata 0.00768
4 Dhaka -0.106
5 Seoul -0.117
Some data used:
#Data
df <- structure(list(Year = c(2000L, 2000L, 2000L, 2000L, 2000L, 2000L,
2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L,
2000L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L), Cities = c("Seoul",
"Mexico", "Guangzh", "Beijing", "Cairo", "New Yor", "Kolkata",
"Moscow", "Bangkok", "Buenos", "Shenzhe", "Dhaka", "Lagos", "Istanbu",
"Osaka", "Karachi", "Seoul", "Mexico", "Guangzh", "Beijing",
"Cairo", "New Yor", "Kolkata", "Moscow", "Bangkok", "Buenos",
"Shenzhe", "Dhaka", "Lagos", "Istanbu", "Osaka", "Karachi"),
generosity = c(1.23788, 1.29704, 0.91451, 1.24711, 1.23287,
1.21963, 1.36948, 1.28566, 1.12575, 1.28548, 1.25596, 1.02,
1.1985, 1.29937, 1.12447, 1.0786, 0.94675, 0.91612, 0.88588,
1.00964, 1.13935, 1.09562, 1.03516, 0.64095, 0.80434, 0.91916,
1.17202, 1.3006, 0.92933, 0.80001, 1.20215, 0.95348), economic = c(0.63376,
0.62433, 0.48181, 0.54604, 0.49049, 0.61583, 0.61777, 0.5845,
0.64157, 0.59625, 0.42908, 0.54252, 0.5421, 0.61477, 0.44132,
0.6404, 0.51697, 0.48827, 0.35068, 0.20107, 0.51469, 0.29671,
0.62545, 0.41691, 0.35733, 0.09245, 0.57672, 0.43626, 0.07699,
0.33916, 0.32112, 0.40148), culture = c(0.86027, 0.89042,
0.81444, 0.86179, 0.69702, 0.91894, 0.89533, 0.89667, 0.80925,
0.90943, 0.72052, 1.02525, 0.79661, 0.89186, 0.85857, 0.79733,
0.81658, 0.29924, 0.7689, 0.74836, 0.87519, 0.72437, 0.58114,
0.60954, 0.81325, 0.79081, 0.66825, 0.60268, 0.88213, 0.83947,
0.75905, 0.6951), hp = c(0.25497, 0.33088, 0.14074, 0.40105,
0.14574, 0.28034, 0.45901, 0.2225, 0.26428, 0.51912, 0.05841,
0.31105, 0.24434, 0.28214, 0.33363, 0.32573, 0.08185, 0.19591,
0.13748, 0.19231, 0.13719, 0.18226, 0.24991, 0.07172, 0.14272,
0.24808, 0.21684, 0.3323, 0, 0.21854, 0.128, 0.23027)), class = "data.frame", row.names = c(NA,
-32L))
But what mentioned in comments about absolute difference should be taken into account as it is not clear.
Upvotes: 2