Reputation: 7107
I am trying to apply a Trim
function from the DescTools
package to a data frame in R using the dplyr
package.
What I have so far is the following:
x <- df %>%
group_by(Country) %>%
mutate_all(OfferPrice, Trim(trim = 0.1, na.rm = TRUE))
Which returns the following error:
Error in Trim(trim = 0.1, na.rm = TRUE) :
argument "x" is missing, with no default
I know its a problem with the characteristics inside the Trim()
part of the mutate
but I cannot seem to apply this function inside dplyr
.
The function trims the top and bottom 10% of the observations, hopefully removing any extreme values.
Data:
df <- structure(list(Country = c("AU", "AU", "AU", "AU", "AU", "AU",
"AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU",
"AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU",
"AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU",
"AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU",
"AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU", "AU",
"AU", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA",
"CA", "CA", "CA", "CA", "CA", "CA", "GB", "GB", "GB", "GB", "GB",
"GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB",
"GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB",
"GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB", "GB",
"GB", "GB", "GB", "GB", "GB", "GB"), OfferPrice = c(0.25, 0.55,
0.065, 0.075, 0.019, 0.0114, 0.18, 0.015, 2.8, 3.62, 0.025, 0.07,
0.6, 0.9, 0.12, 2.72, 0.015, 0.015, 0.32, 0.2, 0.063, 0.01, 1.42,
0.0045, 0.02, 1.15, 0.2, 17.05, 0.009, 1.8, 3.22, 0.135, 0.35,
5, 0.37, 0.023, 0.014, 0.023, 0.35, 1.25, 0.05, 0.059, 0.2, 0.025,
5.45, 0.05, 0.3, 0.22, 0.04, 0.035, 2, 0.32, 0.2, 0.2, 0.02,
0.34, 0.04, 0.025, 0.03, 0.0125, 1.6, 0.03, 0.15, 13.5, 0.1,
0.3, 0.13, 0.115, 0.35, 0.2, 0.6, 0.7, 8, 14, 25, 15.75, 3.8,
2, 0.5, 35.2, 1.75, 0.12, 0.48, 0.15, 0.7, 0.075, 0.15, 14.5,
0.29, 0.58, 1.75, 9, 11.5, 0.5, 0.075, 0.12, 1.1, 0.6, 0.75,
0.26, 0.2, 0.12, 0.49, 12, 6.85, 0.55, 0.25, 1.6, 0.36, 0.06,
2, 0.272, 41, 0.15, 1.1, 4.1, 0.6, 0.08, 1.4, 3, 0.09, 0.15,
0.2, 0.3, 0.8, 0.21, 0.1, 0.05, 0.17, 0.1, 0.15, 0.05, 0.3, 0.6,
0.2, 0.5, 3.45, 3, 0.07, 0.1, 0.3, 7.2, 0.4, 0.1, 12.5, 0.07,
0.375, 0.25, 0.3, 1.15, 0.2, 3, 1, 0.3, 0.25, 530, 262, 20, 37.5,
3422, 295, 100, 0.085, 1925, 0.3, 107.5, 10, 2.1, 3, 15, 300,
690, 50, 410, 100, 120, 225, 40, 100, 100, 51, 10, 82, 9.58,
269, 0.5, 271, 100, 108, 0.3, 4.5, 0.5, 0.55, 50, 0.95, 275,
100, 170, 0.7), OfferTo1stOpen = c(18, -2.727274895, 9.230772972,
6.666662216, -15.78947067, 5.263155937, -2.777781725, 13.33333588,
5.000001907, -3.591157198, -0.000001490116119, 1.428570986, -4.166670322,
0.00000264909545, -34.16666412, -0.000001051846652, 26.66666985,
26.66666985, 9.375002861, 2.499998569, 6.34920454, 0.000002235174179,
-0.7042223215, -11.11110687, 15.00000286, 1.304349899, -0.000001490116119,
6.217013359, 11.11111546, 25.00000381, 0.9316761494, -0.000003973642833,
-15.71428394, 17.20000076, -0.000001288749104, 4.347826004, 14.28571033,
13.04347801, 4.285716057, 43.20000076, 1.99999845, 10.16949081,
2.499998569, -4.000001431, -0.1834827513, 11.99999809, -1.666670561,
95.45454407, -12.49999809, 25.7142849, -0.5, 18.75000191, -0.000001490116119,
-17.50000191, -9.999998093, 44.11764526, 15.00000286, 19.99999809,
0.000002235174179, 35.99999619, 10.62499809, 76.66667175, 6.666662216,
-0.3703703582, -10.00000095, -100, 146.1538544, 65.21739197,
-11.42856979, 14.99999809, -5.000003815, -11.42856979, 1.625,
6.785714149, NA, 3.492063522, -3.684209347, -2.5, 10, -1.420456648,
1.142857194, -12.49999809, -1.041664481, -0.000003973642833,
-14.2857132, 39.99999619, 36.66666031, -0.3448275924, -15.51723862,
-12.06896305, -18.2857151, 0.555555582, -5.434782505, 590, -6.666670322,
0.000002235174179, 1.818179607, 36.66666031, -6.666666508, 0.000003667978262,
-10.00000095, 20.83333588, -20.40816498, -2.916666746, -29.1970787,
-0.000002167441608, -10, -18.80635834, -100, 8.333335876, -3.5,
10.29411125, 2.097560883, -6.666670322, 7.272725105, 0.7317096591,
19.99999619, 81.25000763, 45.00000381, -20, -11.1111145, -0.000003973642833,
-7.500001431, -0.000003973642833, -1.250001431, -14.28571129,
49.99999619, -10.00000095, -5.882353783, NA, 23.33332825, 19.99999809,
18.33332825, -13.33333683, 34.99999619, -34, -19.71014595, -32.33333206,
-21.4285717, -20.00000191, -100, 0.1388915479, 7.499998569, -20.00000191,
-0.2399999946, 257.1428528, -16, 54, NA, -4.347824097, -100,
6, 1, 4.999995708, -8, 8.301886559, 3.511450291, 25, 16, -1.461133838,
-1.694915295, 1, 17.64705849, 3.376623392, 24.99999428, 3.255813837,
34, 0.00000454130668, -3.333333254, 10.33333302, 1.666666627,
16.231884, 9, 1.829268336, 3, 11.66666698, 4.888888836, 14.25,
3.5, 3.5, -4.411764622, 0.200000003, 1.829268336, 53.96659851,
9.665427208, 5, -1.586715817, 2, 1.111111164, 4.999995708, -10,
5, -4.545456409, NA, 7.894738197, 5.454545498, 1, 11.17647076,
25.00000191), OfferTo1stClose = c(8, -7.272729397, 9.230772972,
7.999995708, -21.05262947, -3.508773565, -2.777781725, 0.000002235174179,
3.571430445, -3.867400169, -0.000001490116119, 1.428570986, -6.666670322,
-1.666664004, -35.83333206, -3.308824539, 13.33333588, 26.66666985,
10.93750286, -0.000001490116119, 6.34920454, -9.999998093, -0.3521096706,
11.11111546, 5.000002384, -0.4347805381, -2.500001431, 3.519066334,
11.11111546, 27.22222519, 4.34782505, -7.407411098, -17.1428566,
15.39999962, 4.05405283, -0.0000001943629684, 7.142853737, 13.04347801,
2.857144594, 43.20000076, 3.999998569, 10.16949081, -7.500001431,
3.999998569, -0.5504552126, 19.99999809, -1.666670561, 170.4545441,
-14.99999809, 31.4285717, -0.5, 18.75000191, -20.00000191, -17.50000191,
0.000002235174179, 44.11764526, 12.50000286, 15.99999809, 3.333335638,
35.99999619, 10.62499809, 123.3333359, 13.3333292, -1.481481433,
-10.00000095, -100, 138.4615479, 47.82608414, -12.85714149, 32.49999619,
-13.33333683, -24.2857132, 1.75, -0.3571428657, NA, 3.93650794,
-7.894735813, -7, 20, -0.9375021458, 1.714285731, -8.333331108,
-1.041664481, 3.333329201, -19.99999809, 33.33332825, 33.33332825,
-0.06896551698, -16.3793087, -16.3793087, -18.2857151, 2.666666746,
2.173913002, 590, -6.666670322, -16.66666412, 2.727270603, 44.99999237,
-10.66666698, 1.923080683, -12.50000095, 16.66666985, -22.44898033,
-4.166666508, -39.85401535, -3.636365652, -12, -16.8959198, -100,
0.000002235174179, -3.5, 13.97058201, 2.707317114, -8.066670418,
5.454543114, 0.4878072143, 19.99999619, 87.50000763, 45.7142868,
-25.66666603, -5.555559158, 16.66666222, -2.500001431, 3.333329201,
-0.000001490116119, -14.28571129, 49.99999619, -10.00000095,
-5.882353783, NA, 39.99999619, 19.99999809, 13.3333292, -10.00000381,
65, -26, -19.71014595, -31.66666603, -21.4285717, -20.00000191,
-100, -0.1388862431, 11.24999809, -20.00000191, -1.679999948,
228.5714264, -22.66666603, 42, NA, -7.826085091, -100, 6.666666508,
0, 4.999995708, -8, 8.301886559, 3.969465733, 26, 16, -5.084745884,
1.322033882, 1.5, 17.64705849, 2.077922106, 24.99999428, 3.255813837,
43, 0.00000454130668, -4.166666508, 10.33333302, 1.333333373,
18.69565201, 9, 1.829268336, 3, 11.66666698, 3.111111164, 15,
3.5, 3.5, -4.411764622, 0.6000000238, 50.60975647, 53.96659851,
37.54646683, 0, -0.1476014704, 3, 1.296296239, 4.999995708, -11.11111069,
5, -0.000002167441608, NA, 7.894738197, 4.181818008, 0.5, 10.88235283,
25.00000191)), row.names = c(NA, -199L), vars = "Country", drop = TRUE, indices = list(
0:61, 62:154, 155:198), group_sizes = c(62L, 93L, 44L), biggest_group_size = 93L, labels = structure(list(
Country = c("AU", "CA", "GB")), row.names = c(NA, -3L), class = "data.frame", vars = "Country", drop = TRUE, indices = list(
0:61, 62:154, 155:198), group_sizes = c(62L, 93L, 44L), biggest_group_size = 93L, labels = structure(list(
Country = c("AU", "CA", "GB")), row.names = c(NA, -3L), class = "data.frame", vars = "Country", drop = TRUE)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Upvotes: 0
Views: 458
Reputation: 269586
Assuming that what you want is that for any element of OfferPrice
excluded by Trim(OfferPrice, ...)
that entire row of df
should be dropped, get the trim
attribute of the result of Trim(...)
and remove those rows using slice
doing it all by Country
.
library(dplyr)
library(DescTools)
df %>%
group_by(Country) %>%
slice(-attr(Trim(OfferPrice, trim = 0.1, na.rm = TRUE), "trim")) %>%
ungroup
This could also be written:
df %>%
group_by(Country) %>%
slice(OfferPrice %>%
Trim(trim = 0.1, na.rm = TRUE) %>%
attr("trim") %>%
`-`) %>%
ungroup
Upvotes: 1
Reputation: 3776
I think you'll need to do this with do
since the action of Trim
is to return essentially a subset of observations. Try:
x <- df %>%
group_by(Country) %>%
do(
Trim(.$OfferPrice, trim = 0.1, na.rm = TRUE)
)
You could then use lapply
or map
inside the do
statement to Trim
each column of data, but I'm not sure if this is actually what you want. It's unclear since you have not provided any sample data. The attempt to use mutate_all
suggests you want to Trim
each column of data separately, but this doesn't make sense to me.
EDIT based on your comment you really want to filter
the dataframe by the Trim
med column OfferPrice
, so
x <- df %>%
group_by(Country) %>%
do(
.[attr(Trim(.$OfferPrice, trim = 0.1, na.rm = TRUE), "trim"), ]
)
See the documentation of Trim
for details, specifically
The indices of the trimmed values will be attached as attribute named "trim".
Upvotes: 1