Reputation: 1
Suppose I have the following data.
dt = DataFrame(
id = [1,1,1,1,1,2,2,2,2,2,],
t = [1,2,3,4,5, 1,2,3,4,5],
val = randn(10)
)
Row │ id t val
│ Int64 Int64 Float64
─────┼─────────────────────────
1 │ 1 1 0.546673
2 │ 1 2 -0.817519
3 │ 1 3 0.201231
4 │ 1 4 0.856569
5 │ 1 5 1.8941
6 │ 2 1 0.240532
7 │ 2 2 -0.431824
8 │ 2 3 0.165137
9 │ 2 4 1.22958
10 │ 2 5 -0.424504
I want to make a dummy variable from t to t+2 whether the val>0.5.
For instance, I want to make val_gr_0.5 a new variable.
Could someone help me with how to do this?
Row │ id t val val_gr_0.5
│ Int64 Int64 Float64 Float64
─────┼─────────────────────────
1 │ 1 1 0.546673 0 (search t:1 to 3)
2 │ 1 2 -0.817519 1 (search t:2 to 4)
3 │ 1 3 0.201231 1 (search t:3 to 5)
4 │ 1 4 0.856569 missing
5 │ 1 5 1.8941 missing
6 │ 2 1 0.240532 0 (search t:1 to 3)
7 │ 2 2 -0.431824 1 (search t:2 to 4)
8 │ 2 3 0.165137 1 (search t:3 to 5)
9 │ 2 4 1.22958 missing
10 │ 2 5 -0.424504 missing
Upvotes: 0
Views: 105
Reputation: 14735
julia> using DataFramesMeta
julia> function checkvals(subsetdf)
vals = subsetdf[!, :val]
length(vals) < 3 && return missing
any(vals .> 0.5)
end
checkvals (generic function with 1 method)
julia> for sdf in groupby(dt, :id)
transform!(sdf, :t => ByRow(t -> checkvals(@subset(sdf, @byrow t <= :t <= t+2))) => :val_gr)
end
julia> dt
10×4 DataFrame
Row │ id t val val_gr
│ Int64 Int64 Float64 Bool?
─────┼──────────────────────────────────
1 │ 1 1 0.0619327 false
2 │ 1 2 0.278406 false
3 │ 1 3 -0.595824 true
4 │ 1 4 0.0466594 missing
5 │ 1 5 1.08579 missing
6 │ 2 1 -1.57656 true
7 │ 2 2 0.17594 true
8 │ 2 3 0.865381 true
9 │ 2 4 0.972024 missing
10 │ 2 5 1.54641 missing
Upvotes: 2
Reputation: 56
first define a function
function run_max(x, window)
window -= 1
res = missings(eltype(x), length(x))
for i in 1:length(x)-window
res[i] = maximum(view(x, i:i+window))
end
res
end
then use it in DataFrames.jl
dt.new = dt.val .> 0.5
transform!(groupby(dt,1), :new => x->run_max(x, 3))
Upvotes: 1