Reputation: 46
I was trying to convert an old yahoo streaming benchmark version for Flink into a new version by removing the deprecated classes.
I'm now stuck up in converting the deprecated fold() to aggregate(). I could not map the existing parameters of fold to those in aggregate.
//old version using fold
val windowedCounts = windowedEvents.fold(new WindowedCount(null, "", 0, new java.sql.Timestamp(0L)),
(acc: WindowedCount, r: (String, String, Timestamp)) => {
val lastUpdate = if (acc.lastUpdate.getTime < r._3.getTime) r._3 else acc.lastUpdate
acc.count += 1
acc.lastUpdate = lastUpdate
acc
},
(key: Tuple, window: TimeWindow, input: Iterable[WindowedCount], out: Collector[WindowedCount]) => {
val windowedCount = input.iterator.next()
println(windowedCount.lastUpdate)
out.collect(new WindowedCount(new java.sql.Timestamp(window.getStart), key.getField(0), windowedCount.count, windowedCount.lastUpdate))
//out.collect(new WindowedCount(new java.sql.Timestamp(window.getStart), key.getField(0), windowedCount.count, windowedCount.lastUpdate))
}
)
val windowedCounts = windowedEvents.aggregate(new CountAggregate)
I want to create a CountAggregate class by extending AggregateFunction Class (something like):
class CountAggregate extends AggregateFunction[(String, String, Timestamp), WindowedCount, Collector[WindowedCount]] {
override def createAccumulator() = WindowedCount(null, "", 0, new java.sql.Timestamp(0L))
override def accumulate(acc: WindowedCount, r: (String, String, Timestamp)): WindowedCount = {
val lastUpdate = if (acc.lastUpdate.getTime < r._3.getTime) r._3 else acc.lastUpdate
acc.count += 1
acc.lastUpdate = lastUpdate
acc
}
override def getValue (acc: WindowedCount) = { (key: Tuple, window: TimeWindow, input: Iterable[WindowedCount], out: Collector[WindowedCount]) =>
val windowedCount = input.iterator.next()
println(windowedCount.lastUpdate)
out.collect(new WindowedCount(new java.sql.Timestamp(window.getStart), key.getField(0), windowedCount.count, windowedCount.lastUpdate))
}
Any help in rewriting the CountAggregate class would be appreciated.
Upvotes: 0
Views: 390
Reputation: 13346
You need to specify an AggregateFunction
as well as a ProcessWindowFunction
to do the final getValue
step:
val windowedCounts = windowedEvents.aggregate(
new CountAggregate(),
new WindowAggregateFunction())
class CountAggregate extends AggregateFunction[(String, String, Timestamp), WindowedCount, WindowedCount] {
override def createAccumulator() = WindowedCount(null, "", 0, new java.sql.Timestamp(0L))
override def add(value: (String, String, Timestamp), acc: WindowedCount): WindowedCount = {
val lastUpdate = if (acc.lastUpdate.getTime < value._3.getTime) value._3 else acc.lastUpdate
WindowedCount(null, "", acc.count + 1, lastUpdate)
}
override def getResult(accumulator: WindowedCount): WindowedCount = {
accumulator
}
override def merge(a: WindowedCount, b: WindowedCount): WindowedCount = {
WindowedCount(null, "", a.count + b.count, if (a.lastUpdate.getTime < b.lastUpdate.getTime) b.lastUpdate else a.lastUpdate)
}
}
class WindowAggregateFunction extends ProcessWindowFunction[WindowedCount, WindowedCount, Tuple, TimeWindow]() {
override def process(key: Tuple, context: Context, elements: Iterable[WindowedCount], out: Collector[WindowedCount]): Unit = {
val windowedCount = elements.iterator.next()
out.collect(WindowedCount(new java.sql.Timestamp(context.window.getStart), key.getField(0), windowedCount.count, windowedCount.lastUpdate))
}
}
Upvotes: 1