Reputation:
I want to find string which has number using spark.
Example : String = "abc def ghi2 xyz4"
Answer : ghi2 xyz4
Upvotes: 0
Views: 437
Reputation: 4045
rlike
function to check for digits.map(row => row.getString(0))
import org.apache.spark.sql.functions._
object CheckDigitInString {
def main(args: Array[String]): Unit = {
val input = "abc def ghi2 xyz4"
val spark = Constant.getSparkSess
import spark.implicits._
val inputDf = input.split(" ").toSeq.toDF
val output = inputDf.where(col("value").rlike(".*[0-9]+.*"))
.map(row => row.getString(0))
.collect().mkString(" ")
println(output)
}
}
Upvotes: 0
Reputation: 2451
+-----------------+
| value|
+-----------------+
|abc def ghi2 xyz4|
| 0d2 234 AXZ Mxei|
+-----------------+
Seq("abc def ghi2 xyz4","0d2 234 AXZ Mxei").toDF()
.select('*,monotonically_increasing_id.as("id"))
.select('id,explode(split('value," ")))
.select('*,regexp_extract('col,"\\d",0).as("digit"))
.filter('digit.notEqual(""))
.groupBy('id)
.agg(concat_ws(" ",collect_list('col)).as("value"))
.show()
output
+---+---------+
| id| value|
+---+---------+
| 0|ghi2 xyz4|
| 1| 0d2 234|
+---+---------+
or using RDD:
Seq("abc def ghi2 xyz4","0d2 234 AXZ Mxei").toDF().rdd.map(s=>s.getString(0).split(" ").filter(s=>s.matches(".*\\d.*")))
Upvotes: 1