Reputation: 117
im using pyspark 2.4 and bellow the code : i have a dataframe with french month , i converte them to english month in order to change the fomat date ( date_desired column) and everything is ok using two expresssion
data = [
(1,"20 mai 2021"),
(1,"21 juin 2021")
]
schema = StructType([
StructField('montant', IntegerType(), False),
StructField('date', StringType(),True),
])
col = ["montant","date"]
df2 = spark.createDataFrame(data=data, schema= schema)
df2= df2.select(col)
df2.show()
dd =df2.withColumn('date_expr',F.expr(" CASE WHEN rlike(date,'mai') THEN regexp_replace(date,'mai','may') \
WHEN rlike(date,'juin') THEN regexp_replace(date,'juin','june') \
ELSE date \
END as rr\
"))
dd =dd.withColumn('date_desired',F.expr(" to_date(date_expr ,'dd MMMM yyyy') "))
dd.show()
+-------+------------+
|montant| date|
+-------+------------+
| 1| 20 mai 2021|
| 1|21 juin 2021|
+-------+------------+
+-------+------------+------------+------------+
|montant| date| date_expr|date_desired|
+-------+------------+------------+------------+
| 1| 20 mai 2021| 20 may 2021| 2021-05-20|
| 1|21 juin 2021|21 june 2021| 2021-06-21|
+-------+------------+------------+------------+
But ~: i want to acheive the same result with one expression as below :
dd =df2.withColumn('date_expr',F.expr(" CASE WHEN rlike(date,'mai') THEN regexp_replace(date,'mai','may') \
WHEN rlike(date,'juin') THEN regexp_replace(date,'juin','june') \
ELSE date \
END as dt_col\
to_date(dt_col ,'dd MMMM yyyy')"))
but i got error sql syntax
Upvotes: 0
Views: 431
Reputation: 26676
from itertools import chain
#create map using itertolls
d={'mai': "May", 'juin': "June"}
m_expr1 = create_map([lit(x) for x in chain(*d.items())])
new = (df2.withColumn('new_date', split(df2['date'],'\s')).withColumn('x', F.struct(*[F.col("new_date")[i].alias(f"val{i+1}") for i in range(3)]))#convert date intostruct column
.withColumn("x", F.col("x").withField("val2", m_expr1[F.col("x.val2")]))#Map new dates
.select('montant','date',array_join(array('x.*'),' ').alias('newdate'))#Convert struct column to string date
.withColumn('date_desired',F.expr(" to_date(newdate ,'dd MMMM yyyy') "))#convert to datetime
).show()
+-------+------------+------------+------------+
|montant| date| newdate|date_desired|
+-------+------------+------------+------------+
| 1| 20 mai 2021| 20 May 2021| 2021-05-20|
| 1|21 juin 2021|21 June 2021| 2021-06-21|
+-------+------------+------------+------------+
Upvotes: 1