Reputation: 635
I'm getting the following error with my code, can you tell me why?
notebook:28: error: not found: value month retail_df = retail_df.withColumn("Month", month(retail_df("Date")))
notebook:29: error: not found: value year retail_df = retail_df.withColumn("Year", year(retail_df("Date")))
import org.apache.spark.sql.types._
// Make cutom schema
var schema = StructType(Array(
StructField("Store", IntegerType, true),
StructField("DayOfWeek", IntegerType, true),
StructField("Date", DateType, true),
StructField("Sales", IntegerType, true),
StructField("Customers", IntegerType, true),
StructField("Open", IntegerType, true),
StructField("Promo", IntegerType, true),
StructField("StateHoliday", StringType, true),
StructField("SchoolHoliday", StringType, true)))
val retail_dfr = sqlContext.read.format("com.databricks.spark.csv").option("header", "true").schema(schema)
var retail_df = retail_dfr.load("/FileStore/tables/Rossman/train.csv")
val sales_custs_df = retail_df.select( "Store", "Sales", "Customers" )
val retails_open_df = retail_df.where( retail_df("Open") > 0)
val holidays_df = retail_df.filter(($"StateHoliday" === 1) && ($"SchoolHoliday" === 1))
val store_ids = retail_df.select(retail_df("Store")).distinct()
var weekday_promos = retail_df.stat.crosstab( "DayOfWeek" , "Promo" )
weekday_promos = weekday_promos.withColumnRenamed( "DayOfWeek_Promo", "DayOfWeek" )
.withColumnRenamed( "0", "NoPromo" )
.withColumnRenamed( "1","Promo" )
retail_df = retail_df.withColumn("Month", month(retail_df("Date")))
retail_df = retail_df.withColumn("Year", year(retail_df("Date")))
retail_df.show(5)
Upvotes: 0
Views: 1790