package com.dt.spark.Test
import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.{DataFrame, SparkSession}
import scala.collection.mutable
object typedLitAndLitTest {
case class Student(classId: Int, name: String, age: Int, birthday: String)
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().master("local[*]").appName("typedLitAndLitTest").getOrCreate()
import spark.implicits._
val sc = spark.sparkContext
sc.setLogLevel("WARN")
/**
* 姓名|性别对应表
*/
val map = new mutable.HashMap[String, String]()
map += ("zhangsan" -> "F")
map += ("lisi" -> "M")
map += ("wangwu" -> "F")
/**
* 创建学生DF
*/
val stuDF: DataFrame = Seq(
Student(1001, "zhangsan", 20, "1995-12-11 12:12:13"),
Student(1002, "lisi", 16, "2000-01-14 10:10:57"),
Student(1003, "wangwu", 21, "1994-05-13 01:12:00")
).toDF()
import org.apache.spark.sql.functions._
import spark.implicits._
/**
* 用户自定义函数:
* 根据名字匹配性别。
*/
val findSexOfname: UserDefinedFunction = udf((name: String, map: scala.collection.immutable.Map[String, String]) => {
map(name)
})
/**
* lit和typedlit:
* typedlit内置函数在spark2.2.0版本开始出现
*org.apache.spark.sql.functions.typedLit,可以添加List,Seq和Map类型的常量列。
* org.apache.spark.sql.functions.lit来添加简单类型(string,int,float,long,等)的常量列。
*/
stuDF.withColumn("classroom", lit("002"))
.withColumn("map", typedLit(map))
.withColumn("sex", findSexOfname($"name", col("map")))
.show(false)
spark.stop()
}
}
+-------+--------+---+-------------------+---------+------------------------------------------+---+
|classId|name |age|birthday |classroom|map |sex|
+-------+--------+---+-------------------+---------+------------------------------------------+---+
|1001 |zhangsan|20 |1995-12-11 12:12:13|002 |Map(lisi -> M, zhangsan -> F, wangwu -> F)|F |
|1002 |lisi |16 |2000-01-14 10:10:57|002 |Map(lisi -> M, zhangsan -> F, wangwu -> F)|M |
|1003 |wangwu |21 |1994-05-13 01:12:00|002 |Map(lisi -> M, zhangsan -> F, wangwu -> F)|F |
+-------+--------+---+-------------------+---------+------------------------------------------+---+
Caused by: java.lang.ClassCastException: scala.collection.immutable.Map$Map3 cannot be cast to scala.collection.mutable.HashMap
问题分析:
无论外部使用mutable.HashMap类型还是immutable.HashMap类型,传入列中都会统一变为immutable.Map类型,故在自定义函数接收时参数也一定要使用immutable.Map类型。