1,258
社区成员
发帖
与我相关
我的任务
分享
val result1 = rdd.map(x=> { val data = x.split(",") val date = data(0) val vid = data(11) val duration = data(16) (date, vid, duration) }).toDF().na.drop().take(10) , 你是要判断三元组任何一个都不为null
toDF().na.drop()
def test23(): Unit ={
val cfg =new Configuration()
val conf = new SparkConf()
.setAppName("spack 本地测试")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.setMaster("local") // 本地调试
val sc = new SparkContext(conf)
//接你的代码, 判断有没有值用 nonEmpty, 或者 isEmpty
val rdd = sc.makeRDD(Array( "a,a,a", "b,b,b", "c,,c", "d,d,d" ), 3)
val result1 = rdd.map(x => {
val data = x.split(",")
val date = data(0)
val vid = data(1)
val duration = data(2)
(date, vid, duration)
} )
.filter(t => t._1.nonEmpty && t._2.nonEmpty && t._3.nonEmpty)
.take(10)
.foreach(row => println(row))
}
val result1 = rdd.map(x=> {
val data = x.split(",")
val date = data(0)
val vid = data(11)
val duration = data(16)
(date, vid, duration)
}).filter(rdd=>{
if(rdd._1!=null && rdd._2!=null && rdd._3!=null)
{
true
}else
{
false
}
}).take(10)
可以这样试试