1,258
社区成员
发帖
与我相关
我的任务
分享
DiskLevel=StorageLevel.DISK_ONLY
udfunction=udf(lambda column: Vectors.dense(column),VectorUDT())
spark.sql("use itemRecommend")
OriginalFeatures=spark.sql("select * from feature_table")
columns=OriginalFeatures.columns
VectorFeatures=OriginalFeatures
i=0
for column in columns:
if column != "tag":
i=i+1
print(column)
VectorFeatures=VectorFeatures.withColumn(column,udfunction(VectorFeatures[column]))
#VectorFeatures.persist(storageLevel=DiskLevel)
if i==20:
i=0
VectorFeatures.persist(storageLevel=DiskLevel)
VectorFeatures.count()
break