1,258
社区成员
发帖
与我相关
我的任务
分享
features = data.rdd.map(lambda x: (x[0], x[1:])).groupByKey().mapValues(list).map(
lambda row: build_sparse_vector(row, tag_index_map_in)).map(lambda x: [x[0], x[1]]).cache()
model = LDA.train(features, k=10, seed=long(time.time()), optimizer="em")
features = data.rdd.map(lambda x: (x[0], x[1:])).groupByKey().mapValues(list).map(
lambda row: build_sparse_vector(row, tag_index_map_in)).map(lambda x: [x[0], x[1]]).cache()
cc = features.collect()
rdd = sc.parallelize(cc)
model = LDA.train(rdd, k=10, seed=long(time.time()), optimizer="em")