37,743
社区成员




# 主题一致性
# model_list=[]
grid=dict()
# range(1,101,5)必须以1为起点,否则出现除以0的错误
parameter_list=range(1,102,10)
chunksize=500
passes=5
iterations=200
random_state=2
for i in parameter_list:
grid[i]=list()
num_topics=i
at_model = AuthorTopicModel(corpus=corpus, num_topics=num_topics, id2word=dictionary.id2token, author2doc=author2doc,
chunksize=chunksize, passes=passes, gamma_threshold=1e-3, eval_every=1, iterations=iterations,
alpha='auto', eta='auto',random_state=random_state,minimum_probability=0.01)
top_topics = at_model.top_topics(corpus=corpus)
# print(top_topics)
tc = sum([t[1] for t in top_topics])/num_topics
# model_list.append((at_model, tc))
grid[i].append(tc)
df = pd.DataFrame(grid)
df.to_csv('tc_c%d_i%d_p%d.csv' % (chunksize,iterations,passes))
# 显示多少行多少列
print(df)
plt.figure(figsize=(8,4), dpi=120)
plt.plot(df.columns.values, df.iloc[0].values, '#007A99')
plt.xticks(df.columns.values)
#plt.xlim(parameter_list[0],parameter_list[-1])
plt.ylabel('tc')
plt.xlabel('topics')
plt.show()