Preprocessing¶
affirmative = make_df("data/affirmative.csv")
affirmative.head()
negative = make_df("data/negative.csv")
negative.head()
LDA¶
stopwords = get_custom_stopwords("data/stopwords.txt", encoding='utf-8') # HITͣ�ôʴʵ�
max_df = 0.9 # �ڳ�����һ�������ĵ��г��ֵĹؼ��ʣ�����ƽ������ȥ������
min_df = 5 # �ڵ�����һ�������ĵ��г��ֵĹؼ��ʣ����ڶ��أ���ȥ������
n_features = 1000 # �����ȡ��������
n_top_words = 20 # ��ʾ�����¹ؼ��ʵ�ʱ����ʾ���ٸ�
col_content = "text" # ˵�����е��ı���Ϣ����������
lda, tf, vect = lda_on_chinese_articles(df = affirmative, n_topics = 3)
pyLDAvis.sklearn.prepare(lda, tf, vect)
TypeError: __init__() got an unexpected keyword argument 'n_topics'
һ������������ⶼ�dz�������ĸд����©д֮�������
��������������������������������
��Ȩ����������ΪCSDN������zhuimengshaonian66����ԭ�����£���ѭ CC 4.0 BY-SA ��ȨЭ�飬ת���븽��ԭ�ij������Ӽ��������� ԭ�����ӣ�https://blog.csdn.net/zhuimengshaonian66/article/details/81700959
n_components
�����������ˡ�
lda, tf, vect = lda_on_chinese_articles(df = negative, n_topics = 3)
pyLDAvis.sklearn.prepare(lda, tf, vect)
pyLDAvis.sklearn.prepare(lda, tf, vect)
�ο� https://github.com/bmabey/pyLDAvis/issues/132
D:\install\miniconda\lib\site-packages\pyLDAvis\_prepare.py:257: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.
To accept the future behavior, pass 'sort=False'.
To retain the current behavior and silence the warning, pass 'sort=True'.
return pd.concat([default_term_info] + list(topic_dfs))
���°�װ����Ȼû�н����
pyLDAvis.__version__
pd.__version__
# !pip install pyldavis
import pickle as pkl
with open("model/sklearn-lda.pkl", 'wb') as fp:
pkl.dump(lda, fp)
with open("model/sklearn-lda.pkl", 'rb') as fp:
model0 = pkl.load(fp)
print(model0.__class__)