File size: 948 Bytes
c37824a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import numpy as np
import pandas as pd
from gensim.corpora import Dictionary, MmCorpus
from gensim.models import LdaModel, Word2Vec
import matplotlib.pyplot as plt
import streamlit as st
from pyLDAvis import prepared_data_to_html
import pyLDAvis.gensim_models as gensimvis

# 生データ・コーパス・辞書・モデルのロード
df = pd.read_csv("./raw_corpus.csv")
corpus = MmCorpus('./corpus.mm')
dict = Dictionary.load(f'./livedoor_demo.dict')
lda = LdaModel.load('./lda_demo.model')

st.caption("生データ一覧")
st.dataframe(df.iloc[:100])

st.caption("記事のカテゴリ")
fig, ax = plt.subplots()
count = df[["CATEGORY", "DOCUMENT"]].groupby("CATEGORY").count()
count.plot.pie(y="DOCUMENT", ax=ax, ylabel="", legend=False)
st.pyplot(fig)

# pyLDAvisによるトピックの可視化
vis = gensimvis.prepare(lda, corpus, dict)
html_string = prepared_data_to_html(vis)
st.components.v1.html(html_string, width=1300, height=800)