test2 / app.py
erinak's picture
test commit
c37824a
raw
history blame
948 Bytes
import numpy as np
import pandas as pd
from gensim.corpora import Dictionary, MmCorpus
from gensim.models import LdaModel, Word2Vec
import matplotlib.pyplot as plt
import streamlit as st
from pyLDAvis import prepared_data_to_html
import pyLDAvis.gensim_models as gensimvis
# 生データ・コーパス・辞書・モデルのロード
df = pd.read_csv("./raw_corpus.csv")
corpus = MmCorpus('./corpus.mm')
dict = Dictionary.load(f'./livedoor_demo.dict')
lda = LdaModel.load('./lda_demo.model')
st.caption("生データ一覧")
st.dataframe(df.iloc[:100])
st.caption("記事のカテゴリ")
fig, ax = plt.subplots()
count = df[["CATEGORY", "DOCUMENT"]].groupby("CATEGORY").count()
count.plot.pie(y="DOCUMENT", ax=ax, ylabel="", legend=False)
st.pyplot(fig)
# pyLDAvisによるトピックの可視化
vis = gensimvis.prepare(lda, corpus, dict)
html_string = prepared_data_to_html(vis)
st.components.v1.html(html_string, width=1300, height=800)