File size: 2,364 Bytes
914ca91 f9ed5bd 82bab6c 914ca91 13a4831 9ef9c2e 914ca91 8913fd3 1ae5746 914ca91 7fcb78b 4704dad 7fcb78b 914ca91 af57430 82bab6c 4704dad 82bab6c af57430 82bab6c 8913fd3 82bab6c 4704dad 82bab6c af57430 914ca91 aea61de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import os
os.system('git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git && cd mecab-ipadic-neologd && ./bin/install-mecab-ipadic-neologd -n -y -u -p $PWD')
os.system('git clone --depth 1 https://github.com/neologd/mecab-unidic-neologd.git && cd mecab-unidic-neologd && ./bin/install-mecab-unidic-neologd -n -y -u -p $PWD')
import streamlit as st
import MeCab
st.set_page_config(page_title="NEologd demo")
st.title('NEologd demo')
"""
Input the text you'd like to analyze. See the [NEologd][] docs for more details.
[NEologd]: https://github.com/neologd
"""
if st.button('Update NEologd', help='It may take some time'):
os.system('cd mecab-ipadic-neologd && ./bin/install-mecab-ipadic-neologd -n -y -u -p $PWD')
os.system('cd mecab-unidic-neologd && ./bin/install-mecab-unidic-neologd -n -y -u -p $PWD')
text = st.text_area("input", "麩菓子は、麩を主材料とした日本の菓子。")
def make_row(word, kana_index=7, lemma_index=6):
# https://stackoverflow.com/a/49774255/5602117
ff = dict(enumerate(word.feature.split(",")))
return dict(surface=word.surface, kana=ff.get(kana_index), lemma=ff.get(lemma_index),
pos1=ff.get(0), pos2=ff.get(1), pos3=ff.get(2), pos4=ff.get(3))
"""
#### [mecab-ipadic-NEologd : Neologism dictionary for MeCab](https://github.com/neologd/mecab-ipadic-neologd)
"""
data = []
tagger = MeCab.Tagger('-r /etc/mecabrc -d /home/user/app/mecab-ipadic-neologd')
node = tagger.parseToNode(text)
while node:
if node.feature.startswith('BOS/EOS'):
pass
else:
data.append(make_row(node))
node = node.next
st.table(data)
"""
#### [mecab-unidic-NEologd : Neologism dictionary for unidic-mecab](https://github.com/neologd/mecab-unidic-neologd)
"""
data = []
tagger = MeCab.Tagger('-r /etc/mecabrc -d /home/user/app/mecab-unidic-neologd')
node = tagger.parseToNode(text)
while node:
if node.feature.startswith('BOS/EOS'):
pass
else:
data.append(make_row(node, kana_index=9, lemma_index=7))
node = node.next
st.table(data)
"""
#### [MeCab](https://taku910.github.io/mecab/)
"""
data = []
tagger = MeCab.Tagger('-r /etc/mecabrc')
node = tagger.parseToNode(text)
while node:
if node.feature.startswith('BOS/EOS'):
pass
else:
data.append(make_row(node))
node = node.next
st.table(data)
|