Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from keybert import KeyBERT | |
| import os | |
| import spacy | |
| import string | |
| from spacy import displacy | |
| from pathlib import Path | |
| from PIL import Image | |
| from keyword_extraction import keyword_extract | |
| from keyphrase_extraction import get_top_key_phrases, display_key_phrases | |
| from word import show_gram_plot | |
| nlp = spacy.load("en_core_web_sm") | |
| def greet(name, descriptions): | |
| os.makedirs(f'results/{name}', exist_ok=True) | |
| outputs = [] | |
| descriptions = descriptions.translate(str.maketrans('', '', string.punctuation)) | |
| # run word count | |
| show_gram_plot(descriptions, 1, 10, save_output=f'results/{name}/{1}_gram.png') | |
| show_gram_plot(descriptions, 2, 10, save_output=f'results/{name}/{2}_gram.png') | |
| show_gram_plot(descriptions, 3, 10, save_output=f'results/{name}/{3}_gram.png') | |
| outputs.append(Image.open(f'results/{name}/1_gram.png')) | |
| outputs.append(Image.open(f'results/{name}/2_gram.png')) | |
| outputs.append(Image.open(f'results/{name}/3_gram.png')) | |
| # run named entity recognition | |
| spacy_descriptions = nlp(descriptions) | |
| # Create a visualization of named entities | |
| ner_svg = displacy.render(spacy_descriptions, style="ent", jupyter=False, page=True) | |
| filename = Path(f'results/{name}/ner.html') | |
| filename.open('w', encoding='utf-8').write(ner_svg) | |
| # run keyword extraction | |
| kw_model = KeyBERT() | |
| keyword_extract(descriptions, kw_model, 1, save_output=f'results/{name}/{1}_keyword.png') | |
| # keyword_extract(descriptions, kw_model, 2, save_output=f'results/{name}/{2}_keyword.png') | |
| # keyword_extract(descriptions, kw_model, 3, save_output=f'results/{name}/{3}_keyword.png') | |
| outputs.append(Image.open(f'results/{name}/1_keyword.png')) | |
| # outputs.append(Image.open(f'results/{name}/2_keyword.png')) | |
| # outputs.append(Image.open(f'results/{name}/3_keyword.png')) | |
| # keywords = kw_model.extract_keywords(descriptions, highlight=True) | |
| # print(keywords) | |
| # run key phrase extraction | |
| get_top_key_phrases(descriptions, 10, save_output=f'results/{name}/top_keyphrase.png') | |
| keyphrase_svg = display_key_phrases(descriptions, save_output=f'results/{name}/key_phrase.html') | |
| outputs.append(Image.open(f'results/{name}/top_keyphrase.png')) | |
| outputs += [ner_svg, keyphrase_svg] | |
| return outputs | |
| demo = gr.Interface( | |
| fn=greet, | |
| inputs=[gr.Textbox(lines=1, placeholder="Data Asset Name"), | |
| gr.Textbox(lines=10, placeholder="All the descriptions for analysis")], | |
| outputs=['image', 'image', 'image', 'image', 'image', 'html', 'html'], | |
| ) | |
| demo.launch() |