Spaces:

darylfunggg
/

text-analysis

Sleeping

text-analysis / app.py

Daryl Fung

fixed error

c082b57 over 2 years ago

2.57 kB

	import gradio as gr
	from keybert import KeyBERT
	import os
	import spacy
	import string
	from spacy import displacy
	from pathlib import Path
	from PIL import Image

	from keyword_extraction import keyword_extract
	from keyphrase_extraction import get_top_key_phrases, display_key_phrases
	from word import show_gram_plot


	nlp = spacy.load("en_core_web_sm")


	def greet(name, descriptions):
	os.makedirs(f'results/{name}', exist_ok=True)
	outputs = []
	descriptions = descriptions.translate(str.maketrans('', '', string.punctuation))

	# run word count
	show_gram_plot(descriptions, 1, 10, save_output=f'results/{name}/{1}_gram.png')
	show_gram_plot(descriptions, 2, 10, save_output=f'results/{name}/{2}_gram.png')
	show_gram_plot(descriptions, 3, 10, save_output=f'results/{name}/{3}_gram.png')
	outputs.append(Image.open(f'results/{name}/1_gram.png'))
	outputs.append(Image.open(f'results/{name}/2_gram.png'))
	outputs.append(Image.open(f'results/{name}/3_gram.png'))

	# run named entity recognition
	spacy_descriptions = nlp(descriptions)
	# Create a visualization of named entities
	ner_svg = displacy.render(spacy_descriptions, style="ent", jupyter=False, page=True)
	filename = Path(f'results/{name}/ner.html')
	filename.open('w', encoding='utf-8').write(ner_svg)

	# run keyword extraction
	kw_model = KeyBERT()
	keyword_extract(descriptions, kw_model, 1, save_output=f'results/{name}/{1}_keyword.png')
	# keyword_extract(descriptions, kw_model, 2, save_output=f'results/{name}/{2}_keyword.png')
	# keyword_extract(descriptions, kw_model, 3, save_output=f'results/{name}/{3}_keyword.png')
	outputs.append(Image.open(f'results/{name}/1_keyword.png'))
	# outputs.append(Image.open(f'results/{name}/2_keyword.png'))
	# outputs.append(Image.open(f'results/{name}/3_keyword.png'))

	# keywords = kw_model.extract_keywords(descriptions, highlight=True)
	# print(keywords)

	# run key phrase extraction
	get_top_key_phrases(descriptions, 10, save_output=f'results/{name}/top_keyphrase.png')
	keyphrase_svg = display_key_phrases(descriptions, save_output=f'results/{name}/key_phrase.html')
	outputs.append(Image.open(f'results/{name}/top_keyphrase.png'))

	outputs += [ner_svg, keyphrase_svg]

	return outputs

	demo = gr.Interface(
	fn=greet,
	inputs=[gr.Textbox(lines=1, placeholder="Data Asset Name"),
	gr.Textbox(lines=10, placeholder="All the descriptions for analysis")],
	outputs=['image', 'image', 'image', 'image', 'image', 'html', 'html'],
	)
	demo.launch()