|
import subprocess |
|
import sys |
|
|
|
def install(package): |
|
subprocess.check_call([sys.executable, "-m", "pip", "install", package], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) |
|
|
|
install("keybert") |
|
install("keyphrase_vectorizers") |
|
import warnings |
|
from keybert import KeyBERT |
|
from keyphrase_vectorizers import KeyphraseCountVectorizer |
|
import gradio as gr |
|
|
|
with warnings.catch_warnings(): |
|
warnings.simplefilter("ignore", category=UserWarning) |
|
embedding = 'all-mpnet-base-v2' |
|
key_model = KeyBERT(model=embedding) |
|
vectorizer_params = KeyphraseCountVectorizer(spacy_pipeline='en_core_web_sm', pos_pattern='<J.*>*<N.*>+', stop_words='english', lowercase=True) |
|
|
|
def get_keywords(course_name, course_desc): |
|
keywords_list = [] |
|
course_name, course_desc = course_name.strip().lower(), course_desc.strip().lower() |
|
data = course_name+". "+course_desc |
|
keywords = key_model.extract_keywords(data, vectorizer=vectorizer_params, stop_words='english', top_n=7, use_mmr=True) |
|
keywords_list = list(dict(keywords).keys()) |
|
return ", ".join(keywords_list) |
|
|
|
iface = gr.Interface(fn=get_keywords, inputs=[gr.Textbox(label="Course Name"), gr.Textbox(label="Course Description")], outputs=gr.Textbox(label="Relevant Tags"), |
|
title="College Course Tags Generator", description="Generating tags/keywords based on Keyphrase-BERT Extraction'") |
|
iface.launch() |