import gradio as gr import requests from bs4 import BeautifulSoup from bs4.element import Comment from transformers import pipeline def tag_visible(element): if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']: return False if isinstance(element, Comment): return False return True classifier = pipeline(model="amitkayal/bert-finetuned-sem_eval-english", top_k=None) def getTalkPage(wiki_page): wiki_page = "https://en.wikipedia.org/" if "wikipedia.org" in wiki_page: response = requests.get(wiki_page) soup = BeautifulSoup(response.content, 'html.parser') talk_url = soup.find_all("a", {"rel": "discussion"}) if len(talk_url) > 0: talk_url = talk_url[0]["href"] try: talk_response = requests.get("https://en.wikipedia.org" + talk_url) talk_soup = BeautifulSoup(talk_response.content, 'html.parser') talk_texts = talk_soup.findAll(text=True) visible_texts = filter(tag_visible, talk_texts) return u" ".join(t.strip() for t in visible_texts) except Exception as error: print('Error occured: {}'.format(error)) def tone_talkpage(url): talk_content = getTalkPage(url) tone_labels = {'anger': 0, 'anticipation': 0, 'disgust': 0, 'fear': 0, 'joy': 0, 'love': 0, 'optimism': 0, 'pessimism': 0, 'sadness': 0, 'surprise': 0, 'trust': 0} if talk_content: breakdown = talk_content.split() n = 200 #because the max amount of sequence length is 512 breakdown_lst = [' '.join(breakdown[i:i+n]) for i in range(0,len(talk_content),n)] for ele in breakdown_lst: res = classifier(ele)[0] for tone_res in res: tone_labels[tone_res["label"]] += tone_res["score"] lst_len = len(breakdown_lst) for key, val in tone_labels.items(): tone_labels[key] = val/lst_len return tone_labels iface = gr.Interface(fn=tone_talkpage, inputs="text", outputs="text") iface.launch()