Spaces:
Runtime error
Runtime error
File size: 2,006 Bytes
e04472d 9bde863 7c667a4 6f50418 9bde863 c9c991e 9bde863 7c667a4 be40263 9bde863 7c667a4 be40263 7c667a4 6b8afad 9bde863 be40263 857289a be40263 2a562b6 c56ee66 be40263 2a562b6 be40263 2a562b6 be40263 2a562b6 be40263 2a562b6 be40263 9bde863 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import gradio as gr
import wikipedia
import numpy as np
import pandas as pd
from os import path
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
def wikipediaScrap(article_name, wikipedia_language = "en"):
if wikipedia_language:
wikipedia.set_lang(wikipedia_language)
et_page = wikipedia.page(article_name)
title = et_page.title
content = et_page.content
page_url = et_page.url
linked_pages = et_page.links
text = content
# Create and generate a word cloud image:
wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text)
# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
return title, content, page_url, "\n". join(linked_pages), plt
with gr.Blocks( css = "footer {visibility: hidden} #dsd_button {background: purple, color: white}" ) as demo:
with gr.Row():
inp = gr.Textbox(placeholder="Enter the name of wikipedia article", label="Wikipedia article name")
lan = gr.Textbox(placeholder="Enter the language code", label="Language")
btn = gr.Button("Start Scraping", elem_id="dsd_button")
with gr.Row():
with gr.Column():
gr.Markdown("""## About""")
title = gr.Textbox(label="Article title")
url = gr.Textbox(label="Article URL")
with gr.Column():
gr.Markdown("""## Wordcloud""")
wordcloud = gr.Plot()
gr.Markdown("""### Content""")
with gr.Row():
content = gr.Textbox(label="Content")
gr.Markdown("""### Linked Articles""")
with gr.Row():
linked = gr.Textbox(label="Linked Articles")
with gr.Row():
gr.Examples(
examples = [["Eiffel Tower", "en"], ["Eiffel tower", 'ur']], fn=wikipediaScrap, inputs=[inp, lan], outputs=[title, content, url, linked, wordcloud], cache_examples=True)
btn.click(fn=wikipediaScrap, inputs=[inp, lan], outputs=[title, content, url, linked, wordcloud])
demo.launch() |