File size: 2,006 Bytes
e04472d
9bde863
7c667a4
 
 
 
 
6f50418
9bde863
 
 
 
 
c9c991e
9bde863
 
 
 
7c667a4
be40263
9bde863
7c667a4
be40263
7c667a4
 
 
 
 
6b8afad
9bde863
 
be40263
857289a
be40263
2a562b6
 
c56ee66
be40263
 
2a562b6
 
 
be40263
2a562b6
be40263
 
 
2a562b6
be40263
 
2a562b6
be40263
 
 
 
 
9bde863
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
import wikipedia
import numpy as np
import pandas as pd
from os import path
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt

def wikipediaScrap(article_name, wikipedia_language = "en"):
  if wikipedia_language:
    wikipedia.set_lang(wikipedia_language)

  et_page = wikipedia.page(article_name)
  title = et_page.title
  content = et_page.content
  page_url = et_page.url
  linked_pages = et_page.links
  
  text = content

  # Create and generate a word cloud image:
  wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text)

  # Display the generated image:
  plt.imshow(wordcloud, interpolation='bilinear')
  plt.axis("off")
  
  return title, content, page_url, "\n". join(linked_pages), plt



with gr.Blocks( css = "footer {visibility: hidden} #dsd_button {background: purple, color: white}" ) as demo:
    with gr.Row():
      inp = gr.Textbox(placeholder="Enter the name of wikipedia article", label="Wikipedia article name")
      lan = gr.Textbox(placeholder="Enter the language code", label="Language")
    btn = gr.Button("Start Scraping", elem_id="dsd_button")
    with gr.Row():
      with gr.Column():
        gr.Markdown("""## About""")
        title = gr.Textbox(label="Article title")
        url = gr.Textbox(label="Article URL")
      with gr.Column():
        gr.Markdown("""## Wordcloud""")
        wordcloud = gr.Plot()
    gr.Markdown("""### Content""")
    with gr.Row():
      content = gr.Textbox(label="Content")
    gr.Markdown("""### Linked Articles""")
    with gr.Row():
      linked = gr.Textbox(label="Linked Articles")
    with gr.Row():
      gr.Examples(
                examples = [["Eiffel Tower", "en"], ["Eiffel tower", 'ur']], fn=wikipediaScrap, inputs=[inp, lan], outputs=[title, content, url, linked, wordcloud], cache_examples=True)
    btn.click(fn=wikipediaScrap, inputs=[inp, lan], outputs=[title, content, url, linked, wordcloud])

demo.launch()