aliicemill commited on
Commit
026c02f
·
verified ·
1 Parent(s): 5140b1f

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -129
app.py DELETED
@@ -1,129 +0,0 @@
1
- # Importing Libraries
2
- # author:@aliicemill
3
- import nltk
4
- from PIL import Image
5
- import matplotlib.pyplot as plt
6
- import streamlit as st
7
- from nltk.corpus import stopwords
8
- from nltk.tokenize import word_tokenize
9
- import string
10
- import stylecloud
11
-
12
- # Downloading word clusters
13
- nltk.download('stopwords')
14
- nltk.download('punkt')
15
-
16
- def preprocess_and_create_stylecloud(file_path, output_name='stylecloud.png',
17
- icon_name='fas fa-laptop', lang='english'):
18
- # read file
19
- with open(file_path, 'r', encoding='utf-8') as f:
20
- text = f.read()
21
-
22
- if lang == 'chinese':
23
- # Tokenize the text for Chinese
24
- import jieba
25
- tokens = jieba.lcut(text)
26
- stop_words = set(line.strip() for line in open('chinese_stopwords.txt', encoding='utf-8'))
27
- else:
28
- # represent stopwords
29
- stop_words = set(stopwords.words(lang))
30
-
31
- # Remove punctuation marks
32
- translator = str.maketrans('', '', string.punctuation)
33
- text = text.translate(translator)
34
-
35
- # Tokenize the text and convert to lowercase
36
- tokens = word_tokenize(text.lower(), language=lang)
37
-
38
- # Filter out stopwords
39
- filtered_tokens = [word for word in tokens if word not in stop_words]
40
-
41
- # Join filtered tokens
42
- processed_text = ' '.join(filtered_tokens)
43
-
44
- # Create StyleCloud
45
- stylecloud.gen_stylecloud(text=processed_text,
46
- icon_name=icon_name,
47
- output_name=output_name)
48
- # Show the generated StyleCloud
49
- im = Image.open(output_name)
50
- plt.figure(figsize=(10, 10))
51
- plt.imshow(im)
52
- plt.axis('off') # Hide axes
53
- plt.show()
54
-
55
- def create_stylecloud(text, language, icon):
56
- output_file = "stylecloud.png"
57
-
58
- if language == 'cn':
59
- import jieba
60
- tokens = jieba.lcut(text)
61
- with open('chinese_stopwords.txt', 'r', encoding='utf-8') as f:
62
- stop_words = set(f.read().split())
63
- filtered_tokens = [word for word in tokens if word not in stop_words]
64
- processed_text = ' '.join(filtered_tokens)
65
- else:
66
- stop_words = set(stopwords.words(language))
67
- translator = str.maketrans('', '', string.punctuation)
68
- text = text.translate(translator)
69
- tokens = word_tokenize(text.lower(), language=language)
70
- filtered_tokens = [word for word in tokens if word not in stop_words]
71
- processed_text = ' '.join(filtered_tokens)
72
-
73
- stylecloud.gen_stylecloud(text=processed_text,
74
- icon_name=icon,
75
- output_name=output_file)
76
-
77
- return output_file
78
-
79
- st.title("WordCloud Creator")
80
-
81
- file = st.file_uploader("Import txt file", type=["txt"])
82
-
83
- if file is not None:
84
- text = file.getvalue().decode("utf-8")
85
-
86
- language = st.radio("Language", ["tr", "en", "fr", "ru", "de", "cn"])
87
-
88
- icon_options = [
89
- 'fa-address-book', 'fa-address-card', 'fa-angry', 'fa-arrow-alt-circle-down', 'fa-arrow-alt-circle-left',
90
- 'fa-arrow-alt-circle-right', 'fa-arrow-alt-circle-up', 'fa-bell', 'fa-bell-slash', 'fa-bookmark', 'fa-building',
91
- 'fa-calendar', 'fa-calendar-alt', 'fa-calendar-check', 'fa-calendar-minus', 'fa-calendar-plus', 'fa-calendar-times',
92
- 'fa-caret-square-down', 'fa-caret-square-left', 'fa-caret-square-right', 'fa-caret-square-up', 'fa-chart-bar',
93
- 'fa-check-circle', 'fa-check-square', 'fa-circle', 'fa-clipboard', 'fa-clock', 'fa-clone', 'fa-closed-captioning',
94
- 'fa-comment', 'fa-comment-alt', 'fa-comment-dots', 'fa-comments', 'fa-compass', 'fa-copy', 'fa-copyright', 'fa-credit-card',
95
- 'fa-dizzy', 'fa-dot-circle', 'fa-edit', 'fa-envelope', 'fa-envelope-open', 'fa-eye', 'fa-eye-slash', 'fa-file',
96
- 'fa-file-alt', 'fa-file-archive', 'fa-file-audio', 'fa-file-code', 'fa-file-excel', 'fa-file-image', 'fa-file-pdf',
97
- 'fa-file-powerpoint', 'fa-file-video', 'fa-file-word', 'fa-flag', 'fa-flushed', 'fa-folder', 'fa-folder-open', 'fa-frown',
98
- 'fa-frown-open', 'fa-futbol', 'fa-gem', 'fa-grimace', 'fa-grin', 'fa-grin-alt', 'fa-grin-beam', 'fa-grin-beam-sweat',
99
- 'fa-grin-hearts', 'fa-grin-squint', 'fa-grin-squint-tears', 'fa-grin-stars', 'fa-grin-tears', 'fa-grin-tongue',
100
- 'fa-grin-tongue-squint', 'fa-grin-tongue-wink', 'fa-grin-wink', 'fa-hand-lizard', 'fa-hand-paper', 'fa-hand-peace',
101
- 'fa-hand-point-down', 'fa-hand-point-left', 'fa-hand-point-right', 'fa-hand-point-up', 'fa-hand-pointer', 'fa-hand-rock',
102
- 'fa-hand-scissors', 'fa-hand-spock', 'fa-handshake', 'fa-hdd', 'fa-heart', 'fa-hospital', 'fa-hourglass', 'fa-id-badge',
103
- 'fa-id-card', 'fa-image', 'fa-images', 'fa-keyboard', 'fa-kiss', 'fa-kiss-beam', 'fa-kiss-wink-heart', 'fa-laugh',
104
- 'fa-laugh-beam', 'fa-laugh-squint', 'fa-laugh-wink', 'fa-lemon', 'fa-life-ring', 'fa-lightbulb', 'fa-list-alt',
105
- 'fa-map', 'fa-meh', 'fa-meh-blank', 'fa-meh-rolling-eyes', 'fa-minus-square', 'fa-money-bill-alt', 'fa-moon',
106
- 'fa-newspaper', 'fa-object-group', 'fa-object-ungroup', 'fa-paper-plane', 'fa-pause-circle', 'fa-play-circle',
107
- 'fa-plus-square', 'fa-question-circle', 'fa-registered', 'fa-sad-cry', 'fa-sad-tear', 'fa-save', 'fa-share-square',
108
- 'fa-smile', 'fa-smile-beam', 'fa-smile-wink', 'fa-snowflake', 'fa-square', 'fa-star', 'fa-star-half', 'fa-sticky-note',
109
- 'fa-stop-circle', 'fa-sun', 'fa-surprise', 'fa-thumbs-down', 'fa-thumbs-up', 'fa-times-circle', 'fa-tired', 'fa-trash-alt',
110
- 'fa-user', 'fa-user-circle', 'fa-window-close', 'fa-window-maximize', 'fa-window-minimize', 'fa-window-restore'
111
- ]
112
- icon_options = list(map(lambda x: "fas " + x, icon_options))
113
-
114
- icon = st.selectbox("Icon Selection", icon_options, index=1)
115
-
116
- if st.button("Create"):
117
- output_file = create_stylecloud(text, language, icon)
118
- st.success(f"Here is your file: {output_file}")
119
-
120
- image = Image.open(output_file)
121
- st.image(image, caption='WordCloud', use_column_width=True)
122
-
123
- with open(output_file, "rb") as file:
124
- btn = st.download_button(
125
- label="Download WordCloud",
126
- data=file,
127
- file_name=output_file,
128
- mime="image/png"
129
- )