wavesoumen commited on
Commit
21be376
1 Parent(s): f7cdb03

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -10
app.py CHANGED
@@ -2,9 +2,15 @@ import streamlit as st
2
  from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
3
  import nltk
4
  from youtube_transcript_api import YouTubeTranscriptApi
 
 
 
 
5
 
6
  # Download NLTK data
7
  nltk.download('punkt')
 
 
8
 
9
  # Load models and tokenizers
10
  summary_model_name = 'utrobinmv/t5_summary_en_ru_zh_base_2048'
@@ -24,6 +30,15 @@ def summarize_text(text, prefix):
24
  result = summary_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
25
  return result[0]
26
 
 
 
 
 
 
 
 
 
 
27
  # Function to fetch YouTube transcript
28
  def fetch_transcript(url):
29
  video_id = url.split('watch?v=')[-1]
@@ -34,19 +49,61 @@ def fetch_transcript(url):
34
  except Exception as e:
35
  return str(e)
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  # Streamlit app title
38
- st.title("Multi-purpose Machine Learning App: WAVE_AI")
39
 
40
  # Create tabs for different functionalities
41
- tab1, tab2, tab3, tab4 = st.tabs(["Text Summarization", "Text Tag Generation", "Image Captioning", "YouTube Transcript"])
42
 
43
  # Text Summarization Tab
44
  with tab1:
45
- st.header("Text Summarization")
46
 
47
  input_text = st.text_area("Enter the text to summarize:", height=300)
48
 
49
- if st.button("Generate Summaries"):
50
  if input_text:
51
  title1 = summarize_text(input_text, 'summary: ')
52
  title2 = summarize_text(input_text, 'summary brief: ')
@@ -59,17 +116,14 @@ with tab1:
59
 
60
  # Text Tag Generation Tab
61
  with tab2:
62
- st.header("Text Tag Generation")
63
 
64
  text = st.text_area("Enter the text for tag extraction:", height=200)
65
 
66
  if st.button("Generate Tags"):
67
  if text:
68
  try:
69
- inputs = tag_tokenizer([text], max_length=512, truncation=True, return_tensors="pt")
70
- output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64)
71
- decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0]
72
- tags = list(set(decoded_output.strip().split(", ")))
73
  st.write("**Generated Tags:**")
74
  st.write(tags)
75
  except Exception as e:
@@ -83,7 +137,8 @@ with tab3:
83
 
84
  image_url = st.text_input("Enter the URL of the image:")
85
 
86
- if image_url:
 
87
  try:
88
  st.image(image_url, caption="Provided Image", use_column_width=True)
89
  caption = captioner(image_url)
@@ -108,3 +163,36 @@ with tab4:
108
  st.error(f"An error occurred: {transcript}")
109
  else:
110
  st.warning("Please enter a URL.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
3
  import nltk
4
  from youtube_transcript_api import YouTubeTranscriptApi
5
+ import torch
6
+ from textblob import TextBlob
7
+ from nltk.corpus import stopwords
8
+ from nltk.tokenize import word_tokenize
9
 
10
  # Download NLTK data
11
  nltk.download('punkt')
12
+ nltk.download('averaged_perceptron_tagger')
13
+ nltk.download('stopwords')
14
 
15
  # Load models and tokenizers
16
  summary_model_name = 'utrobinmv/t5_summary_en_ru_zh_base_2048'
 
30
  result = summary_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
31
  return result[0]
32
 
33
+ # Function to generate tags
34
+ def generate_tags(text):
35
+ with torch.no_grad():
36
+ inputs = tag_tokenizer(text, max_length=256, truncation=True, return_tensors="pt")
37
+ output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64, num_return_sequences=1)
38
+ decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0]
39
+ tags = list(set(decoded_output.strip().split(", ")))
40
+ return tags
41
+
42
  # Function to fetch YouTube transcript
43
  def fetch_transcript(url):
44
  video_id = url.split('watch?v=')[-1]
 
49
  except Exception as e:
50
  return str(e)
51
 
52
+ # Function to extract keywords and generate hashtags
53
+ def extract_keywords(content):
54
+ text = content.lower()
55
+ sentences = nltk.sent_tokenize(text)
56
+ keywords = []
57
+ for sentence in sentences:
58
+ words = nltk.word_tokenize(sentence)
59
+ tags = nltk.pos_tag(words)
60
+ for word, tag in tags:
61
+ if tag.startswith('NN'):
62
+ keywords.append(word)
63
+ return keywords
64
+
65
+ def generate_hashtags(content, max_hashtags=10):
66
+ keywords = extract_keywords(content)
67
+ hashtags = []
68
+ for keyword in keywords:
69
+ hashtag = "#" + keyword
70
+ if len(hashtag) <= 20:
71
+ hashtags.append(hashtag)
72
+ return hashtags[:max_hashtags]
73
+
74
+ # Function to extract point of view
75
+ def extract_point_of_view(text):
76
+ stop_words = set(stopwords.words('english'))
77
+ words = word_tokenize(str(text))
78
+ filtered_words = [word for word in words if word.casefold() not in stop_words]
79
+ text = ' '.join(filtered_words)
80
+
81
+ blob = TextBlob(text)
82
+ polarity = blob.sentiment.polarity
83
+ subjectivity = blob.sentiment.subjectivity
84
+
85
+ if polarity > 0.5:
86
+ point_of_view = "Positive"
87
+ elif polarity < -0.5:
88
+ point_of_view = "Negative"
89
+ else:
90
+ point_of_view = "Neutral"
91
+
92
+ return point_of_view
93
+
94
  # Streamlit app title
95
+ st.title("Multi-purpose AI App: WAVE_AI")
96
 
97
  # Create tabs for different functionalities
98
+ tab1, tab2, tab3, tab4, tab5 = st.tabs(["Text Summarization", "Text Tag Generation", "Image Captioning", "YouTube Transcript", "LinkedIn Post Analysis"])
99
 
100
  # Text Summarization Tab
101
  with tab1:
102
+ st.header("Summarize Title Maker")
103
 
104
  input_text = st.text_area("Enter the text to summarize:", height=300)
105
 
106
+ if st.button("Generate the Title"):
107
  if input_text:
108
  title1 = summarize_text(input_text, 'summary: ')
109
  title2 = summarize_text(input_text, 'summary brief: ')
 
116
 
117
  # Text Tag Generation Tab
118
  with tab2:
119
+ st.header("Tag Generation from Text")
120
 
121
  text = st.text_area("Enter the text for tag extraction:", height=200)
122
 
123
  if st.button("Generate Tags"):
124
  if text:
125
  try:
126
+ tags = generate_tags(text)
 
 
 
127
  st.write("**Generated Tags:**")
128
  st.write(tags)
129
  except Exception as e:
 
137
 
138
  image_url = st.text_input("Enter the URL of the image:")
139
 
140
+ if st.button("Analysis Image"):
141
+ if image_url:
142
  try:
143
  st.image(image_url, caption="Provided Image", use_column_width=True)
144
  caption = captioner(image_url)
 
163
  st.error(f"An error occurred: {transcript}")
164
  else:
165
  st.warning("Please enter a URL.")
166
+
167
+ # LinkedIn Post Analysis Tab
168
+ with tab5:
169
+ st.header("LinkedIn Post Analysis AI")
170
+
171
+ text = st.text_area("Enter the LinkedIn Post:")
172
+
173
+ if st.button("Analyze:"):
174
+ if text:
175
+ # Generate tags
176
+ tags = generate_tags(text)
177
+ st.subheader("The Most Tracked KeyWords:")
178
+ st.write(tags)
179
+
180
+ # Generate summaries
181
+ summary1 = summarize_text(text, 'summary: ')
182
+ summary2 = summarize_text(text, 'summary brief: ')
183
+ st.subheader("Summary Title 1:")
184
+ st.write(summary1)
185
+ st.subheader("Summary Title 2:")
186
+ st.write(summary2)
187
+
188
+ # Generate hashtags
189
+ hashtags = generate_hashtags(text)
190
+ st.subheader("Generated Hashtags for the Post")
191
+ st.write(hashtags)
192
+
193
+ # Extract point of view
194
+ point_of_view = extract_point_of_view(text)
195
+ st.subheader("Tone of the Post:")
196
+ st.write(point_of_view)
197
+ else:
198
+ st.warning("Please enter text to analyze.")