wavesoumen commited on
Commit
d6764d1
1 Parent(s): 5fe4ba5
Files changed (1) hide show
  1. app.py +41 -33
app.py CHANGED
@@ -1,17 +1,28 @@
1
  import streamlit as st
2
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
3
  import nltk
4
  from youtube_transcript_api import YouTubeTranscriptApi
5
 
6
  # Download NLTK data
7
  nltk.download('punkt')
8
 
9
- # Initialize the image captioning pipeline
 
 
 
 
 
 
 
10
  captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
11
 
12
- # Load the tokenizer and model for tag generation
13
- tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation")
14
- model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation")
 
 
 
 
15
 
16
  # Function to fetch YouTube transcript
17
  def fetch_transcript(url):
@@ -27,32 +38,38 @@ def fetch_transcript(url):
27
  st.title("Multi-purpose Machine Learning App: WAVE_AI")
28
 
29
  # Create tabs for different functionalities
30
- tab1, tab2, tab3 = st.tabs(["Text Tag Generation", "Image Captioning", "YouTube Transcript"])
31
 
32
- # Image Captioning Tab
33
  with tab1:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  st.header("Text Tag Generation")
35
 
36
- # Text area for user input
37
  text = st.text_area("Enter the text for tag extraction:", height=200)
38
 
39
- # Button to generate tags
40
  if st.button("Generate Tags"):
41
  if text:
42
  try:
43
- # Tokenize and encode the input text
44
- inputs = tokenizer([text], max_length=512, truncation=True, return_tensors="pt")
45
-
46
- # Generate tags
47
- output = model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64)
48
-
49
- # Decode the output
50
- decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
51
-
52
- # Extract unique tags
53
  tags = list(set(decoded_output.strip().split(", ")))
54
-
55
- # Display the tags
56
  st.write("**Generated Tags:**")
57
  st.write(tags)
58
  except Exception as e:
@@ -60,36 +77,27 @@ with tab1:
60
  else:
61
  st.warning("Please enter some text to generate tags.")
62
 
63
- # Text Tag Generation Tab
64
- with tab2:
65
  st.header("Image Captioning Extractor")
66
 
67
- # Input for image URL
68
  image_url = st.text_input("Enter the URL of the image:")
69
 
70
- # If an image URL is provided
71
  if image_url:
72
  try:
73
- # Display the image
74
  st.image(image_url, caption="Provided Image", use_column_width=True)
75
-
76
- # Generate the caption
77
  caption = captioner(image_url)
78
-
79
- # Display the caption
80
  st.write("**Generated Caption:**")
81
  st.write(caption[0]['generated_text'])
82
  except Exception as e:
83
  st.error(f"An error occurred: {e}")
84
 
85
  # YouTube Transcript Tab
86
- with tab3:
87
  st.header("YouTube Video Transcript Extractor")
88
 
89
- # Input for YouTube URL
90
  youtube_url = st.text_input("Enter YouTube URL:")
91
 
92
- # Button to get transcript
93
  if st.button("Get Transcript"):
94
  if youtube_url:
95
  transcript = fetch_transcript(youtube_url)
 
1
  import streamlit as st
2
+ from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
3
  import nltk
4
  from youtube_transcript_api import YouTubeTranscriptApi
5
 
6
  # Download NLTK data
7
  nltk.download('punkt')
8
 
9
+ # Load models and tokenizers
10
+ summary_model_name = 'utrobinmv/t5_summary_en_ru_zh_base_2048'
11
+ summary_model = T5ForConditionalGeneration.from_pretrained(summary_model_name)
12
+ summary_tokenizer = T5Tokenizer.from_pretrained(summary_model_name)
13
+
14
+ tag_tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation")
15
+ tag_model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation")
16
+
17
  captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
18
 
19
+ # Function to summarize text
20
+ def summarize_text(text, prefix):
21
+ src_text = prefix + text
22
+ input_ids = summary_tokenizer(src_text, return_tensors="pt")
23
+ generated_tokens = summary_model.generate(**input_ids)
24
+ result = summary_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
25
+ return result[0]
26
 
27
  # Function to fetch YouTube transcript
28
  def fetch_transcript(url):
 
38
  st.title("Multi-purpose Machine Learning App: WAVE_AI")
39
 
40
  # Create tabs for different functionalities
41
+ tab1, tab2, tab3, tab4 = st.tabs(["Text Summarization", "Text Tag Generation", "Image Captioning", "YouTube Transcript"])
42
 
43
+ # Text Summarization Tab
44
  with tab1:
45
+ st.header("Text Summarization")
46
+
47
+ input_text = st.text_area("Enter the text to summarize:", height=300)
48
+
49
+ if st.button("Generate Summaries"):
50
+ if input_text:
51
+ title1 = summarize_text(input_text, 'summary: ')
52
+ title2 = summarize_text(input_text, 'summary brief: ')
53
+ st.write("### Title 1")
54
+ st.write(title1)
55
+ st.write("### Title 2")
56
+ st.write(title2)
57
+ else:
58
+ st.warning("Please enter some text to summarize.")
59
+
60
+ # Text Tag Generation Tab
61
+ with tab2:
62
  st.header("Text Tag Generation")
63
 
 
64
  text = st.text_area("Enter the text for tag extraction:", height=200)
65
 
 
66
  if st.button("Generate Tags"):
67
  if text:
68
  try:
69
+ inputs = tag_tokenizer([text], max_length=512, truncation=True, return_tensors="pt")
70
+ output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64)
71
+ decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0]
 
 
 
 
 
 
 
72
  tags = list(set(decoded_output.strip().split(", ")))
 
 
73
  st.write("**Generated Tags:**")
74
  st.write(tags)
75
  except Exception as e:
 
77
  else:
78
  st.warning("Please enter some text to generate tags.")
79
 
80
+ # Image Captioning Tab
81
+ with tab3:
82
  st.header("Image Captioning Extractor")
83
 
 
84
  image_url = st.text_input("Enter the URL of the image:")
85
 
 
86
  if image_url:
87
  try:
 
88
  st.image(image_url, caption="Provided Image", use_column_width=True)
 
 
89
  caption = captioner(image_url)
 
 
90
  st.write("**Generated Caption:**")
91
  st.write(caption[0]['generated_text'])
92
  except Exception as e:
93
  st.error(f"An error occurred: {e}")
94
 
95
  # YouTube Transcript Tab
96
+ with tab4:
97
  st.header("YouTube Video Transcript Extractor")
98
 
 
99
  youtube_url = st.text_input("Enter YouTube URL:")
100
 
 
101
  if st.button("Get Transcript"):
102
  if youtube_url:
103
  transcript = fetch_transcript(youtube_url)