subashdvorak commited on
Commit
668bc28
·
verified ·
1 Parent(s): 92da897

uploaded the app and requirement files

Browse files
Files changed (2) hide show
  1. captioning_app.py +269 -0
  2. requirements.txt +8 -0
captioning_app.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import re
4
+ import requests
5
+ import urllib.request
6
+ from PIL import Image
7
+ from transformers import pipeline
8
+ import tempfile
9
+ import cv2
10
+ import io
11
+ import yt_dlp
12
+ import os
13
+
14
+ # Add a styled disclaimer at the top
15
+ st.markdown(
16
+ """
17
+ <div style="background-color: #f8d7da; color: #721c24; padding: 10px; border-radius: 5px; border: 1px solid #f5c6cb;">
18
+ **Disclaimer:** You are recommended to give any images and videos from your local device. In case of URLs, give the url of the website's image from chrome by copying image address. And give the URL of twitter videos for video captioning by URL.
19
+ </div>
20
+ """,
21
+ unsafe_allow_html=True
22
+ )
23
+
24
+ # Load the Salesforce BLIP model for image captioning
25
+ captioning_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
26
+ # Load the summarization model for summarizing captions
27
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
28
+
29
+ # Function to extract URLs from a text
30
+ def extract_urls(text):
31
+ url_pattern = re.compile(r'https?://\S+')
32
+ return url_pattern.findall(text)
33
+
34
+ # Function to fetch image from URL
35
+ def fetch_image_from_url(url):
36
+ try:
37
+ response = urllib.request.urlopen(url)
38
+ image_data = response.read()
39
+ image = Image.open(io.BytesIO(image_data))
40
+ return image
41
+ except Exception as e:
42
+ return None
43
+
44
+ # Function to convert video to 30 FPS
45
+ def convert_video_to_30fps(video_path):
46
+ cap = cv2.VideoCapture(video_path)
47
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Output format
48
+ fps = 30 # Desired FPS
49
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
50
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
51
+
52
+ # Temporary file to save the 30 FPS video
53
+ converted_video_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
54
+ out = cv2.VideoWriter(converted_video_path, fourcc, fps, (width, height))
55
+
56
+ while True:
57
+ ret, frame = cap.read()
58
+ if not ret:
59
+ break
60
+ out.write(frame) # Write the frame into the new video
61
+
62
+ cap.release()
63
+ out.release()
64
+
65
+ return converted_video_path
66
+
67
+ # Function to extract frames from a 30 FPS video at 1-second intervals
68
+ def extract_frames(video_stream):
69
+ frames = []
70
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
71
+ temp_video_file.write(video_stream.read())
72
+ temp_video_file_path = temp_video_file.name
73
+
74
+ # Convert video to 30 FPS
75
+ converted_video_path = convert_video_to_30fps(temp_video_file_path)
76
+
77
+ cap = cv2.VideoCapture(converted_video_path)
78
+ fps = cap.get(cv2.CAP_PROP_FPS) # This should now be 30 FPS
79
+ frame_interval = int(fps) # Frame interval for 1 second
80
+
81
+ while True:
82
+ success, frame = cap.read()
83
+ if not success:
84
+ break
85
+ current_frame_number = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
86
+ if current_frame_number % frame_interval == 0: # Extract one frame per second
87
+ frames.append(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
88
+
89
+ cap.release()
90
+ return frames
91
+
92
+ # Function to generate captions for a list of frames
93
+ def generate_captions(frames):
94
+ captions = []
95
+ for frame in frames:
96
+ caption = captioning_model(frame)
97
+ if caption and 'generated_text' in caption[0]:
98
+ captions.append(caption[0]['generated_text'])
99
+
100
+ return captions
101
+
102
+ # Function to generate caption for a single image
103
+ def generate_caption_for_image(image):
104
+ caption = captioning_model(image)
105
+ if caption and 'generated_text' in caption[0]:
106
+ return caption[0]['generated_text']
107
+ return "No caption generated."
108
+
109
+ # Function to summarize the captions
110
+ def summarize_captions(captions):
111
+ combined_captions = " ".join(captions)
112
+ summary = summarizer(combined_captions, max_length=150, min_length=30, do_sample=False)
113
+ return summary[0]['summary_text']
114
+
115
+ # Function to download Twitter video using yt-dlp
116
+ def download_twitter_video(url):
117
+ url = url.replace("x.com", "twitter.com") # Convert the URL if needed
118
+ ydl_opts = {
119
+ 'format': 'best',
120
+ 'outtmpl': 'downloaded_video.%(ext)s',
121
+ 'quiet': True,
122
+ 'noplaylist': True,
123
+ }
124
+
125
+ try:
126
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
127
+ info_dict = ydl.extract_info(url, download=False)
128
+ video_url = info_dict.get("url", None)
129
+ response = requests.get(video_url)
130
+ if response.status_code == 200:
131
+ return io.BytesIO(response.content)
132
+ else:
133
+ return None
134
+ except Exception as e:
135
+ st.error(f"An error occurred: {e}")
136
+ return None
137
+
138
+ # Function to process URLs in a DataFrame
139
+ def process_urls_in_dataframe(df):
140
+ results = []
141
+ for index, row in df.iterrows():
142
+ for cell in row:
143
+ if pd.notna(cell):
144
+ urls = extract_urls(str(cell))
145
+ for url in urls:
146
+ if url.startswith("https://x.com"):
147
+ st.write(f"Processing video URL: {url}")
148
+ video_stream = download_twitter_video(url)
149
+ if video_stream:
150
+ frames = extract_frames(video_stream)
151
+ if frames:
152
+ captions = generate_captions(frames)
153
+ summary = summarize_captions(captions)
154
+ results.append({"URL": url, "Caption": summary})
155
+ save_results_to_csv(results)
156
+ else:
157
+ st.error(f"Failed to extract frames from video: {url}")
158
+ else:
159
+ st.error(f"Failed to fetch video: {url}")
160
+ else:
161
+ st.write(f"Processing image URL: {url}")
162
+ image = fetch_image_from_url(url)
163
+ if image:
164
+ caption = generate_caption_for_image(image)
165
+ results.append({"URL": url, "Caption": caption})
166
+ save_results_to_csv(results)
167
+ return results
168
+
169
+ # Function to save results to a CSV file
170
+ def save_results_to_csv(results):
171
+ file_path = "captions_results.csv"
172
+ df = pd.DataFrame(results)
173
+ if not os.path.isfile(file_path):
174
+ df.to_csv(file_path, index=False, mode='w', header=True)
175
+ else:
176
+ df.to_csv(file_path, index=False, mode='a', header=False)
177
+
178
+ # Streamlit app
179
+ st.title("Captioning Application")
180
+
181
+ # Section to process uploaded CSV or Excel files
182
+ st.subheader("Process URLs from File")
183
+ uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"])
184
+
185
+ if uploaded_file is not None:
186
+ st.write("Processing file...")
187
+ if uploaded_file.name.endswith("csv"):
188
+ df = pd.read_csv(uploaded_file)
189
+ else:
190
+ df = pd.read_excel(uploaded_file)
191
+
192
+ results = process_urls_in_dataframe(df)
193
+
194
+ if results:
195
+ st.write(f"Processed {len(results)} URLs from the file.")
196
+ st.write("Results saved to captions_results.csv")
197
+ else:
198
+ st.write("No URLs found or processed.")
199
+
200
+ # Section to process URLs for images and videos
201
+ st.subheader("Process URLs Directly")
202
+
203
+ # Upload image URL
204
+ image_url = st.text_input("Enter Image URL:")
205
+ if image_url:
206
+ st.write(f"Processing Image URL: {image_url}")
207
+ image = fetch_image_from_url(image_url)
208
+ if image:
209
+ caption = generate_caption_for_image(image)
210
+ st.image(image, caption="Uploaded Image", use_column_width=True)
211
+ st.write(f"Caption: {caption}")
212
+ # Collect results in a list of dictionaries
213
+ results = [{"URL": image_url, "Caption": caption}]
214
+
215
+ # Save the results to the CSV file
216
+ save_results_to_csv(results)
217
+ st.success("Results saved to captions_results.csv")
218
+
219
+
220
+
221
+ # Upload video URL
222
+ video_url = st.text_input("Enter Video URL:")
223
+ if video_url:
224
+ st.write(f"Processing Video URL: {video_url}")
225
+ if video_url.startswith("https://x.com"):
226
+ video_stream = download_twitter_video(video_url)
227
+ if video_stream:
228
+ frames = extract_frames(video_stream)
229
+ if frames:
230
+ captions = generate_captions(frames)
231
+ summary = summarize_captions(captions)
232
+ st.write(f"Caption: {summary}")
233
+ # Collect results in a list of dictionaries
234
+ results = [{"URL": video_url, "Caption": summary}]
235
+
236
+ # Save the results to the CSV file
237
+ save_results_to_csv(results)
238
+ st.success("Results saved to captions_results.csv")
239
+
240
+ else:
241
+ st.error("Failed to extract frames from video.")
242
+ else:
243
+ st.error("Failed to fetch video.")
244
+ else:
245
+ st.error("Only Twitter video URLs are supported.")
246
+
247
+ # Section to process local files
248
+ st.subheader("Process Local Files")
249
+
250
+ uploaded_local_file = st.file_uploader("Upload a local image or video file", type=["jpg", "jpeg", "png", "mp4"])
251
+
252
+ if uploaded_local_file is not None:
253
+ if uploaded_local_file.type.startswith("image"):
254
+ image = Image.open(uploaded_local_file)
255
+ caption = generate_caption_for_image(image)
256
+ st.image(image, caption="Uploaded Image", use_column_width=True)
257
+ st.write(f"Caption: {caption}")
258
+ elif uploaded_local_file.type.startswith("video"):
259
+ video_stream = io.BytesIO(uploaded_local_file.read())
260
+ frames = extract_frames(video_stream)
261
+ if frames:
262
+ captions = generate_captions(frames)
263
+ summary = summarize_captions(captions)
264
+ st.video(uploaded_local_file)
265
+ st.write(f"Summary of Captions: {summary}")
266
+ else:
267
+ st.error("Failed to extract frames from video.")
268
+
269
+ st.write("Upload a file or enter a URL to start processing.")
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ requests
4
+ urllib3
5
+ Pillow
6
+ transformers
7
+ opencv-python-headless
8
+ yt-dlp