Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,6 @@
|
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
import plotly.express as px
|
5 |
-
import plotly.figure_factory as ff
|
6 |
import subprocess
|
7 |
import threading
|
8 |
import re
|
@@ -84,14 +83,12 @@ def parse_time(time_str):
|
|
84 |
"""
|
85 |
if pd.isna(time_str):
|
86 |
return 0
|
87 |
-
|
88 |
time_str = str(time_str).strip()
|
89 |
if not time_str:
|
90 |
return 0
|
91 |
|
92 |
# If the string contains a colon, assume mm:ss or hh:mm:ss format.
|
93 |
if ":" in time_str:
|
94 |
-
# Remove non-digit/colon characters.
|
95 |
clean_str = re.sub(r"[^\d:]", "", time_str)
|
96 |
parts = clean_str.split(":")
|
97 |
try:
|
@@ -122,11 +119,16 @@ df["Video_length_seconds"] = df["video length"].apply(parse_time)
|
|
122 |
# -----------------------------
|
123 |
# Compute Aggregated Insights
|
124 |
# -----------------------------
|
|
|
125 |
avg_time_by_model = df.groupby("whisper model")["Time_required_seconds"].mean().reset_index()
|
|
|
|
|
126 |
avg_time_by_target = df.groupby("Target Audio")["Time_required_seconds"].mean().reset_index()
|
127 |
|
128 |
# Mark run type based on the "run" column (if "First" appears then it's First Run)
|
129 |
df["Run_type"] = df["run"].apply(lambda x: "First Run" if "First" in str(x) else "Subsequent Run")
|
|
|
|
|
130 |
run_counts = df.groupby(["whisper model", "Run_type"]).size().reset_index(name="count")
|
131 |
|
132 |
# -----------------------------
|
@@ -228,20 +230,6 @@ fig_corr = px.imshow(
|
|
228 |
labels=dict(color="Correlation")
|
229 |
)
|
230 |
|
231 |
-
# # 3D Scatter Plot: Whisper Model vs Processing Time vs Video Length
|
232 |
-
# fig_model_time_video = px.scatter_3d(
|
233 |
-
# df,
|
234 |
-
# x="whisper model",
|
235 |
-
# y="Time_required_seconds",
|
236 |
-
# z="Video_length_seconds",
|
237 |
-
# color="whisper model",
|
238 |
-
# title="Whisper Model vs Processing Time vs Video Length",
|
239 |
-
# labels={
|
240 |
-
# "whisper model": "Whisper Model",
|
241 |
-
# "Time_required_seconds": "Processing Time (seconds)",
|
242 |
-
# "Video_length_seconds": "Video Length (seconds)"
|
243 |
-
# }
|
244 |
-
# )
|
245 |
# 3D Scatter Plot: Whisper Model vs Processing Time vs Video Length
|
246 |
fig_model_time_video = px.scatter_3d(
|
247 |
df,
|
@@ -256,17 +244,23 @@ fig_model_time_video = px.scatter_3d(
|
|
256 |
"Video_length_seconds": "Video Length (seconds)"
|
257 |
}
|
258 |
)
|
259 |
-
|
260 |
-
# Update the layout to enlarge the graph in the UI
|
261 |
fig_model_time_video.update_layout(height=1000, width=1200)
|
262 |
|
263 |
-
#
|
264 |
-
#
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
# -----------------------------
|
271 |
# Build the Streamlit App Layout
|
272 |
# -----------------------------
|
@@ -315,6 +309,9 @@ st.plotly_chart(fig_corr, use_container_width=True)
|
|
315 |
st.subheader("Whisper Model vs Processing Time vs Video Length (3D)")
|
316 |
st.plotly_chart(fig_model_time_video, use_container_width=True)
|
317 |
|
|
|
|
|
|
|
318 |
# -----------------------------
|
319 |
# Optional: Start LocalTunnel for Public Access
|
320 |
# -----------------------------
|
@@ -330,7 +327,6 @@ def start_localtunnel(port=8501):
|
|
330 |
stderr=subprocess.PIPE,
|
331 |
text=True,
|
332 |
)
|
333 |
-
|
334 |
def read_tunnel_output(process):
|
335 |
while True:
|
336 |
line = process.stdout.readline()
|
@@ -340,11 +336,10 @@ def start_localtunnel(port=8501):
|
|
340 |
if "your url is:" in line.lower():
|
341 |
public_url = line.split("your url is:")[-1].strip()
|
342 |
st.success(f"LocalTunnel URL: {public_url}")
|
343 |
-
|
344 |
thread = threading.Thread(target=read_tunnel_output, args=(proc,), daemon=True)
|
345 |
thread.start()
|
346 |
except Exception as e:
|
347 |
st.error(f"Error starting LocalTunnel: {e}")
|
348 |
|
349 |
# Uncomment the following line to start LocalTunnel when the app runs.
|
350 |
-
# start_localtunnel(port=8501)
|
|
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
import plotly.express as px
|
|
|
5 |
import subprocess
|
6 |
import threading
|
7 |
import re
|
|
|
83 |
"""
|
84 |
if pd.isna(time_str):
|
85 |
return 0
|
|
|
86 |
time_str = str(time_str).strip()
|
87 |
if not time_str:
|
88 |
return 0
|
89 |
|
90 |
# If the string contains a colon, assume mm:ss or hh:mm:ss format.
|
91 |
if ":" in time_str:
|
|
|
92 |
clean_str = re.sub(r"[^\d:]", "", time_str)
|
93 |
parts = clean_str.split(":")
|
94 |
try:
|
|
|
119 |
# -----------------------------
|
120 |
# Compute Aggregated Insights
|
121 |
# -----------------------------
|
122 |
+
# Overall average processing time by whisper model
|
123 |
avg_time_by_model = df.groupby("whisper model")["Time_required_seconds"].mean().reset_index()
|
124 |
+
|
125 |
+
# Average processing time by target audio
|
126 |
avg_time_by_target = df.groupby("Target Audio")["Time_required_seconds"].mean().reset_index()
|
127 |
|
128 |
# Mark run type based on the "run" column (if "First" appears then it's First Run)
|
129 |
df["Run_type"] = df["run"].apply(lambda x: "First Run" if "First" in str(x) else "Subsequent Run")
|
130 |
+
|
131 |
+
# Run counts by whisper model and run type
|
132 |
run_counts = df.groupby(["whisper model", "Run_type"]).size().reset_index(name="count")
|
133 |
|
134 |
# -----------------------------
|
|
|
230 |
labels=dict(color="Correlation")
|
231 |
)
|
232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
# 3D Scatter Plot: Whisper Model vs Processing Time vs Video Length
|
234 |
fig_model_time_video = px.scatter_3d(
|
235 |
df,
|
|
|
244 |
"Video_length_seconds": "Video Length (seconds)"
|
245 |
}
|
246 |
)
|
247 |
+
# Enlarge the 3D graph layout
|
|
|
248 |
fig_model_time_video.update_layout(height=1000, width=1200)
|
249 |
|
250 |
+
# New Graph:
|
251 |
+
# First Run Average Processing Time by Whisper Model Grouped by Video Duration
|
252 |
+
first_run_df = df[df["Run_type"] == "First Run"]
|
253 |
+
avg_time_first_run = first_run_df.groupby(["whisper model", "video length"])["Time_required_seconds"].mean().reset_index()
|
254 |
+
fig_first_run = px.bar(
|
255 |
+
avg_time_first_run,
|
256 |
+
x="whisper model",
|
257 |
+
y="Time_required_seconds",
|
258 |
+
color="video length",
|
259 |
+
barmode="group",
|
260 |
+
title="First Run Average Processing Time by Whisper Model (Grouped by Video Duration)",
|
261 |
+
labels={"Time_required_seconds": "Avg Time (seconds)", "whisper model": "Whisper Model"}
|
262 |
+
)
|
263 |
+
|
264 |
# -----------------------------
|
265 |
# Build the Streamlit App Layout
|
266 |
# -----------------------------
|
|
|
309 |
st.subheader("Whisper Model vs Processing Time vs Video Length (3D)")
|
310 |
st.plotly_chart(fig_model_time_video, use_container_width=True)
|
311 |
|
312 |
+
st.subheader("First Run Avg Processing Time by Whisper Model and Video Duration")
|
313 |
+
st.plotly_chart(fig_first_run, use_container_width=True)
|
314 |
+
|
315 |
# -----------------------------
|
316 |
# Optional: Start LocalTunnel for Public Access
|
317 |
# -----------------------------
|
|
|
327 |
stderr=subprocess.PIPE,
|
328 |
text=True,
|
329 |
)
|
|
|
330 |
def read_tunnel_output(process):
|
331 |
while True:
|
332 |
line = process.stdout.readline()
|
|
|
336 |
if "your url is:" in line.lower():
|
337 |
public_url = line.split("your url is:")[-1].strip()
|
338 |
st.success(f"LocalTunnel URL: {public_url}")
|
|
|
339 |
thread = threading.Thread(target=read_tunnel_output, args=(proc,), daemon=True)
|
340 |
thread.start()
|
341 |
except Exception as e:
|
342 |
st.error(f"Error starting LocalTunnel: {e}")
|
343 |
|
344 |
# Uncomment the following line to start LocalTunnel when the app runs.
|
345 |
+
# start_localtunnel(port=8501)
|