mfarre's picture
mfarre HF staff
v0
8a53a03
raw
history blame
25.6 kB
import gradio as gr
import logging
import json
import os
from typing import Dict, Any, List
from itertools import groupby
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
video_folder = 'video/'
metadata_folder = 'metadata/'
def load_video_list() -> List[Dict[str, str]]:
video_list = []
for filename in os.listdir(video_folder):
if filename.endswith('.mp4'):
video_id = os.path.splitext(filename)[0]
metadata_path = os.path.join(metadata_folder, f"{video_id}.json")
if os.path.exists(metadata_path):
with open(metadata_path, 'r') as f:
metadata = json.load(f)
metadata = metadata['content_metadata']
title = metadata.get('title', 'Untitled')
video_list.append({"video_id": video_id, "title": title})
# Define the custom order for the first five videos
custom_order = ['7BhJmDPB7RU', 'PrAwsi3Ldzo', '3rhsSPxQ39c', 'P7WnJZ55sgc', 'g9GtUQs7XUM']
# Custom sorting function
def custom_sort(item):
try:
return custom_order.index(item['video_id'])
except ValueError:
return len(custom_order) + 1 # Place non-specified videos after the custom ordered ones
# Sort the video list
video_list.sort(key=lambda x: (custom_sort(x), x['title']))
return video_list
def score_to_emoji(score):
if score < 0.2:
return "😴"
elif score < 0.4:
return "🙂"
elif score < 0.6:
return "😊"
elif score < 0.8:
return "😃"
else:
return "🤩"
def load_metadata(video_id: str) -> Dict[str, Any]:
metadata_path = os.path.join(metadata_folder, f"{video_id}.json")
try:
with open(metadata_path, 'r') as f:
asd =json.load(f)
return asd['content_metadata']
except FileNotFoundError:
logger.error(f"Metadata file not found for video ID: {video_id}")
raise
except json.JSONDecodeError:
logger.error(f"Invalid JSON in metadata file for video ID: {video_id}")
raise
def timestamp_to_seconds(timestamp: str) -> float:
try:
h, m, s = timestamp.split(':')
return int(h) * 3600 + int(m) * 60 + float(s)
except ValueError:
logger.error(f"Invalid timestamp format: {timestamp}")
return 0.0
def format_timestamp(timestamp: str) -> str:
try:
h, m, s = timestamp.split(':')
return f"{int(m):02d}:{int(float(s)):02d}"
except Exception as e:
logger.error(f"Invalid timestamp format: {timestamp}")
return ""
def create_scene_table(scene: Dict[str, Any]) -> str:
dynamism_score = scene.get('dynamismScore', 0)
av_correlation = scene.get('audioVisualCorrelation', 0)
cast = ", ".join([cast_member for cast_member in scene.get('cast', []) if cast_member and cast_member != 'None'])
output = f"""
<div class="scene-container">
<h3>Scene {scene.get('sceneId', 'Unknown')}: {scene.get('title', '')}</h3>
<p>Dynamism: {score_to_emoji(dynamism_score)} Audio-visual correlation: {score_to_emoji(av_correlation)} Cast: {cast}</p>
<table class="metadata-table">
<tr>
<th>Timestamp</th>
<th>Type</th>
<th>Description</th>
</tr>
"""
scene_events = []
# Collect all scene data
data_types = [
('Activities', scene.get('activities', [])),
('Props', scene.get('props', [])),
('Mood', [scene.get('mood', {})]),
('Narrative Progression', scene.get('narrativeProgression', [])),
('Video Editing Details', scene.get('videoEditingDetails', [])),
('Thematic Elements', [{'description': scene.get('thematicElements', '')}]),
('Contextual Relevance', [{'description': scene.get('contextualRelevance', '')}]),
('Character Interaction', scene.get('characterInteraction', []))
]
for data_type, data_list in data_types:
for item in data_list:
if isinstance(item, dict):
start_time = ''
end_time = ''
description = ''
if data_type == 'Activities':
start_time = item.get('timestamp', {}).get('start_timestamp', '')
end_time = item.get('timestamp', {}).get('end_timestamp', '')
description = item.get('description', '')
elif data_type == 'Props':
start_time = item.get('timestamp', {}).get('start_timestamp', '')
end_time = item.get('timestamp', {}).get('end_timestamp', '')
description = item.get('name', '')
elif data_type == 'Video Editing Details':
start_time = item.get('timestamps', {}).get('start_timestamp', '')
end_time = item.get('timestamps', {}).get('end_timestamp', '')
description = item.get('description', '')
elif data_type == 'Mood':
description = item.get('description', '')
# Handle mood changes
for mood_change in item.get('keyMoments', []):
if isinstance(mood_change, dict):
mood_change_description = mood_change.get('changeDescription', '')
if mood_change_description and mood_change_description != 'None':
scene_events.append({
'timestamp_start': mood_change.get('timestamp', ''),
'timestamp_end': '',
'type': 'Mood Change',
'description': mood_change_description
})
elif data_type == 'Character Interaction':
characters = ', '.join([char for char in item.get('characters', []) if char and char != 'None'])
description = f"{characters}: {item.get('description', '')}"
else:
start_time = item.get('timestamp', '')
description = item.get('description', '')
if description and description != 'None': # Only add the event if there's a valid description
scene_events.append({
'timestamp_start': start_time,
'timestamp_end': end_time,
'type': data_type,
'description': description
})
elif isinstance(item, str) and item and item != 'None': # Only add non-empty and non-'None' string items
scene_events.append({
'timestamp_start': '',
'timestamp_end': '',
'type': data_type,
'description': item
})
# Sort events by timestamp
scene_events.sort(key=lambda x: x['timestamp_start'] if x['timestamp_start'] else '')
for event in scene_events:
start_time = format_timestamp(event['timestamp_start'])
end_time = format_timestamp(event['timestamp_end'])
start_link = f'<a href="#" class="timestamp-link" data-timestamp="{event["timestamp_start"]}">{start_time}</a>' if start_time else ''
end_link = f' - <a href="#" class="timestamp-link" data-timestamp="{event["timestamp_end"]}">{end_time}</a>' if end_time else ''
output += f"""
<tr>
<td>{start_link}{end_link}</td>
<td>{event['type']}</td>
<td>{event['description']}</td>
</tr>
"""
output += """
</table>
</div>
"""
return output
def create_storylines_table(storylines: Dict[str, Any]) -> str:
output = """
<div class="storylines-container">
<h3>Storylines</h3>
<table class="metadata-table">
<tr>
<th>Storyline</th>
<th>Scenes Involved</th>
</tr>
"""
output += f"""
<tr>
<td>{storylines.get('description', 'No description available')}</td>
<td>{', '.join(map(str, storylines.get('scenes', [])))}</td>
</tr>
"""
output += """
</table>
</div>
"""
return output
def create_qa_section(qa_list: List[Dict[str, str]]) -> str:
output = """
<div class="qa-container">
<h3>Q&A</h3>
<div class="chat-discussion">
"""
for qa in qa_list:
output += f"""
<div class="question">{qa.get('question', '')}</div>
<div class="answer">{qa.get('answer', '')}</div>
"""
output += """
</div>
</div>
"""
return output
def create_trimming_suggestions(suggestions: List[Dict[str, Any]]) -> str:
output = """
<div class="trimming-suggestions-container">
<h3>Trimming Suggestions</h3>
<table class="metadata-table">
<tr>
<th>Timestamp</th>
<th>Description</th>
</tr>
"""
for suggestion in suggestions:
start_time = suggestion.get('timestamps', {}).get('start_timestamp', '')
end_time = suggestion.get('timestamps', {}).get('end_timestamp', '')
start_formatted = format_timestamp(start_time)
end_formatted = format_timestamp(end_time)
output += f"""
<tr>
<td>
<a href="#" class="timestamp-link" data-timestamp="{start_time}">{start_formatted}</a>
{f' - <a href="#" class="timestamp-link" data-timestamp="{end_time}">{end_formatted}</a>' if end_time else ''}
</td>
<td>{suggestion.get('description', '')}</td>
</tr>
"""
output += """
</table>
</div>
"""
return output
def create_filmstrip(scenes: List[Dict[str, Any]], video_duration: float) -> str:
filmstrip_html = f"""
<div id="filmstrip-inner" style="position: relative; width: 100%; height: 100%;" data-duration="{video_duration}">
"""
for scene in scenes:
start_time = timestamp_to_seconds(scene['timestamps'].get('start_timestamp', '0:00:00'))
end_time = timestamp_to_seconds(scene['timestamps'].get('end_timestamp', str(video_duration)))
left_pos = (start_time / video_duration) * 100
width = ((end_time - start_time) / video_duration) * 100
title = scene.get('title', '')
filmstrip_html += f'''
<div class="scene-marker" style="position: absolute; left: {left_pos}%; width: {width}%; height: 100%; background-color: rgba(0, 0, 255, 0.2); border-right: 1px solid blue; overflow: hidden;">
<div class="scene-title" style="font-size: 10px; word-wrap: break-word; padding: 2px;">{title}</div>
</div>
'''
filmstrip_html += """
<div id="scrubbing-needle" style="position: absolute; width: 2px; height: 100%; background-color: red; top: 0; left: 0; pointer-events: none;"></div>
</div>
"""
return filmstrip_html
# def generate_correlation_scores(metadata: Dict[str, Any]) -> str:
# dynamism_score = metadata.get('dynamismscore', 0)
# av_correlation = metadata.get('audiovisualcorrelation', 0)
# def score_to_emoji(score):
# if score < 0.2:
# return "😴"
# elif score < 0.4:
# return "🙂"
# elif score < 0.6:
# return "😊"
# elif score < 0.8:
# return "😃"
# else:
# return "🤩"
# return f"""
# <div class="correlation-scores">
# <p>Dynamism: {score_to_emoji(dynamism_score)} ({dynamism_score:.2f})</p>
# <p>Audio-Visual Correlation: {score_to_emoji(av_correlation)} ({av_correlation:.2f})</p>
# </div>
# """
def process_video(video_id: str):
try:
#logger.info(f"Processing video with ID: {video_id}")
metadata = load_metadata(video_id)
video_path = os.path.join(video_folder, f"{video_id}.mp4")
if not os.path.exists(video_path):
logger.error(f"Video file not found: {video_path}")
return None, "", f"Error: Video file not found for ID {video_id}"
# Character List Table
character_table = """
<h3>Characters</h3>
<table class="metadata-table">
<tr>
<th>Character</th>
<th>Description</th>
</tr>
"""
for character in metadata.get('characterList', []):
character_table += f"""
<tr>
<td>{character.get('name', '')}</td>
<td>{character.get('description', '')}</td>
</tr>
"""
character_table += "</table>"
additional_data = f"""
<div class="video-info">
<h2>{metadata.get('title', 'Untitled')}</h2>
<p><strong>Description:</strong> {metadata.get('description', 'No description available')}</p>
</div>
{character_table}
"""
scenes_output = ""
for scene in metadata.get('scenes', []):
scenes_output += create_scene_table(scene)
storylines_output = create_storylines_table(metadata.get('storylines', {}))
qa_output = create_qa_section(metadata.get('qAndA', []))
trimming_suggestions_output = create_trimming_suggestions(metadata.get('trimmingSuggestions', []))
# Generate filmstrip HTML
last_scene = metadata['scenes'][-1]
video_duration = timestamp_to_seconds(last_scene['timestamps'].get('end_timestamp', '0:00:00'))
filmstrip_html = create_filmstrip(metadata['scenes'], video_duration)
logger.info("Video processing completed successfully")
return video_path, filmstrip_html, additional_data + scenes_output + storylines_output + qa_output + trimming_suggestions_output
except Exception as e:
logger.exception(f"Error processing video: {str(e)}")
return None, "", f"Error processing video: {str(e)}"
css = """
body {
margin: 0;
padding: 0;
font-family: Arial, sans-serif;
overflow: hidden;
}
.container {
display: flex;
flex-direction: column;
height: 100vh;
}
#header {
display: flex;
align-items: center;
padding: 10px;
background-color: white;
}
#logo {
width: auto;
height: 150px;
margin-right: 20px;
box-shadow: none !important;
border: none !important;
background: none !important;
object-fit: contain;
}
#header-content {
flex-grow: 1;
display: flex;
justify-content: space-between;
align-items: center;
}
#header-content h1 {
margin: 0;
font-size: 45px;
font-weight: bold;
}
#header-content a {
font-size: 18px;
color: #0066cc;
text-decoration: none;
}
#header-content a:hover {
text-decoration: underline;
}
#top-panel {
position: sticky;
top: 10vh;
background-color: white;
z-index: 100;
padding: 20px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
display: flex;
height: 35vh;
overflow: hidden;
}
#video-list-column {
display: flex;
flex-direction: column;
max-height: 100%;
width: 30%;
overflow-y: auto;
}
#video-list label {
display: block;
width: 100%;
}
#video-column {
display: flex;
flex-direction: column;
max-height: 100%;
overflow: hidden;
width: 70%;
}
#video-column > div:first-child {
display: flex;
flex-direction: column;
height: calc(100% - 100px);
}
#video-column video {
max-height: 100%;
object-fit: contain;
width: 100%;
margin: 0;
}
#filmstrip-container {
width: 100%;
height: 80px !important;
background-color: #f0f0f0;
position: relative;
overflow: hidden;
cursor: pointer;
margin-top: 0;
}
#filmstrip-container > div,
#filmstrip-container > div > div,
#filmstrip-container > div > div > div {
height: 100% !important;
}
#scrollable-content {
flex-grow: 1;
overflow-y: auto;
padding: 20px;
height: calc(55vh - 40px);
}
#metadata-container {
margin-top: 20px;
}
.content-samples {
display: flex;
flex-direction: column;
overflow-y: auto;
max-height: 100%;
}
.content-samples > .wrap {
display: flex;
flex-direction: column;
}
.content-samples .hidden {
display: none !important;
}
.content-samples > .wrap > .wrap {
display: flex !important;
flex-direction: column !important;
}
.content-samples label {
padding: 10px;
cursor: pointer;
border-bottom: 1px solid #ddd;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.content-samples label:hover {
background-color: #f0f0f0;
}
.video-info {
margin-bottom: 20px;
}
.scene-container {
margin-bottom: 30px;
}
.metadata-table {
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
}
.metadata-table th, .metadata-table td {
border: 1px solid #ddd;
padding: 8px;
text-align: left;
}
.metadata-table th {
background-color: #f2f2f2;
}
.metadata-table tr:nth-child(even) {
background-color: #f9f9f9;
}
.timestamp-link {
color: #0066cc;
text-decoration: none;
cursor: pointer;
}
.timestamp-link:hover {
text-decoration: underline;
}
.chat-discussion {
background-color: #f0f0f0;
border-radius: 10px;
padding: 15px;
margin-bottom: 20px;
}
.question {
font-weight: bold;
margin-bottom: 5px;
}
.answer {
margin-bottom: 15px;
padding-left: 15px;
}
.correlation-scores {
font-size: 18px;
margin-bottom: 20px;
}
#reinitialization-overlay {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: rgba(0, 0, 0, 0.5);
display: flex;
justify-content: center;
align-items: center;
z-index: 9999;
color: white;
font-size: 24px;
font-weight: bold;
}
@media (max-width: 768px) {
#header {
flex-direction: column;
align-items: flex-start;
}
#header-content h1 {
font-size: 24px;
}
#header-content p {
font-size: 14px;
}
#logo {
align-self: flex-end;
margin-top: 10px;
}
#top-panel {
flex-direction: column;
}
#video-list-column, #video-column {
width: 100%;
}
}
.icon-buttons button {
display: none !important;
}
"""
js = """
<script>
(function() {
let isReinitializing = false;
let lastVideoSrc = null;
function showOverlay() {
let overlay = document.getElementById('reinitialization-overlay');
if (!overlay) {
overlay = document.createElement('div');
overlay.id = 'reinitialization-overlay';
overlay.style.position = 'fixed';
overlay.style.top = '0';
overlay.style.left = '0';
overlay.style.width = '100%';
overlay.style.height = '100%';
overlay.style.backgroundColor = 'rgba(0, 0, 0, 0.5)';
overlay.style.display = 'flex';
overlay.style.justifyContent = 'center';
overlay.style.alignItems = 'center';
overlay.style.zIndex = '9999';
const message = document.createElement('div');
message.textContent = 'Loading assets...';
message.style.color = 'white';
message.style.fontSize = '24px';
message.style.fontWeight = 'bold';
overlay.appendChild(message);
document.body.appendChild(overlay);
}
overlay.style.display = 'flex';
}
function hideOverlay() {
const overlay = document.getElementById('reinitialization-overlay');
if (overlay) {
overlay.style.display = 'none';
}
}
function initializeFilmstrip() {
var videoElement = document.querySelector('video');
var filmstripContainer = document.getElementById('filmstrip-container');
var filmstripInner = filmstripContainer ? filmstripContainer.querySelector('#filmstrip-inner') : null;
var scrubbingNeedle = document.getElementById('scrubbing-needle');
if (!videoElement || !filmstripContainer || !filmstripInner || !scrubbingNeedle) {
return;
}
var videoDuration = parseFloat(filmstripInner.getAttribute('data-duration') || videoElement.duration);
videoElement.addEventListener('timeupdate', function() {
var progress = videoElement.currentTime / videoDuration;
scrubbingNeedle.style.left = (progress * 100) + '%';
});
filmstripContainer.addEventListener('click', function(event) {
var rect = filmstripContainer.getBoundingClientRect();
var clickPosition = (event.clientX - rect.left) / rect.width;
videoElement.currentTime = clickPosition * videoDuration;
});
}
function initializeTimestampLinks() {
var videoElement = document.querySelector('video');
var links = document.querySelectorAll('.timestamp-link');
if (!videoElement || links.length === 0) {
return;
}
links.forEach(function(link) {
link.addEventListener('click', function(e) {
e.preventDefault();
var timestamp = this.getAttribute('data-timestamp');
var parts = timestamp.split(':');
var seconds = parseInt(parts[0], 10) * 3600 + parseInt(parts[1], 10) * 60 + parseFloat(parts[2]);
videoElement.currentTime = seconds;
});
});
}
function initializeEverything() {
if (isReinitializing) {
return;
}
isReinitializing = true;
showOverlay();
const videoElement = document.querySelector('video');
if (videoElement) {
const onCanPlay = function() {
videoElement.removeEventListener('canplay', onCanPlay);
initializeFilmstrip();
initializeTimestampLinks();
isReinitializing = false;
hideOverlay();
};
videoElement.addEventListener('canplay', onCanPlay);
// If the video is already loaded, trigger the event manually
if (videoElement.readyState >= 3) {
videoElement.dispatchEvent(new Event('canplay'));
}
} else {
// If there's no video element, just initialize other components
initializeFilmstrip();
initializeTimestampLinks();
isReinitializing = false;
hideOverlay();
}
}
function checkForVideoChanges() {
const videoElement = document.querySelector('video');
if (videoElement && videoElement.src !== lastVideoSrc) {
lastVideoSrc = videoElement.src;
showOverlay();
setTimeout(initializeEverything, 100);
}
}
// Set up a MutationObserver to watch for changes in the entire document
const contentObserver = new MutationObserver((mutations) => {
checkForVideoChanges();
});
contentObserver.observe(document.body, {
childList: true,
subtree: true,
attributes: true,
attributeFilter: ['src']
});
// Periodically check for video changes
setInterval(checkForVideoChanges, 1000);
// Initialize everything when the DOM is ready
document.addEventListener('DOMContentLoaded', initializeEverything);
// Also try to initialize after a short delay, in case DOMContentLoaded has already fired
setTimeout(initializeEverything, 1000);
})();
</script>
"""
with gr.Blocks(css=css, head=js) as iface:
with gr.Row(elem_id="header"):
with gr.Column(scale=1):
gr.Image("logo.png", elem_id="logo", show_label=False, interactive=False)
with gr.Column(elem_id="header-content",scale=10):
gr.Markdown("""
# Exploration space
## [🔗 Dataset](https://huggingface.co/datasets/HuggingFaceFV/finevideo)
""")
with gr.Row(elem_id="top-panel"):
with gr.Column(scale=1, elem_id="video-list-column"):
video_list_data = load_video_list()
video_list = gr.Radio(
label="Content Samples",
choices=[video["title"] for video in video_list_data],
elem_id="video-list",
value=None,
container=False
)
with gr.Column(scale=2, elem_id="video-column"):
video_output = gr.Video(label="Video", elem_id="video-player")
filmstrip_output = gr.HTML(elem_id="filmstrip-container")
with gr.Row(elem_id="scrollable-content"):
metadata_output = gr.HTML(elem_id="metadata-container")
def wrapped_process_video(title: str) -> tuple:
if not title:
return None, "", ""
video_id = next(video["video_id"] for video in video_list_data if video["title"] == title)
logging.info(f"Processing video with ID: {video_id}")
video_path, filmstrip_html, metadata_html = process_video(video_id)
return video_path, filmstrip_html, metadata_html
video_list.change(
fn=wrapped_process_video,
inputs=[video_list],
outputs=[video_output, filmstrip_output, metadata_output]
)
if __name__ == "__main__":
iface.launch()