File size: 4,850 Bytes
c9440c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import json
import gradio as gr
from google import genai
import pandas as pd
import os
import re
import concurrent.futures
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Initialize the GenAI client with the API key
client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))

def analyze_single_video(video_path):
    """Analyzes a single video for emotions using the GenAI model."""
    prompt = """
    Detect emotion from this video and classify into 3 categories: happy, sad, normal. Return only JSON format without any extra text.

    Return this JSON schema:

    {
      "Vocal": {
        "sad_score": (%),
        "happy_score": (%),
        "normal_score": (%),
        "sad_reason": (list of timestamps),
        "happy_reason": (list of timestamps),
        "normal_reason": (list of timestamps)
      },
      "Verbal": {
        "sad_score": (%),
        "happy_score": (%),
        "normal_score": (%),
        "sad_reason": (list of timestamps),
        "happy_reason": (list of timestamps),
        "normal_reason": (list of timestamps)
      },
      "Vision": {
        "sad_score": (%),
        "happy_score": (%),
        "normal_score": (%),
        "sad_reason": (list of timestamps),
        "happy_reason": (list of timestamps),
        "normal_reason": (list of timestamps)
      }
    }

    Reasons (sad_reason, happy_reason, normal_reason) should be a list of beginning-ending timestamps. For example: ['0:11-0:14', '0:23-0:25', '0:27-0:29']
    """

    try:
        with open(video_path, 'rb') as video_file:
            video_bytes = video_file.read()

        print(f"Processing: {video_path}")

        response = client.models.generate_content(
            model="gemini-2.0-flash",
            contents=[{"text": prompt}, {"inline_data": {"data": video_bytes, "mime_type": "video/mp4"}}],
            config={"http_options": {"timeout": 60000}}
        )

        response_text = response.text.strip()
        json_match = re.search(r'```json\s*([\s\S]*?)\s*```', response_text)
        json_string = json_match.group(1).strip() if json_match else response_text
        result = json.loads(json_string)
        
        return result
    
    except Exception as e:
        print(f"Error processing {video_path}: {e}")
        return None

def process_multiple_videos(video_paths):
    """Processes multiple videos and stores the emotion analysis results."""
    records = []
    
    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(analyze_single_video, video_paths))

    # Process results and organize them into a DataFrame
    for video_path, result in zip(video_paths, results):
        if result is None:
            continue  # Skip invalid results

        video_title = os.path.basename(video_path)
        print(f"Processing result for {video_title}: {result}")
        
        try:
            for category in ['Verbal', 'Vocal', 'Vision']:
                for emotion in ['normal', 'happy', 'sad']:
                    score = result[category].get(f"{emotion}_score", 0)
                    reasons = result[category].get(f"{emotion}_reason", [])
                    records.append({
                        'title': video_title,
                        'category': category,
                        'emotion': emotion,
                        'score': score,
                        'reasons': json.dumps(reasons)  # Ensure reasons are serialized as JSON
                    })
        except KeyError as e:
            print(f"Skipping invalid result for {video_title} due to missing key: {e}")

    # Create a DataFrame and export to CSV and Excel
    df = pd.DataFrame(records)
    df.to_csv("emotion_results.csv", index=False)
    df.to_excel("emotion_results.xlsx", index=False)
    return df

def gradio_interface(video_paths):
    """Handles the Gradio interface and video processing."""
    # Filter valid .mp4 video files
    paths = [file.name if hasattr(file, 'name') else file for file in video_paths]
    paths = [p for p in paths if os.path.isfile(p) and p.endswith(".mp4")]

    if not paths:
        raise ValueError("No valid video files were provided.")

    df = process_multiple_videos(paths)
    
    # Save the DataFrame as CSV and return it
    csv_file = "emotion_results.csv"
    df.to_csv(csv_file, index=False)
    
    return df, csv_file

# Gradio interface definition
iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.File(file_types=[".mp4"], label="Upload one or more videos", file_count="multiple"),
    outputs=[gr.DataFrame(), gr.File(label="Download CSV")],
    title="Batch Video Emotion Analyzer",
    description="Upload multiple videos to analyze their emotions across verbal, vocal, and visual channels."
)

# Launch the interface
iface.launch(share=True)