Spaces:

kavehtaheri
/

hotspotv1

Sleeping

App Files Files Community

kavehtaheri commited on Jul 24

Commit

c157612

verified ·

1 Parent(s): a9bf664

Create app.py

Browse files

Files changed (1) hide show

app.py +254 -0

app.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import gradio as gr
+import requests
+import google.generativeai as genai
+import os
+import json
+import time
+from moviepy.editor import VideoFileClip
+# --- 1. CONFIGURATION & CONSTANTS ---
+# Load API keys from Hugging Face Space secrets
+ONE_API_KEY ="268976:66f4f58a2a905"
+ONE_API_URL ="https://api.one-api.ir/chatbot/v1/gpt4o/"
+GEMINI_API_KEY ="AIzaSyAKI92YawOKQ1-HRLmvaryMEWk_y4alJgA"
+# --- MASTER PROMPTS ---
+# Prompt for Mode 1: "Viral Spot for Shorts (< 3 mins)"
+PROMPT_SHORTS_MODE = """
+You are an expert producer of viral short-form content for platforms like YouTube Shorts, TikTok, and Instagram Reels. Your task is to analyze a transcript and identify the single most impactful, hook-worthy segment that can stand alone as a compelling video under 3 minutes, ideally around 60-90 seconds.
+PRIORITIES:
+1.  **Strong Hook:** The segment must start immediately with a shocking statement, a controversial question, or a highly emotional moment.
+2.  **Single, Clear Point:** The clip should focus on ONE idea. Do not try to tell a complex story.
+3.  **High-Energy & Pacing:** Identify a section where the speaker is energetic or the topic is intense.
+4.  **Clear Payoff:** The segment must have a quick, satisfying conclusion or punchline.
+Analyze the provided SRT transcript and find the absolute best segment that fits these criteria. The timestamps in the SRT file are in the format HH:MM:SS,ms.
+**Input SRT Content:**
+{transcript_content}
+**Instructions:**
+Your output MUST be a single, valid JSON object and nothing else. Do not include any text before or after the JSON object. The times must be in total seconds.
+{{
+  "clip_title_suggestion": "A catchy, clickbait-style title for this short clip.",
+  "reasoning": "Briefly explain why this segment is perfect for a short video, referencing the hook and payoff.",
+  "final_clip_start_seconds": <The precise start time in total seconds from the SRT>,
+  "final_clip_end_seconds": <The precise end time in total seconds from the SRT>
+}}
+"""
+# Prompt for Mode 2: "Viral Narrative Clip (5-12 mins)"
+PROMPT_NARRATIVE_MODE = """
+You are an expert video editor and storyteller. Your task is to analyze a transcript to find the most compelling and cohesive narrative segment that is between 5 and 12 minutes long.
+METHODOLOGY:
+1.  **Identify the Peak Moment:** First, locate the single most insightful, profound, or climactic moment in the entire conversation. This is your anchor.
+2.  **Establish the Narrative Arc:**
+    *   **Find the Setup:** Trace backward from the peak moment to find the true beginning of this story or argument. Where is the context given? Where does the thesis for this specific sub-topic begin?
+    *   **Find the Resolution:** Trace forward from the peak moment to find the natural conclusion of this story or argument. Where does the speaker summarize the point or transition away?
+3.  **Ensure Cohesion & Duration:** The final selected segment MUST flow as a complete, self-contained story. Adjust the start and end boundaries to ensure the final duration is between 5 and 12 minutes while preserving the narrative integrity.
+Analyze the provided SRT transcript using this methodology. The timestamps in the SRT file are in the format HH:MM:SS,ms.
+**Input SRT Content:**
+{transcript_content}
+**Instructions:**
+Your output MUST be a single, valid JSON object and nothing else. Do not include any text before or after the JSON object. The times must be in total seconds.
+{{
+  "narrative_summary": "A one-sentence summary of the story told in the extracted clip.",
+  "reasoning": "Explain why this segment works as a standalone narrative, mentioning the peak moment and how the start/end points provide a full arc.",
+  "final_clip_start_seconds": <The precise start time in total seconds from the SRT>,
+  "final_clip_end_seconds": <The precise end time in total seconds from the SRT>
+}}
+"""
+# --- 2. LLM AGENT WRAPPER FUNCTIONS ---
+def call_gpt4o_agent(transcript_content, prompt_template):
+    """Makes an API call to a OneAPI-compatible endpoint for GPT-4o."""
+    if not ONE_API_KEY or not ONE_API_URL:
+        raise ValueError("ONE_API_KEY and ONE_API_URL secrets are not set in the Hugging Face Space.")
+    headers = {
+        "Authorization": f"Bearer {ONE_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    final_prompt = prompt_template.format(transcript_content=transcript_content)
+    # Standard OpenAI payload format
+    payload = {
+        "model": "gpt-4o",
+        "messages": [{"role": "user", "content": final_prompt}],
+        "temperature": 0.2, # Lower temperature for more predictable, structured output
+        "response_format": {"type": "json_object"} # Use JSON mode for reliability
+    }
+    try:
+        response = requests.post(ONE_API_URL, headers=headers, json=payload, timeout=180) # 3 minute timeout
+        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
+        # The content is usually in the 'choices' list
+        result = response.json()
+        return result['choices'][0]['message']['content']
+    except requests.exceptions.RequestException as e:
+        return f"Error in API call: {str(e)}"
+    except KeyError:
+        return f"Error: Unexpected response format from API. Full response: {response.text}"
+def call_gemini_agent(transcript_content, prompt_template):
+    """Makes an API call to the Google Gemini API."""
+    if not GEMINI_API_KEY:
+        raise ValueError("GEMINI_API_KEY secret is not set in the Hugging Face Space.")
+    try:
+        genai.configure(api_key=GEMINI_API_KEY)
+        # Configure the model to return JSON directly
+        generation_config = genai.GenerationConfig(
+            response_mime_type="application/json",
+            temperature=0.2
+        )
+        model = genai.GenerativeModel(
+            'gemini-1.5-flash-latest',
+            generation_config=generation_config
+        )
+        final_prompt = prompt_template.format(transcript_content=transcript_content)
+        response = model.generate_content(final_prompt)
+        return response.text.strip()
+    except Exception as e:
+        return f"Error calling Gemini API: {str(e)}"
+# --- 3. CORE ORCHESTRATOR FUNCTION ---
+def generate_viral_clip(video_file, srt_file, analysis_mode, llm_agent, progress=gr.Progress()):
+    """
+    Main function to orchestrate the analysis and clipping process.
+    """
+    # --- Input Validation ---
+    if not video_file or not srt_file:
+        return "Error: Please upload both a video file and an SRT file.", None
+    if not ONE_API_KEY or not ONE_API_URL or not GEMINI_API_KEY:
+        return "Error: API keys are not configured correctly in the Space secrets.", None
+    try:
+        # --- Step 1: Read SRT Content ---
+        progress(0.1, desc="Reading SRT file...")
+        with open(srt_file.name, 'r', encoding='utf-8') as f:
+            transcript_content = f.read()
+        # --- Step 2: Select Prompt based on Mode ---
+        progress(0.2, desc="Preparing analysis prompt...")
+        if analysis_mode == "Viral Spot for Shorts (< 3 mins)":
+            prompt_template = PROMPT_SHORTS_MODE
+        else:
+            prompt_template = PROMPT_NARRATIVE_MODE
+        # --- Step 3: Call Selected LLM Agent ---
+        progress(0.4, desc=f"Calling {llm_agent} for analysis... (This may take a minute)")
+        if llm_agent == "GPT-4o (via OneAPI)":
+            llm_response_str = call_gpt4o_agent(transcript_content, prompt_template)
+        else: # Google Gemini
+            llm_response_str = call_gemini_agent(transcript_content, prompt_template)
+        # --- Step 4: Parse Structured JSON Response ---
+        progress(0.7, desc="Parsing AI response...")
+        if llm_response_str.startswith("Error"):
+             return llm_response_str, None
+        try:
+            # Clean the response just in case the LLM adds markdown backticks
+            cleaned_response = llm_response_str.strip().replace("```json", "").replace("```", "")
+            parsed_response = json.loads(cleaned_response)
+            start_time = float(parsed_response['final_clip_start_seconds'])
+            end_time = float(parsed_response['final_clip_end_seconds'])
+            reasoning = parsed_response.get('reasoning', 'No reasoning provided.')
+            # Format a nice summary for the user
+            summary = (f"✅ Analysis Complete!\n\n"
+                       f"Reasoning: {reasoning}\n\n"
+                       f"Title Suggestion: {parsed_response.get('clip_title_suggestion', 'N/A')}\n"
+                       f"Narrative Summary: {parsed_response.get('narrative_summary', 'N/A')}\n\n"
+                       f"Clipping video from {time.strftime('%H:%M:%S', time.gmtime(start_time))} to {time.strftime('%H:%M:%S', time.gmtime(end_time))}.")
+        except (json.JSONDecodeError, KeyError, TypeError) as e:
+            error_msg = f"Error: Failed to parse the AI's response. It may not be valid JSON.\n\nError Details: {str(e)}\n\nRaw AI Response:\n---\n{llm_response_str}"
+            return error_msg, None
+        # --- Step 5: Trim Original Video using Timestamps ---
+        progress(0.8, desc="Clipping video... (This can be slow for large files)")
+        output_filename = "viral_clip.mp4"
+        with VideoFileClip(video_file.name) as video:
+            # Check if times are within video duration
+            if end_time > video.duration:
+                end_time = video.duration
+                summary += f"\n\n⚠️ Warning: End time was beyond video duration, adjusted to {end_time:.2f}s."
+            new_clip = video.subclip(start_time, end_time)
+            new_clip.write_videofile(output_filename, codec="libx264", audio_codec="aac")
+        progress(1.0, desc="Done!")
+        return summary, output_filename
+    except Exception as e:
+        return f"An unexpected error occurred: {str(e)}", None
+# --- 4. GRADIO UI DEFINITION ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 🎬 Multi-Agent Viral Video Extractor
+        This tool uses a powerful LLM agent (GPT-4o or Gemini) to analyze a video transcript and automatically clip the most viral segment.
+        **⚠️ Important Setup:**
+        1.  This Hugging Face Space must have `ONE_API_KEY`, `ONE_API_URL`, and `GEMINI_API_KEY` configured in its **Settings > Secrets**.
+        2.  The process involves uploading large files and intensive AI analysis, so please be patient.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            video_input = gr.Video(label="1. Upload Original Video")
+            srt_input = gr.File(label="2. Upload English SRT File", file_types=['.srt'])
+            mode_input = gr.Radio(
+                label="3. Select Analysis Mode",
+                choices=["Viral Spot for Shorts (< 3 mins)", "Viral Narrative Clip (5-12 mins)"],
+                value="Viral Narrative Clip (5-12 mins)"
+            )
+            agent_input = gr.Radio(
+                label="4. Select AI Agent",
+                choices=["GPT-4o (via OneAPI)", "Google Gemini"],
+                value="Google Gemini"
+            )
+            submit_button = gr.Button("🚀 Generate Viral Clip", variant="primary")
+        with gr.Column(scale=2):
+            summary_output = gr.Textbox(label="Analysis Summary", lines=10, interactive=False)
+            video_output = gr.Video(label="Generated Clip", interactive=False)
+    submit_button.click(
+        fn=generate_viral_clip,
+        inputs=[video_input, srt_input, mode_input, agent_input],
+        outputs=[summary_output, video_output],
+        #api_name="generate_clip" # Uncomment to create an API endpoint
+    )
+if __name__ == "__main__":
+    demo.launch(debug=True)