soundscene-v1 / app.py
yunusakkaya's picture
Update app.py
c48c207 verified
import av
import numpy as np
import torch
import streamlit as st
from PIL import Image
from torchvision.transforms import Resize, ToTensor, Compose
from transformers import AutoImageProcessor, AutoTokenizer, VisionEncoderDecoderModel
from diffusers import StableAudioPipeline
import soundfile as sf
import stripe
import time # For simulating processing time
import firebase_admin
from firebase_admin import credentials, auth
from huggingface_hub import login
import os
import json
# Firebase JSON içeriğini Hugging Face Secrets'ten al
firebase_json_string = os.getenv("firebase")
firebase_credentials = json.loads(firebase_json_string)
hf_token = os.getenv("token1") # Secret Manager'da tanımladığınız isim
login(hf_token)
# Firebase Admin SDK'yı başlatma
if not firebase_admin._apps:
cred = credentials.Certificate(firebase_credentials)
firebase_admin.initialize_app(cred)
# -----------------------------
# 1. Device Configuration
# -----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
# -----------------------------
# 2. Streamlit Page Configuration and Styling
# -----------------------------
st.set_page_config(page_title="SoundScene.ai", page_icon=":musical_note:", layout="wide")
# Dark theme and custom styles
st.markdown(
"""
<style>
/* General Body Styles */
body {
background-color: #0e1117;
color: #ffffff;
}
/* Title Styles */
.stTitle {
color: #1e90ff;
text-align: center;
}
/* Sidebar Styles */
.stSidebar {
background-color: #1c1e26;
}
/* Button Styles */
.stButton>button {
background-color: #1e90ff;
color: #ffffff;
}
.stButton>button:hover {
background-color: #0d6efd;
}
/* Header Styles */
.stHeader {
color: #1e90ff;
}
/* Footer Styles */
footer {
visibility: hidden;
}
</style>
""",
unsafe_allow_html=True,
)
# -----------------------------
# 4. Stripe Configuration
# -----------------------------
# Replace with your actual Stripe secret key
stripe.api_key = "YOUR_STRIPE_SECRET_KEY"
# -----------------------------
# 5. Session State Initialization
# -----------------------------
# Initialize session states
if "user" not in st.session_state:
st.session_state.user = None
if "auth_mode" not in st.session_state:
st.session_state.auth_mode = "Login" # Default mode must match radio option exactly
if "conversion_count" not in st.session_state:
st.session_state.conversion_count = 0
if "subscribed" not in st.session_state:
st.session_state.subscribed = False
# Authentication logic
def login_user(email, password):
try:
user = auth.get_user_by_email(email)
# Note: Firebase Admin SDK does not handle password validation directly
st.session_state.user = {"uid": user.uid, "email": user.email}
st.success("Successfully logged in!")
except Exception as e:
st.error(f"Login failed: {str(e)}")
def register_user(email, password):
try:
user = auth.create_user(email=email, password=password)
st.success("Registration successful! You can now log in.")
st.session_state.auth_mode = "Login" # Switch to login after registration
except Exception as e:
st.error(f"Registration failed: {str(e)}")
# Authentication interface
if st.session_state.user is None:
#st.sidebar.title("Welcome to SoundScene.ai")
st.sidebar.subheader("Please authenticate to use the application.")
# Toggle between Login and Register
auth_mode = st.sidebar.radio(
"Select an option",
("Login", "Register"),
key="auth_mode"
)
email = st.sidebar.text_input("Email", key="sidebar_email")
password = st.sidebar.text_input("Password", type="password", key="sidebar_password")
if auth_mode == "Login":
if st.sidebar.button("Log In"):
login_user(email, password)
elif auth_mode == "Register":
if st.sidebar.button("Register"):
register_user(email, password)
# Main app (accessible regardless of login status)
#st.title("Welcome to SoundScene.ai")
if st.session_state.user:
st.sidebar.success(f"Logged in as: {st.session_state.user['email']}")
if st.sidebar.button("Log Out"):
#st.session_state.clear()
#st.experimental_rerun() # Refresh app to show login screen
st.session_state.user = None
@st.cache_resource
def load_models():
audio_pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0",use_auth_token=hf_token, torch_dtype=torch.float16)
audio_pipe = audio_pipe.to(device)
image_processor = AutoImageProcessor.from_pretrained("MCG-NJU/videomae-base")
tokenizer = AutoTokenizer.from_pretrained("gpt2")
video_model = VisionEncoderDecoderModel.from_pretrained("Neleac/timesformer-gpt2-video-captioning").to(device)
return image_processor, tokenizer, video_model, audio_pipe
image_processor, tokenizer, video_model, audio_pipe = load_models()
# -----------------------------
# 7. Preprocessing Functions
# -----------------------------
# Görüntüleri yeniden boyutlandıran bir işlev
def preprocess_frame(frame, target_size=(224, 224)):
"""
Preprocess a single video frame for model input.
Args:
frame (np.ndarray): Video frame.
target_size (tuple): Desired size (height, width).
Returns:
PIL.Image: Preprocessed frame image.
"""
# Ensure the frame is in uint8 format
if frame.dtype != np.uint8:
frame = np.clip(frame, 0, 255).astype(np.uint8)
# Check frame dimensions
if frame.ndim != 3 or frame.shape[2] != 3:
raise ValueError(f"Expected frame with 3 channels, got shape {frame.shape}")
transform = Resize(target_size)
frame_image = Image.fromarray(frame)
frame_image = transform(frame_image) # Resize the image
return frame_image # Returns PIL.Image
def preprocess_frames(frames, target_size=(224, 224), num_frames=8):
"""
Preprocess multiple video frames for model input.
Args:
frames (list of np.ndarray): Video frames.
target_size (tuple): Desired size (height, width).
num_frames (int): Number of frames to process.
Returns:
list of PIL.Image: List of preprocessed frame images.
"""
processed_frames = []
for idx, frame in enumerate(frames[:num_frames]):
try:
img = preprocess_frame(frame, target_size)
# Debugging: Display frame properties
st.write(f"Frame {idx + 1}: size={img.size}, mode={img.mode}")
processed_frames.append(img)
except Exception as e:
st.warning(f"Skipping frame {idx + 1} due to error: {e}")
return processed_frames
# with st.sidebar:
# st.title("Login")
# email = st.text_input("Email", key="login_email")
# password = st.text_input("Password", type="password", key="login_password")
# login_btn = st.button("Login")
# logout_btn = st.button("Logout")
# if login_btn:
# try:
# user = auth.sign_in_with_email_and_password(email, password)
# st.session_state.user = user
# st.success("Logged in successfully!")
# except:
# st.error("Invalid login credentials.")
# elif logout_btn:
# st.session_state.user = None
# st.success("Logged out successfully!")
# if st.session_state.user:
# st.sidebar.write(f"Welcome, {st.session_state.user['email']}!")
# -----------------------------
# 9. Navigation Menu
# -----------------------------
with st.sidebar:
st.markdown("---")
page = st.selectbox("Menu", ["Home", "About", "Pricing", "Contact"])
# -----------------------------
# 10. Membership Control and Payment Integration
# -----------------------------
def handle_subscription():
st.warning("Subscription service is currently not available.")
# -----------------------------
# 11. Main Content Rendering Based on Navigation
# -----------------------------
if page == "Home":
# -----------------------------
# 12. Home Page Content
# -----------------------------
st.title("Welcome to SoundScene.ai")
st.markdown(
"""
<p style='text-align: center;'>
Transform your videos into stunning soundscapes with advanced AI models.
Learn more about our <a href="#About" style="color: #1e90ff;">features</a> and <a href="#Pricing" style="color: #1e90ff;">pricing</a> options.
</p>
""",
unsafe_allow_html=True
)
st.header("Create Audio")
creation_method = st.radio(
"Choose how you want to create audio:",
("Upload Video", "Input Comment"),
horizontal=True
)
# Initialize variables
caption = ""
audio_path = ""
audio_duration = 0
# -----------------------------
# 13. Audio Creation via Comment
# -----------------------------
if creation_method == "Input Comment":
st.subheader("Create Audio via Comment")
user_comment = st.text_input("Enter your comment here:")
# Sound Duration Input
default_duration = 10 # Default to 10 seconds
max_duration = 60 # Maximum allowed duration
user_duration = st.number_input(
"Select sound duration (seconds):",
min_value=1,
max_value=max_duration,
value=default_duration,
step=1
)
if st.button("Generate Sound"):
if not user_comment:
st.error("Please enter a comment to generate sound.")
else:
try:
# Initialize progress bar
progress_bar = st.progress(0)
progress_text = st.empty()
with st.spinner("Generating sound from your comment..."):
# Step 1: Generating audio
progress_text.text("Generating audio...")
# Simulate processing time
time.sleep(1)
progress_bar.progress(20)
# Generate audio based on comment
negative_prompt = "Low quality."
generator = torch.Generator(device).manual_seed(0)
audio = audio_pipe(
prompt=user_comment,
negative_prompt=negative_prompt,
num_inference_steps=50,
audio_end_in_s=user_duration,
num_waveforms_per_prompt=1,
generator=generator,
).audios
progress_bar.progress(60)
progress_text.text("Finalizing audio...")
# Save audio
output = audio[0].T.float().cpu().numpy()
audio_path = "generated_audio.wav"
sf.write(audio_path, output, audio_pipe.vae.sampling_rate)
progress_bar.progress(80)
progress_text.text("Almost done...")
# Update conversion count
st.session_state.conversion_count += 1
# Finalize progress bar
progress_bar.progress(100)
progress_text.text("Audio generation complete!")
# Display audio
st.audio(audio_path)
# Download button logic
if st.session_state.user:
if st.session_state.subscribed or st.session_state.conversion_count <= 10:
st.download_button("Download Sound", audio_path, file_name="soundscene_output.wav")
else:
st.warning(
"You have reached the download limit. Please subscribe for unlimited downloads.")
else:
st.warning("Please log in to download the generated sound.")
# Clear progress text after a short delay
time.sleep(1)
progress_text.empty()
progress_bar.empty()
except Exception as e:
st.error(f"An error occurred while generating audio: {e}")
# -----------------------------
# 14. Audio Creation via Video Upload
# -----------------------------
elif creation_method == "Upload Video":
st.subheader("Create Audio via Video Upload")
uploaded_video = st.file_uploader("Upload a video", type=["mp4", "avi", "mov"])
if uploaded_video:
try:
# Open the video using PyAV
container = av.open(uploaded_video)
video_stream = container.streams.video[0]
video_duration = float(container.duration * video_stream.time_base)
st.write(f"Video Duration: {video_duration:.2f} seconds")
# Sound Duration Selection
sound_duration = st.slider(
"Select sound duration (seconds):",
min_value=1,
max_value=10,
value=int(video_duration),
step=1
)
if st.button("Process Video"):
try:
# Initialize progress bar
progress_bar = st.progress(0)
progress_text = st.empty()
with st.spinner("Processing your video..."):
# Step 1: Extracting frames
progress_text.text("Extracting frames from video...")
seg_len = video_stream.frames
clip_len = video_model.config.encoder.num_frames
if clip_len > seg_len:
st.warning(f"Video has only {seg_len} frames, but the model expects {clip_len} frames.")
clip_len = seg_len
# Simulate processing time
time.sleep(1)
progress_bar.progress(10)
# Select evenly spaced frame indices
indices = set(np.linspace(0, seg_len - 1, num=clip_len, endpoint=True).astype(np.int64))
frames = []
container.seek(0)
for i, frame in enumerate(container.decode(video=0)):
if i in indices:
frames.append(frame.to_ndarray(format="rgb24"))
if not frames:
st.error("No frames were extracted from the video.")
st.stop()
progress_bar.progress(30)
progress_text.text("Preprocessing frames...")
# Preprocess frames
processed_frames = preprocess_frames(frames, target_size=(224, 224), num_frames=clip_len)
if not processed_frames:
st.error("No valid frames to process after preprocessing.")
st.stop()
# Simulate processing time
time.sleep(1)
progress_bar.progress(50)
# Step 2: Generating caption
progress_text.text("Preprocessing the video...")
# Ensure image_processor receives correct keyword argument
pixel_values = image_processor(images=processed_frames,
return_tensors="pt").pixel_values.to(device)
gen_kwargs = {"min_length": 10, "max_length": 20, "num_beams": 8}
tokens = video_model.generate(pixel_values, **gen_kwargs)
caption = tokenizer.batch_decode(tokens, skip_special_tokens=True)[0]
st.write(f"Generated Caption: {caption}")
# Simulate processing time
time.sleep(1)
progress_bar.progress(70)
if sound_duration > 10:
sound_duration = 10
# Step 3: Generating audio based on caption
progress_text.text("Generating sound based on the video...")
negative_prompt = "Low quality."
generator = torch.Generator(device).manual_seed(0)
audio = audio_pipe(
prompt=caption,
negative_prompt=negative_prompt,
num_inference_steps=50,
audio_end_in_s=sound_duration,
num_waveforms_per_prompt=1,
generator=generator,
).audios
progress_bar.progress(90)
progress_text.text("Finalizing audio...")
# Save audio
output = audio[0].T.float().cpu().numpy()
audio_path = "generated_audio.wav"
sf.write(audio_path, output, audio_pipe.vae.sampling_rate)
# Update conversion count
st.session_state.conversion_count += 1
# Finalize progress bar
progress_bar.progress(100)
progress_text.text("Audio generation complete!")
# Display audio
st.audio(audio_path)
# Download button logic
if st.session_state.user:
if st.session_state.subscribed or st.session_state.conversion_count <= 10:
st.download_button("Download Sound", audio_path, file_name="soundscene_output.wav")
else:
st.warning(
"You have reached the download limit. Please subscribe for unlimited downloads.")
else:
st.warning("Please log in to download the generated sound.")
# Clear progress text and bar after a short delay
time.sleep(1)
progress_text.empty()
progress_bar.empty()
except Exception as e:
st.error(f"An error occurred while processing the video: {e}")
except Exception as e:
st.error(f"Failed to open the video file: {e}")
# -----------------------------
# 15. Conversion Count and Subscription Prompt
# -----------------------------
if st.session_state.conversion_count >= 10 and not st.session_state.subscribed:
st.warning("You have used your 10 free conversions. Please subscribe to process more content.")
if st.button("Subscribe for $5/month (Currently not available)"):
handle_subscription()
elif page == "About":
# -----------------------------
# 16. About Page Content
# -----------------------------
st.title("About SoundScene.ai")
st.markdown(
"""
<p style='text-align: center;'>
SoundScene.ai leverages cutting-edge AI technologies to transform your visual content into immersive soundscapes. Whether you're a content creator, filmmaker, or enthusiast, our platform empowers you to add rich audio dimensions to your videos effortlessly.
</p>
<br>
<h3>Features:</h3>
<ul>
<li>Generate audio from video content with precise synchronization.</li>
<li>Create custom soundscapes based on textual descriptions.</li>
<li>Download and share your unique audio creations.</li>
<li>Secure subscription plans to cater to your needs.</li>
</ul>
<br>
<h3>Base Models From Huggingface:</h3>
<p>
<li>Image Processing: MCG-NJU/videomae-base</li>
<li>Tokenizer: gpt2</li>
<li>Video Model: Neleac/timesformer-gpt2-video-captioning</li>
<li>Audio Model = stabilityai/stable-audio-open-1.0</li>
</p>
<a href="https://huggingface.co/stabilityai/stable-audio-open-1.0" target="_blank" style="color: #1e90ff;"> stable-audio-open-1.0 </a>
<a href="https://huggingface.co/Neleac/SpaceTimeGPT" target="_blank" style="color: #1e90ff;"> SpaceTimeGPT </a>
""",
unsafe_allow_html=True
)
elif page == "Pricing":
# -----------------------------
# 17. Pricing Page Content
# -----------------------------
st.title("Pricing")
st.markdown(
"""
<p style='text-align: center;'>
Choose a plan that fits your needs and start transforming your content today!
</p>
""",
unsafe_allow_html=True
)
# Pricing Cards
col1, col2, col3 = st.columns(3)
with col1:
st.markdown(
"""
<div style="background-color:#1c1e26;padding:20px;border-radius:10px;">
<h3 style="text-align:center;">Free</h3>
<p style="text-align:center; font-size:24px;">$0/month</p>
<ul style="color:#ffffff;">
<li>Up to 10 conversions per month</li>
<li>Basic Support</li>
</ul>
<div style="text-align:center;">
<button style="background-color:#1e90ff; color:#ffffff; padding:10px 20px; border:none; border-radius:5px;">Select</button>
</div>
</div>
""",
unsafe_allow_html=True
)
with col2:
st.markdown(
"""
<div style="background-color:#1c1e26;padding:20px;border-radius:10px;">
<h3 style="text-align:center;">Pro</h3>
<p style="text-align:center; font-size:24px;">$5/month (Currently not available)</p>
<ul style="color:#ffffff;">
<li>Unlimited conversions</li>
<li>Priority Support</li>
<li>Access to Premium Features</li>
</ul>
<div style="text-align:center;">
<button style="background-color:#1e90ff; color:#ffffff; padding:10px 20px; border:none; border-radius:5px;" onclick="window.location.href='#'">Select</button>
</div>
</div>
""",
unsafe_allow_html=True
)
with col3:
st.markdown(
"""
<div style="background-color:#1c1e26;padding:20px;border-radius:10px;">
<h3 style="text-align:center;">Enterprise</h3>
<p style="text-align:center; font-size:24px;">Contact Us</p>
<ul style="color:#ffffff;">
<li>Custom Solutions</li>
<li>Dedicated Support</li>
<li>Scalable Infrastructure</li>
</ul>
<div style="text-align:center;">
<button style="background-color:#1e90ff; color:#ffffff; padding:10px 20px; border:none; border-radius:5px;" onclick="window.location.href='#'">Contact</button>
</div>
</div>
""",
unsafe_allow_html=True
)
elif page == "Contact":
# -----------------------------
# 18. Contact Page Content
# -----------------------------
st.title("Contact Us")
st.markdown(
"""
<p style='text-align: center;'>
We'd love to hear from you! Whether you have a question about features, trials, pricing, or anything else, our team is ready to answer all your questions.
</p>
""",
unsafe_allow_html=True
)
# Contact Form
with st.form("contact_form"):
name = st.text_input("Your Name")
email = st.text_input("Your Email")
message = st.text_area("Your Message")
submitted = st.form_submit_button("Send")
if submitted:
if not name or not email or not message:
st.error("Please fill out all fields.")
else:
# Here you can integrate with an email service or database to store the messages
st.success("Thank you for reaching out! We'll get back to you shortly.")
# Contact Details
st.markdown("---")
st.markdown(
"""
<h3>Social Media Accounts</h3>
<a href="https://www.linkedin.com/in/yunusakkaya/" target="_blank" style="color: #1e90ff;"> Linkedin </a>
<a href="https://medium.com/@yunus-akkaya" target="_blank" style="color: #1e90ff;"> Medium </a>
""",
unsafe_allow_html=True
)
# -----------------------------
# 19. Conversion Count and Subscription Prompt (Moved to Home Page)
# -----------------------------
# Note: This section is already handled within the "Home" page based on conversion count.
# -----------------------------
# 20. Security and Best Practices Notes
# -----------------------------
# Note:
# - Replace placeholder API keys with secure methods (e.g., environment variables or Streamlit secrets).
# - Ensure proper configuration of Firebase and Stripe if enabling authentication and payment features.
# - Validate and sanitize all user inputs to enhance security.
# - Customize the success_url and cancel_url in the Stripe checkout session to match your deployment URLs.
# -----------------------------
# 21. Footer (Hidden)
# -----------------------------
# Optional: Add a custom footer if desired
st.markdown(
"""
<style>
.footer {
position: fixed;
left: 0;
bottom: 0;
width: 100%;
background-color: #1c1e26;
color: white;
text-align: center;
padding: 10px 0;
}
</style>
<div class="footer">
<p>© 2024 SoundScene.ai. All rights reserved.</p>
</div>
""",
unsafe_allow_html=True
)