Spaces:

yunusakkaya
/

soundscene-v1

Running

App Files Files Community

soundscene-v1 / app.py

yunusakkaya

Update app.py

c48c207 verified about 2 months ago

raw

history blame contribute delete

27 kB

	import av
	import numpy as np
	import torch
	import streamlit as st
	from PIL import Image
	from torchvision.transforms import Resize, ToTensor, Compose
	from transformers import AutoImageProcessor, AutoTokenizer, VisionEncoderDecoderModel
	from diffusers import StableAudioPipeline
	import soundfile as sf
	import stripe
	import time # For simulating processing time
	import firebase_admin
	from firebase_admin import credentials, auth
	from huggingface_hub import login
	import os
	import json

	# Firebase JSON içeriğini Hugging Face Secrets'ten al
	firebase_json_string = os.getenv("firebase")
	firebase_credentials = json.loads(firebase_json_string)

	hf_token = os.getenv("token1") # Secret Manager'da tanımladığınız isim
	login(hf_token)

	# Firebase Admin SDK'yı başlatma
	if not firebase_admin._apps:
	cred = credentials.Certificate(firebase_credentials)
	firebase_admin.initialize_app(cred)
	# -----------------------------
	# 1. Device Configuration
	# -----------------------------
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# -----------------------------
	# 2. Streamlit Page Configuration and Styling
	# -----------------------------
	st.set_page_config(page_title="SoundScene.ai", page_icon=":musical_note:", layout="wide")

	# Dark theme and custom styles
	st.markdown(
	"""
	<style>
	/* General Body Styles */
	body {
	background-color: #0e1117;
	color: #ffffff;
	}
	/* Title Styles */
	.stTitle {
	color: #1e90ff;
	text-align: center;
	}
	/* Sidebar Styles */
	.stSidebar {
	background-color: #1c1e26;
	}
	/* Button Styles */
	.stButton>button {
	background-color: #1e90ff;
	color: #ffffff;
	}
	.stButton>button:hover {
	background-color: #0d6efd;
	}
	/* Header Styles */
	.stHeader {
	color: #1e90ff;
	}
	/* Footer Styles */
	footer {
	visibility: hidden;
	}
	</style>
	""",
	unsafe_allow_html=True,
	)

	# -----------------------------
	# 4. Stripe Configuration
	# -----------------------------
	# Replace with your actual Stripe secret key
	stripe.api_key = "YOUR_STRIPE_SECRET_KEY"

	# -----------------------------
	# 5. Session State Initialization
	# -----------------------------
	# Initialize session states
	if "user" not in st.session_state:
	st.session_state.user = None
	if "auth_mode" not in st.session_state:
	st.session_state.auth_mode = "Login" # Default mode must match radio option exactly
	if "conversion_count" not in st.session_state:
	st.session_state.conversion_count = 0
	if "subscribed" not in st.session_state:
	st.session_state.subscribed = False


	# Authentication logic
	def login_user(email, password):
	try:
	user = auth.get_user_by_email(email)
	# Note: Firebase Admin SDK does not handle password validation directly
	st.session_state.user = {"uid": user.uid, "email": user.email}
	st.success("Successfully logged in!")
	except Exception as e:
	st.error(f"Login failed: {str(e)}")


	def register_user(email, password):
	try:
	user = auth.create_user(email=email, password=password)
	st.success("Registration successful! You can now log in.")
	st.session_state.auth_mode = "Login" # Switch to login after registration
	except Exception as e:
	st.error(f"Registration failed: {str(e)}")


	# Authentication interface
	if st.session_state.user is None:
	#st.sidebar.title("Welcome to SoundScene.ai")
	st.sidebar.subheader("Please authenticate to use the application.")

	# Toggle between Login and Register
	auth_mode = st.sidebar.radio(
	"Select an option",
	("Login", "Register"),
	key="auth_mode"
	)

	email = st.sidebar.text_input("Email", key="sidebar_email")
	password = st.sidebar.text_input("Password", type="password", key="sidebar_password")

	if auth_mode == "Login":
	if st.sidebar.button("Log In"):
	login_user(email, password)
	elif auth_mode == "Register":
	if st.sidebar.button("Register"):
	register_user(email, password)

	# Main app (accessible regardless of login status)
	#st.title("Welcome to SoundScene.ai")
	if st.session_state.user:
	st.sidebar.success(f"Logged in as: {st.session_state.user['email']}")
	if st.sidebar.button("Log Out"):
	#st.session_state.clear()
	#st.experimental_rerun() # Refresh app to show login screen
	st.session_state.user = None


	@st.cache_resource
	def load_models():
	audio_pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0",use_auth_token=hf_token, torch_dtype=torch.float16)
	audio_pipe = audio_pipe.to(device)

	image_processor = AutoImageProcessor.from_pretrained("MCG-NJU/videomae-base")
	tokenizer = AutoTokenizer.from_pretrained("gpt2")
	video_model = VisionEncoderDecoderModel.from_pretrained("Neleac/timesformer-gpt2-video-captioning").to(device)

	return image_processor, tokenizer, video_model, audio_pipe


	image_processor, tokenizer, video_model, audio_pipe = load_models()


	# -----------------------------
	# 7. Preprocessing Functions
	# -----------------------------

	# Görüntüleri yeniden boyutlandıran bir işlev
	def preprocess_frame(frame, target_size=(224, 224)):
	"""
	Preprocess a single video frame for model input.
	Args:
	frame (np.ndarray): Video frame.
	target_size (tuple): Desired size (height, width).
	Returns:
	PIL.Image: Preprocessed frame image.
	"""
	# Ensure the frame is in uint8 format
	if frame.dtype != np.uint8:
	frame = np.clip(frame, 0, 255).astype(np.uint8)

	# Check frame dimensions
	if frame.ndim != 3 or frame.shape[2] != 3:
	raise ValueError(f"Expected frame with 3 channels, got shape {frame.shape}")

	transform = Resize(target_size)
	frame_image = Image.fromarray(frame)
	frame_image = transform(frame_image) # Resize the image
	return frame_image # Returns PIL.Image


	def preprocess_frames(frames, target_size=(224, 224), num_frames=8):
	"""
	Preprocess multiple video frames for model input.
	Args:
	frames (list of np.ndarray): Video frames.
	target_size (tuple): Desired size (height, width).
	num_frames (int): Number of frames to process.
	Returns:
	list of PIL.Image: List of preprocessed frame images.
	"""
	processed_frames = []
	for idx, frame in enumerate(frames[:num_frames]):
	try:
	img = preprocess_frame(frame, target_size)
	# Debugging: Display frame properties
	st.write(f"Frame {idx + 1}: size={img.size}, mode={img.mode}")
	processed_frames.append(img)
	except Exception as e:
	st.warning(f"Skipping frame {idx + 1} due to error: {e}")
	return processed_frames


	# with st.sidebar:
	# st.title("Login")
	# email = st.text_input("Email", key="login_email")
	# password = st.text_input("Password", type="password", key="login_password")
	# login_btn = st.button("Login")
	# logout_btn = st.button("Logout")

	# if login_btn:
	# try:
	# user = auth.sign_in_with_email_and_password(email, password)
	# st.session_state.user = user
	# st.success("Logged in successfully!")
	# except:
	# st.error("Invalid login credentials.")
	# elif logout_btn:
	# st.session_state.user = None
	# st.success("Logged out successfully!")

	# if st.session_state.user:
	# st.sidebar.write(f"Welcome, {st.session_state.user['email']}!")

	# -----------------------------
	# 9. Navigation Menu
	# -----------------------------
	with st.sidebar:
	st.markdown("---")
	page = st.selectbox("Menu", ["Home", "About", "Pricing", "Contact"])


	# -----------------------------
	# 10. Membership Control and Payment Integration
	# -----------------------------
	def handle_subscription():
	st.warning("Subscription service is currently not available.")


	# -----------------------------
	# 11. Main Content Rendering Based on Navigation
	# -----------------------------
	if page == "Home":
	# -----------------------------
	# 12. Home Page Content
	# -----------------------------
	st.title("Welcome to SoundScene.ai")
	st.markdown(
	"""
	<p style='text-align: center;'>
	Transform your videos into stunning soundscapes with advanced AI models.
	Learn more about our <a href="#About" style="color: #1e90ff;">features</a> and <a href="#Pricing" style="color: #1e90ff;">pricing</a> options.
	</p>
	""",
	unsafe_allow_html=True
	)

	st.header("Create Audio")
	creation_method = st.radio(
	"Choose how you want to create audio:",
	("Upload Video", "Input Comment"),
	horizontal=True
	)

	# Initialize variables
	caption = ""
	audio_path = ""
	audio_duration = 0

	# -----------------------------
	# 13. Audio Creation via Comment
	# -----------------------------
	if creation_method == "Input Comment":
	st.subheader("Create Audio via Comment")
	user_comment = st.text_input("Enter your comment here:")

	# Sound Duration Input
	default_duration = 10 # Default to 10 seconds
	max_duration = 60 # Maximum allowed duration
	user_duration = st.number_input(
	"Select sound duration (seconds):",
	min_value=1,
	max_value=max_duration,
	value=default_duration,
	step=1
	)

	if st.button("Generate Sound"):
	if not user_comment:
	st.error("Please enter a comment to generate sound.")
	else:
	try:
	# Initialize progress bar
	progress_bar = st.progress(0)
	progress_text = st.empty()

	with st.spinner("Generating sound from your comment..."):
	# Step 1: Generating audio
	progress_text.text("Generating audio...")
	# Simulate processing time
	time.sleep(1)
	progress_bar.progress(20)

	# Generate audio based on comment
	negative_prompt = "Low quality."
	generator = torch.Generator(device).manual_seed(0)
	audio = audio_pipe(
	prompt=user_comment,
	negative_prompt=negative_prompt,
	num_inference_steps=50,
	audio_end_in_s=user_duration,
	num_waveforms_per_prompt=1,
	generator=generator,
	).audios

	progress_bar.progress(60)
	progress_text.text("Finalizing audio...")

	# Save audio
	output = audio[0].T.float().cpu().numpy()
	audio_path = "generated_audio.wav"
	sf.write(audio_path, output, audio_pipe.vae.sampling_rate)

	progress_bar.progress(80)
	progress_text.text("Almost done...")

	# Update conversion count
	st.session_state.conversion_count += 1

	# Finalize progress bar
	progress_bar.progress(100)
	progress_text.text("Audio generation complete!")

	# Display audio
	st.audio(audio_path)

	# Download button logic
	if st.session_state.user:
	if st.session_state.subscribed or st.session_state.conversion_count <= 10:
	st.download_button("Download Sound", audio_path, file_name="soundscene_output.wav")
	else:
	st.warning(
	"You have reached the download limit. Please subscribe for unlimited downloads.")
	else:
	st.warning("Please log in to download the generated sound.")

	# Clear progress text after a short delay
	time.sleep(1)
	progress_text.empty()
	progress_bar.empty()

	except Exception as e:
	st.error(f"An error occurred while generating audio: {e}")

	# -----------------------------
	# 14. Audio Creation via Video Upload
	# -----------------------------
	elif creation_method == "Upload Video":
	st.subheader("Create Audio via Video Upload")
	uploaded_video = st.file_uploader("Upload a video", type=["mp4", "avi", "mov"])

	if uploaded_video:
	try:
	# Open the video using PyAV
	container = av.open(uploaded_video)
	video_stream = container.streams.video[0]
	video_duration = float(container.duration * video_stream.time_base)
	st.write(f"Video Duration: {video_duration:.2f} seconds")

	# Sound Duration Selection
	sound_duration = st.slider(
	"Select sound duration (seconds):",
	min_value=1,
	max_value=10,
	value=int(video_duration),
	step=1
	)

	if st.button("Process Video"):
	try:
	# Initialize progress bar
	progress_bar = st.progress(0)
	progress_text = st.empty()

	with st.spinner("Processing your video..."):
	# Step 1: Extracting frames
	progress_text.text("Extracting frames from video...")
	seg_len = video_stream.frames
	clip_len = video_model.config.encoder.num_frames
	if clip_len > seg_len:
	st.warning(f"Video has only {seg_len} frames, but the model expects {clip_len} frames.")
	clip_len = seg_len
	# Simulate processing time
	time.sleep(1)
	progress_bar.progress(10)

	# Select evenly spaced frame indices
	indices = set(np.linspace(0, seg_len - 1, num=clip_len, endpoint=True).astype(np.int64))

	frames = []
	container.seek(0)
	for i, frame in enumerate(container.decode(video=0)):
	if i in indices:
	frames.append(frame.to_ndarray(format="rgb24"))

	if not frames:
	st.error("No frames were extracted from the video.")
	st.stop()

	progress_bar.progress(30)
	progress_text.text("Preprocessing frames...")

	# Preprocess frames
	processed_frames = preprocess_frames(frames, target_size=(224, 224), num_frames=clip_len)
	if not processed_frames:
	st.error("No valid frames to process after preprocessing.")
	st.stop()

	# Simulate processing time
	time.sleep(1)
	progress_bar.progress(50)

	# Step 2: Generating caption
	progress_text.text("Preprocessing the video...")
	# Ensure image_processor receives correct keyword argument
	pixel_values = image_processor(images=processed_frames,
	return_tensors="pt").pixel_values.to(device)
	gen_kwargs = {"min_length": 10, "max_length": 20, "num_beams": 8}
	tokens = video_model.generate(pixel_values, **gen_kwargs)
	caption = tokenizer.batch_decode(tokens, skip_special_tokens=True)[0]
	st.write(f"Generated Caption: {caption}")
	# Simulate processing time
	time.sleep(1)
	progress_bar.progress(70)
	if sound_duration > 10:
	sound_duration = 10
	# Step 3: Generating audio based on caption
	progress_text.text("Generating sound based on the video...")
	negative_prompt = "Low quality."
	generator = torch.Generator(device).manual_seed(0)
	audio = audio_pipe(
	prompt=caption,
	negative_prompt=negative_prompt,
	num_inference_steps=50,
	audio_end_in_s=sound_duration,
	num_waveforms_per_prompt=1,
	generator=generator,
	).audios

	progress_bar.progress(90)
	progress_text.text("Finalizing audio...")

	# Save audio
	output = audio[0].T.float().cpu().numpy()
	audio_path = "generated_audio.wav"
	sf.write(audio_path, output, audio_pipe.vae.sampling_rate)

	# Update conversion count
	st.session_state.conversion_count += 1

	# Finalize progress bar
	progress_bar.progress(100)
	progress_text.text("Audio generation complete!")

	# Display audio
	st.audio(audio_path)

	# Download button logic
	if st.session_state.user:
	if st.session_state.subscribed or st.session_state.conversion_count <= 10:
	st.download_button("Download Sound", audio_path, file_name="soundscene_output.wav")
	else:
	st.warning(
	"You have reached the download limit. Please subscribe for unlimited downloads.")
	else:
	st.warning("Please log in to download the generated sound.")

	# Clear progress text and bar after a short delay
	time.sleep(1)
	progress_text.empty()
	progress_bar.empty()

	except Exception as e:
	st.error(f"An error occurred while processing the video: {e}")
	except Exception as e:
	st.error(f"Failed to open the video file: {e}")

	# -----------------------------
	# 15. Conversion Count and Subscription Prompt
	# -----------------------------
	if st.session_state.conversion_count >= 10 and not st.session_state.subscribed:
	st.warning("You have used your 10 free conversions. Please subscribe to process more content.")
	if st.button("Subscribe for $5/month (Currently not available)"):
	handle_subscription()

	elif page == "About":
	# -----------------------------
	# 16. About Page Content
	# -----------------------------
	st.title("About SoundScene.ai")
	st.markdown(
	"""
	<p style='text-align: center;'>
	SoundScene.ai leverages cutting-edge AI technologies to transform your visual content into immersive soundscapes. Whether you're a content creator, filmmaker, or enthusiast, our platform empowers you to add rich audio dimensions to your videos effortlessly.
	</p>
	<br>
	<h3>Features:</h3>
	<ul>
	<li>Generate audio from video content with precise synchronization.</li>
	<li>Create custom soundscapes based on textual descriptions.</li>
	<li>Download and share your unique audio creations.</li>
	<li>Secure subscription plans to cater to your needs.</li>
	</ul>
	<br>
	<h3>Base Models From Huggingface:</h3>
	<p>
	<li>Image Processing: MCG-NJU/videomae-base</li>
	<li>Tokenizer: gpt2</li>
	<li>Video Model: Neleac/timesformer-gpt2-video-captioning</li>
	<li>Audio Model = stabilityai/stable-audio-open-1.0</li>


	</p>

	<a href="https://huggingface.co/stabilityai/stable-audio-open-1.0" target="_blank" style="color: #1e90ff;"> stable-audio-open-1.0 </a>

	<a href="https://huggingface.co/Neleac/SpaceTimeGPT" target="_blank" style="color: #1e90ff;"> SpaceTimeGPT </a>
	""",
	unsafe_allow_html=True
	)

	elif page == "Pricing":
	# -----------------------------
	# 17. Pricing Page Content
	# -----------------------------
	st.title("Pricing")
	st.markdown(
	"""
	<p style='text-align: center;'>
	Choose a plan that fits your needs and start transforming your content today!
	</p>
	""",
	unsafe_allow_html=True
	)

	# Pricing Cards
	col1, col2, col3 = st.columns(3)

	with col1:
	st.markdown(
	"""
	<div style="background-color:#1c1e26;padding:20px;border-radius:10px;">
	<h3 style="text-align:center;">Free</h3>
	<p style="text-align:center; font-size:24px;">$0/month</p>
	<ul style="color:#ffffff;">
	<li>Up to 10 conversions per month</li>
	<li>Basic Support</li>
	</ul>
	<div style="text-align:center;">
	<button style="background-color:#1e90ff; color:#ffffff; padding:10px 20px; border:none; border-radius:5px;">Select</button>
	</div>
	</div>
	""",
	unsafe_allow_html=True
	)

	with col2:
	st.markdown(
	"""
	<div style="background-color:#1c1e26;padding:20px;border-radius:10px;">
	<h3 style="text-align:center;">Pro</h3>
	<p style="text-align:center; font-size:24px;">$5/month (Currently not available)</p>
	<ul style="color:#ffffff;">
	<li>Unlimited conversions</li>
	<li>Priority Support</li>
	<li>Access to Premium Features</li>
	</ul>
	<div style="text-align:center;">
	<button style="background-color:#1e90ff; color:#ffffff; padding:10px 20px; border:none; border-radius:5px;" onclick="window.location.href='#'">Select</button>
	</div>
	</div>
	""",
	unsafe_allow_html=True
	)

	with col3:
	st.markdown(
	"""
	<div style="background-color:#1c1e26;padding:20px;border-radius:10px;">
	<h3 style="text-align:center;">Enterprise</h3>
	<p style="text-align:center; font-size:24px;">Contact Us</p>
	<ul style="color:#ffffff;">
	<li>Custom Solutions</li>
	<li>Dedicated Support</li>
	<li>Scalable Infrastructure</li>
	</ul>
	<div style="text-align:center;">
	<button style="background-color:#1e90ff; color:#ffffff; padding:10px 20px; border:none; border-radius:5px;" onclick="window.location.href='#'">Contact</button>
	</div>
	</div>
	""",
	unsafe_allow_html=True
	)

	elif page == "Contact":
	# -----------------------------
	# 18. Contact Page Content
	# -----------------------------
	st.title("Contact Us")
	st.markdown(
	"""
	<p style='text-align: center;'>
	We'd love to hear from you! Whether you have a question about features, trials, pricing, or anything else, our team is ready to answer all your questions.
	</p>
	""",
	unsafe_allow_html=True
	)

	# Contact Form
	with st.form("contact_form"):
	name = st.text_input("Your Name")
	email = st.text_input("Your Email")
	message = st.text_area("Your Message")
	submitted = st.form_submit_button("Send")
	if submitted:
	if not name or not email or not message:
	st.error("Please fill out all fields.")
	else:
	# Here you can integrate with an email service or database to store the messages
	st.success("Thank you for reaching out! We'll get back to you shortly.")

	# Contact Details
	st.markdown("---")
	st.markdown(
	"""
	<h3>Social Media Accounts</h3>
	<a href="https://www.linkedin.com/in/yunusakkaya/" target="_blank" style="color: #1e90ff;"> Linkedin </a>

	<a href="https://medium.com/@yunus-akkaya" target="_blank" style="color: #1e90ff;"> Medium </a>

	""",
	unsafe_allow_html=True
	)

	# -----------------------------
	# 19. Conversion Count and Subscription Prompt (Moved to Home Page)
	# -----------------------------
	# Note: This section is already handled within the "Home" page based on conversion count.

	# -----------------------------
	# 20. Security and Best Practices Notes
	# -----------------------------
	# Note:
	# - Replace placeholder API keys with secure methods (e.g., environment variables or Streamlit secrets).
	# - Ensure proper configuration of Firebase and Stripe if enabling authentication and payment features.
	# - Validate and sanitize all user inputs to enhance security.
	# - Customize the success_url and cancel_url in the Stripe checkout session to match your deployment URLs.

	# -----------------------------
	# 21. Footer (Hidden)
	# -----------------------------
	# Optional: Add a custom footer if desired
	st.markdown(
	"""
	<style>
	.footer {
	position: fixed;
	left: 0;
	bottom: 0;
	width: 100%;
	background-color: #1c1e26;
	color: white;
	text-align: center;
	padding: 10px 0;
	}
	</style>
	<div class="footer">
	<p>© 2024 SoundScene.ai. All rights reserved.</p>
	</div>
	""",
	unsafe_allow_html=True
	)