Spaces:
Running
Running
import av | |
import numpy as np | |
import torch | |
import streamlit as st | |
from PIL import Image | |
from torchvision.transforms import Resize, ToTensor, Compose | |
from transformers import AutoImageProcessor, AutoTokenizer, VisionEncoderDecoderModel | |
from diffusers import StableAudioPipeline | |
import soundfile as sf | |
import stripe | |
import time # For simulating processing time | |
import firebase_admin | |
from firebase_admin import credentials, auth | |
from huggingface_hub import login | |
import os | |
import json | |
# Firebase JSON içeriğini Hugging Face Secrets'ten al | |
firebase_json_string = os.getenv("firebase") | |
firebase_credentials = json.loads(firebase_json_string) | |
hf_token = os.getenv("token1") # Secret Manager'da tanımladığınız isim | |
login(hf_token) | |
# Firebase Admin SDK'yı başlatma | |
if not firebase_admin._apps: | |
cred = credentials.Certificate(firebase_credentials) | |
firebase_admin.initialize_app(cred) | |
# ----------------------------- | |
# 1. Device Configuration | |
# ----------------------------- | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# ----------------------------- | |
# 2. Streamlit Page Configuration and Styling | |
# ----------------------------- | |
st.set_page_config(page_title="SoundScene.ai", page_icon=":musical_note:", layout="wide") | |
# Dark theme and custom styles | |
st.markdown( | |
""" | |
<style> | |
/* General Body Styles */ | |
body { | |
background-color: #0e1117; | |
color: #ffffff; | |
} | |
/* Title Styles */ | |
.stTitle { | |
color: #1e90ff; | |
text-align: center; | |
} | |
/* Sidebar Styles */ | |
.stSidebar { | |
background-color: #1c1e26; | |
} | |
/* Button Styles */ | |
.stButton>button { | |
background-color: #1e90ff; | |
color: #ffffff; | |
} | |
.stButton>button:hover { | |
background-color: #0d6efd; | |
} | |
/* Header Styles */ | |
.stHeader { | |
color: #1e90ff; | |
} | |
/* Footer Styles */ | |
footer { | |
visibility: hidden; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True, | |
) | |
# ----------------------------- | |
# 4. Stripe Configuration | |
# ----------------------------- | |
# Replace with your actual Stripe secret key | |
stripe.api_key = "YOUR_STRIPE_SECRET_KEY" | |
# ----------------------------- | |
# 5. Session State Initialization | |
# ----------------------------- | |
# Initialize session states | |
if "user" not in st.session_state: | |
st.session_state.user = None | |
if "auth_mode" not in st.session_state: | |
st.session_state.auth_mode = "Login" # Default mode must match radio option exactly | |
if "conversion_count" not in st.session_state: | |
st.session_state.conversion_count = 0 | |
if "subscribed" not in st.session_state: | |
st.session_state.subscribed = False | |
# Authentication logic | |
def login_user(email, password): | |
try: | |
user = auth.get_user_by_email(email) | |
# Note: Firebase Admin SDK does not handle password validation directly | |
st.session_state.user = {"uid": user.uid, "email": user.email} | |
st.success("Successfully logged in!") | |
except Exception as e: | |
st.error(f"Login failed: {str(e)}") | |
def register_user(email, password): | |
try: | |
user = auth.create_user(email=email, password=password) | |
st.success("Registration successful! You can now log in.") | |
st.session_state.auth_mode = "Login" # Switch to login after registration | |
except Exception as e: | |
st.error(f"Registration failed: {str(e)}") | |
# Authentication interface | |
if st.session_state.user is None: | |
#st.sidebar.title("Welcome to SoundScene.ai") | |
st.sidebar.subheader("Please authenticate to use the application.") | |
# Toggle between Login and Register | |
auth_mode = st.sidebar.radio( | |
"Select an option", | |
("Login", "Register"), | |
key="auth_mode" | |
) | |
email = st.sidebar.text_input("Email", key="sidebar_email") | |
password = st.sidebar.text_input("Password", type="password", key="sidebar_password") | |
if auth_mode == "Login": | |
if st.sidebar.button("Log In"): | |
login_user(email, password) | |
elif auth_mode == "Register": | |
if st.sidebar.button("Register"): | |
register_user(email, password) | |
# Main app (accessible regardless of login status) | |
#st.title("Welcome to SoundScene.ai") | |
if st.session_state.user: | |
st.sidebar.success(f"Logged in as: {st.session_state.user['email']}") | |
if st.sidebar.button("Log Out"): | |
#st.session_state.clear() | |
#st.experimental_rerun() # Refresh app to show login screen | |
st.session_state.user = None | |
def load_models(): | |
audio_pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0",use_auth_token=hf_token, torch_dtype=torch.float16) | |
audio_pipe = audio_pipe.to(device) | |
image_processor = AutoImageProcessor.from_pretrained("MCG-NJU/videomae-base") | |
tokenizer = AutoTokenizer.from_pretrained("gpt2") | |
video_model = VisionEncoderDecoderModel.from_pretrained("Neleac/timesformer-gpt2-video-captioning").to(device) | |
return image_processor, tokenizer, video_model, audio_pipe | |
image_processor, tokenizer, video_model, audio_pipe = load_models() | |
# ----------------------------- | |
# 7. Preprocessing Functions | |
# ----------------------------- | |
# Görüntüleri yeniden boyutlandıran bir işlev | |
def preprocess_frame(frame, target_size=(224, 224)): | |
""" | |
Preprocess a single video frame for model input. | |
Args: | |
frame (np.ndarray): Video frame. | |
target_size (tuple): Desired size (height, width). | |
Returns: | |
PIL.Image: Preprocessed frame image. | |
""" | |
# Ensure the frame is in uint8 format | |
if frame.dtype != np.uint8: | |
frame = np.clip(frame, 0, 255).astype(np.uint8) | |
# Check frame dimensions | |
if frame.ndim != 3 or frame.shape[2] != 3: | |
raise ValueError(f"Expected frame with 3 channels, got shape {frame.shape}") | |
transform = Resize(target_size) | |
frame_image = Image.fromarray(frame) | |
frame_image = transform(frame_image) # Resize the image | |
return frame_image # Returns PIL.Image | |
def preprocess_frames(frames, target_size=(224, 224), num_frames=8): | |
""" | |
Preprocess multiple video frames for model input. | |
Args: | |
frames (list of np.ndarray): Video frames. | |
target_size (tuple): Desired size (height, width). | |
num_frames (int): Number of frames to process. | |
Returns: | |
list of PIL.Image: List of preprocessed frame images. | |
""" | |
processed_frames = [] | |
for idx, frame in enumerate(frames[:num_frames]): | |
try: | |
img = preprocess_frame(frame, target_size) | |
# Debugging: Display frame properties | |
st.write(f"Frame {idx + 1}: size={img.size}, mode={img.mode}") | |
processed_frames.append(img) | |
except Exception as e: | |
st.warning(f"Skipping frame {idx + 1} due to error: {e}") | |
return processed_frames | |
# with st.sidebar: | |
# st.title("Login") | |
# email = st.text_input("Email", key="login_email") | |
# password = st.text_input("Password", type="password", key="login_password") | |
# login_btn = st.button("Login") | |
# logout_btn = st.button("Logout") | |
# if login_btn: | |
# try: | |
# user = auth.sign_in_with_email_and_password(email, password) | |
# st.session_state.user = user | |
# st.success("Logged in successfully!") | |
# except: | |
# st.error("Invalid login credentials.") | |
# elif logout_btn: | |
# st.session_state.user = None | |
# st.success("Logged out successfully!") | |
# if st.session_state.user: | |
# st.sidebar.write(f"Welcome, {st.session_state.user['email']}!") | |
# ----------------------------- | |
# 9. Navigation Menu | |
# ----------------------------- | |
with st.sidebar: | |
st.markdown("---") | |
page = st.selectbox("Menu", ["Home", "About", "Pricing", "Contact"]) | |
# ----------------------------- | |
# 10. Membership Control and Payment Integration | |
# ----------------------------- | |
def handle_subscription(): | |
st.warning("Subscription service is currently not available.") | |
# ----------------------------- | |
# 11. Main Content Rendering Based on Navigation | |
# ----------------------------- | |
if page == "Home": | |
# ----------------------------- | |
# 12. Home Page Content | |
# ----------------------------- | |
st.title("Welcome to SoundScene.ai") | |
st.markdown( | |
""" | |
<p style='text-align: center;'> | |
Transform your videos into stunning soundscapes with advanced AI models. | |
Learn more about our <a href="#About" style="color: #1e90ff;">features</a> and <a href="#Pricing" style="color: #1e90ff;">pricing</a> options. | |
</p> | |
""", | |
unsafe_allow_html=True | |
) | |
st.header("Create Audio") | |
creation_method = st.radio( | |
"Choose how you want to create audio:", | |
("Upload Video", "Input Comment"), | |
horizontal=True | |
) | |
# Initialize variables | |
caption = "" | |
audio_path = "" | |
audio_duration = 0 | |
# ----------------------------- | |
# 13. Audio Creation via Comment | |
# ----------------------------- | |
if creation_method == "Input Comment": | |
st.subheader("Create Audio via Comment") | |
user_comment = st.text_input("Enter your comment here:") | |
# Sound Duration Input | |
default_duration = 10 # Default to 10 seconds | |
max_duration = 60 # Maximum allowed duration | |
user_duration = st.number_input( | |
"Select sound duration (seconds):", | |
min_value=1, | |
max_value=max_duration, | |
value=default_duration, | |
step=1 | |
) | |
if st.button("Generate Sound"): | |
if not user_comment: | |
st.error("Please enter a comment to generate sound.") | |
else: | |
try: | |
# Initialize progress bar | |
progress_bar = st.progress(0) | |
progress_text = st.empty() | |
with st.spinner("Generating sound from your comment..."): | |
# Step 1: Generating audio | |
progress_text.text("Generating audio...") | |
# Simulate processing time | |
time.sleep(1) | |
progress_bar.progress(20) | |
# Generate audio based on comment | |
negative_prompt = "Low quality." | |
generator = torch.Generator(device).manual_seed(0) | |
audio = audio_pipe( | |
prompt=user_comment, | |
negative_prompt=negative_prompt, | |
num_inference_steps=50, | |
audio_end_in_s=user_duration, | |
num_waveforms_per_prompt=1, | |
generator=generator, | |
).audios | |
progress_bar.progress(60) | |
progress_text.text("Finalizing audio...") | |
# Save audio | |
output = audio[0].T.float().cpu().numpy() | |
audio_path = "generated_audio.wav" | |
sf.write(audio_path, output, audio_pipe.vae.sampling_rate) | |
progress_bar.progress(80) | |
progress_text.text("Almost done...") | |
# Update conversion count | |
st.session_state.conversion_count += 1 | |
# Finalize progress bar | |
progress_bar.progress(100) | |
progress_text.text("Audio generation complete!") | |
# Display audio | |
st.audio(audio_path) | |
# Download button logic | |
if st.session_state.user: | |
if st.session_state.subscribed or st.session_state.conversion_count <= 10: | |
st.download_button("Download Sound", audio_path, file_name="soundscene_output.wav") | |
else: | |
st.warning( | |
"You have reached the download limit. Please subscribe for unlimited downloads.") | |
else: | |
st.warning("Please log in to download the generated sound.") | |
# Clear progress text after a short delay | |
time.sleep(1) | |
progress_text.empty() | |
progress_bar.empty() | |
except Exception as e: | |
st.error(f"An error occurred while generating audio: {e}") | |
# ----------------------------- | |
# 14. Audio Creation via Video Upload | |
# ----------------------------- | |
elif creation_method == "Upload Video": | |
st.subheader("Create Audio via Video Upload") | |
uploaded_video = st.file_uploader("Upload a video", type=["mp4", "avi", "mov"]) | |
if uploaded_video: | |
try: | |
# Open the video using PyAV | |
container = av.open(uploaded_video) | |
video_stream = container.streams.video[0] | |
video_duration = float(container.duration * video_stream.time_base) | |
st.write(f"Video Duration: {video_duration:.2f} seconds") | |
# Sound Duration Selection | |
sound_duration = st.slider( | |
"Select sound duration (seconds):", | |
min_value=1, | |
max_value=10, | |
value=int(video_duration), | |
step=1 | |
) | |
if st.button("Process Video"): | |
try: | |
# Initialize progress bar | |
progress_bar = st.progress(0) | |
progress_text = st.empty() | |
with st.spinner("Processing your video..."): | |
# Step 1: Extracting frames | |
progress_text.text("Extracting frames from video...") | |
seg_len = video_stream.frames | |
clip_len = video_model.config.encoder.num_frames | |
if clip_len > seg_len: | |
st.warning(f"Video has only {seg_len} frames, but the model expects {clip_len} frames.") | |
clip_len = seg_len | |
# Simulate processing time | |
time.sleep(1) | |
progress_bar.progress(10) | |
# Select evenly spaced frame indices | |
indices = set(np.linspace(0, seg_len - 1, num=clip_len, endpoint=True).astype(np.int64)) | |
frames = [] | |
container.seek(0) | |
for i, frame in enumerate(container.decode(video=0)): | |
if i in indices: | |
frames.append(frame.to_ndarray(format="rgb24")) | |
if not frames: | |
st.error("No frames were extracted from the video.") | |
st.stop() | |
progress_bar.progress(30) | |
progress_text.text("Preprocessing frames...") | |
# Preprocess frames | |
processed_frames = preprocess_frames(frames, target_size=(224, 224), num_frames=clip_len) | |
if not processed_frames: | |
st.error("No valid frames to process after preprocessing.") | |
st.stop() | |
# Simulate processing time | |
time.sleep(1) | |
progress_bar.progress(50) | |
# Step 2: Generating caption | |
progress_text.text("Preprocessing the video...") | |
# Ensure image_processor receives correct keyword argument | |
pixel_values = image_processor(images=processed_frames, | |
return_tensors="pt").pixel_values.to(device) | |
gen_kwargs = {"min_length": 10, "max_length": 20, "num_beams": 8} | |
tokens = video_model.generate(pixel_values, **gen_kwargs) | |
caption = tokenizer.batch_decode(tokens, skip_special_tokens=True)[0] | |
st.write(f"Generated Caption: {caption}") | |
# Simulate processing time | |
time.sleep(1) | |
progress_bar.progress(70) | |
if sound_duration > 10: | |
sound_duration = 10 | |
# Step 3: Generating audio based on caption | |
progress_text.text("Generating sound based on the video...") | |
negative_prompt = "Low quality." | |
generator = torch.Generator(device).manual_seed(0) | |
audio = audio_pipe( | |
prompt=caption, | |
negative_prompt=negative_prompt, | |
num_inference_steps=50, | |
audio_end_in_s=sound_duration, | |
num_waveforms_per_prompt=1, | |
generator=generator, | |
).audios | |
progress_bar.progress(90) | |
progress_text.text("Finalizing audio...") | |
# Save audio | |
output = audio[0].T.float().cpu().numpy() | |
audio_path = "generated_audio.wav" | |
sf.write(audio_path, output, audio_pipe.vae.sampling_rate) | |
# Update conversion count | |
st.session_state.conversion_count += 1 | |
# Finalize progress bar | |
progress_bar.progress(100) | |
progress_text.text("Audio generation complete!") | |
# Display audio | |
st.audio(audio_path) | |
# Download button logic | |
if st.session_state.user: | |
if st.session_state.subscribed or st.session_state.conversion_count <= 10: | |
st.download_button("Download Sound", audio_path, file_name="soundscene_output.wav") | |
else: | |
st.warning( | |
"You have reached the download limit. Please subscribe for unlimited downloads.") | |
else: | |
st.warning("Please log in to download the generated sound.") | |
# Clear progress text and bar after a short delay | |
time.sleep(1) | |
progress_text.empty() | |
progress_bar.empty() | |
except Exception as e: | |
st.error(f"An error occurred while processing the video: {e}") | |
except Exception as e: | |
st.error(f"Failed to open the video file: {e}") | |
# ----------------------------- | |
# 15. Conversion Count and Subscription Prompt | |
# ----------------------------- | |
if st.session_state.conversion_count >= 10 and not st.session_state.subscribed: | |
st.warning("You have used your 10 free conversions. Please subscribe to process more content.") | |
if st.button("Subscribe for $5/month (Currently not available)"): | |
handle_subscription() | |
elif page == "About": | |
# ----------------------------- | |
# 16. About Page Content | |
# ----------------------------- | |
st.title("About SoundScene.ai") | |
st.markdown( | |
""" | |
<p style='text-align: center;'> | |
SoundScene.ai leverages cutting-edge AI technologies to transform your visual content into immersive soundscapes. Whether you're a content creator, filmmaker, or enthusiast, our platform empowers you to add rich audio dimensions to your videos effortlessly. | |
</p> | |
<br> | |
<h3>Features:</h3> | |
<ul> | |
<li>Generate audio from video content with precise synchronization.</li> | |
<li>Create custom soundscapes based on textual descriptions.</li> | |
<li>Download and share your unique audio creations.</li> | |
<li>Secure subscription plans to cater to your needs.</li> | |
</ul> | |
<br> | |
<h3>Base Models From Huggingface:</h3> | |
<p> | |
<li>Image Processing: MCG-NJU/videomae-base</li> | |
<li>Tokenizer: gpt2</li> | |
<li>Video Model: Neleac/timesformer-gpt2-video-captioning</li> | |
<li>Audio Model = stabilityai/stable-audio-open-1.0</li> | |
</p> | |
<a href="https://huggingface.co/stabilityai/stable-audio-open-1.0" target="_blank" style="color: #1e90ff;"> stable-audio-open-1.0 </a> | |
<a href="https://huggingface.co/Neleac/SpaceTimeGPT" target="_blank" style="color: #1e90ff;"> SpaceTimeGPT </a> | |
""", | |
unsafe_allow_html=True | |
) | |
elif page == "Pricing": | |
# ----------------------------- | |
# 17. Pricing Page Content | |
# ----------------------------- | |
st.title("Pricing") | |
st.markdown( | |
""" | |
<p style='text-align: center;'> | |
Choose a plan that fits your needs and start transforming your content today! | |
</p> | |
""", | |
unsafe_allow_html=True | |
) | |
# Pricing Cards | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.markdown( | |
""" | |
<div style="background-color:#1c1e26;padding:20px;border-radius:10px;"> | |
<h3 style="text-align:center;">Free</h3> | |
<p style="text-align:center; font-size:24px;">$0/month</p> | |
<ul style="color:#ffffff;"> | |
<li>Up to 10 conversions per month</li> | |
<li>Basic Support</li> | |
</ul> | |
<div style="text-align:center;"> | |
<button style="background-color:#1e90ff; color:#ffffff; padding:10px 20px; border:none; border-radius:5px;">Select</button> | |
</div> | |
</div> | |
""", | |
unsafe_allow_html=True | |
) | |
with col2: | |
st.markdown( | |
""" | |
<div style="background-color:#1c1e26;padding:20px;border-radius:10px;"> | |
<h3 style="text-align:center;">Pro</h3> | |
<p style="text-align:center; font-size:24px;">$5/month (Currently not available)</p> | |
<ul style="color:#ffffff;"> | |
<li>Unlimited conversions</li> | |
<li>Priority Support</li> | |
<li>Access to Premium Features</li> | |
</ul> | |
<div style="text-align:center;"> | |
<button style="background-color:#1e90ff; color:#ffffff; padding:10px 20px; border:none; border-radius:5px;" onclick="window.location.href='#'">Select</button> | |
</div> | |
</div> | |
""", | |
unsafe_allow_html=True | |
) | |
with col3: | |
st.markdown( | |
""" | |
<div style="background-color:#1c1e26;padding:20px;border-radius:10px;"> | |
<h3 style="text-align:center;">Enterprise</h3> | |
<p style="text-align:center; font-size:24px;">Contact Us</p> | |
<ul style="color:#ffffff;"> | |
<li>Custom Solutions</li> | |
<li>Dedicated Support</li> | |
<li>Scalable Infrastructure</li> | |
</ul> | |
<div style="text-align:center;"> | |
<button style="background-color:#1e90ff; color:#ffffff; padding:10px 20px; border:none; border-radius:5px;" onclick="window.location.href='#'">Contact</button> | |
</div> | |
</div> | |
""", | |
unsafe_allow_html=True | |
) | |
elif page == "Contact": | |
# ----------------------------- | |
# 18. Contact Page Content | |
# ----------------------------- | |
st.title("Contact Us") | |
st.markdown( | |
""" | |
<p style='text-align: center;'> | |
We'd love to hear from you! Whether you have a question about features, trials, pricing, or anything else, our team is ready to answer all your questions. | |
</p> | |
""", | |
unsafe_allow_html=True | |
) | |
# Contact Form | |
with st.form("contact_form"): | |
name = st.text_input("Your Name") | |
email = st.text_input("Your Email") | |
message = st.text_area("Your Message") | |
submitted = st.form_submit_button("Send") | |
if submitted: | |
if not name or not email or not message: | |
st.error("Please fill out all fields.") | |
else: | |
# Here you can integrate with an email service or database to store the messages | |
st.success("Thank you for reaching out! We'll get back to you shortly.") | |
# Contact Details | |
st.markdown("---") | |
st.markdown( | |
""" | |
<h3>Social Media Accounts</h3> | |
<a href="https://www.linkedin.com/in/yunusakkaya/" target="_blank" style="color: #1e90ff;"> Linkedin </a> | |
<a href="https://medium.com/@yunus-akkaya" target="_blank" style="color: #1e90ff;"> Medium </a> | |
""", | |
unsafe_allow_html=True | |
) | |
# ----------------------------- | |
# 19. Conversion Count and Subscription Prompt (Moved to Home Page) | |
# ----------------------------- | |
# Note: This section is already handled within the "Home" page based on conversion count. | |
# ----------------------------- | |
# 20. Security and Best Practices Notes | |
# ----------------------------- | |
# Note: | |
# - Replace placeholder API keys with secure methods (e.g., environment variables or Streamlit secrets). | |
# - Ensure proper configuration of Firebase and Stripe if enabling authentication and payment features. | |
# - Validate and sanitize all user inputs to enhance security. | |
# - Customize the success_url and cancel_url in the Stripe checkout session to match your deployment URLs. | |
# ----------------------------- | |
# 21. Footer (Hidden) | |
# ----------------------------- | |
# Optional: Add a custom footer if desired | |
st.markdown( | |
""" | |
<style> | |
.footer { | |
position: fixed; | |
left: 0; | |
bottom: 0; | |
width: 100%; | |
background-color: #1c1e26; | |
color: white; | |
text-align: center; | |
padding: 10px 0; | |
} | |
</style> | |
<div class="footer"> | |
<p>© 2024 SoundScene.ai. All rights reserved.</p> | |
</div> | |
""", | |
unsafe_allow_html=True | |
) | |