# Models import torch from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration from pyannote.audio import Pipeline # Audio Manipulation import audioread from pydub import AudioSegment, silence import yt_dlp from yt_dlp import DownloadError # Others import pandas as pd from datetime import timedelta import os import streamlit as st import time import pickle def config(): st.set_page_config(page_title="Speech to Text / 음성을 텍스트로", page_icon="📝") # Create a data directory to store our audio files if not os.path.exists("whisper_app/data"): os.makedirs("whisper_app/data") # Initialize session state variables if 'page_index' not in st.session_state: st.session_state['page_index'] = -1 # Handle which page should be displayed (token page, home page, results page, rename page) st.session_state['txt_transcript'] = "" # Save the transcript as .txt so we can display it again on the results page st.session_state["process"] = [] # Save the results obtained so we can display them again on the results page st.session_state['srt_txt'] = "" # Save the transcript in a subtitles case to display it on the results page st.session_state['srt_token'] = 0 # Is subtitles parameter enabled or not st.session_state['audio_file'] = None # Save the audio file provided by the user so we can display it again on the results page st.session_state["start_time"] = 0 # Default audio player starting point (0s) st.session_state["summary"] = "" # Save the summary of the transcript so we can display it on the results page st.session_state["number_of_speakers"] = 0 # Save the number of speakers detected in the conversation (diarization) st.session_state["chosen_mode"] = 0 # Save the mode chosen by the user (Diarization or not, timestamps or not) st.session_state["btn_token_list"] = [] # List of tokens that indicates what options are activated to adapt the display on results page st.session_state["my_HF_token"] = "ACCESS_TOKEN_GOES_HERE" # User's Token that allows the use of the diarization model st.session_state["disable"] = True # Default appearance of the button to change your token # Display Text and CSS st.title("Speech to Text App / 음성을 텍스트로 앱 📝") st.markdown("""