File size: 3,396 Bytes
fc48446 0cc6b2a 4cc910e 0cc6b2a fc48446 4fcc6b4 0cc6b2a 4fcc6b4 fc48446 0cc6b2a fc48446 4fcc6b4 0cc6b2a 4fcc6b4 0cc6b2a 4fcc6b4 fc48446 4fcc6b4 4cc910e 4fcc6b4 0cc6b2a 4fcc6b4 fc48446 df4ad39 fc48446 0cc6b2a fc48446 0cc6b2a fc48446 4fcc6b4 fc48446 0cc6b2a 4fcc6b4 fc48446 0cc6b2a 4fcc6b4 df4ad39 4cc910e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import streamlit as st
import importlib.util
import langdetect
try:
# 检查 accelerate 库是否安装
spec = importlib.util.find_spec("accelerate")
if spec is None:
st.error("缺少 'accelerate' 库,请安装该库以加载 FP8 量化模型。可以使用 'pip install accelerate' 进行安装。")
st.stop()
from transformers import pipeline
from gtts import gTTS
import io
import tempfile
import os
except ImportError as e:
st.error(f"导入库时出错: {e}")
st.stop()
# function part
# img2text
def img2text(url):
try:
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
text = image_to_text_model(url)[0]["generated_text"]
return text
except Exception as e:
st.error(f"图像描述生成出错: {e}")
return None
# text2story
def text2story(text):
try:
story_generator = pipeline("text-generation", model="perplexity-ai/r1-1776", trust_remote_code=True)
story = story_generator(text, max_length=200, num_return_sequences=1)[0]['generated_text']
return story
except Exception as e:
st.error(f"故事生成出错: {e}")
return None
# text2audio
def text2audio(story_text):
try:
# 检测故事的语言
detected_lang = langdetect.detect(story_text)
tts = gTTS(text=story_text, lang=detected_lang)
audio_file = io.BytesIO()
tts.write_to_fp(audio_file)
audio_file.seek(0)
return audio_file
except langdetect.LangDetectException:
st.error("无法检测故事的语言,默认使用英语进行语音合成。")
tts = gTTS(text=story_text, lang='en')
audio_file = io.BytesIO()
tts.write_to_fp(audio_file)
audio_file.seek(0)
return audio_file
except Exception as e:
st.error(f"文本转语音出错: {e}")
return None
st.set_page_config(page_title="Your Image to Audio Story",
page_icon="🦜")
st.header("Turn Your Image to Audio Story")
uploaded_file = st.file_uploader("Select an Image...")
if uploaded_file is not None:
print(uploaded_file)
# 使用临时文件处理上传的图像
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
temp_file.write(uploaded_file.getvalue())
temp_file_path = temp_file.name
st.image(uploaded_file, caption="Uploaded Image",
use_container_width=True) # 修改为 use_container_width
#Stage 1: Image to Text
st.text('Processing img2text...')
scenario = img2text(temp_file_path)
if scenario:
st.write(scenario)
#Stage 2: Text to Story
st.text('Generating a story...')
story = text2story(scenario)
if story:
st.write(story)
#Stage 3: Story to Audio data
st.text('Generating audio data...')
audio_data = text2audio(story)
if audio_data:
# Play button
if st.button("Play Audio"):
st.audio(audio_data,
format="audio/mpeg",
start_time=0)
# 删除临时文件并进行异常处理
try:
os.remove(temp_file_path)
except Exception as e:
st.error(f"删除临时文件时出错: {e}") |