Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
import streamlit as st
|
2 |
import requests
|
3 |
import os
|
4 |
-
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
|
5 |
-
import torch
|
6 |
-
import soundfile as sf
|
7 |
|
8 |
# API keys for other features (optional)
|
9 |
Image_Token = os.getenv('Image_generation')
|
@@ -39,16 +37,18 @@ content_models = {
|
|
39 |
# Load the translation model and tokenizer locally
|
40 |
@st.cache_resource
|
41 |
def load_translation_model():
|
42 |
-
model
|
43 |
-
|
|
|
44 |
return model, tokenizer
|
45 |
|
46 |
# Function to perform translation locally
|
47 |
def translate_text_local(text):
|
48 |
model, tokenizer = load_translation_model()
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
52 |
return translated_text
|
53 |
|
54 |
# Function to query Groq content generation model (optional)
|
@@ -77,9 +77,9 @@ def generate_image_prompt(english_text):
|
|
77 |
"model": "mixtral-8x7b-32768",
|
78 |
"messages": [
|
79 |
{"role": "system", "content": "You are a professional Text to image prompt generator."},
|
80 |
-
{"role": "user", "content": f"Create a text to image generation prompt about {english_text} within
|
81 |
],
|
82 |
-
"max_tokens":
|
83 |
}
|
84 |
response = requests.post("https://api.groq.com/openai/v1/chat/completions", json=payload, headers=Image_Prompt)
|
85 |
if response.status_code == 200:
|
@@ -99,28 +99,47 @@ def generate_image(image_prompt, model_url):
|
|
99 |
st.error(f"Image Generation Error {response.status_code}: {response.text}")
|
100 |
return None
|
101 |
|
102 |
-
# New Function to generate speech from text using VitsModel
|
103 |
-
def generate_speech(text):
|
104 |
-
model = VitsModel.from_pretrained("facebook/mms-tts-eng")
|
105 |
-
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
|
106 |
-
|
107 |
-
inputs = tokenizer(text, return_tensors="pt")
|
108 |
-
|
109 |
-
# Generate the speech waveform
|
110 |
-
with torch.no_grad():
|
111 |
-
output = model(**inputs).waveform
|
112 |
-
|
113 |
-
# Save the waveform as an audio file
|
114 |
-
audio_path = "output.wav"
|
115 |
-
sf.write(audio_path, output.numpy().flatten(), 16000)
|
116 |
-
|
117 |
-
return audio_path
|
118 |
-
|
119 |
# User Guide Section
|
120 |
def show_user_guide():
|
121 |
st.title("FusionMind User Guide")
|
122 |
st.write("""
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
""")
|
125 |
|
126 |
# Main Streamlit app
|
@@ -169,14 +188,6 @@ def main():
|
|
169 |
if content_output:
|
170 |
st.success(content_output)
|
171 |
|
172 |
-
# Step 4: Generate speech from the content
|
173 |
-
st.write("### Generated Speech:")
|
174 |
-
with st.spinner('Generating speech...'):
|
175 |
-
audio_path = generate_speech(content_output)
|
176 |
-
audio_file = open(audio_path, 'rb')
|
177 |
-
audio_bytes = audio_file.read()
|
178 |
-
st.audio(audio_bytes, format="audio/wav")
|
179 |
-
|
180 |
# Step 3: Generate Image from the prompt (optional)
|
181 |
st.write("### Generated Image:")
|
182 |
with st.spinner('Generating image...'):
|
|
|
1 |
import streamlit as st
|
2 |
import requests
|
3 |
import os
|
4 |
+
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
|
|
|
|
|
5 |
|
6 |
# API keys for other features (optional)
|
7 |
Image_Token = os.getenv('Image_generation')
|
|
|
37 |
# Load the translation model and tokenizer locally
|
38 |
@st.cache_resource
|
39 |
def load_translation_model():
|
40 |
+
with st.spinner('Loading translation model... Please wait.'):
|
41 |
+
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
|
42 |
+
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
|
43 |
return model, tokenizer
|
44 |
|
45 |
# Function to perform translation locally
|
46 |
def translate_text_local(text):
|
47 |
model, tokenizer = load_translation_model()
|
48 |
+
with st.spinner('Translation is on progress... Please wait.'):
|
49 |
+
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
|
50 |
+
translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
|
51 |
+
translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
|
52 |
return translated_text
|
53 |
|
54 |
# Function to query Groq content generation model (optional)
|
|
|
77 |
"model": "mixtral-8x7b-32768",
|
78 |
"messages": [
|
79 |
{"role": "system", "content": "You are a professional Text to image prompt generator."},
|
80 |
+
{"role": "user", "content": f"Create a text to image generation prompt about {english_text} within 150 tokens."}
|
81 |
],
|
82 |
+
"max_tokens": 150
|
83 |
}
|
84 |
response = requests.post("https://api.groq.com/openai/v1/chat/completions", json=payload, headers=Image_Prompt)
|
85 |
if response.status_code == 200:
|
|
|
99 |
st.error(f"Image Generation Error {response.status_code}: {response.text}")
|
100 |
return None
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
# User Guide Section
|
103 |
def show_user_guide():
|
104 |
st.title("FusionMind User Guide")
|
105 |
st.write("""
|
106 |
+
### Welcome to the FusionMind User Guide!
|
107 |
+
|
108 |
+
### How to use this app:
|
109 |
+
|
110 |
+
1. **Input Tamil Text**:
|
111 |
+
- You can either select one of the suggested Tamil phrases or input your own text. The app primarily focuses on Tamil inputs, but it supports a wide range of other languages as well (see the list below).
|
112 |
+
|
113 |
+
2. **Generate Translations**:
|
114 |
+
- Once you've input your text, the app will automatically translate it to English. The translation model is a **many-to-one model**, meaning it can take input from various languages and translate it into English.
|
115 |
+
|
116 |
+
3. **Generate Educational Content**:
|
117 |
+
- After translating the text into English, the app will generate **educational content** based on the translated input. You can adjust the creativity of the content generation using the temperature slider, and control the length of the output with the token limit setting.
|
118 |
+
|
119 |
+
4. **Generate Images**:
|
120 |
+
- In addition to generating content, the app can also generate an **image** related to the translated content. You don’t need to worry about creating complex image prompts—FusionMind includes an automatic **image prompt generator** that will convert your input into a well-defined image prompt, ensuring better image generation results.
|
121 |
+
|
122 |
+
---
|
123 |
+
|
124 |
+
### Features:
|
125 |
+
|
126 |
+
- **Multilingual Translation**:
|
127 |
+
- FusionMind supports a **many-to-one translation model**, so you can input text in a wide variety of languages, not just Tamil. Below are the supported languages:
|
128 |
+
|
129 |
+
- **Arabic (ar_AR)**, **Czech (cs_CZ)**, **German (de_DE)**, **English (en_XX)**, **Spanish (es_XX)**, **Estonian (et_EE)**, **Finnish (fi_FI)**, **French (fr_XX)**, **Gujarati (gu_IN)**, **Hindi (hi_IN)**, **Italian (it_IT)**, **Japanese (ja_XX)**, **Kazakh (kk_KZ)**, **Korean (ko_KR)**, **Lithuanian (lt_LT)**, **Latvian (lv_LV)**, **Burmese (my_MM)**, **Nepali (ne_NP)**, **Dutch (nl_XX)**, **Romanian (ro_RO)**, **Russian (ru_RU)**, **Sinhala (si_LK)**, **Turkish (tr_TR)**, **Vietnamese (vi_VN)**, **Chinese (zh_CN)**, **Afrikaans (af_ZA)**, **Azerbaijani (az_AZ)**, **Bengali (bn_IN)**, **Persian (fa_IR)**, **Hebrew (he_IL)**, **Croatian (hr_HR)**, **Indonesian (id_ID)**, **Georgian (ka_GE)**, **Khmer (km_KH)**, **Macedonian (mk_MK)**, **Malayalam (ml_IN)**, **Mongolian (mn_MN)**, **Marathi (mr_IN)**, **Polish (pl_PL)**, **Pashto (ps_AF)**, **Portuguese (pt_XX)**, **Swedish (sv_SE)**, **Swahili (sw_KE)**, **Tamil (ta_IN)**, **Telugu (te_IN)**, **Thai (th_TH)**, **Tagalog (tl_XX)**, **Ukrainian (uk_UA)**, **Urdu (ur_PK)**, **Xhosa (xh_ZA)**, **Galician (gl_ES)**, **Slovene (sl_SI)**.
|
130 |
+
|
131 |
+
- **Temperature Adjustment**:
|
132 |
+
- You can adjust the **temperature** of the content generation. A **higher temperature** makes the content more creative and varied, while a **lower temperature** generates more focused and deterministic responses.
|
133 |
+
|
134 |
+
- **Token Limit**:
|
135 |
+
- Set the **maximum number of tokens** for content generation. This allows you to control the length of the generated educational content.
|
136 |
+
|
137 |
+
- **Auto-Generated Image Prompts**:
|
138 |
+
- One of the unique features of FusionMind is the **auto-generated image prompts**. Even if you're not experienced in creating detailed prompts for image generation, the app will take care of this for you. It automatically converts the translated text or content into a well-defined prompt that produces more accurate and high-quality images.
|
139 |
+
|
140 |
+
---
|
141 |
+
|
142 |
+
Enjoy the multimodal experience with **FusionMind** and explore its powerful translation, content generation, and image generation features!
|
143 |
""")
|
144 |
|
145 |
# Main Streamlit app
|
|
|
188 |
if content_output:
|
189 |
st.success(content_output)
|
190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
# Step 3: Generate Image from the prompt (optional)
|
192 |
st.write("### Generated Image:")
|
193 |
with st.spinner('Generating image...'):
|