Santhosh1325 commited on
Commit
dd19ac8
·
verified ·
1 Parent(s): 11adc11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -36
app.py CHANGED
@@ -1,9 +1,7 @@
1
  import streamlit as st
2
  import requests
3
  import os
4
- from transformers import MBartForConditionalGeneration, MBart50TokenizerFast, VitsModel, AutoTokenizer
5
- import torch
6
- import soundfile as sf
7
 
8
  # API keys for other features (optional)
9
  Image_Token = os.getenv('Image_generation')
@@ -39,16 +37,18 @@ content_models = {
39
  # Load the translation model and tokenizer locally
40
  @st.cache_resource
41
  def load_translation_model():
42
- model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
43
- tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
 
44
  return model, tokenizer
45
 
46
  # Function to perform translation locally
47
  def translate_text_local(text):
48
  model, tokenizer = load_translation_model()
49
- inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
50
- translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
51
- translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
 
52
  return translated_text
53
 
54
  # Function to query Groq content generation model (optional)
@@ -77,9 +77,9 @@ def generate_image_prompt(english_text):
77
  "model": "mixtral-8x7b-32768",
78
  "messages": [
79
  {"role": "system", "content": "You are a professional Text to image prompt generator."},
80
- {"role": "user", "content": f"Create a text to image generation prompt about {english_text} within 30 tokens."}
81
  ],
82
- "max_tokens": 30
83
  }
84
  response = requests.post("https://api.groq.com/openai/v1/chat/completions", json=payload, headers=Image_Prompt)
85
  if response.status_code == 200:
@@ -99,28 +99,47 @@ def generate_image(image_prompt, model_url):
99
  st.error(f"Image Generation Error {response.status_code}: {response.text}")
100
  return None
101
 
102
- # New Function to generate speech from text using VitsModel
103
- def generate_speech(text):
104
- model = VitsModel.from_pretrained("facebook/mms-tts-eng")
105
- tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
106
-
107
- inputs = tokenizer(text, return_tensors="pt")
108
-
109
- # Generate the speech waveform
110
- with torch.no_grad():
111
- output = model(**inputs).waveform
112
-
113
- # Save the waveform as an audio file
114
- audio_path = "output.wav"
115
- sf.write(audio_path, output.numpy().flatten(), 16000)
116
-
117
- return audio_path
118
-
119
  # User Guide Section
120
  def show_user_guide():
121
  st.title("FusionMind User Guide")
122
  st.write("""
123
- ... [content unchanged] ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  """)
125
 
126
  # Main Streamlit app
@@ -169,14 +188,6 @@ def main():
169
  if content_output:
170
  st.success(content_output)
171
 
172
- # Step 4: Generate speech from the content
173
- st.write("### Generated Speech:")
174
- with st.spinner('Generating speech...'):
175
- audio_path = generate_speech(content_output)
176
- audio_file = open(audio_path, 'rb')
177
- audio_bytes = audio_file.read()
178
- st.audio(audio_bytes, format="audio/wav")
179
-
180
  # Step 3: Generate Image from the prompt (optional)
181
  st.write("### Generated Image:")
182
  with st.spinner('Generating image...'):
 
1
  import streamlit as st
2
  import requests
3
  import os
4
+ from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
 
 
5
 
6
  # API keys for other features (optional)
7
  Image_Token = os.getenv('Image_generation')
 
37
  # Load the translation model and tokenizer locally
38
  @st.cache_resource
39
  def load_translation_model():
40
+ with st.spinner('Loading translation model... Please wait.'):
41
+ model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
42
+ tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
43
  return model, tokenizer
44
 
45
  # Function to perform translation locally
46
  def translate_text_local(text):
47
  model, tokenizer = load_translation_model()
48
+ with st.spinner('Translation is on progress... Please wait.'):
49
+ inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
50
+ translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
51
+ translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
52
  return translated_text
53
 
54
  # Function to query Groq content generation model (optional)
 
77
  "model": "mixtral-8x7b-32768",
78
  "messages": [
79
  {"role": "system", "content": "You are a professional Text to image prompt generator."},
80
+ {"role": "user", "content": f"Create a text to image generation prompt about {english_text} within 150 tokens."}
81
  ],
82
+ "max_tokens": 150
83
  }
84
  response = requests.post("https://api.groq.com/openai/v1/chat/completions", json=payload, headers=Image_Prompt)
85
  if response.status_code == 200:
 
99
  st.error(f"Image Generation Error {response.status_code}: {response.text}")
100
  return None
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  # User Guide Section
103
  def show_user_guide():
104
  st.title("FusionMind User Guide")
105
  st.write("""
106
+ ### Welcome to the FusionMind User Guide!
107
+
108
+ ### How to use this app:
109
+
110
+ 1. **Input Tamil Text**:
111
+ - You can either select one of the suggested Tamil phrases or input your own text. The app primarily focuses on Tamil inputs, but it supports a wide range of other languages as well (see the list below).
112
+
113
+ 2. **Generate Translations**:
114
+ - Once you've input your text, the app will automatically translate it to English. The translation model is a **many-to-one model**, meaning it can take input from various languages and translate it into English.
115
+
116
+ 3. **Generate Educational Content**:
117
+ - After translating the text into English, the app will generate **educational content** based on the translated input. You can adjust the creativity of the content generation using the temperature slider, and control the length of the output with the token limit setting.
118
+
119
+ 4. **Generate Images**:
120
+ - In addition to generating content, the app can also generate an **image** related to the translated content. You don’t need to worry about creating complex image prompts—FusionMind includes an automatic **image prompt generator** that will convert your input into a well-defined image prompt, ensuring better image generation results.
121
+
122
+ ---
123
+
124
+ ### Features:
125
+
126
+ - **Multilingual Translation**:
127
+ - FusionMind supports a **many-to-one translation model**, so you can input text in a wide variety of languages, not just Tamil. Below are the supported languages:
128
+
129
+ - **Arabic (ar_AR)**, **Czech (cs_CZ)**, **German (de_DE)**, **English (en_XX)**, **Spanish (es_XX)**, **Estonian (et_EE)**, **Finnish (fi_FI)**, **French (fr_XX)**, **Gujarati (gu_IN)**, **Hindi (hi_IN)**, **Italian (it_IT)**, **Japanese (ja_XX)**, **Kazakh (kk_KZ)**, **Korean (ko_KR)**, **Lithuanian (lt_LT)**, **Latvian (lv_LV)**, **Burmese (my_MM)**, **Nepali (ne_NP)**, **Dutch (nl_XX)**, **Romanian (ro_RO)**, **Russian (ru_RU)**, **Sinhala (si_LK)**, **Turkish (tr_TR)**, **Vietnamese (vi_VN)**, **Chinese (zh_CN)**, **Afrikaans (af_ZA)**, **Azerbaijani (az_AZ)**, **Bengali (bn_IN)**, **Persian (fa_IR)**, **Hebrew (he_IL)**, **Croatian (hr_HR)**, **Indonesian (id_ID)**, **Georgian (ka_GE)**, **Khmer (km_KH)**, **Macedonian (mk_MK)**, **Malayalam (ml_IN)**, **Mongolian (mn_MN)**, **Marathi (mr_IN)**, **Polish (pl_PL)**, **Pashto (ps_AF)**, **Portuguese (pt_XX)**, **Swedish (sv_SE)**, **Swahili (sw_KE)**, **Tamil (ta_IN)**, **Telugu (te_IN)**, **Thai (th_TH)**, **Tagalog (tl_XX)**, **Ukrainian (uk_UA)**, **Urdu (ur_PK)**, **Xhosa (xh_ZA)**, **Galician (gl_ES)**, **Slovene (sl_SI)**.
130
+
131
+ - **Temperature Adjustment**:
132
+ - You can adjust the **temperature** of the content generation. A **higher temperature** makes the content more creative and varied, while a **lower temperature** generates more focused and deterministic responses.
133
+
134
+ - **Token Limit**:
135
+ - Set the **maximum number of tokens** for content generation. This allows you to control the length of the generated educational content.
136
+
137
+ - **Auto-Generated Image Prompts**:
138
+ - One of the unique features of FusionMind is the **auto-generated image prompts**. Even if you're not experienced in creating detailed prompts for image generation, the app will take care of this for you. It automatically converts the translated text or content into a well-defined prompt that produces more accurate and high-quality images.
139
+
140
+ ---
141
+
142
+ Enjoy the multimodal experience with **FusionMind** and explore its powerful translation, content generation, and image generation features!
143
  """)
144
 
145
  # Main Streamlit app
 
188
  if content_output:
189
  st.success(content_output)
190
 
 
 
 
 
 
 
 
 
191
  # Step 3: Generate Image from the prompt (optional)
192
  st.write("### Generated Image:")
193
  with st.spinner('Generating image...'):