Spaces:

Santhosh1325
/

FusionMind_TransArt_V2

Sleeping

App Files Files Community

Santhosh1325 commited on Sep 29, 2024

Commit

dd19ac8

verified ·

1 Parent(s): 11adc11

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -36

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import streamlit as st
 import requests
 import os
-from transformers import MBartForConditionalGeneration, MBart50TokenizerFast, VitsModel, AutoTokenizer
-import torch
-import soundfile as sf
 # API keys for other features (optional)
 Image_Token = os.getenv('Image_generation')
@@ -39,16 +37,18 @@ content_models = {
 # Load the translation model and tokenizer locally
 @st.cache_resource
 def load_translation_model():
-    model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
-    tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
     return model, tokenizer
 # Function to perform translation locally
 def translate_text_local(text):
     model, tokenizer = load_translation_model()
-    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
-    translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
-    translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
     return translated_text
 # Function to query Groq content generation model (optional)
@@ -77,9 +77,9 @@ def generate_image_prompt(english_text):
         "model": "mixtral-8x7b-32768",
         "messages": [
             {"role": "system", "content": "You are a professional Text to image prompt generator."},
-            {"role": "user", "content": f"Create a text to image generation prompt about {english_text} within 30 tokens."}
         ],
-        "max_tokens": 30
     }
     response = requests.post("https://api.groq.com/openai/v1/chat/completions", json=payload, headers=Image_Prompt)
     if response.status_code == 200:
@@ -99,28 +99,47 @@ def generate_image(image_prompt, model_url):
         st.error(f"Image Generation Error {response.status_code}: {response.text}")
         return None
-# New Function to generate speech from text using VitsModel
-def generate_speech(text):
-    model = VitsModel.from_pretrained("facebook/mms-tts-eng")
-    tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
-    inputs = tokenizer(text, return_tensors="pt")
-    # Generate the speech waveform
-    with torch.no_grad():
-        output = model(**inputs).waveform
-    # Save the waveform as an audio file
-    audio_path = "output.wav"
-    sf.write(audio_path, output.numpy().flatten(), 16000)
-    return audio_path
 # User Guide Section
 def show_user_guide():
     st.title("FusionMind User Guide")
     st.write("""
-        ... [content unchanged] ...
     """)
 # Main Streamlit app
@@ -169,14 +188,6 @@ def main():
                     if content_output:
                         st.success(content_output)
-                        # Step 4: Generate speech from the content
-                        st.write("### Generated Speech:")
-                        with st.spinner('Generating speech...'):
-                            audio_path = generate_speech(content_output)
-                            audio_file = open(audio_path, 'rb')
-                            audio_bytes = audio_file.read()
-                            st.audio(audio_bytes, format="audio/wav")
                 # Step 3: Generate Image from the prompt (optional)
                 st.write("### Generated Image:")
                 with st.spinner('Generating image...'):

 import streamlit as st
 import requests
 import os
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
 # API keys for other features (optional)
 Image_Token = os.getenv('Image_generation')
 # Load the translation model and tokenizer locally
 @st.cache_resource
 def load_translation_model():
+    with st.spinner('Loading translation model... Please wait.'):
+        model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
+        tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
     return model, tokenizer
 # Function to perform translation locally
 def translate_text_local(text):
     model, tokenizer = load_translation_model()
+    with st.spinner('Translation is on progress... Please wait.'):
+        inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
+        translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+        translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
     return translated_text
 # Function to query Groq content generation model (optional)
         "model": "mixtral-8x7b-32768",
         "messages": [
             {"role": "system", "content": "You are a professional Text to image prompt generator."},
+            {"role": "user", "content": f"Create a text to image generation prompt about {english_text} within 150 tokens."}
         ],
+        "max_tokens": 150
     }
     response = requests.post("https://api.groq.com/openai/v1/chat/completions", json=payload, headers=Image_Prompt)
     if response.status_code == 200:
         st.error(f"Image Generation Error {response.status_code}: {response.text}")
         return None
 # User Guide Section
 def show_user_guide():
     st.title("FusionMind User Guide")
     st.write("""
+### Welcome to the FusionMind User Guide!
+### How to use this app:
+1. **Input Tamil Text**:
+   - You can either select one of the suggested Tamil phrases or input your own text. The app primarily focuses on Tamil inputs, but it supports a wide range of other languages as well (see the list below).
+2. **Generate Translations**:
+   - Once you've input your text, the app will automatically translate it to English. The translation model is a **many-to-one model**, meaning it can take input from various languages and translate it into English.
+3. **Generate Educational Content**:
+   - After translating the text into English, the app will generate **educational content** based on the translated input. You can adjust the creativity of the content generation using the temperature slider, and control the length of the output with the token limit setting.
+4. **Generate Images**:
+   - In addition to generating content, the app can also generate an **image** related to the translated content. You don’t need to worry about creating complex image prompts—FusionMind includes an automatic **image prompt generator** that will convert your input into a well-defined image prompt, ensuring better image generation results.
+---
+### Features:
+- **Multilingual Translation**:
+   - FusionMind supports a **many-to-one translation model**, so you can input text in a wide variety of languages, not just Tamil. Below are the supported languages:
+     - **Arabic (ar_AR)**, **Czech (cs_CZ)**, **German (de_DE)**, **English (en_XX)**, **Spanish (es_XX)**, **Estonian (et_EE)**, **Finnish (fi_FI)**, **French (fr_XX)**, **Gujarati (gu_IN)**, **Hindi (hi_IN)**, **Italian (it_IT)**, **Japanese (ja_XX)**, **Kazakh (kk_KZ)**, **Korean (ko_KR)**, **Lithuanian (lt_LT)**, **Latvian (lv_LV)**, **Burmese (my_MM)**, **Nepali (ne_NP)**, **Dutch (nl_XX)**, **Romanian (ro_RO)**, **Russian (ru_RU)**, **Sinhala (si_LK)**, **Turkish (tr_TR)**, **Vietnamese (vi_VN)**, **Chinese (zh_CN)**, **Afrikaans (af_ZA)**, **Azerbaijani (az_AZ)**, **Bengali (bn_IN)**, **Persian (fa_IR)**, **Hebrew (he_IL)**, **Croatian (hr_HR)**, **Indonesian (id_ID)**, **Georgian (ka_GE)**, **Khmer (km_KH)**, **Macedonian (mk_MK)**, **Malayalam (ml_IN)**, **Mongolian (mn_MN)**, **Marathi (mr_IN)**, **Polish (pl_PL)**, **Pashto (ps_AF)**, **Portuguese (pt_XX)**, **Swedish (sv_SE)**, **Swahili (sw_KE)**, **Tamil (ta_IN)**, **Telugu (te_IN)**, **Thai (th_TH)**, **Tagalog (tl_XX)**, **Ukrainian (uk_UA)**, **Urdu (ur_PK)**, **Xhosa (xh_ZA)**, **Galician (gl_ES)**, **Slovene (sl_SI)**.
+- **Temperature Adjustment**:
+   - You can adjust the **temperature** of the content generation. A **higher temperature** makes the content more creative and varied, while a **lower temperature** generates more focused and deterministic responses.
+- **Token Limit**:
+   - Set the **maximum number of tokens** for content generation. This allows you to control the length of the generated educational content.
+- **Auto-Generated Image Prompts**:
+   - One of the unique features of FusionMind is the **auto-generated image prompts**. Even if you're not experienced in creating detailed prompts for image generation, the app will take care of this for you. It automatically converts the translated text or content into a well-defined prompt that produces more accurate and high-quality images.
+---
+Enjoy the multimodal experience with **FusionMind** and explore its powerful translation, content generation, and image generation features!
     """)
 # Main Streamlit app
                     if content_output:
                         st.success(content_output)
                 # Step 3: Generate Image from the prompt (optional)
                 st.write("### Generated Image:")
                 with st.spinner('Generating image...'):