saranbalan commited on
Commit
10734c0
·
verified ·
1 Parent(s): 806ef4b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # prompt: add the theme featrue in above Audio Transcription, Translation, and Sentiment Analysis app and also need selectbox for sentiment and generated image
2
+ import whisper
3
+ import os
4
+ import gradio as gr
5
+ from groq import Groq
6
+ from deep_translator import GoogleTranslator
7
+ import pickle
8
+ from diffusers import StableDiffusionPipeline
9
+ import matplotlib.pyplot as plt
10
+ import torch
11
+
12
+
13
+ # Replace with your actual API key
14
+ api_key ="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"
15
+ client = Groq(api_key=api_key)
16
+
17
+
18
+ model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
19
+ model_id2 = "stabilityai/stable-diffusion-xl-base-1.0"
20
+
21
+ pipe = StableDiffusionPipeline.from_pretrained(model_id1, torch_dtype=torch.float16, use_safetensors=True)
22
+ pipe = pipe.to("cpu")
23
+
24
+ prompt = """dreamlikeart, a grungy woman with rainbow hair, travelling between dimensions, dynamic pose, happy, soft eyes and narrow chin,
25
+ extreme bokeh, dainty figure, long hair straight down, torn kawaii shirt and baggy jeans
26
+ """
27
+ image = pipe(prompt).images[0]
28
+
29
+ # Function to transcribe, translate, and analyze sentiment
30
+ def process_audio(audio_path, image_option):
31
+ if audio_path is None:
32
+ return "Please upload an audio file.", None, None, None
33
+
34
+ # Step 1: Transcribe audio
35
+ try:
36
+ with open(audio_path, "rb") as file:
37
+ transcription = client.audio.transcriptions.create(
38
+ file=(os.path.basename(audio_path), file.read()),
39
+ model="whisper-large-v3",
40
+ language="ta",
41
+ response_format="verbose_json",
42
+ )
43
+ tamil_text = transcription.text
44
+ except Exception as e:
45
+ return f"An error occurred during transcription: {str(e)}", None, None, None
46
+
47
+ # Step 2: Translate Tamil to English
48
+ try:
49
+ translator = GoogleTranslator(source='ta', target='en')
50
+ translation = translator.translate(tamil_text)
51
+ except Exception as e:
52
+ return tamil_text, f"An error occurred during translation: {str(e)}", None, None
53
+
54
+
55
+
56
+ # Step 3: Generate image (if selected)
57
+ image = None
58
+ if image_option == "Generate Image":
59
+ try:
60
+ model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
61
+ pipe = StableDiffusionPipeline.from_pretrained(model_id1, torch_dtype=torch.float16, use_safetensors=True)
62
+ pipe = pipe.to("cpu")
63
+ image = pipe(translation).images[0]
64
+ except Exception as e:
65
+ return tamil_text, translation, f"An error occurred during image generation: {str(e)}"
66
+
67
+ return tamil_text, translation, image
68
+
69
+ # Create Gradio interface
70
+ with gr.Blocks(theme=gr.themes.Base()) as iface:
71
+ gr.Markdown("# Audio Transcription, Translation, and image Generate")
72
+ with gr.Row():
73
+ with gr.Column():
74
+ audio_input = gr.Audio(type="filepath", label="Upload Audio File")
75
+ image_option = gr.Dropdown(["Generate Image", "Skip Image"], label="Image Generation", value="Generate Image")
76
+ submit_button = gr.Button("Process Audio")
77
+ with gr.Column():
78
+ tamil_text_output = gr.Textbox(label="Tamil Transcription")
79
+ translation_output = gr.Textbox(label="English Translation")
80
+ image_output = gr.Image(label="Generated Image")
81
+
82
+ submit_button.click(
83
+ fn=process_audio,
84
+ inputs=[audio_input, image_option],
85
+ outputs=[tamil_text_output, translation_output, image_output]
86
+ )
87
+
88
+ # Launch the interface
89
+ iface.launch()