awacke1 commited on
Commit
37c6135
β€’
1 Parent(s): 8f04cdd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -63
app.py CHANGED
@@ -15,6 +15,13 @@ from audio_recorder_streamlit import audio_recorder
15
  import json
16
  from openai import OpenAI
17
  from dotenv import load_dotenv
 
 
 
 
 
 
 
18
 
19
  # Page config
20
  st.set_page_config(
@@ -65,7 +72,26 @@ st.markdown("""
65
 
66
  # Load environment variables
67
  load_dotenv()
68
- client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  # Bike Collections
71
  bike_collections = {
@@ -86,14 +112,6 @@ bike_collections = {
86
  Lighting: Natural starlight with subtle rim lighting
87
  Color palette: Deep blues, silver highlights, cosmic purples""",
88
  "emoji": "✨"
89
- },
90
- "Moonlit Hopper": {
91
- "prompt": """A sleek black bike mid-hop over a moonlit meadow.
92
- Full moon illuminating misty surroundings with fireflies dancing around.
93
- Camera angle: Side profile with slight low angle
94
- Lighting: Soft moonlight with atmospheric fog
95
- Color palette: Silver blues, soft whites, deep shadows""",
96
- "emoji": "πŸŒ™"
97
  }
98
  },
99
  "Nature-Inspired Collection 🌲": {
@@ -104,36 +122,142 @@ bike_collections = {
104
  Lighting: Natural forest lighting with sun rays
105
  Color palette: Forest greens, golden sunlight, deep shadows""",
106
  "emoji": "πŸ¦—"
107
- },
108
- "Onyx Leapfrog": {
109
- "prompt": """A bike with obsidian-black finish jumping over a sparkling creek.
110
- Water reflection creates mirror effect with ripples from the leap.
111
- Camera angle: Low angle from water level
112
- Lighting: Golden hour side lighting
113
- Color palette: Deep blacks, water blues, forest greens""",
114
- "emoji": "🐸"
115
  }
116
  }
117
  }
118
 
119
  # File handling functions
120
  def generate_filename(prompt, file_type):
121
- """Generate a safe filename from prompt and timestamp"""
122
  central = pytz.timezone('US/Central')
123
  safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
124
  replaced_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt)
125
  safe_prompt = re.sub(r'\s+', ' ', replaced_prompt).strip()[:240]
126
  return f"{safe_date_time}_{safe_prompt}.{file_type}"
127
 
128
- def save_file(content, filename, is_binary=False):
129
- """Save content to file with proper mode"""
130
- mode = 'wb' if is_binary else 'w'
131
- with open(filename, mode) as f:
132
- f.write(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  return filename
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  def process_video(video_path, seconds_per_frame=1):
136
- """Extract frames and audio from video"""
137
  base64Frames = []
138
  video = cv2.VideoCapture(video_path)
139
  total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -164,6 +288,7 @@ def process_video(video_path, seconds_per_frame=1):
164
  return base64Frames, audio_path
165
 
166
  def create_media_gallery():
 
167
  st.header("🎬 Media Gallery")
168
 
169
  tabs = st.tabs(["πŸ–ΌοΈ Images", "🎡 Audio", "πŸŽ₯ Video", "🎨 Scene Generator"])
@@ -176,18 +301,31 @@ def create_media_gallery():
176
  with cols[idx % 3]:
177
  st.image(image_file)
178
  st.caption(os.path.basename(image_file))
 
 
 
 
 
 
 
179
 
180
  with tabs[1]:
181
  audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
182
  for audio_file in audio_files:
183
  with st.expander(f"🎡 {os.path.basename(audio_file)}"):
184
  st.audio(audio_file)
 
 
185
 
186
  with tabs[2]:
187
  video_files = glob.glob("*.mp4")
188
  for video_file in video_files:
189
  with st.expander(f"πŸŽ₯ {os.path.basename(video_file)}"):
190
  st.video(video_file)
 
 
 
 
191
 
192
  with tabs[3]:
193
  for collection_name, bikes in bike_collections.items():
@@ -208,51 +346,14 @@ def main():
208
 
209
  # Main navigation
210
  tab_main = st.radio("Choose Action:",
211
- ["πŸ“Έ Upload Media", "🎬 View Gallery", "🎨 Generate Scene"],
212
  horizontal=True)
213
 
214
  if tab_main == "πŸ“Έ Upload Media":
215
  col1, col2 = st.columns(2)
216
 
217
  with col1:
218
- # Image upload
219
  uploaded_image = st.file_uploader("Upload Image", type=['png', 'jpg'])
220
  if uploaded_image:
221
  st.image(uploaded_image)
222
- prompt = st.text_input("Image Description:")
223
- if st.button("Process Image"):
224
- filename = generate_filename(prompt, uploaded_image.type.split('/')[-1])
225
- save_file(uploaded_image.getvalue(), filename, is_binary=True)
226
- st.success(f"Saved as {filename}")
227
-
228
- with col2:
229
- # Audio/Video upload
230
- uploaded_media = st.file_uploader("Upload Audio/Video", type=['mp3', 'wav', 'mp4'])
231
- if uploaded_media:
232
- if uploaded_media.type.startswith('audio'):
233
- st.audio(uploaded_media)
234
- else:
235
- st.video(uploaded_media)
236
- if st.button("Save Media"):
237
- filename = generate_filename("media", uploaded_media.type.split('/')[-1])
238
- save_file(uploaded_media.getvalue(), filename, is_binary=True)
239
- st.success(f"Saved as {filename}")
240
-
241
- elif tab_main == "🎬 View Gallery":
242
- create_media_gallery()
243
-
244
- else: # Generate Scene
245
- st.header("🎨 Scene Generator")
246
- selected_collection = st.selectbox("Choose Collection", list(bike_collections.keys()))
247
- selected_bike = st.selectbox("Choose Bike", list(bike_collections[selected_collection].keys()))
248
-
249
- bike_details = bike_collections[selected_collection][selected_bike]
250
- st.markdown(f"""
251
- <div class='scene-card'>
252
- <h3>{bike_details['emoji']} {selected_bike}</h3>
253
- <p>{bike_details['prompt']}</p>
254
- </div>
255
- """, unsafe_allow_html=True)
256
-
257
- if __name__ == "__main__":
258
- main()
 
15
  import json
16
  from openai import OpenAI
17
  from dotenv import load_dotenv
18
+ from huggingface_hub import InferenceClient
19
+ from bs4 import BeautifulSoup
20
+ import textract
21
+ from xml.etree import ElementTree as ET
22
+ from urllib.parse import quote
23
+ import time
24
+ from collections import deque
25
 
26
  # Page config
27
  st.set_page_config(
 
72
 
73
  # Load environment variables
74
  load_dotenv()
75
+
76
+ # Initialize OpenAI client
77
+ client = OpenAI(
78
+ api_key=os.getenv('OPENAI_API_KEY'),
79
+ organization=os.getenv('OPENAI_ORG_ID')
80
+ )
81
+
82
+ # Initialize session state
83
+ if "openai_model" not in st.session_state:
84
+ st.session_state["openai_model"] = "gpt-4o-2024-05-13"
85
+ if "messages" not in st.session_state:
86
+ st.session_state.messages = []
87
+
88
+ # Hugging Face settings
89
+ API_URL = os.getenv('API_URL')
90
+ HF_KEY = os.getenv('HF_KEY')
91
+ headers = {
92
+ "Authorization": f"Bearer {HF_KEY}",
93
+ "Content-Type": "application/json"
94
+ }
95
 
96
  # Bike Collections
97
  bike_collections = {
 
112
  Lighting: Natural starlight with subtle rim lighting
113
  Color palette: Deep blues, silver highlights, cosmic purples""",
114
  "emoji": "✨"
 
 
 
 
 
 
 
 
115
  }
116
  },
117
  "Nature-Inspired Collection 🌲": {
 
122
  Lighting: Natural forest lighting with sun rays
123
  Color palette: Forest greens, golden sunlight, deep shadows""",
124
  "emoji": "πŸ¦—"
 
 
 
 
 
 
 
 
125
  }
126
  }
127
  }
128
 
129
  # File handling functions
130
  def generate_filename(prompt, file_type):
131
+ """Generate a safe filename using the prompt and file type."""
132
  central = pytz.timezone('US/Central')
133
  safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
134
  replaced_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt)
135
  safe_prompt = re.sub(r'\s+', ' ', replaced_prompt).strip()[:240]
136
  return f"{safe_date_time}_{safe_prompt}.{file_type}"
137
 
138
+ def create_and_save_file(content, file_type="md", prompt=None, is_image=False, should_save=True):
139
+ """Create and save file with proper handling of different types."""
140
+ if not should_save:
141
+ return None
142
+
143
+ filename = generate_filename(prompt if prompt else content, file_type)
144
+
145
+ if file_type == "md":
146
+ title_from_content = extract_markdown_title(content)
147
+ if title_from_content:
148
+ filename = generate_filename(title_from_content, file_type)
149
+
150
+ with open(filename, "w", encoding="utf-8") as f:
151
+ if is_image:
152
+ f.write(content)
153
+ else:
154
+ f.write(prompt + "\n\n" + content)
155
+
156
  return filename
157
 
158
+ def extract_markdown_title(content):
159
+ """Extract the first markdown title from content."""
160
+ title_match = re.search(r'^\s*#\s*(.+)', content, re.MULTILINE)
161
+ if title_match:
162
+ return title_match.group(1).strip()
163
+ return None
164
+
165
+ # HTML5 Speech Synthesis
166
+ @st.cache_resource
167
+ def SpeechSynthesis(result):
168
+ documentHTML5 = f'''
169
+ <!DOCTYPE html>
170
+ <html>
171
+ <head>
172
+ <title>Read It Aloud</title>
173
+ <script type="text/javascript">
174
+ function readAloud() {{
175
+ const text = document.getElementById("textArea").value;
176
+ const speech = new SpeechSynthesisUtterance(text);
177
+ window.speechSynthesis.speak(speech);
178
+ }}
179
+ </script>
180
+ </head>
181
+ <body>
182
+ <h1>πŸ”Š Read It Aloud</h1>
183
+ <textarea id="textArea" rows="10" cols="80">{result}</textarea>
184
+ <br>
185
+ <button onclick="readAloud()">πŸ”Š Read Aloud</button>
186
+ </body>
187
+ </html>
188
+ '''
189
+ st.components.v1.html(documentHTML5, width=1280, height=300)
190
+
191
+ # Process functions for different media types
192
+ def process_text(text_input):
193
+ """Process text input with GPT-4o."""
194
+ if text_input:
195
+ st.session_state.messages.append({"role": "user", "content": text_input})
196
+
197
+ with st.chat_message("user"):
198
+ st.markdown(text_input)
199
+
200
+ with st.chat_message("assistant"):
201
+ completion = client.chat.completions.create(
202
+ model=st.session_state["openai_model"],
203
+ messages=[
204
+ {"role": m["role"], "content": m["content"]}
205
+ for m in st.session_state.messages
206
+ ],
207
+ stream=False
208
+ )
209
+ return_text = completion.choices[0].message.content
210
+ st.write("Assistant: " + return_text)
211
+
212
+ create_and_save_file(return_text, file_type="md", prompt=text_input)
213
+ st.session_state.messages.append({"role": "assistant", "content": return_text})
214
+
215
+ def process_image(image_input, user_prompt):
216
+ """Process image with GPT-4o vision."""
217
+ if isinstance(image_input, str):
218
+ with open(image_input, "rb") as image_file:
219
+ image_input = image_file.read()
220
+
221
+ base64_image = base64.b64encode(image_input).decode("utf-8")
222
+
223
+ response = client.chat.completions.create(
224
+ model=st.session_state["openai_model"],
225
+ messages=[
226
+ {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
227
+ {"role": "user", "content": [
228
+ {"type": "text", "text": user_prompt},
229
+ {"type": "image_url", "image_url": {
230
+ "url": f"data:image/png;base64,{base64_image}"
231
+ }}
232
+ ]}
233
+ ],
234
+ temperature=0.0,
235
+ )
236
+
237
+ return response.choices[0].message.content
238
+
239
+ def process_audio(audio_input, text_input=''):
240
+ """Process audio with GPT-4o and Whisper."""
241
+ if isinstance(audio_input, str):
242
+ with open(audio_input, "rb") as file:
243
+ audio_input = file.read()
244
+
245
+ transcription = client.audio.transcriptions.create(
246
+ model="whisper-1",
247
+ file=audio_input,
248
+ )
249
+
250
+ st.session_state.messages.append({"role": "user", "content": transcription.text})
251
+
252
+ with st.chat_message("assistant"):
253
+ st.markdown(transcription.text)
254
+ SpeechSynthesis(transcription.text)
255
+
256
+ filename = generate_filename(transcription.text, "wav")
257
+ create_and_save_file(audio_input.getvalue(), "wav", transcription.text, True)
258
+
259
  def process_video(video_path, seconds_per_frame=1):
260
+ """Process video files for frame extraction and audio."""
261
  base64Frames = []
262
  video = cv2.VideoCapture(video_path)
263
  total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 
288
  return base64Frames, audio_path
289
 
290
  def create_media_gallery():
291
+ """Create the media gallery interface."""
292
  st.header("🎬 Media Gallery")
293
 
294
  tabs = st.tabs(["πŸ–ΌοΈ Images", "🎡 Audio", "πŸŽ₯ Video", "🎨 Scene Generator"])
 
301
  with cols[idx % 3]:
302
  st.image(image_file)
303
  st.caption(os.path.basename(image_file))
304
+
305
+ # Add prompt input for GPT-4o analysis
306
+ prompt = st.text_input(f"Analyze image {idx}",
307
+ "Describe this image in detail and list key elements.")
308
+ if st.button(f"Analyze {idx}"):
309
+ analysis = process_image(image_file, prompt)
310
+ st.markdown(analysis)
311
 
312
  with tabs[1]:
313
  audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
314
  for audio_file in audio_files:
315
  with st.expander(f"🎡 {os.path.basename(audio_file)}"):
316
  st.audio(audio_file)
317
+ if st.button(f"Transcribe {audio_file}"):
318
+ process_audio(audio_file)
319
 
320
  with tabs[2]:
321
  video_files = glob.glob("*.mp4")
322
  for video_file in video_files:
323
  with st.expander(f"πŸŽ₯ {os.path.basename(video_file)}"):
324
  st.video(video_file)
325
+ if st.button(f"Analyze {video_file}"):
326
+ frames, audio = process_video(video_file)
327
+ if audio:
328
+ st.audio(audio)
329
 
330
  with tabs[3]:
331
  for collection_name, bikes in bike_collections.items():
 
346
 
347
  # Main navigation
348
  tab_main = st.radio("Choose Action:",
349
+ ["πŸ“Έ Upload Media", "🎬 View Gallery", "🎨 Generate Scene", "πŸ€– Chat"],
350
  horizontal=True)
351
 
352
  if tab_main == "πŸ“Έ Upload Media":
353
  col1, col2 = st.columns(2)
354
 
355
  with col1:
 
356
  uploaded_image = st.file_uploader("Upload Image", type=['png', 'jpg'])
357
  if uploaded_image:
358
  st.image(uploaded_image)
359
+ prompt = st