awacke1 commited on
Commit
993d89a
Β·
verified Β·
1 Parent(s): f0ab781

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +401 -300
app.py CHANGED
@@ -150,6 +150,14 @@ def preprocess_text(text):
150
  text = re.sub(r'[^\x00-\x7F]+', '', text)
151
  return text.strip()
152
 
 
 
 
 
 
 
 
 
153
  # =============================================================================
154
  # ───────────── COSMOS DB FUNCTIONS ─────────────
155
  # =============================================================================
@@ -408,8 +416,14 @@ def display_file_editor(file_path):
408
  col1, col2 = st.columns([1, 5])
409
  with col1:
410
  if st.button("πŸ’Ύ Save"):
411
- if save_file_content(file_path, new_content):
412
- st.session_state.file_content[file_path] = new_content
 
 
 
 
 
 
413
  st.success("Saved! πŸŽ‰")
414
  time.sleep(1)
415
  st.rerun()
@@ -507,12 +521,74 @@ def show_sidebar_data_grid():
507
  })
508
  df = pd.DataFrame(data)
509
  st.sidebar.markdown("### πŸ“Š Data Grid")
510
- st.sidebar.dataframe(df)
511
  except Exception as e:
512
  st.sidebar.error(f"Data grid error: {str(e)}")
513
  else:
514
  st.sidebar.info("No container selected for data grid.")
515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
516
  # =============================================================================
517
  # ────��──────── VIDEO & AUDIO UI FUNCTIONS ─────────────
518
  # =============================================================================
@@ -629,57 +705,6 @@ def add_video_generation_ui(container):
629
  except Exception as e:
630
  st.error(f"Upload error: {str(e)}")
631
 
632
- # =============================================================================
633
- # ───────────── AI SAMPLES SIDEBAR (Processed as a Python List) ─────────────
634
- # =============================================================================
635
- def display_ai_samples():
636
- ai_samples = [
637
- {
638
- "name": "FullTextContains",
639
- "description": "Query using FullTextContains",
640
- "query": 'SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, "bicycle")'
641
- },
642
- {
643
- "name": "FullTextContainsAll",
644
- "description": "Query using FullTextContainsAll",
645
- "query": 'SELECT TOP 10 * FROM c WHERE FullTextContainsAll(c.text, "red", "bicycle")'
646
- },
647
- {
648
- "name": "FullTextContainsAny",
649
- "description": "Query using FullTextContainsAny",
650
- "query": 'SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, "red") AND FullTextContainsAny(c.text, "bicycle", "skateboard")'
651
- },
652
- {
653
- "name": "FullTextScore",
654
- "description": "Query using FullTextScore (order by relevance)",
655
- "query": 'SELECT TOP 10 * FROM c ORDER BY RANK FullTextScore(c.text, ["bicycle", "mountain"])'
656
- },
657
- {
658
- "name": "Vector Search with Score",
659
- "description": "Example vector search snippet",
660
- "query": 'results = vector_search.similarity_search_with_score(query="Your query", k=5)\nfor result, score in results:\n print(result.json(), score)'
661
- },
662
- {
663
- "name": "Vector Search with Filtering",
664
- "description": "Example vector search with a filter",
665
- "query": 'pre_filter = {"conditions": [{"property": "metadata.page", "operator": "$eq", "value": 0}]}\nresults = vector_search.similarity_search_with_score(query="Your query", k=5, pre_filter=pre_filter)'
666
- },
667
- {
668
- "name": "Hybrid Search",
669
- "description": "Example hybrid search snippet",
670
- "query": 'results = vector_search.similarity_search_with_score(query="Your query", k=5, query_type=CosmosDBQueryType.HYBRID)'
671
- }
672
- ]
673
- st.sidebar.markdown("### πŸ€– AI Samples")
674
- st.sidebar.info("πŸš€ Get started with our AI samples! Time free access to get started today.")
675
- sample_names = [sample["name"] for sample in ai_samples]
676
- selected_sample_name = st.sidebar.selectbox("Select an AI Sample", sample_names)
677
- selected_sample = next((s for s in ai_samples if s["name"] == selected_sample_name), None)
678
- if selected_sample:
679
- st.sidebar.markdown(f"**{selected_sample['name']}**: {selected_sample['description']}")
680
- lang = "sql" if "FullText" in selected_sample["name"] else "python"
681
- st.sidebar.code(selected_sample["query"], language=lang)
682
-
683
  # =============================================================================
684
  # ───────────── NEW ITEM & FIELD FUNCTIONS
685
  # =============================================================================
@@ -704,6 +729,10 @@ def new_item_default(container):
704
  def auto_save_edit():
705
  try:
706
  edited_str = st.session_state.doc_editor
 
 
 
 
707
  edited_doc = json.loads(edited_str)
708
  container = st.session_state.current_container
709
  container.upsert_item(edited_doc)
@@ -793,260 +822,336 @@ def display_langchain_functions():
793
  st.sidebar.write(f"{func['name']}: {func['comment']}")
794
 
795
  # =============================================================================
796
- # ───────────── OPTIONAL: SIDEBAR DATA GRID (Records with formatted timestamps)
797
  # =============================================================================
798
- # (This feature is now integrated above via show_sidebar_data_grid().)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
799
 
800
  # =============================================================================
801
- # ───────────── ASYNC TTS & ARXIV FUNCTIONS (Optional Features)
802
  # =============================================================================
803
- import asyncio
804
- import edge_tts
805
- from streamlit_marquee import streamlit_marquee
806
- from collections import Counter
807
-
808
- class PerformanceTimer:
809
- def __init__(self, operation_name: str):
810
- self.operation_name = operation_name
811
- self.start_time = None
812
- def __enter__(self):
813
- self.start_time = time.time()
814
- return self
815
- def __exit__(self, exc_type, exc_val, exc_tb):
816
- pass
 
 
817
 
818
- async def async_edge_tts_generate(text: str, voice: str, rate: int = 0, pitch: int = 0, file_format: str = "mp3"):
819
- with PerformanceTimer("tts_generation") as timer:
820
- text = text.replace("\n", " ").strip()
821
- if not text:
822
- return None, 0
823
- cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
824
- if cache_key in st.session_state.get('audio_cache', {}):
825
- return st.session_state['audio_cache'][cache_key], 0
 
 
 
 
 
826
  try:
827
- rate_str = f"{rate:+d}%"
828
- pitch_str = f"{pitch:+d}Hz"
829
- communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
830
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
831
- filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
832
- await communicate.save(filename)
833
- st.session_state.setdefault('audio_cache', {})[cache_key] = filename
834
- return filename, time.time() - timer.start_time
835
- except Exception as e:
836
- st.error(f"Error generating audio: {str(e)}")
837
- return None, 0
838
-
839
- def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
840
- result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format))
841
- if isinstance(result, tuple):
842
- return result[0]
843
- return result
844
-
845
- async def async_save_qa_with_audio(question: str, answer: str):
846
- with PerformanceTimer("qa_save") as timer:
847
- md_file = create_file(question, answer, "md")
848
- audio_file = None
849
- if st.session_state.get('enable_audio', True):
850
- audio_text = f"{question}\n\nAnswer: {answer}"
851
- audio_file, _ = await async_edge_tts_generate(audio_text, voice=st.session_state.get('tts_voice', "en-US-AriaNeural"), file_format=st.session_state.get('audio_format', "mp3"))
852
- return md_file, audio_file, time.time() - timer.start_time, 0
853
-
854
- def save_qa_with_audio(question, answer, voice=None):
855
- if not voice:
856
- voice = st.session_state.get('tts_voice', "en-US-AriaNeural")
857
- md_file = create_file(question, answer, "md")
858
- audio_text = f"{question}\n\nAnswer: {answer}"
859
- audio_file = speak_with_edge_tts(audio_text, voice=voice, file_format=st.session_state.get('audio_format', "mp3"))
860
- return md_file, audio_file
861
-
862
- def play_and_download_audio(file_path, file_type="mp3"):
863
- if file_path and os.path.exists(file_path):
864
- st.audio(file_path)
865
- dl_link = get_download_link(file_path, file_type=file_type)
866
- st.markdown(dl_link, unsafe_allow_html=True)
867
 
868
- def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str:
869
- cache_key = f"dl_{file_path}"
870
- if cache_key in st.session_state.get('download_link_cache', {}):
871
- return st.session_state['download_link_cache'][cache_key]
872
  try:
873
- with open(file_path, "rb") as f:
874
- b64 = base64.b64encode(f.read()).decode()
875
- filename = os.path.basename(file_path)
876
- if file_type == "mp3":
877
- link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">🎡 Download {filename}</a>'
878
- elif file_type == "wav":
879
- link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">πŸ”Š Download {filename}</a>'
880
- elif file_type == "md":
881
- link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">πŸ“ Download {filename}</a>'
 
 
 
 
 
 
 
 
882
  else:
883
- link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">Download {filename}</a>'
884
- st.session_state.setdefault('download_link_cache', {})[cache_key] = link
885
- return link
 
 
 
 
 
 
 
886
  except Exception as e:
887
- st.error(f"Error creating download link: {str(e)}")
888
- return ""
889
 
890
- # =============================================================================
891
- # ───────────── RESEARCH / ARXIV FUNCTIONS (Optional Features)
892
- # =============================================================================
893
- def parse_arxiv_refs(ref_text: str):
894
- if not ref_text:
895
- return []
896
- results = []
897
- current_paper = {}
898
- lines = ref_text.split('\n')
899
- for i, line in enumerate(lines):
900
- if line.count('|') == 2:
901
- if current_paper:
902
- results.append(current_paper)
903
- if len(results) >= 20:
904
- break
905
- try:
906
- header_parts = line.strip('* ').split('|')
907
- date = header_parts[0].strip()
908
- title = header_parts[1].strip()
909
- url_match = re.search(r'(https://arxiv.org/\S+)', line)
910
- url = url_match.group(1) if url_match else f"paper_{len(results)}"
911
- current_paper = {
912
- 'date': date,
913
- 'title': title,
914
- 'url': url,
915
- 'authors': '',
916
- 'summary': '',
917
- 'full_audio': None,
918
- 'download_base64': '',
919
- }
920
- except Exception as e:
921
- st.warning(f"Error parsing paper header: {str(e)}")
922
- current_paper = {}
923
- continue
924
- elif current_paper:
925
- if not current_paper['authors']:
926
- current_paper['authors'] = line.strip('* ')
927
- else:
928
- if current_paper['summary']:
929
- current_paper['summary'] += ' ' + line.strip()
930
  else:
931
- current_paper['summary'] = line.strip()
932
- if current_paper:
933
- results.append(current_paper)
934
- return results[:20]
935
-
936
- def create_paper_links_md(papers):
937
- lines = ["# Paper Links\n"]
938
- for i, p in enumerate(papers, start=1):
939
- lines.append(f"{i}. **{p['title']}** β€” [Arxiv Link]({p['url']})")
940
- return "\n".join(lines)
941
-
942
- def generate_pdf_link(url: str) -> str:
943
- if "abs" in url:
944
- pdf_url = url.replace("abs", "pdf")
945
- if not pdf_url.endswith(".pdf"):
946
- pdf_url += ".pdf"
947
- return pdf_url
948
- return url
949
-
950
- def generate_5min_feature_markdown(paper: dict) -> str:
951
- title = paper.get('title', '')
952
- summary = paper.get('summary', '')
953
- authors = paper.get('authors', '')
954
- date = paper.get('date', '')
955
- url = paper.get('url', '')
956
- pdf_link = generate_pdf_link(url)
957
- title_wc = len(title.split())
958
- summary_wc = len(summary.split())
959
- high_info_terms = [term for term in summary.split()[:5]]
960
- terms_str = ", ".join(high_info_terms)
961
- rouge_score = round((len(high_info_terms) / max(len(summary.split()), 1)) * 100, 2)
962
- mermaid_code = "```mermaid\nflowchart TD\n"
963
- for i in range(len(high_info_terms) - 1):
964
- mermaid_code += f' T{i+1}["{high_info_terms[i]}"] --> T{i+2}["{high_info_terms[i+1]}"]\n'
965
- mermaid_code += "```"
966
- md = f"""
967
- ## {title}
968
-
969
- **Authors:** {authors}
970
- **Date:** {date}
971
- **Word Count (Title):** {title_wc} | **Word Count (Summary):** {summary_wc}
 
 
 
 
 
 
 
 
 
 
 
 
972
 
973
- **Links:** [Abstract]({url}) | [PDF]({pdf_link})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
974
 
975
- **High Info Terms:** {terms_str}
976
- **ROUGE Score:** {rouge_score}%
 
 
 
 
 
 
 
 
 
 
 
977
 
978
- ### Mermaid Graph of Key Concepts
979
- {mermaid_code}
 
 
 
 
 
 
 
 
 
980
 
981
- ---
982
- """
983
- return md
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
984
 
985
- def create_detailed_paper_md(papers: list) -> str:
986
- md_parts = ["# Detailed Research Paper Summary\n"]
987
- for idx, paper in enumerate(papers, start=1):
988
- md_parts.append(generate_5min_feature_markdown(paper))
989
- return "\n".join(md_parts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
990
 
991
  # =============================================================================
992
- # ─────────────────────────────────────────────────────────
993
- # MAIN AI LOOKUP FUNCTION (Optional Features)
994
  # =============================================================================
995
- def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
996
- start = time.time()
997
- ai_constitution = """
998
- You are a medical and machine learning review board expert...
999
- """
1000
- # 1) Claude API call
1001
- import anthropic
1002
- client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY_3"))
1003
- user_input = q
1004
- response = client.messages.create(
1005
- model="claude-3-sonnet-20240229",
1006
- max_tokens=1000,
1007
- messages=[{"role": "user", "content": user_input}]
1008
- )
1009
- st.write("Claude's reply 🧠:")
1010
- st.markdown(response.content[0].text)
1011
- result = response.content[0].text
1012
- create_file(q, result, "md")
1013
- md_file, audio_file = save_qa_with_audio(q, result)
1014
- st.subheader("πŸ“ Main Response Audio")
1015
- play_and_download_audio(audio_file, st.session_state.get('audio_format', "mp3"))
1016
- if useArxiv:
1017
- q = q + result
1018
- st.write('Running Arxiv RAG with Claude inputs.')
1019
- client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
1020
- refs = client.predict(q, 10, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md")[0]
1021
- result = f"πŸ”Ž {q}\n\n{refs}"
1022
- md_file, audio_file = save_qa_with_audio(q, result)
1023
- st.subheader("πŸ“ Main Response Audio")
1024
- play_and_download_audio(audio_file, st.session_state.get('audio_format', "mp3"))
1025
- papers = parse_arxiv_refs(refs)
1026
- if papers:
1027
- paper_links = create_paper_links_md(papers)
1028
- links_file = create_file(q, paper_links, "md")
1029
- st.markdown(paper_links)
1030
- detailed_md = create_detailed_paper_md(papers)
1031
- detailed_file = create_file(q, detailed_md, "md")
1032
- st.markdown(detailed_md)
1033
- if useArxivAudio:
1034
- asyncio.run(async_edge_tts_generate("Sample text", st.session_state.get('tts_voice', "en-US-AriaNeural")))
1035
- st.write("Displaying Papers:")
1036
- # (Optional: call functions to display papers)
1037
- else:
1038
- st.warning("No papers found.")
1039
- response2 = client.messages.create(
1040
- model="claude-3-sonnet-20240229",
1041
- max_tokens=1000,
1042
- messages=[{"role": "user", "content": q + '\n\nUse the reference papers below to answer the question by creating a python streamlit app.py and requirements.txt with working code.'}]
1043
- )
1044
- r2 = response2.content[0].text
1045
- st.write("Claude's reply 🧠:")
1046
- st.markdown(r2)
1047
- elapsed = time.time() - start
1048
- st.write(f"**Total Elapsed:** {elapsed:.2f} s")
1049
- return result
1050
 
1051
  # =============================================================================
1052
  # ───────────── MAIN FUNCTION ─────────────
@@ -1089,17 +1194,11 @@ def main():
1089
  else:
1090
  st.warning("No container selected!")
1091
  st.sidebar.markdown("## πŸ” Vector Search")
1092
- search_keyword = st.sidebar.text_input("Search Keyword", key="vector_search_keyword")
1093
- if st.sidebar.button("Search"):
1094
- if st.session_state.get("current_container"):
1095
- results = vector_keyword_search(search_keyword, st.session_state.current_container)
1096
- st.sidebar.write(f"Found {len(results)} results:")
1097
- for res in results:
1098
- st.sidebar.code(json.dumps(res, indent=2), language="json")
1099
- else:
1100
- st.warning("No container selected for search!")
1101
  show_sidebar_data_grid()
1102
  display_langchain_functions()
 
 
1103
  try:
1104
  if st.session_state.get("client") is None:
1105
  st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
@@ -1170,9 +1269,11 @@ def main():
1170
  num_docs = st.slider("Docs", 1, 20, 1)
1171
  documents_to_display = documents[:num_docs] if total_docs > num_docs else documents
1172
  st.sidebar.info(f"Showing {len(documents_to_display)} docs")
1173
- view_options = ['Markdown', 'Code', 'Run AI', 'Clone', 'New']
1174
  selected_view = st.sidebar.selectbox("View", view_options, index=1)
1175
- if selected_view == 'Markdown':
 
 
1176
  st.markdown("#### πŸ“„ Markdown")
1177
  if documents:
1178
  doc = documents[st.session_state.current_index]
@@ -1334,4 +1435,4 @@ def main():
1334
  show_sidebar_data_grid()
1335
 
1336
  if __name__ == "__main__":
1337
- main()
 
150
  text = re.sub(r'[^\x00-\x7F]+', '', text)
151
  return text.strip()
152
 
153
+ # NEW: Sanitize JSON text before saving (remove problematic control characters)
154
+ def sanitize_json_text(text):
155
+ # Remove control characters except newline, carriage return, and tab
156
+ text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', text)
157
+ # Escape newline, tab, and carriage return
158
+ text = text.replace("\n", "\\n").replace("\r", "\\r").replace("\t", "\\t")
159
+ return text
160
+
161
  # =============================================================================
162
  # ───────────── COSMOS DB FUNCTIONS ─────────────
163
  # =============================================================================
 
416
  col1, col2 = st.columns([1, 5])
417
  with col1:
418
  if st.button("πŸ’Ύ Save"):
419
+ sanitized = sanitize_json_text(new_content)
420
+ try:
421
+ json.loads(sanitized)
422
+ except Exception as e:
423
+ st.error(f"Sanitization failed: {str(e)}")
424
+ return
425
+ if save_file_content(file_path, sanitized):
426
+ st.session_state.file_content[file_path] = sanitized
427
  st.success("Saved! πŸŽ‰")
428
  time.sleep(1)
429
  st.rerun()
 
521
  })
522
  df = pd.DataFrame(data)
523
  st.sidebar.markdown("### πŸ“Š Data Grid")
524
+ st.sidebar.dataframe(df[["Name", "Timestamp"]])
525
  except Exception as e:
526
  st.sidebar.error(f"Data grid error: {str(e)}")
527
  else:
528
  st.sidebar.info("No container selected for data grid.")
529
 
530
+ # =============================================================================
531
+ # ───────────── SEARCH RESULTS DISPLAY (Editable Code Editors)
532
+ # =============================================================================
533
+ def display_search_results(keyword, container):
534
+ results = vector_keyword_search(keyword, container)
535
+ st.markdown("### πŸ” Search Results")
536
+ for res in results:
537
+ doc_id = res.get("id", "")
538
+ exp = st.expander(f"Result {doc_id}")
539
+ with exp:
540
+ edited = st.text_area("Edit Document", value=json.dumps(res, indent=2), key=f"search_{doc_id}")
541
+ if st.button(f"πŸ’Ύ Save changes for {doc_id}", key=f"save_search_{doc_id}"):
542
+ try:
543
+ updated_doc = json.loads(edited)
544
+ container.upsert_item(body=updated_doc)
545
+ st.success(f"Updated {doc_id}!")
546
+ st.experimental_rerun()
547
+ except Exception as e:
548
+ st.error(f"Error saving {doc_id}: {str(e)}")
549
+
550
+ # =============================================================================
551
+ # ───────────── DOCUMENTS LIST VIEW (Editable List with Sorting)
552
+ # =============================================================================
553
+ def edit_documents_list(container):
554
+ records = get_documents(container)
555
+ sort_option = st.selectbox("Sort by", ["Timestamp", "Name"], key="sort_option")
556
+ if sort_option == "Name":
557
+ records.sort(key=lambda r: r.get("name", "").lower())
558
+ else:
559
+ records.sort(key=lambda r: r.get("timestamp", ""), reverse=True)
560
+ data = []
561
+ for rec in records:
562
+ ts = rec.get("timestamp", "")
563
+ try:
564
+ dt = datetime.fromisoformat(ts)
565
+ formatted = dt.strftime("%I:%M %p %m/%d/%Y")
566
+ except Exception:
567
+ formatted = ts
568
+ data.append({
569
+ "ID": rec.get("id", ""),
570
+ "Name": rec.get("name", ""),
571
+ "Content": rec.get("content", "")[:100] + "..." if rec.get("content", "") else "",
572
+ "Timestamp": formatted
573
+ })
574
+ df = pd.DataFrame(data)
575
+ edited_df = st.data_editor(df[["Name", "Content", "Timestamp"]], key="docs_editor", num_rows="dynamic")
576
+ if st.button("πŸ’Ύ Save List Changes"):
577
+ for idx, row in edited_df.iterrows():
578
+ original = data[idx]
579
+ if row["Name"] != original["Name"] or row["Content"] != original["Content"]:
580
+ doc_id = original["ID"]
581
+ doc = next((r for r in records if r.get("id") == doc_id), None)
582
+ if doc:
583
+ doc["name"] = row["Name"]
584
+ doc["content"] = row["Content"]
585
+ success, message = update_record(container, doc)
586
+ if success:
587
+ st.success(f"Updated {doc_id} πŸ‘")
588
+ else:
589
+ st.error(f"Error updating {doc_id}: {message}")
590
+ st.experimental_rerun()
591
+
592
  # =============================================================================
593
  # ────��──────── VIDEO & AUDIO UI FUNCTIONS ─────────────
594
  # =============================================================================
 
705
  except Exception as e:
706
  st.error(f"Upload error: {str(e)}")
707
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
708
  # =============================================================================
709
  # ───────────── NEW ITEM & FIELD FUNCTIONS
710
  # =============================================================================
 
729
  def auto_save_edit():
730
  try:
731
  edited_str = st.session_state.doc_editor
732
+ try:
733
+ json.loads(edited_str)
734
+ except Exception:
735
+ edited_str = sanitize_json_text(edited_str)
736
  edited_doc = json.loads(edited_str)
737
  container = st.session_state.current_container
738
  container.upsert_item(edited_doc)
 
822
  st.sidebar.write(f"{func['name']}: {func['comment']}")
823
 
824
  # =============================================================================
825
+ # ───────────── SIDEBAR DATA GRID (Editable Names Grid)
826
  # =============================================================================
827
+ def edit_names_grid(container):
828
+ records = get_documents(container)
829
+ data = []
830
+ for rec in records:
831
+ ts = rec.get("timestamp", "")
832
+ try:
833
+ dt = datetime.fromisoformat(ts)
834
+ formatted = dt.strftime("%I:%M %p %m/%d/%Y")
835
+ except Exception:
836
+ formatted = ts
837
+ data.append({
838
+ "ID": rec.get("id", ""),
839
+ "Name": rec.get("name", ""),
840
+ "Timestamp": formatted
841
+ })
842
+ df = pd.DataFrame(data)
843
+ edited_df = st.sidebar.data_editor(df[["Name", "Timestamp"]], key="names_editor", num_rows="dynamic")
844
+ if st.sidebar.button("πŸ’Ύ Save Name Changes"):
845
+ for idx, row in edited_df.iterrows():
846
+ original = df.iloc[idx]
847
+ if row["Name"] != original["Name"]:
848
+ doc_id = original["ID"]
849
+ doc = next((r for r in records if r.get("id") == doc_id), None)
850
+ if doc:
851
+ doc["name"] = row["Name"]
852
+ success, message = update_record(container, doc)
853
+ if success:
854
+ st.sidebar.success(f"Updated Name for {doc_id} to '{row['Name']}'")
855
+ else:
856
+ st.sidebar.error(f"Update error for {doc_id}: {message}")
857
+ st.experimental_rerun()
858
 
859
  # =============================================================================
860
+ # ───────────── SEARCH RESULTS DISPLAY (Editable Code Editors)
861
  # =============================================================================
862
+ def display_search_results(keyword, container):
863
+ results = vector_keyword_search(keyword, container)
864
+ st.markdown("### πŸ” Search Results")
865
+ for res in results:
866
+ doc_id = res.get("id", "")
867
+ exp = st.expander(f"Result {doc_id}")
868
+ with exp:
869
+ edited = st.text_area("Edit Document", value=json.dumps(res, indent=2), key=f"search_{doc_id}")
870
+ if st.button(f"πŸ’Ύ Save changes for {doc_id}", key=f"save_search_{doc_id}"):
871
+ try:
872
+ updated_doc = json.loads(edited)
873
+ container.upsert_item(body=updated_doc)
874
+ st.success(f"Updated {doc_id}!")
875
+ st.experimental_rerun()
876
+ except Exception as e:
877
+ st.error(f"Error saving {doc_id}: {str(e)}")
878
 
879
+ # =============================================================================
880
+ # ───────────── DOCUMENTS LIST VIEW (Editable List with Sorting)
881
+ # =============================================================================
882
+ def edit_documents_list(container):
883
+ records = get_documents(container)
884
+ sort_option = st.selectbox("Sort by", ["Timestamp", "Name"], key="sort_option")
885
+ if sort_option == "Name":
886
+ records.sort(key=lambda r: r.get("name", "").lower())
887
+ else:
888
+ records.sort(key=lambda r: r.get("timestamp", ""), reverse=True)
889
+ data = []
890
+ for rec in records:
891
+ ts = rec.get("timestamp", "")
892
  try:
893
+ dt = datetime.fromisoformat(ts)
894
+ formatted = dt.strftime("%I:%M %p %m/%d/%Y")
895
+ except Exception:
896
+ formatted = ts
897
+ data.append({
898
+ "ID": rec.get("id", ""),
899
+ "Name": rec.get("name", ""),
900
+ "Content": rec.get("content", "")[:100] + "..." if rec.get("content", "") else "",
901
+ "Timestamp": formatted
902
+ })
903
+ df = pd.DataFrame(data)
904
+ edited_df = st.data_editor(df[["Name", "Content", "Timestamp"]], key="docs_editor", num_rows="dynamic")
905
+ if st.button("πŸ’Ύ Save List Changes"):
906
+ for idx, row in edited_df.iterrows():
907
+ original = data[idx]
908
+ if row["Name"] != original["Name"] or row["Content"] != original["Content"]:
909
+ doc_id = original["ID"]
910
+ doc = next((r for r in records if r.get("id") == doc_id), None)
911
+ if doc:
912
+ doc["name"] = row["Name"]
913
+ doc["content"] = row["Content"]
914
+ success, message = update_record(container, doc)
915
+ if success:
916
+ st.success(f"Updated {doc_id} πŸ‘")
917
+ else:
918
+ st.error(f"Error updating {doc_id}: {message}")
919
+ st.experimental_rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
920
 
921
+ # =============================================================================
922
+ # ───────────── VIDEO & AUDIO UI FUNCTIONS ─────────────
923
+ # =============================================================================
924
+ def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
925
  try:
926
+ st.write("Preprocessing image...")
927
+ if isinstance(file_data, bytes):
928
+ img = Image.open(io.BytesIO(file_data))
929
+ elif hasattr(file_data, 'read'):
930
+ if hasattr(file_data, 'seek'):
931
+ file_data.seek(0)
932
+ img = Image.open(file_data)
933
+ elif isinstance(file_data, Image.Image):
934
+ img = file_data
935
+ else:
936
+ raise ValueError(f"Unsupported input: {type(file_data)}")
937
+ if img.mode != 'RGB':
938
+ img = img.convert('RGB')
939
+ aspect_ratio = img.size[0] / img.size[1]
940
+ if aspect_ratio > target_size[0] / target_size[1]:
941
+ new_width = target_size[0]
942
+ new_height = int(new_width / aspect_ratio)
943
  else:
944
+ new_height = target_size[1]
945
+ new_width = int(new_height * aspect_ratio)
946
+ new_width = (new_width // 2) * 2
947
+ new_height = (new_height // 2) * 2
948
+ resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
949
+ final_img = Image.new('RGB', target_size, (255, 255, 255))
950
+ paste_x = (target_size[0] - new_width) // 2
951
+ paste_y = (target_size[1] - new_height) // 2
952
+ final_img.paste(resized_img, (paste_x, paste_y))
953
+ return final_img
954
  except Exception as e:
955
+ st.error(f"Image error: {str(e)}")
956
+ return None
957
 
958
+ def add_video_generation_ui(container):
959
+ st.markdown("### πŸŽ₯ Video Gen")
960
+ col1, col2 = st.columns([2, 1])
961
+ with col1:
962
+ uploaded_file = st.file_uploader("Upload Image πŸ–ΌοΈ", type=['png', 'jpg', 'jpeg'])
963
+ with col2:
964
+ st.markdown("#### Params")
965
+ motion = st.slider("🌊 Motion", 1, 255, 127)
966
+ fps = st.slider("🎬 FPS", 1, 30, 6)
967
+ with st.expander("Advanced"):
968
+ use_custom = st.checkbox("Custom Seed")
969
+ seed = st.number_input("Seed", value=int(time.time() * 1000)) if use_custom else None
970
+ if uploaded_file is not None:
971
+ try:
972
+ file_data = uploaded_file.read()
973
+ preview1, preview2 = st.columns(2)
974
+ with preview1:
975
+ st.write("Original")
976
+ st.image(Image.open(io.BytesIO(file_data)), use_column_width=True)
977
+ with preview2:
978
+ proc_img = validate_and_preprocess_image(io.BytesIO(file_data))
979
+ if proc_img:
980
+ st.write("Processed")
981
+ st.image(proc_img, use_column_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
982
  else:
983
+ st.error("Preprocess failed")
984
+ return
985
+ if st.button("πŸŽ₯ Generate"):
986
+ with st.spinner("Generating video..."):
987
+ with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_file:
988
+ proc_img.save(temp_file.name, format='PNG')
989
+ try:
990
+ client = Client("awacke1/stable-video-diffusion", hf_token=os.environ.get("HUGGINGFACE_TOKEN"))
991
+ result = client.predict(
992
+ image=temp_file.name,
993
+ seed=seed if seed is not None else int(time.time() * 1000),
994
+ randomize_seed=seed is None,
995
+ motion_bucket_id=motion,
996
+ fps_id=fps,
997
+ api_name="/video"
998
+ )
999
+ if result and isinstance(result, tuple) and len(result) >= 1:
1000
+ video_path = result[0].get('video') if isinstance(result[0], dict) else None
1001
+ if video_path and os.path.exists(video_path):
1002
+ video_filename = f"generated_video_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
1003
+ shutil.copy(video_path, video_filename)
1004
+ st.success(f"Video generated! πŸŽ‰")
1005
+ st.video(video_filename)
1006
+ if container:
1007
+ video_record = {
1008
+ "id": generate_unique_id(),
1009
+ "pk": generate_unique_id(),
1010
+ "type": "generated_video",
1011
+ "filename": video_filename,
1012
+ "seed": seed if seed is not None else "random",
1013
+ "motion": motion,
1014
+ "fps": fps,
1015
+ "timestamp": datetime.now().isoformat()
1016
+ }
1017
+ success, message = insert_record(container, video_record)
1018
+ if success:
1019
+ st.success("DB record saved!")
1020
+ else:
1021
+ st.error(f"DB error: {message}")
1022
+ else:
1023
+ st.error("Invalid result format")
1024
+ else:
1025
+ st.error("No result returned")
1026
+ except Exception as e:
1027
+ st.error(f"Video gen error: {str(e)}")
1028
+ finally:
1029
+ try:
1030
+ os.unlink(temp_file.name)
1031
+ st.write("Temp file removed")
1032
+ except Exception as e:
1033
+ st.warning(f"Cleanup error: {str(e)}")
1034
+ except Exception as e:
1035
+ st.error(f"Upload error: {str(e)}")
1036
 
1037
+ # =============================================================================
1038
+ # ───────────── NEW ITEM & FIELD FUNCTIONS
1039
+ # =============================================================================
1040
+ def new_item_default(container):
1041
+ new_id = generate_unique_id()
1042
+ default_doc = {
1043
+ "id": new_id,
1044
+ "pk": new_id,
1045
+ "name": "New Sample Document",
1046
+ "content": "Start editing your document here...",
1047
+ "timestamp": datetime.now().isoformat(),
1048
+ "type": "sample"
1049
+ }
1050
+ success, message = insert_record(container, default_doc)
1051
+ if success:
1052
+ st.success("New sample document created! ✨")
1053
+ return default_doc
1054
+ else:
1055
+ st.error("Error creating new item: " + message)
1056
+ return None
1057
 
1058
+ def auto_save_edit():
1059
+ try:
1060
+ edited_str = st.session_state.doc_editor
1061
+ try:
1062
+ json.loads(edited_str)
1063
+ except Exception:
1064
+ edited_str = sanitize_json_text(edited_str)
1065
+ edited_doc = json.loads(edited_str)
1066
+ container = st.session_state.current_container
1067
+ container.upsert_item(edited_doc)
1068
+ st.success("Auto-saved! πŸ’Ύ")
1069
+ except Exception as e:
1070
+ st.error(f"Auto-save error: {str(e)}")
1071
 
1072
+ def add_field_to_doc():
1073
+ key = st.session_state.new_field_key
1074
+ value = st.session_state.new_field_value
1075
+ try:
1076
+ doc = json.loads(st.session_state.doc_editor)
1077
+ doc[key] = value
1078
+ st.session_state.doc_editor = json.dumps(doc, indent=2)
1079
+ auto_save_edit()
1080
+ st.success(f"Added field {key} πŸ‘")
1081
+ except Exception as e:
1082
+ st.error(f"Error adding field: {str(e)}")
1083
 
1084
+ # =============================================================================
1085
+ # ───────────── SEARCH RESULTS DISPLAY (Editable Code Editors)
1086
+ # =============================================================================
1087
+ def display_search_results(keyword, container):
1088
+ results = vector_keyword_search(keyword, container)
1089
+ st.markdown("### πŸ” Search Results")
1090
+ for res in results:
1091
+ doc_id = res.get("id", "")
1092
+ exp = st.expander(f"Result {doc_id}")
1093
+ with exp:
1094
+ edited = st.text_area("Edit Document", value=json.dumps(res, indent=2), key=f"search_{doc_id}")
1095
+ if st.button(f"πŸ’Ύ Save changes for {doc_id}", key=f"save_search_{doc_id}"):
1096
+ try:
1097
+ updated_doc = json.loads(edited)
1098
+ container.upsert_item(body=updated_doc)
1099
+ st.success(f"Updated {doc_id}!")
1100
+ st.experimental_rerun()
1101
+ except Exception as e:
1102
+ st.error(f"Error saving {doc_id}: {str(e)}")
1103
 
1104
+ # =============================================================================
1105
+ # ───────────── DOCUMENTS LIST VIEW (Editable List with Sorting)
1106
+ # =============================================================================
1107
+ def edit_documents_list(container):
1108
+ records = get_documents(container)
1109
+ sort_option = st.selectbox("Sort by", ["Timestamp", "Name"], key="sort_option")
1110
+ if sort_option == "Name":
1111
+ records.sort(key=lambda r: r.get("name", "").lower())
1112
+ else:
1113
+ records.sort(key=lambda r: r.get("timestamp", ""), reverse=True)
1114
+ data = []
1115
+ for rec in records:
1116
+ ts = rec.get("timestamp", "")
1117
+ try:
1118
+ dt = datetime.fromisoformat(ts)
1119
+ formatted = dt.strftime("%I:%M %p %m/%d/%Y")
1120
+ except Exception:
1121
+ formatted = ts
1122
+ data.append({
1123
+ "ID": rec.get("id", ""),
1124
+ "Name": rec.get("name", ""),
1125
+ "Content": rec.get("content", "")[:100] + "..." if rec.get("content", "") else "",
1126
+ "Timestamp": formatted
1127
+ })
1128
+ df = pd.DataFrame(data)
1129
+ edited_df = st.data_editor(df[["Name", "Content", "Timestamp"]], key="docs_editor", num_rows="dynamic")
1130
+ if st.button("πŸ’Ύ Save List Changes"):
1131
+ for idx, row in edited_df.iterrows():
1132
+ original = data[idx]
1133
+ if row["Name"] != original["Name"] or row["Content"] != original["Content"]:
1134
+ doc_id = original["ID"]
1135
+ doc = next((r for r in records if r.get("id") == doc_id), None)
1136
+ if doc:
1137
+ doc["name"] = row["Name"]
1138
+ doc["content"] = row["Content"]
1139
+ success, message = update_record(container, doc)
1140
+ if success:
1141
+ st.success(f"Updated {doc_id} πŸ‘")
1142
+ else:
1143
+ st.error(f"Error updating {doc_id}: {message}")
1144
+ st.experimental_rerun()
1145
 
1146
  # =============================================================================
1147
+ # ───────────── SEARCH DOCUMENTS UI (Enter Key triggers search)
 
1148
  # =============================================================================
1149
+ def search_documents_ui(container):
1150
+ with st.sidebar.form("search_form"):
1151
+ keyword = st.text_input("Search Keyword", key="search_keyword")
1152
+ submitted = st.form_submit_button("πŸ” Search")
1153
+ if submitted and keyword:
1154
+ display_search_results(keyword, container)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1155
 
1156
  # =============================================================================
1157
  # ───────────── MAIN FUNCTION ─────────────
 
1194
  else:
1195
  st.warning("No container selected!")
1196
  st.sidebar.markdown("## πŸ” Vector Search")
1197
+ search_documents_ui(st.session_state.get("current_container"))
 
 
 
 
 
 
 
 
1198
  show_sidebar_data_grid()
1199
  display_langchain_functions()
1200
+ if st.session_state.get("current_container"):
1201
+ edit_names_grid(st.session_state.current_container)
1202
  try:
1203
  if st.session_state.get("client") is None:
1204
  st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
 
1269
  num_docs = st.slider("Docs", 1, 20, 1)
1270
  documents_to_display = documents[:num_docs] if total_docs > num_docs else documents
1271
  st.sidebar.info(f"Showing {len(documents_to_display)} docs")
1272
+ view_options = ['List', 'Markdown', 'Code', 'Run AI', 'Clone', 'New']
1273
  selected_view = st.sidebar.selectbox("View", view_options, index=1)
1274
+ if selected_view == 'List':
1275
+ edit_documents_list(container)
1276
+ elif selected_view == 'Markdown':
1277
  st.markdown("#### πŸ“„ Markdown")
1278
  if documents:
1279
  doc = documents[st.session_state.current_index]
 
1435
  show_sidebar_data_grid()
1436
 
1437
  if __name__ == "__main__":
1438
+ main()