Huzaifa367 commited on
Commit
2f49f39
·
verified ·
1 Parent(s): d434ae0

Update pages/jarvis.py

Browse files
Files changed (1) hide show
  1. pages/jarvis.py +114 -47
pages/jarvis.py CHANGED
@@ -1,48 +1,115 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
- import edge_tts
4
- import tempfile
5
-
6
- # Initialize voice chat pipeline and other settings
7
- default_lang = "en"
8
- system_instructions = "[SYSTEM] Answer as Real Jarvis JARVIS, Made by 'Tony Stark', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses as if You are the character Jarvis, made by 'Tony Stark.' The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
9
- client = pipeline("text2text-generation", model="mistralai/Mixtral-8x7B-Instruct-v0.1")
10
-
11
- # Function to transcribe audio
12
- def transcribe(audio):
13
- lang = "en"
14
- text = "Sample transcription" # Replace with your transcription logic
15
- return text
16
-
17
- # Function to generate Jarvis response
18
- def generate_jarvis_response(prompt):
19
- formatted_prompt = system_instructions + prompt + "[JARVIS]"
20
- output = client(formatted_prompt)
21
- response_text = output[0]['generated_text']
22
- communicate = edge_tts.Communicate(response_text)
23
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
24
- tmp_path = tmp_file.name
25
- communicate.save(tmp_path)
26
- return tmp_path
27
-
28
- # Streamlit UI
29
- st.title("JARVIS⚡ - Personal Assistant of Tony Stark")
30
- st.markdown("Voice Chat with your personal assistant")
31
-
32
- # Voice chat interface
33
- audio_file = st.file_uploader("Voice Chat (BETA) - Upload audio file", type=["mp3", "wav"])
34
- if audio_file:
35
- audio_path = transcribe(audio_file)
36
- response_path = generate_jarvis_response(audio_path)
37
- st.audio(response_path)
38
-
39
- # Text-based interaction
40
- st.markdown("### Text-based Interaction")
41
- prompt_text = st.text_input("Prompt", "What is Wikipedia?")
42
- if st.button("Generate Response"):
43
- response_path = generate_jarvis_response(prompt_text)
44
- st.audio(response_path)
45
-
46
- # # Additional links
47
- # st.markdown("### Try Other Models")
48
- # st.markdown("[Instant ](https://huggingface.co/spaces/)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import Chroma
5
+ from langchain.chains import ConversationalRetrievalChain
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain_community.llms import HuggingFacePipeline
8
+ from langchain.chains import ConversationChain
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain_community.llms import HuggingFaceEndpoint
11
+ from pathlib import Path
12
+ import chromadb
13
+ from unidecode import unidecode
14
+ from transformers import AutoTokenizer
15
+ import transformers
16
+ import torch
17
+ import tqdm
18
+ import accelerate
19
+ import re
20
+
21
+ # Function to load PDF document and create doc splits
22
+ def load_doc(list_file_path, chunk_size, chunk_overlap):
23
+ loaders = [PyPDFLoader(x) for x in list_file_path]
24
+ pages = []
25
+ for loader in loaders:
26
+ pages.extend(loader.load())
27
+ text_splitter = RecursiveCharacterTextSplitter(
28
+ chunk_size=chunk_size,
29
+ chunk_overlap=chunk_overlap
30
+ )
31
+ doc_splits = text_splitter.split_documents(pages)
32
+ return doc_splits
33
+
34
+ # Function to create vector database
35
+ def create_db(splits, collection_name):
36
+ embedding = HuggingFaceEmbeddings()
37
+ new_client = chromadb.EphemeralClient()
38
+ vectordb = Chroma.from_documents(
39
+ documents=splits,
40
+ embedding=embedding,
41
+ client=new_client,
42
+ collection_name=collection_name,
43
+ )
44
+ return vectordb
45
+
46
+ # Initialize Langchain LLM chain
47
+ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
48
+ if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
49
+ llm = HuggingFaceEndpoint(
50
+ repo_id=llm_model,
51
+ temperature=temperature,
52
+ max_new_tokens=max_tokens,
53
+ top_k=top_k,
54
+ load_in_8bit=True,
55
+ )
56
+ # Add other LLM models initialization conditions here...
57
+ memory = ConversationBufferMemory(
58
+ memory_key="chat_history",
59
+ output_key='answer',
60
+ return_messages=True
61
+ )
62
+ retriever = vector_db.as_retriever()
63
+ qa_chain = ConversationalRetrievalChain.from_llm(
64
+ llm,
65
+ retriever=retriever,
66
+ chain_type="stuff",
67
+ memory=memory,
68
+ return_source_documents=True,
69
+ verbose=False,
70
+ )
71
+ return qa_chain
72
+
73
+ # Function to process uploaded PDFs and initialize the database
74
+ def process_documents(list_file_obj, chunk_size, chunk_overlap):
75
+ list_file_path = [x.name for x in list_file_obj if x is not None]
76
+ collection_name = create_collection_name(list_file_path[0])
77
+ doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
78
+ vector_db = create_db(doc_splits, collection_name)
79
+ return vector_db
80
+
81
+ # Streamlit app
82
+ def main():
83
+ st.title("PDF-based Chatbot")
84
+ st.write("Ask any questions about your PDF documents")
85
+
86
+ # Step 1: Upload PDF documents
87
+ uploaded_files = st.file_uploader("Upload your PDF documents (single or multiple)", type=["pdf"], accept_multiple_files=True)
88
+
89
+ # Step 2: Process documents and initialize vector database
90
+ if uploaded_files:
91
+ chunk_size = st.slider("Chunk size", min_value=100, max_value=1000, value=600, step=20)
92
+ chunk_overlap = st.slider("Chunk overlap", min_value=10, max_value=200, value=40, step=10)
93
+ if st.button("Generate Vector Database"):
94
+ vector_db = process_documents(uploaded_files, chunk_size, chunk_overlap)
95
+ st.success("Vector database generated successfully!")
96
+
97
+ # Step 3: Initialize QA chain with selected LLM model
98
+ st.header("Initialize Question Answering (QA) Chain")
99
+ llm_model = st.selectbox("Choose LLM Model", list_llm_simple)
100
+ temperature = st.slider("Temperature", min_value=0.01, max_value=1.0, value=0.7, step=0.1)
101
+ max_tokens = st.slider("Max Tokens", min_value=224, max_value=4096, value=1024, step=32)
102
+ top_k = st.slider("Top-k Samples", min_value=1, max_value=10, value=3, step=1)
103
+ if st.button("Initialize QA Chain"):
104
+ qa_chain = initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db)
105
+ st.success("QA Chain initialized successfully!")
106
+
107
+ # Step 4: Chatbot interaction
108
+ st.header("Chatbot")
109
+ message = st.text_input("Type your message here")
110
+ if st.button("Submit"):
111
+ response = qa_chain(message)
112
+ st.write(f"Chatbot Response: {response['answer']}")
113
+
114
+ if __name__ == "__main__":
115
+ main()