Lakshita336 commited on
Commit
72c1b8c
·
verified ·
1 Parent(s): d61a041

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +72 -72
utils.py CHANGED
@@ -1,72 +1,72 @@
1
- from sentence_transformers import SentenceTransformer
2
- from langchain_community.vectorstores import Chroma
3
- from langchain_community.embeddings import HuggingFaceEmbeddings
4
- from langchain_community.llms import HuggingFaceHub
5
- import openai
6
- import streamlit as st
7
- import re
8
-
9
- #openai_api_key = "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"
10
- model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
11
- # Define the embedding function using HuggingFaceEmbeddings
12
- embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
13
-
14
- vectordb = Chroma(persist_directory= r"C:\Users\Lakshita\PycharmProjects\trial_bot\vector db", #enter chroma directory
15
- embedding_function=embeddings)
16
- #index = pinecone.Index('langchain-chatbot')
17
-
18
- # Create a retriever from the Chroma object
19
- retriever = vectordb.as_retriever()
20
-
21
- def find_match(input_text):
22
- # Retrieve relevant documents based on the input query
23
- docs = retriever.get_relevant_documents(input_text)
24
-
25
- match_texts = [doc.page_content for doc in docs]
26
-
27
- # Return the concatenated texts of the relevant documents
28
- return "\n".join(match_texts)
29
-
30
-
31
- from transformers import pipeline
32
-
33
- # Load the text generation pipeline from Hugging Face
34
- text_generator = pipeline("text-generation", model="gpt2")
35
-
36
- def query_refiner(conversation, query):
37
- # Formulate the prompt for the model
38
- prompt = f"Given the following user query and conversation log, formulate a question that would be the most relevant to provide the user with an answer from a knowledge base.\n\nCONVERSATION LOG: \n{conversation}\n\nQuery: {query}\n\nRefined Query:"
39
-
40
- # Generate the response using the Hugging Face model
41
- response = text_generator(prompt, max_length=256, temperature=0.7, top_p=1.0, pad_token_id=text_generator.tokenizer.eos_token_id)
42
-
43
- # Extract the refined query from the response
44
- refined_query = response[0]['generated_text'].split('Refined Query:')[-1].strip()
45
-
46
- return refined_query
47
-
48
-
49
- def get_conversation_string():
50
- conversation_string = ""
51
- for i in range(len(st.session_state['responses'])-1):
52
-
53
- conversation_string += "Human: "+st.session_state['requests'][i] + "\n"
54
- conversation_string += "Bot: "+ st.session_state['responses'][i+1] + "\n"
55
- return conversation_string
56
-
57
-
58
- """
59
- from openai import OpenAI
60
- from audio_recorder_streamlit import audio_recorder
61
-
62
- client=OpenAI(api_key="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")
63
-
64
- def speech_to_text(audio_data):
65
- with open(audio_data, "rb") as audio_file:
66
- transcript = client.audio.transcriptions.create(
67
- model="whisper-1",
68
- response_format="text",
69
- file=audio_file
70
- )
71
- return transcript
72
- """
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from langchain_community.vectorstores import Chroma
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.llms import HuggingFaceHub
5
+ import openai
6
+ import streamlit as st
7
+ import re
8
+
9
+ #openai_api_key = "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"
10
+ model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
11
+ # Define the embedding function using HuggingFaceEmbeddings
12
+ embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
13
+
14
+ vectordb = Chroma(persist_directory= r"\vector db", #enter chroma directory
15
+ embedding_function=embeddings)
16
+ #index = pinecone.Index('langchain-chatbot')
17
+
18
+ # Create a retriever from the Chroma object
19
+ retriever = vectordb.as_retriever()
20
+
21
+ def find_match(input_text):
22
+ # Retrieve relevant documents based on the input query
23
+ docs = retriever.get_relevant_documents(input_text)
24
+
25
+ match_texts = [doc.page_content for doc in docs]
26
+
27
+ # Return the concatenated texts of the relevant documents
28
+ return "\n".join(match_texts)
29
+
30
+
31
+ from transformers import pipeline
32
+
33
+ # Load the text generation pipeline from Hugging Face
34
+ text_generator = pipeline("text-generation", model="gpt2")
35
+
36
+ def query_refiner(conversation, query):
37
+ # Formulate the prompt for the model
38
+ prompt = f"Given the following user query and conversation log, formulate a question that would be the most relevant to provide the user with an answer from a knowledge base.\n\nCONVERSATION LOG: \n{conversation}\n\nQuery: {query}\n\nRefined Query:"
39
+
40
+ # Generate the response using the Hugging Face model
41
+ response = text_generator(prompt, max_length=256, temperature=0.7, top_p=1.0, pad_token_id=text_generator.tokenizer.eos_token_id)
42
+
43
+ # Extract the refined query from the response
44
+ refined_query = response[0]['generated_text'].split('Refined Query:')[-1].strip()
45
+
46
+ return refined_query
47
+
48
+
49
+ def get_conversation_string():
50
+ conversation_string = ""
51
+ for i in range(len(st.session_state['responses'])-1):
52
+
53
+ conversation_string += "Human: "+st.session_state['requests'][i] + "\n"
54
+ conversation_string += "Bot: "+ st.session_state['responses'][i+1] + "\n"
55
+ return conversation_string
56
+
57
+
58
+ """
59
+ from openai import OpenAI
60
+ from audio_recorder_streamlit import audio_recorder
61
+
62
+ client=OpenAI(api_key="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")
63
+
64
+ def speech_to_text(audio_data):
65
+ with open(audio_data, "rb") as audio_file:
66
+ transcript = client.audio.transcriptions.create(
67
+ model="whisper-1",
68
+ response_format="text",
69
+ file=audio_file
70
+ )
71
+ return transcript
72
+ """