Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
@@ -1,72 +1,72 @@
|
|
1 |
-
from sentence_transformers import SentenceTransformer
|
2 |
-
from langchain_community.vectorstores import Chroma
|
3 |
-
from langchain_community.embeddings import HuggingFaceEmbeddings
|
4 |
-
from langchain_community.llms import HuggingFaceHub
|
5 |
-
import openai
|
6 |
-
import streamlit as st
|
7 |
-
import re
|
8 |
-
|
9 |
-
#openai_api_key = "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"
|
10 |
-
model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
11 |
-
# Define the embedding function using HuggingFaceEmbeddings
|
12 |
-
embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
13 |
-
|
14 |
-
vectordb = Chroma(persist_directory= r"
|
15 |
-
embedding_function=embeddings)
|
16 |
-
#index = pinecone.Index('langchain-chatbot')
|
17 |
-
|
18 |
-
# Create a retriever from the Chroma object
|
19 |
-
retriever = vectordb.as_retriever()
|
20 |
-
|
21 |
-
def find_match(input_text):
|
22 |
-
# Retrieve relevant documents based on the input query
|
23 |
-
docs = retriever.get_relevant_documents(input_text)
|
24 |
-
|
25 |
-
match_texts = [doc.page_content for doc in docs]
|
26 |
-
|
27 |
-
# Return the concatenated texts of the relevant documents
|
28 |
-
return "\n".join(match_texts)
|
29 |
-
|
30 |
-
|
31 |
-
from transformers import pipeline
|
32 |
-
|
33 |
-
# Load the text generation pipeline from Hugging Face
|
34 |
-
text_generator = pipeline("text-generation", model="gpt2")
|
35 |
-
|
36 |
-
def query_refiner(conversation, query):
|
37 |
-
# Formulate the prompt for the model
|
38 |
-
prompt = f"Given the following user query and conversation log, formulate a question that would be the most relevant to provide the user with an answer from a knowledge base.\n\nCONVERSATION LOG: \n{conversation}\n\nQuery: {query}\n\nRefined Query:"
|
39 |
-
|
40 |
-
# Generate the response using the Hugging Face model
|
41 |
-
response = text_generator(prompt, max_length=256, temperature=0.7, top_p=1.0, pad_token_id=text_generator.tokenizer.eos_token_id)
|
42 |
-
|
43 |
-
# Extract the refined query from the response
|
44 |
-
refined_query = response[0]['generated_text'].split('Refined Query:')[-1].strip()
|
45 |
-
|
46 |
-
return refined_query
|
47 |
-
|
48 |
-
|
49 |
-
def get_conversation_string():
|
50 |
-
conversation_string = ""
|
51 |
-
for i in range(len(st.session_state['responses'])-1):
|
52 |
-
|
53 |
-
conversation_string += "Human: "+st.session_state['requests'][i] + "\n"
|
54 |
-
conversation_string += "Bot: "+ st.session_state['responses'][i+1] + "\n"
|
55 |
-
return conversation_string
|
56 |
-
|
57 |
-
|
58 |
-
"""
|
59 |
-
from openai import OpenAI
|
60 |
-
from audio_recorder_streamlit import audio_recorder
|
61 |
-
|
62 |
-
client=OpenAI(api_key="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")
|
63 |
-
|
64 |
-
def speech_to_text(audio_data):
|
65 |
-
with open(audio_data, "rb") as audio_file:
|
66 |
-
transcript = client.audio.transcriptions.create(
|
67 |
-
model="whisper-1",
|
68 |
-
response_format="text",
|
69 |
-
file=audio_file
|
70 |
-
)
|
71 |
-
return transcript
|
72 |
-
"""
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
from langchain_community.vectorstores import Chroma
|
3 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
4 |
+
from langchain_community.llms import HuggingFaceHub
|
5 |
+
import openai
|
6 |
+
import streamlit as st
|
7 |
+
import re
|
8 |
+
|
9 |
+
#openai_api_key = "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"
|
10 |
+
model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
11 |
+
# Define the embedding function using HuggingFaceEmbeddings
|
12 |
+
embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
13 |
+
|
14 |
+
vectordb = Chroma(persist_directory= r"\vector db", #enter chroma directory
|
15 |
+
embedding_function=embeddings)
|
16 |
+
#index = pinecone.Index('langchain-chatbot')
|
17 |
+
|
18 |
+
# Create a retriever from the Chroma object
|
19 |
+
retriever = vectordb.as_retriever()
|
20 |
+
|
21 |
+
def find_match(input_text):
|
22 |
+
# Retrieve relevant documents based on the input query
|
23 |
+
docs = retriever.get_relevant_documents(input_text)
|
24 |
+
|
25 |
+
match_texts = [doc.page_content for doc in docs]
|
26 |
+
|
27 |
+
# Return the concatenated texts of the relevant documents
|
28 |
+
return "\n".join(match_texts)
|
29 |
+
|
30 |
+
|
31 |
+
from transformers import pipeline
|
32 |
+
|
33 |
+
# Load the text generation pipeline from Hugging Face
|
34 |
+
text_generator = pipeline("text-generation", model="gpt2")
|
35 |
+
|
36 |
+
def query_refiner(conversation, query):
|
37 |
+
# Formulate the prompt for the model
|
38 |
+
prompt = f"Given the following user query and conversation log, formulate a question that would be the most relevant to provide the user with an answer from a knowledge base.\n\nCONVERSATION LOG: \n{conversation}\n\nQuery: {query}\n\nRefined Query:"
|
39 |
+
|
40 |
+
# Generate the response using the Hugging Face model
|
41 |
+
response = text_generator(prompt, max_length=256, temperature=0.7, top_p=1.0, pad_token_id=text_generator.tokenizer.eos_token_id)
|
42 |
+
|
43 |
+
# Extract the refined query from the response
|
44 |
+
refined_query = response[0]['generated_text'].split('Refined Query:')[-1].strip()
|
45 |
+
|
46 |
+
return refined_query
|
47 |
+
|
48 |
+
|
49 |
+
def get_conversation_string():
|
50 |
+
conversation_string = ""
|
51 |
+
for i in range(len(st.session_state['responses'])-1):
|
52 |
+
|
53 |
+
conversation_string += "Human: "+st.session_state['requests'][i] + "\n"
|
54 |
+
conversation_string += "Bot: "+ st.session_state['responses'][i+1] + "\n"
|
55 |
+
return conversation_string
|
56 |
+
|
57 |
+
|
58 |
+
"""
|
59 |
+
from openai import OpenAI
|
60 |
+
from audio_recorder_streamlit import audio_recorder
|
61 |
+
|
62 |
+
client=OpenAI(api_key="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")
|
63 |
+
|
64 |
+
def speech_to_text(audio_data):
|
65 |
+
with open(audio_data, "rb") as audio_file:
|
66 |
+
transcript = client.audio.transcriptions.create(
|
67 |
+
model="whisper-1",
|
68 |
+
response_format="text",
|
69 |
+
file=audio_file
|
70 |
+
)
|
71 |
+
return transcript
|
72 |
+
"""
|