Runtime error
Runtime error
File size: 4,726 Bytes
f3405cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import os
import openai
import sys
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import GitLoader
from langchain.llms import OpenAI
from langchain.memory import ConversationBufferMemory
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
import datetime
import shutil
# Function to load the data from github using langchain with string type url, string type branch, string type file_filter
def loader(url: str, branch: str, file_filter: str):
repo_path = "./github_repo"
if os.path.exists(repo_path):
loader = GitLoader(
clone_url= url,
file_filter=lambda file_path: file_path.endswith(tuple(file_filter.split(','))) # Filter out files in Data but whole repo is cloned
data = loader.load()
return data
#Function to split the data into chunks using recursive character text splitter
def split_data(data):
splitter = RecursiveCharacterTextSplitter(
length_function=len, # Function to measure the length of chunks while splitting
add_start_index=True # Include the starting position of each chunk in metadata
chunks = splitter.split_documents(data)
return chunks
#Function to ingest the chunks into a vectorstore of doc
def ingest_chunks(chunks):
embedding = OpenAIEmbeddings()
vector_store = DocArrayInMemorySearch.from_documents(chunks, embedding)
repo_path = "./github_repo"
if os.path.exists(repo_path):
return vector_store
#Retreival function to get the data from the database and reply to the user
def retreival(vector_store):
# Selecting the right model
current_date =
if current_date <, 9, 2):
llm_name = "gpt-3.5-turbo-0301"
llm_name = "gpt-3.5-turbo"
#Creating LLM
llm = ChatOpenAI(model=llm_name, temperature=0)
# Creating Prompt template
template = """
You're a code summarisation assistant. Given the following extracted parts of a long document and a question, create a final answer with "CODE SNIPPETS" from "SOURCE DOCUMENTS".
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
ALWAYS return a "CODE SNIPPETS" from "SOURCE DOCUMENTS" part in your answer.
QUESTION: {question}
CONTEXT: {context}
PROMPT = PromptTemplate(input_variables=["context", "question"], template=template,)
#Creating memory
memory = ConversationBufferMemory(
#Creating the retriever, this can also be a contextual compressed retriever
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5}) #search_type can be "similarity" or "mmr"
chain = ConversationalRetrievalChain.from_llm(
chain_type="stuff", #chain type can be refine, stuff, map_reduce
return_source_documents=True, #When used these 2 properties, the output gets 3 properties: answer, source_document, source_document_score and then have to speocify input and output key in memory for it to work
combine_docs_chain_kwargs=dict({"prompt": PROMPT})
return chain
#Class using all above components to create QA system
class ConversationalResponse:
def __init__(self, url, branch, file_filter):
self.url = url
self.branch = branch
self.file_filter = file_filter = loader(self.url, self.branch, self.file_filter)
self.chunks = split_data(
self.vector_store = ingest_chunks(self.chunks)
self.chain_type = "stuff"
self.k = 5
self.chain = retreival(self.vector_store)
def __call__(self, question):
agent = self.chain(question)
return agent['answer'] |