cmagganas commited on
Commit
a2f6a14
1 Parent(s): 61df78d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -13
app.py CHANGED
@@ -1,9 +1,10 @@
1
  import chainlit as cl
2
  from langchain.embeddings.openai import OpenAIEmbeddings
3
  from langchain.document_loaders.csv_loader import CSVLoader
4
- from langchain.embeddings import CacheBackedEmbeddings
 
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.vectorstores import FAISS
7
  from langchain.chains import RetrievalQA
8
  from langchain.chat_models import ChatOpenAI
9
  from langchain.storage import LocalFileStore
@@ -14,22 +15,29 @@ from langchain.prompts.chat import (
14
  )
15
  import chainlit as cl
16
 
17
- import build_langchain_vector_store
18
-
19
- build_langchain_vector_store
20
 
21
- from langchain.vectorstores import Chroma
22
- from langchain.embeddings import OpenAIEmbeddings
23
  import openai
24
- import os
25
 
26
- openai.api_key = os.getenv("OPENAI_API_KEY")
27
  openai.api_base = 'https://api.openai.com/v1' # default
28
 
 
29
  embedding_model_name = "text-embedding-ada-002"
 
 
 
 
 
 
 
30
  embedding_model = OpenAIEmbeddings(model=embedding_model_name)
 
 
 
 
31
  read_vector_store = Chroma(
32
- persist_directory="langchain-chroma-pulze-docs", embedding_function=embedding_model
33
- )
34
- query_results = read_vector_store.similarity_search("How do I use Pulze?")
35
- print(query_results[0].page_content)
 
1
  import chainlit as cl
2
  from langchain.embeddings.openai import OpenAIEmbeddings
3
  from langchain.document_loaders.csv_loader import CSVLoader
4
+ from langchain.embeddings import CacheBackedEmbeddings, OpenAIEmbeddings
5
+ from langchain.embeddings import OpenAIEmbeddings
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.vectorstores import Chroma #, FAISS
8
  from langchain.chains import RetrievalQA
9
  from langchain.chat_models import ChatOpenAI
10
  from langchain.storage import LocalFileStore
 
15
  )
16
  import chainlit as cl
17
 
18
+ from build_langchain_vector_store import chunk_docs, load_gitbook_docs, tiktoken_len
 
 
19
 
 
 
20
  import openai
21
+ # import os
22
 
23
+ # openai.api_key = os.getenv("OPENAI_API_KEY")
24
  openai.api_base = 'https://api.openai.com/v1' # default
25
 
26
+ docs_url = "https://docs.pulze.ai/"
27
  embedding_model_name = "text-embedding-ada-002"
28
+ langchain_documents = load_gitbook_docs(docs_url)
29
+ chunked_langchain_documents = chunk_docs(
30
+ langchain_documents,
31
+ tokenizer=encoding_for_model(embedding_model_name),
32
+ chunk_size=200,
33
+ )
34
+
35
  embedding_model = OpenAIEmbeddings(model=embedding_model_name)
36
+ shutil.rmtree(args.persist_path, ignore_errors=True)
37
+ vector_store = Chroma.from_documents(
38
+ chunked_langchain_documents, embedding=embedding_model, persist_directory=args.persist_path
39
+ )
40
  read_vector_store = Chroma(
41
+ persist_directory=args.persist_path, embedding_function=embedding_model
42
+ )
43
+ print(read_vector_store.similarity_search("How do I use Pulze?"))