timep12345 commited on
Commit
5643bbe
·
1 Parent(s): 5799184

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -6
app.py CHANGED
@@ -5,7 +5,7 @@ import json
5
  from langchain.document_loaders import DataFrameLoader
6
  from langchain.text_splitter import CharacterTextSplitter
7
  from langchain.llms import HuggingFaceHub
8
- from langchain.embeddings import HuggingFaceEmbeddings
9
  from langchain.vectorstores import Chroma
10
  from langchain.chains import RetrievalQA
11
 
@@ -39,11 +39,7 @@ def url_changes(url, pages_to_visit, urls_to_scrape, repo_id):
39
  documents = loader.load()
40
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
41
  texts = text_splitter.split_documents(documents)
42
- #"snunlp/KR-SBERT-V40K-klueNLI-augSTS"
43
- emb_model = "all-MiniLM-L6-v2"
44
- embeddings = HuggingFaceEmbeddings(
45
- model_name=emb_model
46
- )
47
  db = Chroma.from_documents(texts, embeddings)
48
  retriever = db.as_retriever()
49
  llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0.1, "max_new_tokens":250})
 
5
  from langchain.document_loaders import DataFrameLoader
6
  from langchain.text_splitter import CharacterTextSplitter
7
  from langchain.llms import HuggingFaceHub
8
+ from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
9
  from langchain.vectorstores import Chroma
10
  from langchain.chains import RetrievalQA
11
 
 
39
  documents = loader.load()
40
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
41
  texts = text_splitter.split_documents(documents)
42
+ embeddings = SentenceTransformerEmbeddings(model_name="jhgan/ko-sroberta-multitask")
 
 
 
 
43
  db = Chroma.from_documents(texts, embeddings)
44
  retriever = db.as_retriever()
45
  llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0.1, "max_new_tokens":250})