Spaces:

kk53
/

rag_lithuania

Sleeping

App Files Files Community

kk53 commited on Mar 21, 2024

Commit

8cc3b16

verified ·

1 Parent(s): d7fa171

updatte

Browse files

Files changed (1) hide show

app.py +9 -10

app.py CHANGED Viewed

@@ -9,7 +9,6 @@ from pinecone import Pinecone
 from huggingface_hub import hf_hub_download
 @st.cache_resource()
 def load_model():
     # from google.colab import userdata
     model_name_or_path = "CompendiumLabs/bge-large-en-v1.5-gguf"
     model_basename = 'bge-large-en-v1.5-f32.gguf'
@@ -20,10 +19,10 @@ def load_model():
     model = Llama(model_path, embedding=True)
     st.success("Loaded NLP model from Hugging Face!")  # 👈 Show a success message
-    apikey = st.secrets["apikey"]
     pc = Pinecone(api_key=apikey)
     index = pc.Index("law")
     # pc = Pinecone(api_key=api_key)
     # index = pc.Index("law")
     model_2_name = "TheBloke/zephyr-7B-beta-GGUF"
@@ -31,7 +30,7 @@ def load_model():
     model_path_model = hf_hub_download(
     repo_id=model_2_name,
     filename=model_2base_name,
-)
     callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
     llm = LlamaCpp(
     model_path=model_path_model,
@@ -39,13 +38,13 @@ def load_model():
     max_tokens=2500,
     top_p=1,
     callback_manager=callback_manager,
-    verbose=True,
     n_ctx=2048,
     n_threads = 2# Verbose is required to pass to the callback manager
 )
-    st.success("loaded the second NLP model from Hugging Face!")
 #     prompt_template = "<|system|>\
 # </s>\
 # <|user|>\
@@ -56,7 +55,7 @@ def load_model():
     return model, llm, index
 st.title("Please ask your question on Lithuanian rules for foreigners.")
 model,llm, index  = load_model()
@@ -66,7 +65,7 @@ if question != "":
     query = model.create_embedding(question)
     st.write(query)
     q = query['data'][0]['embedding']
     response = index.query(
     vector=q,
     top_k=1,
@@ -75,4 +74,4 @@ if question != "":
     )
     response_t = response['matches'][0]['metadata']['text']
     st.write(response_t)
-st.header("Answer:")

 from huggingface_hub import hf_hub_download
 @st.cache_resource()
 def load_model():
     # from google.colab import userdata
     model_name_or_path = "CompendiumLabs/bge-large-en-v1.5-gguf"
     model_basename = 'bge-large-en-v1.5-f32.gguf'
     model = Llama(model_path, embedding=True)
     st.success("Loaded NLP model from Hugging Face!")  # 👈 Show a success message
+    apikey = st.secrets["apikey"]
     pc = Pinecone(api_key=apikey)
     index = pc.Index("law")
     # pc = Pinecone(api_key=api_key)
     # index = pc.Index("law")
     model_2_name = "TheBloke/zephyr-7B-beta-GGUF"
     model_path_model = hf_hub_download(
     repo_id=model_2_name,
     filename=model_2base_name,
+)
     callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
     llm = LlamaCpp(
     model_path=model_path_model,
     max_tokens=2500,
     top_p=1,
     callback_manager=callback_manager,
+    verbose=True,
     n_ctx=2048,
     n_threads = 2# Verbose is required to pass to the callback manager
 )
+    st.success("loaded the second NLP model from Hugging Face!")
 #     prompt_template = "<|system|>\
 # </s>\
 # <|user|>\
     return model, llm, index
 st.title("Please ask your question on Lithuanian rules for foreigners.")
 model,llm, index  = load_model()
     query = model.create_embedding(question)
     st.write(query)
     q = query['data'][0]['embedding']
     response = index.query(
     vector=q,
     top_k=1,
     )
     response_t = response['matches'][0]['metadata']['text']
     st.write(response_t)
+st.header("Answer:")