Spaces:
Runtime error
Runtime error
import logging | |
import sys | |
logging.basicConfig(stream=sys.stdout, level=logging.INFO) | |
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) | |
import git | |
from llama_index import SimpleDirectoryReader | |
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext | |
documents = SimpleDirectoryReader("./").load_data() | |
import torch | |
from llama_index.llms import LlamaCPP | |
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt | |
llm = LlamaCPP( | |
# You can pass in the URL to a GGML model to download it automatically | |
model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf', | |
# optionally, you can set the path to a pre-downloaded model instead of model_url | |
model_path=None, | |
temperature=0.1, | |
max_new_tokens=256, | |
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room | |
context_window=3900, | |
# kwargs to pass to __call__() | |
generate_kwargs={}, | |
# kwargs to pass to __init__() | |
# set to at least 1 to use GPU | |
model_kwargs={"n_gpu_layers": -1}, | |
# transform inputs into Llama2 format | |
messages_to_prompt=messages_to_prompt, | |
completion_to_prompt=completion_to_prompt, | |
verbose=True, | |
) | |
from langchain.embeddings.huggingface import HuggingFaceEmbeddings | |
from llama_index.embeddings import LangchainEmbedding | |
embed_model = LangchainEmbedding( | |
HuggingFaceEmbeddings(model_name="thenlper/gte-large") | |
) | |
service_context = ServiceContext.from_defaults( | |
chunk_size=256, | |
llm=llm, | |
embed_model=embed_model | |
) | |
index = VectorStoreIndex.from_documents(documents, service_context=service_context) | |
# Create a Streamlit app file (e.g., app.py) and run it | |
import streamlit as st | |
from transformers import GPT2LMHeadModel, GPT2Tokenizer | |
def generate_response(prompt): | |
model_name = "gpt2" | |
model = GPT2LMHeadModel.from_pretrained(model_name) | |
tokenizer = GPT2Tokenizer.from_pretrained(model_name) | |
input_ids = tokenizer.encode(prompt, return_tensors="pt") | |
output = model.generate(input_ids, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2) | |
generated_text = tokenizer.decode(output[0], skip_special_tokens=True) | |
return generated_text | |
def main(): | |
st.title("Cloudflare RAG") | |
# User input | |
user_input = st.text_input("Enter your message:") | |
if user_input: | |
# Generate response | |
query_engine = index.as_query_engine() | |
response = query_engine.query(user_input) | |
# Display response | |
st.text_area("ChatGPT Response:", response, height=100) | |
if __name__ == "__main__": | |
main() |