from llama_index.core import SimpleDirectoryReader, GPTListIndex, GPTVectorStoreIndex, PromptHelper from llama_index.core import StorageContext, load_index_from_storage, get_response_synthesizer from llama_index.core.retrievers import VectorIndexRetriever from llama_index.core.query_engine import RetrieverQueryEngine from llama_index.core.postprocessor import SimilarityPostprocessor #from llama_index import LLMPredictor, PromptHelper #from langchain.chat_models import ChatOpenAI from langchain_community.embeddings import OpenAIEmbeddings import gradio as gr import os import openai from gradio.themes.utils import colors, fonts, sizes #os.environ['OPENAI_API_KEY'] = api_key messages = [ {"role": "system", "content": "follow the 4 instructions below for your outputs:"}, {"role": "system", "content": "1. make sure all expressions are compatible with Polish"}, {"role": "system", "content": "2. use Polish only for outputs"}, {"role": "system", "content": "3. if you cannot answer, reply that you do not have enough information"}, {"role": "system", "content": "4. do not make up any answer if you do know the answer"}, ] def construct_index(directory_path): max_input_size = 4096 num_outputs = 512 max_chunk_overlap = 0.05 chunk_size_limit = 1000 temperature = 0.1 prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit) # llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=temperature, model_name="gpt-3.5-turbo-instruct", max_tokens=num_outputs)) documents = SimpleDirectoryReader(directory_path).load_data() #index = GPTVectorStoreIndex(documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper) index = GPTVectorStoreIndex.from_documents(documents, urls=[ 'https://trio.house/', 'https://trio.house/kontakt/', 'https://trio.house/o-nas/', 'https://trio.house/w-sprzedazy/', 'https://trio.house/dzialki/', 'https://trio.house/zainwestuj-z-nami/', 'https://trio.house/potrzebujesz-konsultacji-rynku-nieruchomosci/', 'https://trio.house/potrzebujesz-remontu/', 'https://trio.house/potrzebujesz-projektu-wnetrza/', 'https://trio.house/potrzebujesz-mebli-na-wymiar/', 'https://trio.house/potrzebujesz-kredytu-na-zakup-nieruchomosci/', 'https://trio.house/makroekonomia/', 'https://trio.house/rynek-nieruchomosci/', 'https://trio.house/2023/05/24/deweloperzy-buduja-coraz-mniej/', 'https://trio.house/2023/04/27/prognozy-na-2023-2025-co-nas-czeka/', 'https://trio.house/2023/04/18/wycinka-drzew-na-wlasnej-dzialce-w-2023/', 'https://trio.house/2023/04/03/lipiec-rozpoczynamy-juz-w-kwietniu/', 'https://trio.house/2023/04/03/zmiany-w-podatku-od-czynnosci-cywilnoprawnych/', 'https://trio.house/2023/03/23/czy-aby-napewno-najdrozsze-mieszkania-sa-w-stolicy/', 'https://trio.house/2023/06/15/rekomendacja-s-korzystniejsza-dla-bezpiecznego-kredytu-2/', 'https://trio.house/2023/07/20/warszawski-rynek-nieruchomosci-mieszkaniowych-na-6-biegu/', 'https://livesmarter.pl/najlepsze-lokaty-maj-2023/', 'https://www.money.pl/gospodarka/inflacja-maj-2023-r-finalny-odczyt-gus-6909186710817344a.html', 'https://ksiegowosc.infor.pl/wiadomosci/5754337,oprocentowanie-lokat-bankowych-i-kont-oszczednosciowych-2023-koniec-maja-poczatek-czerwca-tabela.html#:~:text=7%2C05%25%20%2D%20takie%20jest,proc.', # ], llm_predictor=llm_predictor, prompt_helper=prompt_helper) ], prompt_helper=prompt_helper) index.storage_context.persist('index.json') return index def chatbotCustom(input): storage_context = StorageContext.from_defaults(persist_dir="index.json") index = load_index_from_storage(storage_context) # query_engine = index.as_query_engine() # response = query_engine.query(input, similarity_top_k=5, response_mode="tree_summarize") ## response = index.query(input, similarity_top_k=5, response_mode="tree_summarize") # configure retriever retriever = VectorIndexRetriever( index=index, similarity_top_k=10, response_mode="tree_summarize", ) # configure response synthesizer response_synthesizer = get_response_synthesizer() # assemble query engine query_engine = RetrieverQueryEngine( retriever=retriever, response_synthesizer=response_synthesizer, node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)], ) response = query_engine.query(input) return response.response #def chatbotGPT(input): # if input: # messages.append({"role": "user", "content": input}) # chat = openai.ChatCompletion.create( # model="gpt-3.5-turbo-instruct", messages=messages # ) # reply = chat.choices[0].message.content # messages.append({"role": "assistant", "content": reply}) # return reply def clear(): return None, None theme = gr.themes.Default(font=[gr.themes.GoogleFont("Roboto"), "sans-serif", "sans-serif"], primary_hue="neutral", secondary_hue="neutral", neutral_hue="neutral").set( button_primary_background_fill="#3FCCA5", button_primary_background_fill_dark="#3FCCA5", button_primary_text_color="#003F62", body_background_fill="FFFFFF", body_background_fill_dark="FFFFFF" ) with gr.Blocks(theme=theme) as trioGPT: inputs = gr.Textbox(lines=4, elem_id="inputs", label="Zadaj mi pytanie")#, elem_classes="textbox") outputs = gr.Textbox(label="Odpowiedź", elem_id="outputs")#, elem_classes="textbox") with gr.Row(): submit_btn = gr.Button("Wyślij", variant="primary") clear_btn = gr.Button("Wyczyść") submit_btn.click(chatbotCustom, inputs=inputs, outputs=outputs) clear_btn.click(fn=clear, inputs=None, outputs=[inputs, outputs]) index = construct_index("data") trioGPT.launch()#(share=True)