christophebourguignat's picture
Update app.py
14c56e1 verified
import pandas as pd
import gradio as gr
from datasets import load_dataset
from langchain.document_loaders import DataFrameLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain_mistralai.chat_models import ChatMistralAI
dataset_names = ["zelros/pj-ce", "zelros/pj-da",
"zelros/pj-groupama", "zelros/pj-sg",
"zelros/pj-lbp", "zelros/pj-maif", "zelros/pj-ca"]
insurers = ["Caisse d'Epargne","Direct Assurance","Groupama",
"Société Générale","La Banque Postale", "MAIF", "Crédit Agricole"]
db_dict = {}
def llm_response(question, insurer1, insurer2, gpt):
if(gpt == "mistral-large-latest"):
llm1 = ChatMistralAI(model=gpt)
llm2 = ChatMistralAI(model=gpt)
else:
llm1 = ChatOpenAI(model_name=gpt)
llm2 = ChatOpenAI(model_name=gpt)
qa_chain1 = RetrievalQA.from_chain_type(llm1, retriever=db_dict[insurer1].as_retriever(search_kwargs={'k': 8}))
qa_chain2 = RetrievalQA.from_chain_type(llm2, retriever=db_dict[insurer2].as_retriever(search_kwargs={'k': 8}))
return qa_chain1({"query": question})['result'], qa_chain2({"query": question})['result']
examples = [
["Qui contacter en cas besoin ?", None, None, None],
["Les problèmes de divorce sont-ils couverts ?", None, None, None],
["En cas violences conjugales, puis-je être assisté ?", None, None, None],
["Les problèmes d'usurpation d'identité sont-ils couverts ?", None, None, None],
["Quel est le montant maximum couvert pour un litige ?", None, None, None],
["Quels frais sont pris en charges, et quels frais ne sont pas pris en charge ?", None, None, None],
["En tant que membre d'une association, suis-je couvert ?", None, None, None],
["J'ai un litige concernant un brevet et un sujet de propriété intellectuelle, suis-je couvert ?", None, None, None],
["Quels sont les moments où le contrat peut être résilié ?", None, None, None]
]
for i, name in enumerate(dataset_names):
dataset = load_dataset(name)
df = dataset['train'].to_pandas()
df['text'] = df["title"] + df["content"]
loader = DataFrameLoader(df, 'text')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=5000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(texts, embeddings)
db_dict[insurers[i]] = db
demo = gr.Interface(llm_response,
inputs=[
gr.Textbox(label="Question",
info="More examples below :)",
value="Quel est le montant maximum couvert pour un litige ?"),
gr.Dropdown(choices=insurers,
label="Insurer 1",
value="Société Générale",
info="More insurers available soon !"),
gr.Dropdown(choices=insurers,
label="Insurer 2",
value="La Banque Postale"),
gr.Dropdown(choices=["gpt-4o-mini-2024-07-18","gpt-4o-2024-08-06","mistral-large-latest"],
label="LLM",
value="gpt-4o-2024-08-06",
info="Compare gpt-4o-mini, gpt-4o, and mistral-large!"),
],
outputs=[gr.Textbox(label="Answer insurer 1"),
gr.Textbox(label="Answer insurer 2")
],
title='Towards more accessible and inclusive insurances',
description='### <center>Compare french legal protection insurances - Research project, non-binding information - Please refer to a professional for advice.',
examples=examples,
cache_examples=False)
demo.launch()