|
import pandas as pd |
|
import gradio as gr |
|
from datasets import load_dataset |
|
from langchain.document_loaders import DataFrameLoader |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.vectorstores import FAISS |
|
from langchain.embeddings import OpenAIEmbeddings |
|
from langchain.chains import RetrievalQA |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain_mistralai.chat_models import ChatMistralAI |
|
|
|
dataset_names = ["zelros/pj-ce", "zelros/pj-da", |
|
"zelros/pj-groupama", "zelros/pj-sg", |
|
"zelros/pj-lbp", "zelros/pj-maif", "zelros/pj-ca"] |
|
|
|
insurers = ["Caisse d'Epargne","Direct Assurance","Groupama", |
|
"Société Générale","La Banque Postale", "MAIF", "Crédit Agricole"] |
|
|
|
db_dict = {} |
|
|
|
def llm_response(question, insurer1, insurer2, gpt): |
|
if(gpt == "mistral-large-latest"): |
|
llm1 = ChatMistralAI(model=gpt) |
|
llm2 = ChatMistralAI(model=gpt) |
|
else: |
|
llm1 = ChatOpenAI(model_name=gpt) |
|
llm2 = ChatOpenAI(model_name=gpt) |
|
qa_chain1 = RetrievalQA.from_chain_type(llm1, retriever=db_dict[insurer1].as_retriever(search_kwargs={'k': 8})) |
|
qa_chain2 = RetrievalQA.from_chain_type(llm2, retriever=db_dict[insurer2].as_retriever(search_kwargs={'k': 8})) |
|
return qa_chain1({"query": question})['result'], qa_chain2({"query": question})['result'] |
|
|
|
examples = [ |
|
["Qui contacter en cas besoin ?", None, None, None], |
|
["Les problèmes de divorce sont-ils couverts ?", None, None, None], |
|
["En cas violences conjugales, puis-je être assisté ?", None, None, None], |
|
["Les problèmes d'usurpation d'identité sont-ils couverts ?", None, None, None], |
|
["Quel est le montant maximum couvert pour un litige ?", None, None, None], |
|
["Quels frais sont pris en charges, et quels frais ne sont pas pris en charge ?", None, None, None], |
|
["En tant que membre d'une association, suis-je couvert ?", None, None, None], |
|
["J'ai un litige concernant un brevet et un sujet de propriété intellectuelle, suis-je couvert ?", None, None, None], |
|
["Quels sont les moments où le contrat peut être résilié ?", None, None, None] |
|
] |
|
|
|
for i, name in enumerate(dataset_names): |
|
dataset = load_dataset(name) |
|
|
|
df = dataset['train'].to_pandas() |
|
df['text'] = df["title"] + df["content"] |
|
|
|
loader = DataFrameLoader(df, 'text') |
|
documents = loader.load() |
|
text_splitter = CharacterTextSplitter(chunk_size=5000, chunk_overlap=0) |
|
texts = text_splitter.split_documents(documents) |
|
embeddings = OpenAIEmbeddings() |
|
|
|
db = FAISS.from_documents(texts, embeddings) |
|
db_dict[insurers[i]] = db |
|
|
|
demo = gr.Interface(llm_response, |
|
inputs=[ |
|
gr.Textbox(label="Question", |
|
info="More examples below :)", |
|
value="Quel est le montant maximum couvert pour un litige ?"), |
|
gr.Dropdown(choices=insurers, |
|
label="Insurer 1", |
|
value="Société Générale", |
|
info="More insurers available soon !"), |
|
gr.Dropdown(choices=insurers, |
|
label="Insurer 2", |
|
value="La Banque Postale"), |
|
gr.Dropdown(choices=["gpt-4o-mini-2024-07-18","gpt-4o-2024-08-06","mistral-large-latest"], |
|
label="LLM", |
|
value="gpt-4o-2024-08-06", |
|
info="Compare gpt-4o-mini, gpt-4o, and mistral-large!"), |
|
], |
|
outputs=[gr.Textbox(label="Answer insurer 1"), |
|
gr.Textbox(label="Answer insurer 2") |
|
], |
|
title='Towards more accessible and inclusive insurances', |
|
description='### <center>Compare french legal protection insurances - Research project, non-binding information - Please refer to a professional for advice.', |
|
examples=examples, |
|
cache_examples=False) |
|
|
|
demo.launch() |