OCRonos-Vintage-CPU

Running

App Files Files Community

Pclanglais commited on May 11, 2024

Commit

9e80596

verified ·

1 Parent(s): f2019a4

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -169

app.py CHANGED Viewed

@@ -13,176 +13,99 @@ import pandas as pd
 from chromadb.config import Settings
 from chromadb.utils import embedding_functions
-device = "cuda:0"
-sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="intfloat/multilingual-e5-base", device = "cuda")
-client = chromadb.PersistentClient(path="mfs_vector")
-collection = client.get_collection(name="sp_expanded", embedding_function = sentence_transformer_ef)
 # Define the device
-device = "cuda" if torch.cuda.is_available() else "cpu"
 #Define variables
 temperature=0.2
 max_new_tokens=1000
 top_p=0.92
 repetition_penalty=1.7
-model_name = "AgentPublic/Guillaume-Tell"
-llm = LLM(model_name, max_model_len=4096)
 #Vector search over the database
-def vector_search(collection, text):
-    results = collection.query(
-        query_texts=[text],
-        n_results=5,
-    )
-    document = []
-    document_html = []
-    id_list = ""
-    list_elm = 0
-    for ids in results["ids"][0]:
-        first_link = str(results["metadatas"][0][list_elm]["identifier"])
-        first_title = results["documents"][0][list_elm]
-        list_elm = list_elm+1
-        document.append(first_link + " : " + first_title)
-        document_html.append('<div class="source" id="' + first_link + '"><p><b>' + first_link + "</b> : " + first_title + "</div>")
-    document = "\n\n".join(document)
-    document_html = '<div id="source_listing">' + "".join(document_html) + "</div>"
-    # Replace this with the actual implementation of the vector search
-    return document, document_html
-#CSS for references formatting
-css = """
-.generation {
-    margin-left:2em;
-    margin-right:2em;
-    size:1.2em;
-}
-:target {
-    background-color: #CCF3DF; /* Change the text color to red */
-  }
-.source {
-    float:left;
-    max-width:17%;
-    margin-left:2%;
-}
-.tooltip {
-    position: relative;
-    cursor: pointer;
-    font-variant-position: super;
-    color: #97999b;
-  }
-  .tooltip:hover::after {
-    content: attr(data-text);
-    position: absolute;
-    left: 0;
-    top: 120%; /* Adjust this value as needed to control the vertical spacing between the text and the tooltip */
-    white-space: pre-wrap; /* Allows the text to wrap */
-    width: 500px; /* Sets a fixed maximum width for the tooltip */
-    max-width: 500px; /* Ensures the tooltip does not exceed the maximum width */
-    z-index: 1;
-    background-color: #f9f9f9;
-    color: #000;
-    border: 1px solid #ddd;
-    border-radius: 5px;
-    padding: 5px;
-    display: block;
-    box-shadow: 0 4px 8px rgba(0,0,0,0.1); /* Optional: Adds a subtle shadow for better visibility */
-  }"""
-#Curtesy of chatgpt
-def format_references(text):
-    # Define start and end markers for the reference
-    ref_start_marker = '<ref text="'
-    ref_end_marker = '</ref>'
-    # Initialize an empty list to hold parts of the text
-    parts = []
-    current_pos = 0
-    ref_number = 1
-    # Loop until no more reference start markers are found
-    while True:
-        start_pos = text.find(ref_start_marker, current_pos)
-        if start_pos == -1:
-            # No more references found, add the rest of the text
-            parts.append(text[current_pos:])
-            break
-        # Add text up to the start of the reference
-        parts.append(text[current_pos:start_pos])
-        # Find the end of the reference text attribute
-        end_pos = text.find('">', start_pos)
-        if end_pos == -1:
-            # Malformed reference, break to avoid infinite loop
-            break
-        # Extract the reference text
-        ref_text = text[start_pos + len(ref_start_marker):end_pos].replace('\n', ' ').strip()
-        ref_text_encoded = ref_text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
-        # Find the end of the reference tag
-        ref_end_pos = text.find(ref_end_marker, end_pos)
-        if ref_end_pos == -1:
-            # Malformed reference, break to avoid infinite loop
-            break
-        # Extract the reference ID
-        ref_id = text[end_pos + 2:ref_end_pos].strip()
-        # Create the HTML for the tooltip
-        tooltip_html = f'<span class="tooltip" data-refid="{ref_id}" data-text="{ref_id}: {ref_text_encoded}"><a href="#{ref_id}">[' + str(ref_number) +']</a></span>'
-        parts.append(tooltip_html)
-        # Update current_pos to the end of the current reference
-        current_pos = ref_end_pos + len(ref_end_marker)
-        ref_number = ref_number + 1
-    # Join and return the parts
-    parts = ''.join(parts)
-    return parts
-# Class to encapsulate the Falcon chatbot
-class MistralChatBot:
-    def __init__(self, system_prompt="Le dialogue suivant est une conversation"):
-        self.system_prompt = system_prompt
-    def predict(self, user_message):
-        fiches, fiches_html = vector_search(collection, user_message)
-        sampling_params = SamplingParams(temperature=.7, top_p=.95, max_tokens=2000, presence_penalty = 1.5, stop = ["``"])
-        detailed_prompt = """<|im_start|>system
-        Tu es Albert, le chatbot des Maisons France Service qui donne des réponses sourcées.<|im_end|>
-        <|im_start|>user
-        Ecrit un texte référencé en réponse à cette question : """ + user_message + """
-        Les références doivent être citées de cette manière : texte rédigé<ref text=\"[passage pertinent dans la référence]\">[\"identifiant de la référence\"]</ref>Si les références ne permettent pas de répondre, qu'il n'y a pas de réponse.
-        Les cinq références disponibles : """ + fiches + "<|im_end|>\n<|im_start|>assistant\n"
-        print(detailed_prompt)
-        prompts = [detailed_prompt]
-        outputs = llm.generate(prompts, sampling_params, use_tqdm = False)
-        generated_text = outputs[0].outputs[0].text
-        generated_text = '<h2 style="text-align:center">Réponse</h3>\n<div class="generation">' + format_references(generated_text) + "</div>"
-        fiches_html = '<h2 style="text-align:center">Sources</h3>\n' + fiches_html
-        return generated_text, fiches_html
-# Create the Falcon chatbot instance
-mistral_bot = MistralChatBot()
 # Define the Gradio interface
-title = "Guillaume-Tell"
-description = "Le LLM répond à des questions administratives sur l'éducation nationale à partir de sources fiables."
 examples = [
     [
         "Qui peut bénéficier de l'AIP?",  # user_message
@@ -190,27 +113,12 @@ examples = [
     ]
 ]
-additional_inputs=[
-    gr.Slider(
-        label="Température",
-        value=0.2,  # Default value
-        minimum=0.05,
-        maximum=1.0,
-        step=0.05,
-        interactive=True,
-        info="Des valeurs plus élevées donne plus de créativité, mais aussi d'étrangeté",
-    ),
-]
 demo = gr.Blocks()
 with gr.Blocks(theme='JohnSmith9982/small_and_pretty', css=css) as demo:
-    gr.HTML("""<h1 style="text-align:center">Albert (Guillaume-Tell)</h1>""")
-    text_input = gr.Textbox(label="Votre question ou votre instruction.", type="text", lines=1)
-    text_button = gr.Button("Interroger Albert")
-    text_output = gr.HTML(label="La réponse d'Albert")
-    embedding_output = gr.HTML(label="Les sources utilisées")
-    text_button.click(mistral_bot.predict, inputs=text_input, outputs=[text_output, embedding_output])
 if __name__ == "__main__":
     demo.queue().launch()

 from chromadb.config import Settings
 from chromadb.utils import embedding_functions
+model = BGEM3FlagModel('BAAI/bge-m3',
+                       use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation
+embeddings = np.load("embeddings_with_api.npy")
+embeddings_data = pd.read_json("embeddings_tchap.json")
+embeddings_text = embeddings_data["text_with_context"].tolist()
 # Define the device
+#device = "cuda" if torch.cuda.is_available() else "cpu"
 #Define variables
 temperature=0.2
 max_new_tokens=1000
 top_p=0.92
 repetition_penalty=1.7
+#model_name = "Pclanglais/Tchap"
+#llm = LLM(model_name, max_model_len=4096)
 #Vector search over the database
+def vector_search(sentence_query):
+    query_embedding = model.encode(sentence_query,
+                            batch_size=12,
+                            max_length=256, # If you don't need such a long length, you can set a smaller value to speed up the encoding process.
+                            )['dense_vecs']
+    # Reshape the query embedding to fit the cosine_similarity function requirements
+    query_embedding_reshaped = query_embedding.reshape(1, -1)
+    # Compute cosine similarities
+    similarities = cosine_similarity(query_embedding_reshaped, embeddings)
+    # Find the index of the closest document (highest similarity)
+    closest_doc_index = np.argmax(similarities)
+    # Closest document's embedding
+    closest_doc_embedding = sentences_1[closest_doc_index]
+    return closest_doc_embedding
+class StopOnTokens(StoppingCriteria):
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+        stop_ids = [29, 0]
+        for stop_id in stop_ids:
+            if input_ids[0][-1] == stop_id:
+                return True
+        return False
+def predict(message, history):
+    text = vector_search(message)
+    message = message + "\n\n### Source ###\n"
+    history_transformer_format = history + [[message, ""]]
+    stop = StopOnTokens()
+    messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]])
+                for item in history_transformer_format])
+    return messages
+def predict_alt(message, history):
+    history_transformer_format = history + [[message, ""]]
+    stop = StopOnTokens()
+    messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]])
+                for item in history_transformer_format])
+    model_inputs = tokenizer([messages], return_tensors="pt").to("cuda")
+    streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = dict(
+        model_inputs,
+        streamer=streamer,
+        max_new_tokens=1024,
+        do_sample=True,
+        top_p=0.95,
+        top_k=1000,
+        temperature=1.0,
+        num_beams=1,
+        stopping_criteria=StoppingCriteriaList([stop])
+        )
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
+    partial_message = ""
+    for new_token in streamer:
+        if new_token != '<':
+            partial_message += new_token
+            yield partial_message
 # Define the Gradio interface
+title = "Tchap"
+description = "Le chatbot du service public"
 examples = [
     [
         "Qui peut bénéficier de l'AIP?",  # user_message
     ]
 ]
 demo = gr.Blocks()
 with gr.Blocks(theme='JohnSmith9982/small_and_pretty', css=css) as demo:
+    gr.HTML("""<h1 style="text-align:center">Albert-Tchap</h1>""")
+    gr.ChatInterface(predict).launch()
 if __name__ == "__main__":
     demo.queue().launch()