File size: 9,759 Bytes
883ccea
 
 
 
 
 
 
 
 
 
 
 
 
 
1a64d83
883ccea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b241b67
883ccea
b241b67
 
 
883ccea
b241b67
 
883ccea
b241b67
 
883ccea
b241b67
 
883ccea
 
 
 
b241b67
883ccea
b241b67
 
883ccea
b241b67
 
883ccea
b241b67
883ccea
 
 
 
 
 
 
b241b67
 
883ccea
 
 
b241b67
883ccea
b241b67
 
883ccea
b241b67
 
883ccea
b241b67
 
883ccea
 
 
b241b67
1a64d83
fa77dd2
 
 
883ccea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a64d83
883ccea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a64d83
 
 
 
 
 
 
 
 
883ccea
 
1a64d83
 
2490a95
1a64d83
 
 
 
 
e624303
 
1a64d83
e624303
1a64d83
 
 
883ccea
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import time
import traceback
import gradio as gr
import os
import asyncio
from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore
from haystack import Pipeline, Document
from haystack.components.generators import OpenAIGenerator
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack_integrations.components.retrievers.mongodb_atlas import MongoDBAtlasEmbeddingRetriever
from haystack.components.embedders import OpenAITextEmbedder, OpenAIDocumentEmbedder
from haystack.document_stores.types import DuplicatePolicy
from haystack.components.writers import DocumentWriter
import requests
import json




try:
    
    documents = [
        Document(content="In the vibrant streets of Paris, Jean, a struggling painter, discovers an ancient secret hidden within his family’s artwork. As he unravels the mystery, he must navigate the treacherous art world, facing rivals and allies alike, on a journey that could redefine his destiny and restore a lost masterpiece."),
        Document(content="Mark, a visionary software developer in Berlin, stumbles upon a groundbreaking algorithm that could change the tech world forever. But as he delves deeper, he finds himself entangled in a web of corporate espionage, challenging his ethics and risking everything he has worked for."),
        Document(content="Giorgio, a dedicated archaeologist in Rome, uncovers an artifact that challenges the foundation of Roman history. His quest for truth leads him through ancient ruins and modern intrigue, as he battles to protect his discovery from those who wish to suppress it."),
        Document(content="Lily, an ambitious journalist in New York, receives a mysterious diary that leads her on an investigation into a century-old mystery involving a forbidden romance and a family's hidden secrets."),
        Document(content="Elena, a young physicist in Geneva, discovers a paradox within the laws of quantum mechanics that could alter our understanding of the universe. Her journey to prove her theory brings her face to face with dark matter and the very fabric of space and time."),
        Document(content="Alex, a disillusioned detective in London, finds himself on the trail of a serial killer who leaves cryptic clues rooted in historical events. As he pieces together the puzzle, he uncovers a conspiracy that could shake the foundations of the British monarchy."),
        Document(content="Nora, a marine biologist in the Great Barrier Reef, discovers a new species of coral with extraordinary regenerative properties. Her fight to protect her discovery from exploitation leads her into a conflict with powerful interests determined to harness the coral's powers for themselves."),
        Document(content="Simon, an expert linguist in Cairo, deciphers an ancient scroll that reveals the location of a hidden chamber beneath the Sphinx. His search for the chamber propels him into a world of mystery and danger, as he confronts a secret society with its own agenda."),
        Document(content="Amelia, a renowned chef in Tokyo, embarks on a quest to rediscover a lost recipe that was once the hallmark of her family's restaurant. Her journey takes her across Japan, delving into the country's culinary history and challenging her beliefs about cooking and tradition."),
        Document(content="Victor, a retired astronaut in Houston, is contacted by an alien intelligence through a mysterious transmission. As he seeks to uncover the truth, he is drawn into an interstellar adventure that reveals the universe's greatest mysteries and humanity's place among the stars.")
    ]
    
    # Initializing a document store in MongoDB Atlas to store the narrative documents.
    document_store = MongoDBAtlasDocumentStore(
        database_name="sample_mflix",  # Specifies the database name.
        collection_name="haystack_embedded_movies",  # Specifies the collection name where documents will be stored.
        vector_search_index="default",  # The search index for vectorized content.
    )
    
    # Setting up a document writer to handle the insertion of documents into the MongoDB collection.
    doc_writer = DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP)
    
    # Initializing a document embedder to convert text content into vectorized form.
    doc_embedder = OpenAIDocumentEmbedder()
    
    # Creating a pipeline for indexing documents. The pipeline includes embedding and writing documents.
    indexing_pipe = Pipeline()
    indexing_pipe.add_component(instance=doc_embedder, name="doc_embedder")
    indexing_pipe.add_component(instance=doc_writer, name="doc_writer")
    
    # Connecting the components of the pipeline for document flow.
    indexing_pipe.connect("doc_embedder.documents", "doc_writer.documents")
    
    # Running the pipeline with the list of documents to index them in MongoDB.
    indexing_pipe.run({"doc_embedder": {"documents": documents}})
    
    # Template for generating prompts for a movie recommendation engine.
    prompt_template = """
        You are a movie recommendation engine use the following context documents.\nDocuments:
        {% for doc in documents %}
            {{ doc.content }}
        {% endfor %}
        
        \Query: {{query}}
        \nAnswer:
    """
    
    # Setting up a retrieval-augmented generation (RAG) pipeline for generating responses.
    rag_pipeline = Pipeline()
    rag_pipeline.add_component("text_embedder", OpenAITextEmbedder())
    
    # Adding a component for retrieving related documents from MongoDB based on the query embedding.
    rag_pipeline.add_component(instance=MongoDBAtlasEmbeddingRetriever(document_store=document_store,top_k=15), name="retriever")
    
    # Building prompts based on retrieved documents to be used for generating responses.
    rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder")
    
    # Adding a language model generator to produce the final text output.
    rag_pipeline.add_component(instance=OpenAIGenerator(), name="llm")
    
    # Connecting the components of the RAG pipeline to ensure proper data flow.
    rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
    rag_pipeline.connect("retriever", "prompt_builder.documents")
    rag_pipeline.connect("prompt_builder", "llm")
    
    # Exception handling to catch and display errors during the pipeline execution.
except Exception as e:
    print("An error occurred: \n" + error_message)


def get_movies(message, history):

    try:
        result = rag_pipeline.run(
                                {
                                    "text_embedder": {"text": message},
                                    "prompt_builder": {"query": message},
                                });
        # print(result)
        print_llm_text = result['llm']['replies'][0]
        for i in range(len(print_llm_text)):
            time.sleep(0.03)
            yield print_llm_text[: i+1]
        
    
    except Exception as e:
        error_message = traceback.format_exc()
        print("An error occurred: \n" + error_message)
        yield "Please clone the repo and add your open ai key as well as your MongoDB Atlas URI in the Secret Section of you Space\n OPENAI_API_KEY (your Open AI key) and MONGODB_ATLAS_CLUSTER_URI (0.0.0.0/0 whitelisted instance with Vector index created) \n\n For more information : https://mongodb.com/products/platform/atlas-vector-search"
        
# Convert documents to a format suitable for Gradio Dataframe
data_for_dataframe = [[doc.content] for doc in documents]
headers = ["Plot"]

def update_movie_data(new_data):
    """
    Converts the updated data from the Dataframe back into Document objects.
    """
    print(new_data)
    new_documents = []

    # Iterate over the DataFrame rows as (index, Series) pairs
    for index, row in new_data.iterrows():
        plot_content = row['Plot']  # Access the 'Plot' column
        print(plot_content)
        new_documents.append(Document(content=plot_content))

    indexing_pipe.run({"doc_embedder": {"documents": new_documents}})

    return new_data

def fetch_url_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raises an HTTPError if the HTTP request returned an unsuccessful status code
        return response.text
    except requests.RequestException as e:
        return f"Error: {e}"
        
# Setup Gradio interface
with gr.Blocks() as demo:
    with gr.Tab("Demo"):
        gr.Markdown("## Movie Plot Viewer")
        movie_table = gr.Dataframe(value=data_for_dataframe, headers=headers, interactive=False)
        submit_button = gr.Button("Update Data")
        ## value=[(None, "Hi, I'm a MongoDB and Heystack based question and answer bot 🤖, I can help you answer on the knowledge base above…")]
        gr.ChatInterface(get_movies,examples=["What characters are from Rome?", "Combine 3 plots of your choice", "List all characters"], title="Atlas Vector Search Chat",description="This small chat uses a similarity search to find relevant plots as listed above, it uses MongoDB Atlas and Haystack integaration: https://haystack.deepset.ai/integrations/mongodb",submit_btn="Search").queue()

        submit_button.click(fn=update_movie_data, inputs=[movie_table], outputs=[movie_table])
    with gr.Tab("Code"):
        gr.Code(label="Code", language="python", value=fetch_url_data('https://huggingface.co/spaces/MongoDB/Haystack-MongoDB-Integration-Chat/raw/main/app.py'))

        

    

if __name__ == "__main__":
    demo.launch()