|
import os |
|
import streamlit as st |
|
from llama_index.core import Settings |
|
from llama_index.core import VectorStoreIndex, Document |
|
from llama_index.embeddings.gemini import GeminiEmbedding |
|
from llama_index.llms.gemini import Gemini |
|
from llama_index.core import DocumentSummaryIndex |
|
from llama_index.embeddings.fastembed import FastEmbedEmbedding |
|
import google.generativeai as genai |
|
import os |
|
import PyPDF2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001") |
|
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5") |
|
Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.8, model_name="models/gemini-pro") |
|
llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro") |
|
|
|
|
|
def load_data(document_text): |
|
document = [Document(text=document_text)] |
|
|
|
|
|
index = VectorStoreIndex.from_documents(document) |
|
return index |
|
|
|
|
|
DEFAULT_REPORT_FORMAT = """ |
|
Title Page |
|
Includes the report title, author's name, and date. |
|
Abstract |
|
A concise summary of the report, covering the background, objectives, methodology, key findings, and conclusions. |
|
Table of Contents |
|
Lists sections and subsections with corresponding page numbers for easy navigation. |
|
Introduction |
|
Provides background information, defines the scope of the report, and states the objectives. |
|
Literature Review |
|
Reviews relevant literature and previous research related to the report topic. |
|
Methodology/Approach |
|
Details the methods used to gather data or conduct experiments, including design and analytical techniques. |
|
Results and Discussion |
|
Presents findings in a clear format, often using tables, figures, and charts, followed by a discussion interpreting these results. |
|
Conclusions |
|
Summarizes the main findings and their implications, often linking back to the report's objectives. |
|
Recommendations |
|
Suggests actions based on the findings, highlighting potential future work or improvements. |
|
References |
|
Lists all sources cited in the report, adhering to a specific referencing style. |
|
Appendices |
|
Contains supplementary material that supports the main text, such as raw data, detailed calculations, or additional figures. |
|
""" |
|
|
|
|
|
def generate_report(index, report_format, additional_info): |
|
query_engine = index.as_query_engine() |
|
|
|
if not report_format.strip(): |
|
report_format = DEFAULT_REPORT_FORMAT |
|
st.info("Using default report format.") |
|
|
|
response = query_engine.query(f""" |
|
You are a professional report writer. Your task is to create a comprehensive report based on the entire document provided. |
|
|
|
First, thoroughly analyze and summarize the entire document. Then, use the input text to create a well-structured report following the format below: |
|
|
|
Report Format: |
|
{report_format} |
|
|
|
Additional Information: |
|
{additional_info} |
|
|
|
Even if the input is shallow, generate a report |
|
Guidelines: |
|
1. Ensure you comprehend and summarize the entire document before starting the report. |
|
2. The report should be comprehensive, covering all major points from the document. |
|
3. Adapt the provided format as necessary to best fit the content and context of the document. |
|
4. Incorporate any additional information provided into the relevant sections of the report. |
|
5. Use clear, professional language throughout the report. |
|
6. Provide specific examples or data from the document to support your analysis and conclusions. |
|
7. If the document contains technical information, explain it in a way that's accessible to a general audience. |
|
|
|
Generate a thorough, well-structured report that captures the essence of the entire document. |
|
""") |
|
return response.response |
|
|
|
|
|
def main(): |
|
st.title("AI Report Writer") |
|
st.write("Upload your document and our AI will generate a comprehensive report based on its contents!") |
|
|
|
|
|
|
|
uploaded_file = st.file_uploader("Choose a file (PDF or TXT)", type=["txt", "pdf"]) |
|
|
|
|
|
report_format = st.text_area("Enter the desired report format (optional)", height=150, |
|
help="Leave blank to use a default template") |
|
|
|
|
|
additional_info = st.text_area("Enter any additional information or context for the report", height=100) |
|
|
|
if uploaded_file is not None: |
|
|
|
if uploaded_file.type == "application/pdf": |
|
pdf_reader = PyPDF2.PdfReader(uploaded_file) |
|
document_text = "" |
|
for page in pdf_reader.pages: |
|
document_text += page.extract_text() |
|
else: |
|
document_text = uploaded_file.getvalue().decode("utf-8") |
|
|
|
if st.button("Generate Report"): |
|
st.write("Analyzing document and generating report...") |
|
|
|
|
|
doc_list = document_text.split(".") |
|
index = load_data(document_text) |
|
report = generate_report(index, report_format, additional_info) |
|
|
|
st.write("## Generated Report") |
|
st.write(report) |
|
else: |
|
st.warning("Please upload a file to generate a report.") |
|
|
|
if __name__ == "__main__": |
|
main() |