|
import asyncio |
|
import re |
|
from pydantic_ai.result import RunResult |
|
import streamlit as st |
|
from pydantic_ai import Agent, RunContext, Tool |
|
from pydantic_ai.models.groq import GroqModel |
|
import nest_asyncio |
|
from pydantic_ai.messages import ModelMessage |
|
import pdfplumber |
|
from transformers import pipeline |
|
import torch |
|
import os |
|
import presentation as customClass |
|
from streamlit_pdf_viewer import pdf_viewer |
|
from dataclasses import dataclass |
|
|
|
api_key = os.getenv("API_KEY") |
|
data = [] |
|
last_message = '' |
|
result_data: list[customClass.PPT] = [] |
|
|
|
|
|
model = GroqModel("llama-3.2-90b-vision-preview", api_key=api_key) |
|
|
|
|
|
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
|
def split_into_token_chunks(text: str, max_tokens: int = 300) -> list: |
|
tokens = text.split() |
|
chunks = [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)] |
|
return chunks |
|
|
|
def return_data() -> str: |
|
return "".join(data) |
|
|
|
@dataclass |
|
class SupportDependencies: |
|
db: str |
|
|
|
async def ppt_content(data): |
|
agent = Agent(model, |
|
result_type=customClass.PPT, |
|
tools=[return_data], |
|
system_prompt=( |
|
"You are an expert in making power-point presentations", |
|
"Create 5 slides", |
|
"Title Slide: short intro about the presentation", |
|
"Methodology Slide: Summarize the methodology in detail", |
|
"Results Slide: Present key findings in detail in simple text and bullet points.", |
|
"Discussion Slide: Summarize the implications and limitations.", |
|
"Conclusion Slide: State the overall conclusion.", |
|
"Each slide should be separate", |
|
"Each slide should have 4 parts:", |
|
"1. Title: title of the slide", |
|
"2. Text: the precise and short description or narrative content of the slide. This should include key information, explanations, or supporting arguments. Keep it concise yet informative to avoid overwhelming the audience.", |
|
"3. Bullet point: A list of bullet points summarizing key information on the slide. Each bullet point should be short, precise, and highlight a specific aspect of the slide's topic. Ideally, limit to 3-5 points.", |
|
)) |
|
|
|
listOfString = split_into_token_chunks("".join(data)) |
|
message_history: list[ModelMessage] = [] |
|
|
|
if len(listOfString) > 0: |
|
result = agent.run_sync(user_prompt=f"Create me a powerpoint presentation from {listOfString[0]}", |
|
message_history=message_history) |
|
if len(listOfString) > 1: |
|
result_1 = agent.run_sync(user_prompt=f"Create me a powerpoint presentation from {listOfString[1]}", |
|
message_history=result.all_messages()) |
|
if len(listOfString) > 2: |
|
result_2 = agent.run_sync(user_prompt=f"Create me a powerpoint presentation from {listOfString[2]}", |
|
message_history=result_1.all_messages()) |
|
|
|
print(result_2.data if len(listOfString) > 2 else result_1.data if len(listOfString) > 1 else result.data) |
|
|
|
def ai_ppt(data): |
|
asyncio.run(ppt_content(data=data)) |
|
|
|
def extract_data(feed): |
|
with pdfplumber.open(feed) as pdf: |
|
pages = pdf.pages |
|
for p in pages: |
|
data.append(p.extract_text()) |
|
return None |
|
|
|
def main(): |
|
uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf") |
|
|
|
if uploaded_file is not None: |
|
extract_data(uploaded_file) |
|
|
|
if st.button("Make PPT"): |
|
ai_ppt(data) |
|
|
|
binary_data = uploaded_file.getvalue() |
|
pdf_viewer(input=binary_data, width=700) |
|
|
|
if __name__ == '__main__': |
|
import asyncio |
|
nest_asyncio.apply() |
|
main() |
|
|