ppt / app.py
samvish's picture
Update app.py
c8ecbd4 verified
raw
history blame
3.77 kB
import asyncio
import os
import re
import pdfplumber
import streamlit as st
import torch
from transformers import pipeline
from dataclasses import dataclass
from streamlit_pdf_viewer import pdf_viewer
from pydantic_ai import Agent, RunContext, Tool
from pydantic_ai.models.groq import GroqModel
from pydantic_ai.messages import ModelMessage
import presentation as customClass
import nest_asyncio
# Load API key
api_key = os.getenv("API_KEY")
if not api_key:
raise ValueError("API_KEY is not set in the environment variables.")
data = []
result_data: list[customClass.PPT] = []
# Initialize models
model = GroqModel("llama3-groq-70b-8192-tool-use-preview", api_key=api_key)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def split_into_token_chunks(text: str, max_tokens: int = 300) -> list:
"""
Splits a long string into chunks of a specified maximum number of tokens (words).
"""
tokens = text.split()
return [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)]
def return_data() -> str:
"""Returns concatenated extracted data."""
return "\n".join(data)
@dataclass
class SupportDependencies:
db: str
async def ppt_content(data):
"""
Generates PowerPoint content using an AI model.
"""
if not data:
raise ValueError("No valid text found for PowerPoint generation.")
agent = Agent(
model,
result_type=customClass.PPT,
tools=[return_data],
system_prompt="""
You are an expert in creating PowerPoint presentations.
Create 5 slides:
1. Title Slide: Introduction about the presentation.
2. Methodology Slide: Summarize the methodology in detail.
3. Results Slide: Present key findings in bullet points.
4. Discussion Slide: Summarize implications and limitations.
5. Conclusion Slide: State the overall conclusion.
Each slide should have:
- Title: Clear and concise.
- Text: Short and informative explanation.
- Bullet Points: 3-5 summarized key takeaways.
"""
)
listOfString = split_into_token_chunks("\n".join(data))
message_history: list[ModelMessage] = []
result = agent.run_sync(user_prompt=f"Create a PowerPoint presentation from {listOfString[0]}", message_history=message_history)
for i in range(1, len(listOfString)):
result = agent.run_sync(user_prompt=f"Continue creating the PowerPoint presentation from {listOfString[i]}", message_history=result.all_messages())
print(result.data)
def ai_ppt(data):
"""Runs the PowerPoint generation in an async loop."""
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(ppt_content(data=data))
def extract_data(feed):
"""Extracts text from PDF and appends to `data` list."""
global data
data = [] # Reset data before extracting
with pdfplumber.open(feed) as pdf:
for p in pdf.pages:
text = p.extract_text()
if text:
data.append(text)
def main():
"""Main Streamlit app function."""
st.title("AI-Powered PowerPoint Generator")
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
extract_data(uploaded_file)
if st.button("Generate PPT"):
try:
ai_ppt(data)
st.success("PowerPoint generation completed!")
except Exception as e:
st.error(f"Error generating PPT: {e}")
# Display PDF
binary_data = uploaded_file.getvalue()
pdf_viewer(input=binary_data, width=700)
if __name__ == '__main__':
nest_asyncio.apply()
main()