Spaces:
Running
Running
import asyncio | |
import os | |
import re | |
import pdfplumber | |
import streamlit as st | |
import torch | |
from transformers import pipeline | |
from dataclasses import dataclass | |
from streamlit_pdf_viewer import pdf_viewer | |
from pydantic_ai import Agent, RunContext, Tool | |
from pydantic_ai.models.groq import GroqModel | |
from pydantic_ai.messages import ModelMessage | |
import presentation as customClass | |
import nest_asyncio | |
# Load API key | |
api_key = os.getenv("API_KEY") | |
if not api_key: | |
raise ValueError("API_KEY is not set in the environment variables.") | |
data = [] | |
result_data: list[customClass.PPT] = [] | |
# Initialize models | |
model = GroqModel("llama3-groq-70b-8192-tool-use-preview", api_key=api_key) | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
def split_into_token_chunks(text: str, max_tokens: int = 300) -> list: | |
""" | |
Splits a long string into chunks of a specified maximum number of tokens (words). | |
""" | |
tokens = text.split() | |
return [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)] | |
def return_data() -> str: | |
"""Returns concatenated extracted data.""" | |
return "\n".join(data) | |
class SupportDependencies: | |
db: str | |
async def ppt_content(data): | |
""" | |
Generates PowerPoint content using an AI model. | |
""" | |
if not data: | |
raise ValueError("No valid text found for PowerPoint generation.") | |
agent = Agent( | |
model, | |
result_type=customClass.PPT, | |
tools=[return_data], | |
system_prompt=""" | |
You are an expert in creating PowerPoint presentations. | |
Create 5 slides: | |
1. Title Slide: Introduction about the presentation. | |
2. Methodology Slide: Summarize the methodology in detail. | |
3. Results Slide: Present key findings in bullet points. | |
4. Discussion Slide: Summarize implications and limitations. | |
5. Conclusion Slide: State the overall conclusion. | |
Each slide should have: | |
- Title: Clear and concise. | |
- Text: Short and informative explanation. | |
- Bullet Points: 3-5 summarized key takeaways. | |
""" | |
) | |
listOfString = split_into_token_chunks("\n".join(data)) | |
message_history: list[ModelMessage] = [] | |
result = agent.run_sync(user_prompt=f"Create a PowerPoint presentation from {listOfString[0]}", message_history=message_history) | |
for i in range(1, len(listOfString)): | |
result = agent.run_sync(user_prompt=f"Continue creating the PowerPoint presentation from {listOfString[i]}", message_history=result.all_messages()) | |
print(result.data) | |
def ai_ppt(data): | |
"""Runs the PowerPoint generation in an async loop.""" | |
loop = asyncio.new_event_loop() | |
asyncio.set_event_loop(loop) | |
loop.run_until_complete(ppt_content(data=data)) | |
def extract_data(feed): | |
"""Extracts text from PDF and appends to `data` list.""" | |
global data | |
data = [] # Reset data before extracting | |
with pdfplumber.open(feed) as pdf: | |
for p in pdf.pages: | |
text = p.extract_text() | |
if text: | |
data.append(text) | |
def main(): | |
"""Main Streamlit app function.""" | |
st.title("AI-Powered PowerPoint Generator") | |
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
if uploaded_file is not None: | |
extract_data(uploaded_file) | |
if st.button("Generate PPT"): | |
try: | |
ai_ppt(data) | |
st.success("PowerPoint generation completed!") | |
except Exception as e: | |
st.error(f"Error generating PPT: {e}") | |
# Display PDF | |
binary_data = uploaded_file.getvalue() | |
pdf_viewer(input=binary_data, width=700) | |
if __name__ == '__main__': | |
nest_asyncio.apply() | |
main() | |