ajibs75's picture
Update app.py
24ccd6f verified
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.middleware.cors import CORSMiddleware
import torch
from transformers import pipeline
import PyPDF2
from docx import Document
import io
app = FastAPI(
title="Text Summarization API",
description="API for summarizing text and documents using Falcon's text summarization model"
# Configure CORS
# Initialize the summarization pipeline
device = "cuda" if torch.cuda.is_available() else "cpu"
summarization_pipe = pipeline("summarization", model="Falconsai/text_summarization", device=device )
def extract_text_from_pdf(file_bytes):
pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
def extract_text_from_docx(file_bytes):
doc = Document(io.BytesIO(file_bytes))
text = ""
for paragraph in doc.paragraphs:
text += paragraph.text + "\n"
return text
async def summarize_text(text: str = Form(...)):
Summarize text input
if not text:
return {"error": "Please provide text to summarize"}
summary = summarization_pipe(text)
return {"summary": summary[0]['summary_text']}
async def summarize_file(file: UploadFile = File(...)):
Summarize text from a PDF or DOCX file
contents = await file.read()
file_name = file.filename.lower()
if file_name.endswith('.pdf'):
text = extract_text_from_pdf(contents)
elif file_name.endswith('.docx'):
text = extract_text_from_docx(contents)
return {"error": "Unsupported file format. Please upload a PDF or DOCX file."}
if not text:
return {"error": "Could not extract text from the file"}
summary = summarization_pipe(text)
return {"summary": summary[0]['summary_text']}
except Exception as e:
return {"error": f"Error processing file: {str(e)}"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="", port=7860)