Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, UploadFile, File, Form | |
from fastapi.middleware.cors import CORSMiddleware | |
import torch | |
from transformers import pipeline | |
import PyPDF2 | |
from docx import Document | |
import io | |
app = FastAPI( | |
title="Text Summarization API", | |
description="API for summarizing text and documents using Falcon's text summarization model" | |
) | |
# Configure CORS | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# Initialize the summarization pipeline | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
summarization_pipe = pipeline("summarization", model="Falconsai/text_summarization", device=device ) | |
def extract_text_from_pdf(file_bytes): | |
pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes)) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
return text | |
def extract_text_from_docx(file_bytes): | |
doc = Document(io.BytesIO(file_bytes)) | |
text = "" | |
for paragraph in doc.paragraphs: | |
text += paragraph.text + "\n" | |
return text | |
async def summarize_text(text: str = Form(...)): | |
""" | |
Summarize text input | |
""" | |
if not text: | |
return {"error": "Please provide text to summarize"} | |
summary = summarization_pipe(text) | |
return {"summary": summary[0]['summary_text']} | |
async def summarize_file(file: UploadFile = File(...)): | |
""" | |
Summarize text from a PDF or DOCX file | |
""" | |
contents = await file.read() | |
file_name = file.filename.lower() | |
try: | |
if file_name.endswith('.pdf'): | |
text = extract_text_from_pdf(contents) | |
elif file_name.endswith('.docx'): | |
text = extract_text_from_docx(contents) | |
else: | |
return {"error": "Unsupported file format. Please upload a PDF or DOCX file."} | |
if not text: | |
return {"error": "Could not extract text from the file"} | |
summary = summarization_pipe(text) | |
return {"summary": summary[0]['summary_text']} | |
except Exception as e: | |
return {"error": f"Error processing file: {str(e)}"} | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=7860) |