Spaces:
Sleeping
Sleeping
File size: 1,321 Bytes
7340eaf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
import torch
from transformers import BertTokenizer, BertModel
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
app = FastAPI()
class TextClassificationRequest(BaseModel):
text: str
@app.post("/classify")
async def classify_text(request: TextClassificationRequest):
# Load the pre-trained BERT model and tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertModel.from_pretrained(model_name)
# Preprocess the input text
inputs = tokenizer.encode_plus(
request.text,
add_special_tokens=True,
max_length=512,
return_attention_mask=True,
return_tensors='pt'
)
# Create a dictionary to store the output
output = {}
# Use the pre-trained BERT model to extract features from the input text
outputs = model(**inputs)
# Extract the features
features = outputs.last_hidden_state[:, 0, :]
# Store the output
output["features"] = features.tolist()
return output
# Create a Gradio interface
interface = gr.Interface(
fn=classify_text,
inputs="pdf",
outputs="text",
title="PDF Text Classification",
description="Upload a PDF file to classify its text"
)
# Launch the interface
interface.launch() |