|
from transformers import pipeline |
|
import numpy as np |
|
import torch |
|
import transformers |
|
import json |
|
import pandas as pd |
|
from numpy.random import seed |
|
seed(1) |
|
import emoji |
|
import string |
|
import nltk |
|
from nltk.corpus import stopwords |
|
from nltk.stem import PorterStemmer |
|
from nltk.stem import WordNetLemmatizer |
|
import re |
|
stemmer = PorterStemmer() |
|
|
|
|
|
nltk.download('wordnet') |
|
nltk.download('omw-1.4') |
|
nltk.download('stopwords') |
|
|
|
lemmatizer = WordNetLemmatizer() |
|
stopwords = nltk.corpus.stopwords.words('english') |
|
|
|
import gradio as gr |
|
pipe = pipeline("text-classification", model="dsmsb/16class_12k_newtest1618_xlm_roberta_base_27nov_v2_8epoch") |
|
def classify(text): |
|
output = pipe(text,top_k = 2) |
|
return {"class": output} |
|
inputs = gr.inputs.Textbox(label="pdf link") |
|
outputs = gr.outputs.Textbox(label="OCR Text") |
|
demo = gr.Interface(fn=classify,inputs=inputs,outputs=outputs) |
|
demo.launch() |