prashant
ver0.2 udfpreprocess update
49a314a
raw
history blame
1.61 kB
import glob, os, sys;
sys.path.append('../udfPreprocess')
#import helper
import udfPreprocess.docPreprocessing as pre
import udfPreprocess.cleaning as clean
#import needed libraries
import seaborn as sns
from pandas import DataFrame
from keybert import KeyBERT
from transformers import pipeline
import matplotlib.pyplot as plt
import numpy as np
import streamlit as st
import pandas as pd
import docx
from docx.shared import Inches
from docx.shared import Pt
from docx.enum.style import WD_STYLE_TYPE
import tempfile
import sqlite3
import logging
logger = logging.getLogger(__name__)
import configparser
@st.cache(allow_output_mutation=True)
def load_sdgClassifier():
classifier = pipeline("text-classification", model= "jonas/sdg_classifier_osdg")
logging.info("Loading classifier")
return classifier
def sdg_classification(par_list):
logging.info("running SDG classifiication")
config = configparser.ConfigParser()
config.read_file(open('udfPreprocess/paramconfig.cfg'))
threshold = float(config.get('sdg','THRESHOLD'))
classifier = load_sdgClassifier()
labels = classifier(par_list)
labels_= [(l['label'],l['score']) for l in labels]
# df2 = DataFrame(labels_, columns=["SDG", "Relevancy"])
df2 = DataFrame(labels_, columns=["SDG", "Relevancy"])
df2['text'] = par_list
df2 = df2.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
df2.index += 1
df2 =df2[df2['Relevancy']>threshold]
x = df2['SDG'].value_counts()
df3 = df2.copy()
df3= df3.drop(['Relevancy'], axis = 1)
return df3, x